13#include "kmp_wait_release.h"
14#include "kmp_barrier.h"
18#include "ompt-specific.h"
20#include "kmp_affinity.h"
24#define USE_NGO_STORES 1
27#if KMP_MIC && USE_NGO_STORES
29#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
30#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
31#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
32#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
34#define ngo_load(src) ((void)0)
35#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
36#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
37#define ngo_sync() ((void)0)
40void __kmp_print_structure(
void);
47void distributedBarrier::computeVarsForN(
size_t n) {
50 int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
51 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
52 int ncores_per_socket =
53 __kmp_topology->calculate_ratio(core_level, socket_level);
54 nsockets = __kmp_topology->get_count(socket_level);
58 if (ncores_per_socket <= 0)
59 ncores_per_socket = 1;
61 threads_per_go = ncores_per_socket >> 1;
62 if (!fix_threads_per_go) {
64 if (threads_per_go > 4) {
65 if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
66 threads_per_go = threads_per_go >> 1;
68 if (threads_per_go > 4 && nsockets == 1)
69 threads_per_go = threads_per_go >> 1;
72 if (threads_per_go == 0)
74 fix_threads_per_go =
true;
75 num_gos = n / threads_per_go;
76 if (n % threads_per_go)
78 if (nsockets == 1 || num_gos == 1)
81 num_groups = num_gos / nsockets;
82 if (num_gos % nsockets)
87 gos_per_group = num_gos / num_groups;
88 if (num_gos % num_groups)
90 threads_per_group = threads_per_go * gos_per_group;
92 num_gos = n / threads_per_go;
93 if (n % threads_per_go)
98 num_groups = num_gos / 2;
102 gos_per_group = num_gos / num_groups;
103 if (num_gos % num_groups)
105 threads_per_group = threads_per_go * gos_per_group;
109void distributedBarrier::computeGo(
size_t n) {
111 for (num_gos = 1;; num_gos++)
112 if (IDEAL_CONTENTION * num_gos >= n)
114 threads_per_go = n / num_gos;
117 while (num_gos > MAX_GOS) {
119 num_gos = n / threads_per_go;
120 if (n % threads_per_go)
128void distributedBarrier::resize(
size_t nthr) {
129 KMP_DEBUG_ASSERT(nthr > max_threads);
132 max_threads = nthr * 2;
135 for (
int i = 0; i < MAX_ITERS; ++i) {
137 flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
138 max_threads *
sizeof(flags_s));
140 flags[i] = (flags_s *)KMP_INTERNAL_MALLOC(max_threads *
sizeof(flags_s));
144 go = (go_s *)KMP_INTERNAL_REALLOC(go, max_threads *
sizeof(go_s));
146 go = (go_s *)KMP_INTERNAL_MALLOC(max_threads *
sizeof(go_s));
149 iter = (iter_s *)KMP_INTERNAL_REALLOC(iter, max_threads *
sizeof(iter_s));
151 iter = (iter_s *)KMP_INTERNAL_MALLOC(max_threads *
sizeof(iter_s));
155 (sleep_s *)KMP_INTERNAL_REALLOC(sleep, max_threads *
sizeof(sleep_s));
157 sleep = (sleep_s *)KMP_INTERNAL_MALLOC(max_threads *
sizeof(sleep_s));
163kmp_uint64 distributedBarrier::go_release() {
164 kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
165 for (
size_t j = 0; j < num_gos; j++) {
166 go[j].go.store(next_go);
171void distributedBarrier::go_reset() {
172 for (
size_t j = 0; j < max_threads; ++j) {
173 for (
size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
174 flags[i][j].stillNeed = 1;
183void distributedBarrier::init(
size_t nthr) {
184 size_t old_max = max_threads;
185 if (nthr > max_threads) {
189 for (
size_t i = 0; i < max_threads; i++) {
190 for (
size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
191 flags[j][i].stillNeed = 1;
196 sleep[i].sleep =
false;
200 computeVarsForN(nthr);
204 if (team_icvs == NULL)
205 team_icvs = __kmp_allocate(
sizeof(kmp_internal_control_t));
210void __kmp_dist_barrier_wakeup(
enum barrier_type bt, kmp_team_t *team,
211 size_t start,
size_t stop,
size_t inc,
213 KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
214 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
217 kmp_info_t **other_threads = team->t.t_threads;
218 for (
size_t thr = start; thr < stop; thr += inc) {
219 KMP_DEBUG_ASSERT(other_threads[thr]);
220 int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
222 __kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
226static void __kmp_dist_barrier_gather(
227 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
228 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
229 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
231 distributedBarrier *b;
232 kmp_info_t **other_threads;
233 kmp_uint64 my_current_iter, my_next_iter;
237 team = this_thr->th.th_team;
238 nproc = this_thr->th.th_team_nproc;
239 other_threads = team->t.t_threads;
241 my_current_iter = b->iter[tid].iter;
242 my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
243 group_leader = ((tid % b->threads_per_group) == 0);
246 (
"__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
247 gtid, team->t.t_id, tid, bt));
249#if USE_ITT_BUILD && USE_ITT_NOTIFY
251 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
252 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
253 __itt_get_timestamp();
259 size_t group_start = tid + 1;
260 size_t group_end = tid + b->threads_per_group;
261 size_t threads_pending = 0;
263 if (group_end > nproc)
268 for (
size_t thr = group_start; thr < group_end; thr++) {
270 threads_pending += b->flags[my_current_iter][thr].stillNeed;
273 if (__kmp_tasking_mode != tskm_immediate_exec) {
274 kmp_task_team_t *task_team = this_thr->th.th_task_team;
275 if (task_team != NULL) {
276 if (TCR_SYNC_4(task_team->tt.tt_active)) {
277 if (KMP_TASKING_ENABLED(task_team)) {
278 int tasks_completed = FALSE;
279 __kmp_atomic_execute_tasks_64(
280 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
281 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
283 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
286 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
289 if (TCR_4(__kmp_global.g.g_done)) {
290 if (__kmp_global.g.g_abort)
291 __kmp_abort_thread();
293 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
294 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
295 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
297 }
while (threads_pending > 0);
300 OMPT_REDUCTION_DECL(this_thr, gtid);
301 OMPT_REDUCTION_BEGIN;
303 for (
size_t thr = group_start; thr < group_end; thr++) {
304 (*reduce)(this_thr->th.th_local.reduce_data,
305 other_threads[thr]->th.th_local.reduce_data);
311 b->flags[my_next_iter][tid].stillNeed = 1;
314 b->flags[my_current_iter][tid].stillNeed = 0;
318 for (
size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
319 threads_pending += b->flags[my_current_iter][thr].stillNeed;
322 if (__kmp_tasking_mode != tskm_immediate_exec) {
323 kmp_task_team_t *task_team = this_thr->th.th_task_team;
324 if (task_team != NULL) {
325 if (TCR_SYNC_4(task_team->tt.tt_active)) {
326 if (KMP_TASKING_ENABLED(task_team)) {
327 int tasks_completed = FALSE;
328 __kmp_atomic_execute_tasks_64(
329 this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
330 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
332 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
335 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
338 if (TCR_4(__kmp_global.g.g_done)) {
339 if (__kmp_global.g.g_abort)
340 __kmp_abort_thread();
342 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
343 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
344 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
346 }
while (threads_pending > 0);
349 if (KMP_MASTER_TID(tid)) {
350 OMPT_REDUCTION_DECL(this_thr, gtid);
351 OMPT_REDUCTION_BEGIN;
352 for (
size_t thr = b->threads_per_group; thr < nproc;
353 thr += b->threads_per_group) {
354 (*reduce)(this_thr->th.th_local.reduce_data,
355 other_threads[thr]->th.th_local.reduce_data);
362 b->flags[my_next_iter][tid].stillNeed = 1;
365 b->flags[my_current_iter][tid].stillNeed = 0;
371 (
"__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
372 gtid, team->t.t_id, tid, bt));
375static void __kmp_dist_barrier_release(
376 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
377 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
378 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
380 distributedBarrier *b;
381 kmp_bstate_t *thr_bar;
382 kmp_uint64 my_current_iter, next_go;
386 KA_TRACE(20, (
"__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
389 thr_bar = &this_thr->th.th_bar[bt].bb;
391 if (!KMP_MASTER_TID(tid)) {
394 if (this_thr->th.th_used_in_team.load() != 1 &&
395 this_thr->th.th_used_in_team.load() != 3) {
400 kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
401 if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
403 this_thr->th.th_used_in_team.load() == 0) {
404 my_flag.wait(this_thr,
true USE_ITT_BUILD_ARG(itt_sync_obj));
406#if USE_ITT_BUILD && USE_ITT_NOTIFY
407 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
410 __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
412 __kmp_itt_task_starting(itt_sync_obj);
414 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
417 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
418 if (itt_sync_obj != NULL)
420 __kmp_itt_task_finished(itt_sync_obj);
423 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
426 if (this_thr->th.th_used_in_team.load() != 1 &&
427 this_thr->th.th_used_in_team.load() != 3)
429 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
438 tid = __kmp_tid_from_gtid(gtid);
439 team = this_thr->th.th_team;
440 KMP_DEBUG_ASSERT(tid >= 0);
441 KMP_DEBUG_ASSERT(team);
443 my_current_iter = b->iter[tid].iter;
444 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
445 my_go_index = tid / b->threads_per_go;
446 if (this_thr->th.th_used_in_team.load() == 3) {
447 KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
450 if (b->go[my_go_index].go.load() != next_go) {
452 kmp_atomic_flag_64<false, true> my_flag(
453 &(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
454 my_flag.wait(this_thr,
true USE_ITT_BUILD_ARG(itt_sync_obj));
455 KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter ||
456 b->iter[tid].iter == 0);
457 KMP_DEBUG_ASSERT(b->sleep[tid].sleep ==
false);
460 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
467 if (this_thr->th.th_used_in_team.load() == 1)
471 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
474 group_leader = ((tid % b->threads_per_group) == 0);
477 for (
size_t go_idx = my_go_index + 1;
478 go_idx < my_go_index + b->gos_per_group; go_idx++) {
479 b->go[go_idx].go.store(next_go);
485#if KMP_BARRIER_ICV_PUSH
486 if (propagate_icvs) {
487 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
489 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
490 (kmp_internal_control_t *)team->t.b->team_icvs);
491 copy_icvs(&thr_bar->th_fixed_icvs,
492 &team->t.t_implicit_task_taskdata[tid].td_icvs);
495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
498 size_t nproc = this_thr->th.th_team_nproc;
499 size_t group_end = tid + b->threads_per_group;
500 if (nproc < group_end)
502 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
505 team = this_thr->th.th_team;
507 my_current_iter = b->iter[tid].iter;
508 next_go = my_current_iter + distributedBarrier::MAX_ITERS;
509#if KMP_BARRIER_ICV_PUSH
510 if (propagate_icvs) {
512 copy_icvs(&thr_bar->th_fixed_icvs,
513 &team->t.t_implicit_task_taskdata[tid].td_icvs);
517 for (
size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
518 b->go[go_idx].go.store(next_go);
521 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
523 size_t nproc = this_thr->th.th_team_nproc;
524 __kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
525 b->threads_per_group, tid);
529 for (
size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
530 b->go[go_idx].go.store(next_go);
536 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
538 size_t nproc = this_thr->th.th_team_nproc;
539 size_t group_end = tid + b->threads_per_group;
540 if (nproc < group_end)
542 __kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
546 KMP_ASSERT(my_current_iter == b->iter[tid].iter);
547 b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;
550 20, (
"__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
551 gtid, team->t.t_id, tid, bt));
555template <
bool cancellable = false>
556static bool __kmp_linear_barrier_gather_template(
557 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
558 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
559 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
560 kmp_team_t *team = this_thr->th.th_team;
561 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
562 kmp_info_t **other_threads = team->t.t_threads;
566 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
567 gtid, team->t.t_id, tid, bt));
568 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
570#if USE_ITT_BUILD && USE_ITT_NOTIFY
572 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
573 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
574 __itt_get_timestamp();
579 if (!KMP_MASTER_TID(tid)) {
581 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
582 "arrived(%p): %llu => %llu\n",
583 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
584 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
585 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
590 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[0]);
593 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
594 int nproc = this_thr->th.th_team_nproc;
597 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
600 for (i = 1; i < nproc; ++i) {
604 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
606 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
607 "arrived(%p) == %llu\n",
608 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
610 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
614 kmp_flag_64<true, false> flag(
615 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
616 if (flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)))
619 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
621 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
623#if USE_ITT_BUILD && USE_ITT_NOTIFY
626 if (__kmp_forkjoin_frames_mode == 2) {
627 this_thr->th.th_bar_min_time = KMP_MIN(
628 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
633 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
634 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
636 OMPT_REDUCTION_DECL(this_thr, gtid);
637 OMPT_REDUCTION_BEGIN;
638 (*reduce)(this_thr->th.th_local.reduce_data,
639 other_threads[i]->th.th_local.reduce_data);
644 team_bar->b_arrived = new_state;
645 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
646 "arrived(%p) = %llu\n",
647 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
652 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
653 gtid, team->t.t_id, tid, bt));
657template <
bool cancellable = false>
658static bool __kmp_linear_barrier_release_template(
659 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
660 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
661 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
662 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
665 if (KMP_MASTER_TID(tid)) {
667 kmp_uint32 nproc = this_thr->th.th_team_nproc;
668 kmp_info_t **other_threads;
670 team = __kmp_threads[gtid]->th.th_team;
671 KMP_DEBUG_ASSERT(team != NULL);
672 other_threads = team->t.t_threads;
674 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) primary enter for "
676 gtid, team->t.t_id, tid, bt));
679#if KMP_BARRIER_ICV_PUSH
681 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
682 if (propagate_icvs) {
683 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
684 for (i = 1; i < nproc; ++i) {
685 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
687 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
688 &team->t.t_implicit_task_taskdata[0].td_icvs);
696 for (i = 1; i < nproc; ++i) {
700 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
704 (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
705 "go(%p): %u => %u\n",
706 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
707 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
708 other_threads[i]->th.th_bar[bt].bb.b_go,
709 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
710 kmp_flag_64<> flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
716 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
717 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
719 kmp_flag_64<true, false> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
720 if (flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)))
723 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
724 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
726#if USE_ITT_BUILD && USE_ITT_NOTIFY
727 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
730 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
732 __kmp_itt_task_starting(itt_sync_obj);
734 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
737 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
738 if (itt_sync_obj != NULL)
740 __kmp_itt_task_finished(itt_sync_obj);
744 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
748 tid = __kmp_tid_from_gtid(gtid);
749 team = __kmp_threads[gtid]->th.th_team;
751 KMP_DEBUG_ASSERT(team != NULL);
752 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
754 (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
755 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
760 (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
761 gtid, team->t.t_id, tid, bt));
765static void __kmp_linear_barrier_gather(
766 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
767 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
768 __kmp_linear_barrier_gather_template<false>(
769 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
772static bool __kmp_linear_barrier_gather_cancellable(
773 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
774 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
775 return __kmp_linear_barrier_gather_template<true>(
776 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
779static void __kmp_linear_barrier_release(
780 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
781 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
782 __kmp_linear_barrier_release_template<false>(
783 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
786static bool __kmp_linear_barrier_release_cancellable(
787 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
788 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
789 return __kmp_linear_barrier_release_template<true>(
790 bt, this_thr, gtid, tid, propagate_icvs USE_ITT_BUILD_ARG(itt_sync_obj));
794static void __kmp_tree_barrier_gather(
795 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
796 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
797 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
798 kmp_team_t *team = this_thr->th.th_team;
799 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
800 kmp_info_t **other_threads = team->t.t_threads;
801 kmp_uint32 nproc = this_thr->th.th_team_nproc;
802 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
803 kmp_uint32 branch_factor = 1 << branch_bits;
805 kmp_uint32 child_tid;
806 kmp_uint64 new_state = 0;
809 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
810 gtid, team->t.t_id, tid, bt));
811 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
813#if USE_ITT_BUILD && USE_ITT_NOTIFY
815 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
816 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
817 __itt_get_timestamp();
822 child_tid = (tid << branch_bits) + 1;
823 if (child_tid < nproc) {
825 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
828 kmp_info_t *child_thr = other_threads[child_tid];
829 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
832 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
834 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
837 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
838 "arrived(%p) == %llu\n",
839 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
840 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
842 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
843 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
844#if USE_ITT_BUILD && USE_ITT_NOTIFY
847 if (__kmp_forkjoin_frames_mode == 2) {
848 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
849 child_thr->th.th_bar_min_time);
854 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
855 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
856 team->t.t_id, child_tid));
857 OMPT_REDUCTION_DECL(this_thr, gtid);
858 OMPT_REDUCTION_BEGIN;
859 (*reduce)(this_thr->th.th_local.reduce_data,
860 child_thr->th.th_local.reduce_data);
865 }
while (child <= branch_factor && child_tid < nproc);
868 if (!KMP_MASTER_TID(tid)) {
869 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
872 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
873 "arrived(%p): %llu => %llu\n",
874 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
875 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
876 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
882 kmp_flag_64<> flag(&thr_bar->b_arrived, other_threads[parent_tid]);
887 team->t.t_bar[bt].b_arrived = new_state;
889 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
890 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
891 "arrived(%p) = %llu\n",
892 gtid, team->t.t_id, tid, team->t.t_id,
893 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
896 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
897 gtid, team->t.t_id, tid, bt));
900static void __kmp_tree_barrier_release(
901 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
902 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
903 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
905 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
907 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
908 kmp_uint32 branch_factor = 1 << branch_bits;
910 kmp_uint32 child_tid;
915 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
916 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
918 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
919 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
920#if USE_ITT_BUILD && USE_ITT_NOTIFY
921 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
924 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
926 __kmp_itt_task_starting(itt_sync_obj);
928 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
931 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
932 if (itt_sync_obj != NULL)
934 __kmp_itt_task_finished(itt_sync_obj);
938 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
942 team = __kmp_threads[gtid]->th.th_team;
943 KMP_DEBUG_ASSERT(team != NULL);
944 tid = __kmp_tid_from_gtid(gtid);
946 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
948 (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
949 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
952 team = __kmp_threads[gtid]->th.th_team;
953 KMP_DEBUG_ASSERT(team != NULL);
954 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) primary enter for "
956 gtid, team->t.t_id, tid, bt));
958 nproc = this_thr->th.th_team_nproc;
959 child_tid = (tid << branch_bits) + 1;
961 if (child_tid < nproc) {
962 kmp_info_t **other_threads = team->t.t_threads;
966 kmp_info_t *child_thr = other_threads[child_tid];
967 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
970 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
972 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
975#if KMP_BARRIER_ICV_PUSH
977 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
978 if (propagate_icvs) {
979 __kmp_init_implicit_task(team->t.t_ident,
980 team->t.t_threads[child_tid], team,
982 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
983 &team->t.t_implicit_task_taskdata[0].td_icvs);
988 (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
989 "go(%p): %u => %u\n",
990 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
991 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
992 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
994 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
998 }
while (child <= branch_factor && child_tid < nproc);
1001 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1002 gtid, team->t.t_id, tid, bt));
1006static void __kmp_hyper_barrier_gather(
1007 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
1008 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
1009 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
1010 kmp_team_t *team = this_thr->th.th_team;
1011 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1012 kmp_info_t **other_threads = team->t.t_threads;
1013 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
1014 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
1015 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
1016 kmp_uint32 branch_factor = 1 << branch_bits;
1022 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1023 gtid, team->t.t_id, tid, bt));
1024 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
1026#if USE_ITT_BUILD && USE_ITT_NOTIFY
1028 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1029 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
1030 __itt_get_timestamp();
1035 kmp_flag_64<> p_flag(&thr_bar->b_arrived);
1036 for (level = 0, offset = 1; offset < num_threads;
1037 level += branch_bits, offset <<= branch_bits) {
1039 kmp_uint32 child_tid;
1041 if (((tid >> level) & (branch_factor - 1)) != 0) {
1042 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
1046 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1047 "arrived(%p): %llu => %llu\n",
1048 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
1049 team->t.t_id, parent_tid, &thr_bar->b_arrived,
1051 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
1056 p_flag.set_waiter(other_threads[parent_tid]);
1062 if (new_state == KMP_BARRIER_UNUSED_STATE)
1063 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1064 for (child = 1, child_tid = tid + (1 << level);
1065 child < branch_factor && child_tid < num_threads;
1066 child++, child_tid += (1 << level)) {
1067 kmp_info_t *child_thr = other_threads[child_tid];
1068 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1070 kmp_uint32 next_child_tid = child_tid + (1 << level);
1072 if (child + 1 < branch_factor && next_child_tid < num_threads)
1074 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
1077 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
1078 "arrived(%p) == %llu\n",
1079 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1080 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
1082 kmp_flag_64<> c_flag(&child_bar->b_arrived, new_state);
1083 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1085#if USE_ITT_BUILD && USE_ITT_NOTIFY
1088 if (__kmp_forkjoin_frames_mode == 2) {
1089 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
1090 child_thr->th.th_bar_min_time);
1095 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
1096 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1097 team->t.t_id, child_tid));
1098 OMPT_REDUCTION_DECL(this_thr, gtid);
1099 OMPT_REDUCTION_BEGIN;
1100 (*reduce)(this_thr->th.th_local.reduce_data,
1101 child_thr->th.th_local.reduce_data);
1107 if (KMP_MASTER_TID(tid)) {
1109 if (new_state == KMP_BARRIER_UNUSED_STATE)
1110 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
1112 team->t.t_bar[bt].b_arrived = new_state;
1113 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
1114 "arrived(%p) = %llu\n",
1115 gtid, team->t.t_id, tid, team->t.t_id,
1116 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
1119 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1120 gtid, team->t.t_id, tid, bt));
1124#define KMP_REVERSE_HYPER_BAR
1125static void __kmp_hyper_barrier_release(
1126 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
1127 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
1128 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
1130 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1131 kmp_info_t **other_threads;
1132 kmp_uint32 num_threads;
1133 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
1134 kmp_uint32 branch_factor = 1 << branch_bits;
1136 kmp_uint32 child_tid;
1144 if (KMP_MASTER_TID(tid)) {
1145 team = __kmp_threads[gtid]->th.th_team;
1146 KMP_DEBUG_ASSERT(team != NULL);
1147 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) primary enter for "
1148 "barrier type %d\n",
1149 gtid, team->t.t_id, tid, bt));
1150#if KMP_BARRIER_ICV_PUSH
1151 if (propagate_icvs) {
1152 copy_icvs(&thr_bar->th_fixed_icvs,
1153 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1157 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
1158 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
1160 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1161 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1162#if USE_ITT_BUILD && USE_ITT_NOTIFY
1163 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
1165 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
1167 __kmp_itt_task_starting(itt_sync_obj);
1169 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1172 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1173 if (itt_sync_obj != NULL)
1175 __kmp_itt_task_finished(itt_sync_obj);
1179 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1183 team = __kmp_threads[gtid]->th.th_team;
1184 KMP_DEBUG_ASSERT(team != NULL);
1185 tid = __kmp_tid_from_gtid(gtid);
1187 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1189 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1190 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1193 num_threads = this_thr->th.th_team_nproc;
1194 other_threads = team->t.t_threads;
1196#ifdef KMP_REVERSE_HYPER_BAR
1198 for (level = 0, offset = 1;
1199 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
1200 level += branch_bits, offset <<= branch_bits)
1204 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
1205 level -= branch_bits, offset >>= branch_bits)
1208 for (level = 0, offset = 1; offset < num_threads;
1209 level += branch_bits, offset <<= branch_bits)
1212#ifdef KMP_REVERSE_HYPER_BAR
1215 child = num_threads >> ((level == 0) ? level : level - 1);
1216 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
1217 child_tid = tid + (child << level);
1218 child >= 1; child--, child_tid -= (1 << level))
1220 if (((tid >> level) & (branch_factor - 1)) != 0)
1225 for (child = 1, child_tid = tid + (1 << level);
1226 child < branch_factor && child_tid < num_threads;
1227 child++, child_tid += (1 << level))
1230 if (child_tid >= num_threads)
1233 kmp_info_t *child_thr = other_threads[child_tid];
1234 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1236 kmp_uint32 next_child_tid = child_tid - (1 << level);
1238#ifdef KMP_REVERSE_HYPER_BAR
1239 if (child - 1 >= 1 && next_child_tid < num_threads)
1241 if (child + 1 < branch_factor && next_child_tid < num_threads)
1244 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
1247#if KMP_BARRIER_ICV_PUSH
1249 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1254 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
1255 "go(%p): %u => %u\n",
1256 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1257 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1258 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1260 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1265#if KMP_BARRIER_ICV_PUSH
1266 if (propagate_icvs &&
1267 !KMP_MASTER_TID(tid)) {
1268 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1270 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1271 &thr_bar->th_fixed_icvs);
1276 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1277 gtid, team->t.t_id, tid, bt));
1290static bool __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt,
1291 kmp_bstate_t *thr_bar,
1292 kmp_uint32 nproc,
int gtid,
1293 int tid, kmp_team_t *team) {
1295 bool uninitialized = thr_bar->team == NULL;
1296 bool team_changed = team != thr_bar->team;
1297 bool team_sz_changed = nproc != thr_bar->nproc;
1298 bool tid_changed = tid != thr_bar->old_tid;
1299 bool retval =
false;
1301 if (uninitialized || team_sz_changed) {
1302 __kmp_get_hierarchy(nproc, thr_bar);
1305 if (uninitialized || team_sz_changed || tid_changed) {
1306 thr_bar->my_level = thr_bar->depth - 1;
1307 thr_bar->parent_tid = -1;
1308 if (!KMP_MASTER_TID(tid)) {
1311 while (d < thr_bar->depth) {
1314 if (d == thr_bar->depth - 2) {
1315 thr_bar->parent_tid = 0;
1316 thr_bar->my_level = d;
1318 }
else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != 0) {
1321 thr_bar->parent_tid = tid - rem;
1322 thr_bar->my_level = d;
1328 __kmp_type_convert(7 - ((tid - thr_bar->parent_tid) /
1329 (thr_bar->skip_per_level[thr_bar->my_level])),
1330 &(thr_bar->offset));
1331 thr_bar->old_tid = tid;
1332 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1333 thr_bar->team = team;
1334 thr_bar->parent_bar =
1335 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1337 if (uninitialized || team_changed || tid_changed) {
1338 thr_bar->team = team;
1339 thr_bar->parent_bar =
1340 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
1343 if (uninitialized || team_sz_changed || tid_changed) {
1344 thr_bar->nproc = nproc;
1345 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
1346 if (thr_bar->my_level == 0)
1347 thr_bar->leaf_kids = 0;
1348 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
1349 __kmp_type_convert(nproc - tid - 1, &(thr_bar->leaf_kids));
1350 thr_bar->leaf_state = 0;
1351 for (
int i = 0; i < thr_bar->leaf_kids; ++i)
1352 ((
char *)&(thr_bar->leaf_state))[7 - i] = 1;
1357static void __kmp_hierarchical_barrier_gather(
1358 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
1359 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
1360 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
1361 kmp_team_t *team = this_thr->th.th_team;
1362 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1363 kmp_uint32 nproc = this_thr->th.th_team_nproc;
1364 kmp_info_t **other_threads = team->t.t_threads;
1365 kmp_uint64 new_state = 0;
1367 int level = team->t.t_level;
1368 if (other_threads[0]
1369 ->th.th_teams_microtask)
1370 if (this_thr->th.th_teams_size.nteams > 1)
1373 thr_bar->use_oncore_barrier = 1;
1375 thr_bar->use_oncore_barrier = 0;
1377 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
1378 "barrier type %d\n",
1379 gtid, team->t.t_id, tid, bt));
1380 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
1382#if USE_ITT_BUILD && USE_ITT_NOTIFY
1384 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
1385 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
1389 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
1392 if (thr_bar->my_level) {
1393 kmp_int32 child_tid;
1395 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1396 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1397 thr_bar->use_oncore_barrier) {
1398 if (thr_bar->leaf_kids) {
1400 kmp_uint64 leaf_state =
1402 ? thr_bar->b_arrived | thr_bar->leaf_state
1403 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
1404 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
1406 gtid, team->t.t_id, tid));
1407 kmp_flag_64<> flag(&thr_bar->b_arrived, leaf_state);
1408 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1410 OMPT_REDUCTION_DECL(this_thr, gtid);
1411 OMPT_REDUCTION_BEGIN;
1412 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
1414 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
1416 gtid, team->t.t_id, tid,
1417 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1419 (*reduce)(this_thr->th.th_local.reduce_data,
1420 other_threads[child_tid]->th.th_local.reduce_data);
1425 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
1428 for (kmp_uint32 d = 1; d < thr_bar->my_level;
1430 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1431 skip = thr_bar->skip_per_level[d];
1434 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1435 kmp_info_t *child_thr = other_threads[child_tid];
1436 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1437 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
1439 "arrived(%p) == %llu\n",
1440 gtid, team->t.t_id, tid,
1441 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1442 child_tid, &child_bar->b_arrived, new_state));
1443 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
1444 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1446 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
1448 gtid, team->t.t_id, tid,
1449 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1451 (*reduce)(this_thr->th.th_local.reduce_data,
1452 child_thr->th.th_local.reduce_data);
1457 for (kmp_uint32 d = 0; d < thr_bar->my_level;
1459 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
1460 skip = thr_bar->skip_per_level[d];
1463 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1464 kmp_info_t *child_thr = other_threads[child_tid];
1465 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1466 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
1468 "arrived(%p) == %llu\n",
1469 gtid, team->t.t_id, tid,
1470 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1471 child_tid, &child_bar->b_arrived, new_state));
1472 kmp_flag_64<> flag(&child_bar->b_arrived, new_state);
1473 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1475 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
1477 gtid, team->t.t_id, tid,
1478 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1480 (*reduce)(this_thr->th.th_local.reduce_data,
1481 child_thr->th.th_local.reduce_data);
1489 if (!KMP_MASTER_TID(tid)) {
1490 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing"
1491 " T#%d(%d:%d) arrived(%p): %llu => %llu\n",
1492 gtid, team->t.t_id, tid,
1493 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
1494 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
1495 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
1499 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
1500 !thr_bar->use_oncore_barrier) {
1502 kmp_flag_64<> flag(&thr_bar->b_arrived,
1503 other_threads[thr_bar->parent_tid]);
1507 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
1508 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived,
1509 thr_bar->offset + 1);
1510 flag.set_waiter(other_threads[thr_bar->parent_tid]);
1514 team->t.t_bar[bt].b_arrived = new_state;
1515 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
1516 "arrived(%p) = %llu\n",
1517 gtid, team->t.t_id, tid, team->t.t_id,
1518 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
1521 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
1522 "barrier type %d\n",
1523 gtid, team->t.t_id, tid, bt));
1526static void __kmp_hierarchical_barrier_release(
1527 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
1528 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
1529 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
1531 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1533 bool team_change =
false;
1535 if (KMP_MASTER_TID(tid)) {
1536 team = __kmp_threads[gtid]->th.th_team;
1537 KMP_DEBUG_ASSERT(team != NULL);
1538 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) primary "
1539 "entered barrier type %d\n",
1540 gtid, team->t.t_id, tid, bt));
1543 if (!thr_bar->use_oncore_barrier ||
1544 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1545 thr_bar->team == NULL) {
1547 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1548 kmp_flag_64<> flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1549 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1550 TCW_8(thr_bar->b_go,
1551 KMP_INIT_BARRIER_STATE);
1555 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1556 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1557 thr_bar->offset + 1, bt,
1558 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1559 flag.wait(this_thr, TRUE);
1560 if (thr_bar->wait_flag ==
1561 KMP_BARRIER_SWITCHING) {
1562 TCW_8(thr_bar->b_go,
1563 KMP_INIT_BARRIER_STATE);
1565 (RCAST(
volatile char *,
1566 &(thr_bar->parent_bar->b_go)))[thr_bar->offset + 1] = 0;
1569 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1571 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1574 team = __kmp_threads[gtid]->th.th_team;
1575 KMP_DEBUG_ASSERT(team != NULL);
1576 tid = __kmp_tid_from_gtid(gtid);
1580 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1581 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1585 nproc = this_thr->th.th_team_nproc;
1586 int level = team->t.t_level;
1587 if (team->t.t_threads[0]
1588 ->th.th_teams_microtask) {
1589 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1590 this_thr->th.th_teams_level == level)
1592 if (this_thr->th.th_teams_size.nteams > 1)
1596 thr_bar->use_oncore_barrier = 1;
1598 thr_bar->use_oncore_barrier = 0;
1602 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1603 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1604 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1610#if KMP_BARRIER_ICV_PUSH
1611 if (propagate_icvs) {
1612 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1616 copy_icvs(&thr_bar->th_fixed_icvs,
1617 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1618 }
else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1619 thr_bar->use_oncore_barrier) {
1620 if (!thr_bar->my_level)
1623 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1624 &thr_bar->parent_bar->th_fixed_icvs);
1627 if (thr_bar->my_level)
1629 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1631 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1632 &thr_bar->parent_bar->th_fixed_icvs);
1638 if (thr_bar->my_level) {
1639 kmp_int32 child_tid;
1641 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1642 thr_bar->use_oncore_barrier) {
1643 if (KMP_MASTER_TID(tid)) {
1646 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1649 ngo_load(&thr_bar->th_fixed_icvs);
1652 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (
int)nproc;
1653 child_tid += thr_bar->skip_per_level[1]) {
1654 kmp_bstate_t *child_bar =
1655 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1656 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1657 "releasing T#%d(%d:%d)"
1658 " go(%p): %u => %u\n",
1659 gtid, team->t.t_id, tid,
1660 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1661 child_tid, &child_bar->b_go, child_bar->b_go,
1662 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1665 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1669 TCW_8(thr_bar->b_go,
1670 KMP_INIT_BARRIER_STATE);
1672 if (thr_bar->leaf_kids) {
1675 old_leaf_kids < thr_bar->leaf_kids) {
1676 if (old_leaf_kids) {
1677 thr_bar->b_go |= old_leaf_state;
1680 last = tid + thr_bar->skip_per_level[1];
1683 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1685 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1686 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1689 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1690 " T#%d(%d:%d) go(%p): %u => %u\n",
1691 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1692 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1693 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1695 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1700 thr_bar->b_go |= thr_bar->leaf_state;
1704 for (
int d = thr_bar->my_level - 1; d >= 0;
1706 last = tid + thr_bar->skip_per_level[d + 1];
1707 kmp_uint32 skip = thr_bar->skip_per_level[d];
1710 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1711 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1712 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1713 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1714 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1715 gtid, team->t.t_id, tid,
1716 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1717 child_tid, &child_bar->b_go, child_bar->b_go,
1718 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1720 kmp_flag_64<> flag(&child_bar->b_go, child_thr);
1725#if KMP_BARRIER_ICV_PUSH
1726 if (propagate_icvs && !KMP_MASTER_TID(tid))
1728 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1729 &thr_bar->th_fixed_icvs);
1732 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
1733 "barrier type %d\n",
1734 gtid, team->t.t_id, tid, bt));
1742template <
bool cancellable>
struct is_cancellable {};
1743template <>
struct is_cancellable<true> {
1745 is_cancellable() : value(false) {}
1746 is_cancellable(
bool b) : value(b) {}
1747 is_cancellable &operator=(
bool b) {
1751 operator bool()
const {
return value; }
1753template <>
struct is_cancellable<false> {
1754 is_cancellable &operator=(
bool b) {
return *
this; }
1755 constexpr operator bool()
const {
return false; }
1766template <
bool cancellable = false>
1767static int __kmp_barrier_template(
enum barrier_type bt,
int gtid,
int is_split,
1768 size_t reduce_size,
void *reduce_data,
1769 void (*reduce)(
void *,
void *)) {
1770 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1771 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1772 int tid = __kmp_tid_from_gtid(gtid);
1773 kmp_info_t *this_thr = __kmp_threads[gtid];
1774 kmp_team_t *team = this_thr->th.th_team;
1776 is_cancellable<cancellable> cancelled;
1777#if OMPT_SUPPORT && OMPT_OPTIONAL
1778 ompt_data_t *my_task_data;
1779 ompt_data_t *my_parallel_data;
1780 void *return_address;
1781 ompt_sync_region_t barrier_kind;
1784 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1785 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1788 if (ompt_enabled.enabled) {
1790 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1791 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1792 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1793 barrier_kind = __ompt_get_barrier_kind(bt, this_thr);
1794 if (ompt_enabled.ompt_callback_sync_region) {
1795 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1796 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1799 if (ompt_enabled.ompt_callback_sync_region_wait) {
1800 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1801 barrier_kind, ompt_scope_begin, my_parallel_data, my_task_data,
1808 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1812 if (!team->t.t_serialized) {
1815 void *itt_sync_obj = NULL;
1817 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1818 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1821 if (__kmp_tasking_mode == tskm_extra_barrier) {
1822 __kmp_tasking_barrier(team, this_thr, gtid);
1824 (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1825 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1832 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1834 this_thr->th.th_team_bt_intervals =
1835 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1836 this_thr->th.th_team_bt_set =
1837 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1839 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1844 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1845 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1849 if (KMP_MASTER_TID(tid)) {
1850 team->t.t_bar[bt].b_master_arrived += 1;
1852 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1855 if (reduce != NULL) {
1857 this_thr->th.th_local.reduce_data = reduce_data;
1860 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1862 __kmp_task_team_setup(this_thr, team, 0);
1865 cancelled = __kmp_linear_barrier_gather_cancellable(
1866 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1868 switch (__kmp_barrier_gather_pattern[bt]) {
1870 __kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
1871 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1874 case bp_hyper_bar: {
1876 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1877 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1878 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1881 case bp_hierarchical_bar: {
1882 __kmp_hierarchical_barrier_gather(
1883 bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1888 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1889 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1890 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1894 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1895 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1902 if (KMP_MASTER_TID(tid)) {
1904 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
1905 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1910 team->t.t_bar[bt].b_team_arrived += 1;
1913 if (__kmp_omp_cancellation) {
1914 kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request);
1916 if (cancel_request == cancel_loop ||
1917 cancel_request == cancel_sections) {
1918 KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq);
1926 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1927 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1929#if USE_ITT_BUILD && USE_ITT_NOTIFY
1931 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1932 __kmp_forkjoin_frames_mode &&
1933 (this_thr->th.th_teams_microtask == NULL ||
1934 this_thr->th.th_teams_size.nteams == 1) &&
1935 team->t.t_active_level == 1) {
1936 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1937 kmp_uint64 cur_time = __itt_get_timestamp();
1938 kmp_info_t **other_threads = team->t.t_threads;
1939 int nproc = this_thr->th.th_team_nproc;
1941 switch (__kmp_forkjoin_frames_mode) {
1943 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1945 this_thr->th.th_frame_time = cur_time;
1949 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1953 if (__itt_metadata_add_ptr) {
1955 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1958 this_thr->th.th_bar_arrive_time = 0;
1959 for (i = 1; i < nproc; ++i) {
1960 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1961 other_threads[i]->th.th_bar_arrive_time = 0;
1963 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1965 (kmp_uint64)(reduce != NULL));
1967 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1969 this_thr->th.th_frame_time = cur_time;
1977 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1978 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1981 if ((status == 1 || !is_split) && !cancelled) {
1983 cancelled = __kmp_linear_barrier_release_cancellable(
1984 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1986 switch (__kmp_barrier_release_pattern[bt]) {
1988 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1989 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
1990 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1993 case bp_hyper_bar: {
1994 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1995 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1996 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1999 case bp_hierarchical_bar: {
2000 __kmp_hierarchical_barrier_release(
2001 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2005 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2006 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2007 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2011 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2012 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
2016 if (__kmp_tasking_mode != tskm_immediate_exec && !cancelled) {
2017 __kmp_task_team_sync(this_thr, team);
2025 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2026 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2030 if (__kmp_tasking_mode != tskm_immediate_exec) {
2031 if (this_thr->th.th_task_team != NULL) {
2033 void *itt_sync_obj = NULL;
2034 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2035 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
2036 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2041 this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE ||
2042 this_thr->th.th_task_team->tt.tt_hidden_helper_task_encountered ==
2044 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
2045 __kmp_task_team_setup(this_thr, team, 0);
2048 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2049 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2054 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
2055 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
2056 __kmp_tid_from_gtid(gtid), status));
2059 if (ompt_enabled.enabled) {
2061 if (ompt_enabled.ompt_callback_sync_region_wait) {
2062 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2063 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2066 if (ompt_enabled.ompt_callback_sync_region) {
2067 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2068 barrier_kind, ompt_scope_end, my_parallel_data, my_task_data,
2072 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
2077 return (
int)cancelled;
2082int __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
2083 size_t reduce_size,
void *reduce_data,
2084 void (*reduce)(
void *,
void *)) {
2085 return __kmp_barrier_template<>(bt, gtid, is_split, reduce_size, reduce_data,
2089#if defined(KMP_GOMP_COMPAT)
2091int __kmp_barrier_gomp_cancel(
int gtid) {
2092 if (__kmp_omp_cancellation) {
2093 int cancelled = __kmp_barrier_template<true>(bs_plain_barrier, gtid, FALSE,
2096 int tid = __kmp_tid_from_gtid(gtid);
2097 kmp_info_t *this_thr = __kmp_threads[gtid];
2098 if (KMP_MASTER_TID(tid)) {
2102 this_thr->th.th_bar[bs_plain_barrier].bb.b_arrived -=
2103 KMP_BARRIER_STATE_BUMP;
2108 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2113void __kmp_end_split_barrier(
enum barrier_type bt,
int gtid) {
2114 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
2115 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
2116 KMP_DEBUG_ASSERT(bt < bs_last_barrier);
2117 int tid = __kmp_tid_from_gtid(gtid);
2118 kmp_info_t *this_thr = __kmp_threads[gtid];
2119 kmp_team_t *team = this_thr->th.th_team;
2121 if (!team->t.t_serialized) {
2122 if (KMP_MASTER_GTID(gtid)) {
2123 switch (__kmp_barrier_release_pattern[bt]) {
2125 __kmp_dist_barrier_release(bt, this_thr, gtid, tid,
2126 FALSE USE_ITT_BUILD_ARG(NULL));
2129 case bp_hyper_bar: {
2130 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2131 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
2132 FALSE USE_ITT_BUILD_ARG(NULL));
2135 case bp_hierarchical_bar: {
2136 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
2137 FALSE USE_ITT_BUILD_ARG(NULL));
2141 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
2142 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
2143 FALSE USE_ITT_BUILD_ARG(NULL));
2147 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
2148 FALSE USE_ITT_BUILD_ARG(NULL));
2151 if (__kmp_tasking_mode != tskm_immediate_exec) {
2152 __kmp_task_team_sync(this_thr, team);
2158void __kmp_join_barrier(
int gtid) {
2159 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
2160 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
2162 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
2164 kmp_info_t *this_thr = __kmp_threads[gtid];
2171 void *itt_sync_obj = NULL;
2173 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2175 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2178#if ((USE_ITT_BUILD && USE_ITT_NOTIFY) || defined KMP_DEBUG)
2179 int nproc = this_thr->th.th_team_nproc;
2184 team = this_thr->th.th_team;
2185 KMP_DEBUG_ASSERT(nproc == team->t.t_nproc);
2186 tid = __kmp_tid_from_gtid(gtid);
2188 team_id = team->t.t_id;
2189 kmp_info_t *master_thread = this_thr->th.th_team_master;
2190 if (master_thread != team->t.t_threads[0]) {
2191 __kmp_print_structure();
2194 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
2198 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
2199 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
2200 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
2201 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
2202 gtid, team_id, tid));
2205 if (ompt_enabled.enabled) {
2207 ompt_data_t *my_task_data;
2208 ompt_data_t *my_parallel_data;
2209 void *codeptr = NULL;
2210 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2211 if (KMP_MASTER_TID(ds_tid) &&
2212 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
2213 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
2214 codeptr = team->t.ompt_team_info.master_return_address;
2215 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
2216 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
2217 if (ompt_enabled.ompt_callback_sync_region) {
2218 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2219 ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
2220 my_task_data, codeptr);
2222 if (ompt_enabled.ompt_callback_sync_region_wait) {
2223 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2224 ompt_sync_region_barrier_implicit, ompt_scope_begin, my_parallel_data,
2225 my_task_data, codeptr);
2227 if (!KMP_MASTER_TID(ds_tid))
2228 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
2230 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
2234 if (__kmp_tasking_mode == tskm_extra_barrier) {
2235 __kmp_tasking_barrier(team, this_thr, gtid);
2236 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
2237 gtid, team_id, tid));
2240 if (__kmp_tasking_mode != tskm_immediate_exec) {
2241 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = "
2242 "%p, th_task_team = %p\n",
2243 __kmp_gtid_from_thread(this_thr), team_id,
2244 team->t.t_task_team[this_thr->th.th_task_state],
2245 this_thr->th.th_task_team));
2246 if (this_thr->th.th_task_team)
2247 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
2248 team->t.t_task_team[this_thr->th.th_task_state]);
2257 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2259 this_thr->th.th_team_bt_intervals =
2260 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2261 this_thr->th.th_team_bt_set =
2262 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2264 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
2269 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2270 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
2273 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
2275 __kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2276 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2279 case bp_hyper_bar: {
2280 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
2281 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2282 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2285 case bp_hierarchical_bar: {
2286 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2287 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2291 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
2292 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2293 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2297 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
2298 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
2306 if (KMP_MASTER_TID(tid)) {
2307 if (__kmp_tasking_mode != tskm_immediate_exec) {
2308 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
2310 if (__kmp_display_affinity) {
2311 KMP_CHECK_UPDATE(team->t.t_display_affinity, 0);
2313#if KMP_STATS_ENABLED
2317 for (
int i = 0; i < team->t.t_nproc; ++i) {
2318 kmp_info_t *team_thread = team->t.t_threads[i];
2319 if (team_thread == this_thr)
2321 team_thread->th.th_stats->setIdleFlag();
2322 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
2323 team_thread->th.th_sleep_loc != NULL)
2324 __kmp_null_resume_wrapper(team_thread);
2328 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2329 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2332#if USE_ITT_BUILD && USE_ITT_NOTIFY
2334 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2335 __kmp_forkjoin_frames_mode &&
2336 (this_thr->th.th_teams_microtask == NULL ||
2337 this_thr->th.th_teams_size.nteams == 1) &&
2338 team->t.t_active_level == 1) {
2339 kmp_uint64 cur_time = __itt_get_timestamp();
2340 ident_t *loc = team->t.t_ident;
2341 kmp_info_t **other_threads = team->t.t_threads;
2342 switch (__kmp_forkjoin_frames_mode) {
2344 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2348 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
2352 if (__itt_metadata_add_ptr) {
2354 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
2357 this_thr->th.th_bar_arrive_time = 0;
2358 for (
int i = 1; i < nproc; ++i) {
2359 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
2360 other_threads[i]->th.th_bar_arrive_time = 0;
2362 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
2363 cur_time, delta, 0);
2365 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
2367 this_thr->th.th_frame_time = cur_time;
2375 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
2376 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2381 if (KMP_MASTER_TID(tid)) {
2384 (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
2385 gtid, team_id, tid, nproc));
2392 (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
2398void __kmp_fork_barrier(
int gtid,
int tid) {
2399 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
2400 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
2401 kmp_info_t *this_thr = __kmp_threads[gtid];
2402 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
2404 void *itt_sync_obj = NULL;
2408 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
2409 (team != NULL) ? team->t.t_id : -1, tid));
2412 if (KMP_MASTER_TID(tid)) {
2413#if USE_ITT_BUILD && USE_ITT_NOTIFY
2414 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2416 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
2417 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
2422 KMP_DEBUG_ASSERT(team);
2423 kmp_info_t **other_threads = team->t.t_threads;
2429 for (i = 1; i < team->t.t_nproc; ++i) {
2431 (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
2433 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
2434 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
2435 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
2437 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
2438 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
2439 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
2443 if (__kmp_tasking_mode != tskm_immediate_exec) {
2445 __kmp_task_team_setup(this_thr, team, 0);
2454 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
2456 this_thr->th.th_team_bt_intervals =
2457 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2458 this_thr->th.th_team_bt_set =
2459 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2461 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
2466 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
2468 __kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2469 TRUE USE_ITT_BUILD_ARG(NULL));
2472 case bp_hyper_bar: {
2473 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
2474 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2475 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2478 case bp_hierarchical_bar: {
2479 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2480 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2484 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
2485 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2486 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2490 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
2491 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
2496 if (ompt_enabled.enabled &&
2497 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
2498 int ds_tid = this_thr->th.th_info.ds.ds_tid;
2499 ompt_data_t *task_data = (team)
2500 ? OMPT_CUR_TASK_DATA(this_thr)
2501 : &(this_thr->th.ompt_thread_info.task_data);
2502 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
2504 void *codeptr = NULL;
2505 if (KMP_MASTER_TID(ds_tid) &&
2506 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
2507 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
2508 codeptr = team ? team->t.ompt_team_info.master_return_address : NULL;
2509 if (ompt_enabled.ompt_callback_sync_region_wait) {
2510 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2511 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
2514 if (ompt_enabled.ompt_callback_sync_region) {
2515 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2516 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
2520 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
2521 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2522 ompt_scope_end, NULL, task_data, 0, ds_tid,
2523 ompt_task_implicit);
2529 if (TCR_4(__kmp_global.g.g_done)) {
2530 this_thr->th.th_task_team = NULL;
2532#if USE_ITT_BUILD && USE_ITT_NOTIFY
2533 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2534 if (!KMP_MASTER_TID(tid)) {
2535 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2537 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2541 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
2549 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
2550 KMP_DEBUG_ASSERT(team != NULL);
2551 tid = __kmp_tid_from_gtid(gtid);
2553#if KMP_BARRIER_ICV_PULL
2561 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
2562 if (!KMP_MASTER_TID(tid)) {
2566 (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
2567 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
2569 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
2570 &team->t.t_threads[0]
2571 ->th.th_bar[bs_forkjoin_barrier]
2577 if (__kmp_tasking_mode != tskm_immediate_exec) {
2578 __kmp_task_team_sync(this_thr, team);
2581#if KMP_AFFINITY_SUPPORTED
2582 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
2583 if (proc_bind == proc_bind_intel) {
2585 if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) {
2586 __kmp_balanced_affinity(this_thr, team->t.t_nproc);
2588 }
else if (proc_bind != proc_bind_false) {
2589 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
2590 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
2591 __kmp_gtid_from_thread(this_thr),
2592 this_thr->th.th_current_place));
2594 __kmp_affinity_set_place(gtid);
2599 if (__kmp_display_affinity) {
2600 if (team->t.t_display_affinity
2601#
if KMP_AFFINITY_SUPPORTED
2602 || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed)
2606 __kmp_aux_display_affinity(gtid, NULL);
2607 this_thr->th.th_prev_num_threads = team->t.t_nproc;
2608 this_thr->th.th_prev_level = team->t.t_level;
2611 if (!KMP_MASTER_TID(tid))
2612 KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator);
2614#if USE_ITT_BUILD && USE_ITT_NOTIFY
2615 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
2616 if (!KMP_MASTER_TID(tid)) {
2618 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
2619 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
2623 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
2624 team->t.t_id, tid));
2627void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
2628 kmp_internal_control_t *new_icvs,
ident_t *loc) {
2629 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
2631 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
2632 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
2637#if KMP_BARRIER_ICV_PULL
2641 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
2644 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2646 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
2647 team->t.t_threads[0], team));
2648#elif KMP_BARRIER_ICV_PUSH
2651 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
2652 team->t.t_threads[0], team));
2657 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
2659 for (
int f = 1; f < new_nproc; ++f) {
2661 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2662 f, team->t.t_threads[f], team));
2663 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2664 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2665 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2666 f, team->t.t_threads[f], team));