15 #include "kmp_wait_release.h" 18 #include "kmp_stats.h" 20 #include "ompt-specific.h" 24 #include <immintrin.h> 25 #define USE_NGO_STORES 1 28 #include "tsan_annotations.h" 30 #if KMP_MIC && USE_NGO_STORES 32 #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) 33 #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 34 #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 35 #define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") 37 #define ngo_load(src) ((void)0) 38 #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 39 #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) 40 #define ngo_sync() ((void)0) 43 void __kmp_print_structure(
void);
48 static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
50 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
52 kmp_team_t *team = this_thr->th.th_team;
53 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 kmp_info_t **other_threads = team->t.t_threads;
58 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
62 #if USE_ITT_BUILD && USE_ITT_NOTIFY 64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp();
71 if (!KMP_MASTER_TID(tid)) {
73 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 74 "arrived(%p): %llu => %llu\n",
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
86 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 int nproc = this_thr->th.th_team_nproc;
90 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
93 for (i = 1; i < nproc; ++i) {
97 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
99 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 100 "arrived(%p) == %llu\n",
101 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
103 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
106 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
108 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
109 ANNOTATE_BARRIER_END(other_threads[i]);
110 #if USE_ITT_BUILD && USE_ITT_NOTIFY 113 if (__kmp_forkjoin_frames_mode == 2) {
114 this_thr->th.th_bar_min_time = KMP_MIN(
115 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
120 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
121 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
123 ANNOTATE_REDUCE_AFTER(reduce);
124 (*reduce)(this_thr->th.th_local.reduce_data,
125 other_threads[i]->th.th_local.reduce_data);
126 ANNOTATE_REDUCE_BEFORE(reduce);
127 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
131 team_bar->b_arrived = new_state;
132 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " 133 "arrived(%p) = %llu\n",
134 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
139 (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
140 gtid, team->t.t_id, tid, bt));
143 static void __kmp_linear_barrier_release(
144 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
145 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
146 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
147 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
150 if (KMP_MASTER_TID(tid)) {
152 kmp_uint32 nproc = this_thr->th.th_team_nproc;
153 kmp_info_t **other_threads;
155 team = __kmp_threads[gtid]->th.th_team;
156 KMP_DEBUG_ASSERT(team != NULL);
157 other_threads = team->t.t_threads;
159 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for " 161 gtid, team->t.t_id, tid, bt));
164 #if KMP_BARRIER_ICV_PUSH 166 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
167 if (propagate_icvs) {
168 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
169 for (i = 1; i < nproc; ++i) {
170 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
172 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
173 &team->t.t_implicit_task_taskdata[0].td_icvs);
178 #endif // KMP_BARRIER_ICV_PUSH 181 for (i = 1; i < nproc; ++i) {
185 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
189 (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 190 "go(%p): %u => %u\n",
191 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
192 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
193 other_threads[i]->th.th_bar[bt].bb.b_go,
194 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
195 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
196 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
202 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
203 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
204 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
205 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
206 ANNOTATE_BARRIER_END(this_thr);
207 #if USE_ITT_BUILD && USE_ITT_NOTIFY 208 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
211 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
213 __kmp_itt_task_starting(itt_sync_obj);
215 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
218 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
219 if (itt_sync_obj != NULL)
221 __kmp_itt_task_finished(itt_sync_obj);
225 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
229 tid = __kmp_tid_from_gtid(gtid);
230 team = __kmp_threads[gtid]->th.th_team;
232 KMP_DEBUG_ASSERT(team != NULL);
233 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
235 (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
236 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
241 (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
242 gtid, team->t.t_id, tid, bt));
247 __kmp_tree_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
248 int tid,
void (*reduce)(
void *,
void *)
249 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
250 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
251 kmp_team_t *team = this_thr->th.th_team;
252 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
253 kmp_info_t **other_threads = team->t.t_threads;
254 kmp_uint32 nproc = this_thr->th.th_team_nproc;
255 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
256 kmp_uint32 branch_factor = 1 << branch_bits;
258 kmp_uint32 child_tid;
259 kmp_uint64 new_state;
262 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
263 gtid, team->t.t_id, tid, bt));
264 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
266 #if USE_ITT_BUILD && USE_ITT_NOTIFY 268 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
269 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
270 __itt_get_timestamp();
275 child_tid = (tid << branch_bits) + 1;
276 if (child_tid < nproc) {
278 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
281 kmp_info_t *child_thr = other_threads[child_tid];
282 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
285 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
287 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
290 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 291 "arrived(%p) == %llu\n",
292 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
293 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
295 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
296 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
297 ANNOTATE_BARRIER_END(child_thr);
298 #if USE_ITT_BUILD && USE_ITT_NOTIFY 301 if (__kmp_forkjoin_frames_mode == 2) {
302 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
303 child_thr->th.th_bar_min_time);
308 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
309 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
310 team->t.t_id, child_tid));
311 ANNOTATE_REDUCE_AFTER(reduce);
312 (*reduce)(this_thr->th.th_local.reduce_data,
313 child_thr->th.th_local.reduce_data);
314 ANNOTATE_REDUCE_BEFORE(reduce);
315 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
319 }
while (child <= branch_factor && child_tid < nproc);
322 if (!KMP_MASTER_TID(tid)) {
323 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
326 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 327 "arrived(%p): %llu => %llu\n",
328 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
329 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
330 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
336 ANNOTATE_BARRIER_BEGIN(this_thr);
337 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
342 team->t.t_bar[bt].b_arrived = new_state;
344 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
345 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " 346 "arrived(%p) = %llu\n",
347 gtid, team->t.t_id, tid, team->t.t_id,
348 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
351 (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
352 gtid, team->t.t_id, tid, bt));
355 static void __kmp_tree_barrier_release(
356 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
357 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
358 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
360 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
362 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
363 kmp_uint32 branch_factor = 1 << branch_bits;
365 kmp_uint32 child_tid;
370 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
371 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
373 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
374 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
375 ANNOTATE_BARRIER_END(this_thr);
376 #if USE_ITT_BUILD && USE_ITT_NOTIFY 377 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
380 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
382 __kmp_itt_task_starting(itt_sync_obj);
384 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
387 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
388 if (itt_sync_obj != NULL)
390 __kmp_itt_task_finished(itt_sync_obj);
394 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
398 team = __kmp_threads[gtid]->th.th_team;
399 KMP_DEBUG_ASSERT(team != NULL);
400 tid = __kmp_tid_from_gtid(gtid);
402 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
404 (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
405 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
408 team = __kmp_threads[gtid]->th.th_team;
409 KMP_DEBUG_ASSERT(team != NULL);
410 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for " 412 gtid, team->t.t_id, tid, bt));
414 nproc = this_thr->th.th_team_nproc;
415 child_tid = (tid << branch_bits) + 1;
417 if (child_tid < nproc) {
418 kmp_info_t **other_threads = team->t.t_threads;
422 kmp_info_t *child_thr = other_threads[child_tid];
423 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
426 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
428 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
431 #if KMP_BARRIER_ICV_PUSH 433 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
434 if (propagate_icvs) {
435 __kmp_init_implicit_task(team->t.t_ident,
436 team->t.t_threads[child_tid], team,
438 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
439 &team->t.t_implicit_task_taskdata[0].td_icvs);
442 #endif // KMP_BARRIER_ICV_PUSH 444 (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 445 "go(%p): %u => %u\n",
446 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
447 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
448 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
450 ANNOTATE_BARRIER_BEGIN(child_thr);
451 kmp_flag_64 flag(&child_bar->b_go, child_thr);
455 }
while (child <= branch_factor && child_tid < nproc);
458 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
459 gtid, team->t.t_id, tid, bt));
464 __kmp_hyper_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
465 int tid,
void (*reduce)(
void *,
void *)
466 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
467 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
468 kmp_team_t *team = this_thr->th.th_team;
469 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
470 kmp_info_t **other_threads = team->t.t_threads;
471 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
472 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
473 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
474 kmp_uint32 branch_factor = 1 << branch_bits;
480 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
481 gtid, team->t.t_id, tid, bt));
482 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
484 #if USE_ITT_BUILD && USE_ITT_NOTIFY 486 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
487 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
488 __itt_get_timestamp();
493 kmp_flag_64 p_flag(&thr_bar->b_arrived);
494 for (level = 0, offset = 1; offset < num_threads;
495 level += branch_bits, offset <<= branch_bits) {
497 kmp_uint32 child_tid;
499 if (((tid >> level) & (branch_factor - 1)) != 0) {
500 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
503 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 504 "arrived(%p): %llu => %llu\n",
505 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
506 team->t.t_id, parent_tid, &thr_bar->b_arrived,
508 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
513 ANNOTATE_BARRIER_BEGIN(this_thr);
514 p_flag.set_waiter(other_threads[parent_tid]);
520 if (new_state == KMP_BARRIER_UNUSED_STATE)
521 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
522 for (child = 1, child_tid = tid + (1 << level);
523 child < branch_factor && child_tid < num_threads;
524 child++, child_tid += (1 << level)) {
525 kmp_info_t *child_thr = other_threads[child_tid];
526 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
528 kmp_uint32 next_child_tid = child_tid + (1 << level);
530 if (child + 1 < branch_factor && next_child_tid < num_threads)
532 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
535 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 536 "arrived(%p) == %llu\n",
537 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
538 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
540 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
541 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
542 ANNOTATE_BARRIER_END(child_thr);
543 #if USE_ITT_BUILD && USE_ITT_NOTIFY 546 if (__kmp_forkjoin_frames_mode == 2) {
547 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
548 child_thr->th.th_bar_min_time);
553 (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
554 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
555 team->t.t_id, child_tid));
556 ANNOTATE_REDUCE_AFTER(reduce);
557 (*reduce)(this_thr->th.th_local.reduce_data,
558 child_thr->th.th_local.reduce_data);
559 ANNOTATE_REDUCE_BEFORE(reduce);
560 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
565 if (KMP_MASTER_TID(tid)) {
567 if (new_state == KMP_BARRIER_UNUSED_STATE)
568 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
570 team->t.t_bar[bt].b_arrived = new_state;
571 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " 572 "arrived(%p) = %llu\n",
573 gtid, team->t.t_id, tid, team->t.t_id,
574 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
577 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
578 gtid, team->t.t_id, tid, bt));
582 #define KMP_REVERSE_HYPER_BAR 583 static void __kmp_hyper_barrier_release(
584 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
585 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
586 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
588 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
589 kmp_info_t **other_threads;
590 kmp_uint32 num_threads;
591 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
592 kmp_uint32 branch_factor = 1 << branch_bits;
594 kmp_uint32 child_tid;
602 if (KMP_MASTER_TID(tid)) {
603 team = __kmp_threads[gtid]->th.th_team;
604 KMP_DEBUG_ASSERT(team != NULL);
605 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for " 607 gtid, team->t.t_id, tid, bt));
608 #if KMP_BARRIER_ICV_PUSH 609 if (propagate_icvs) {
610 copy_icvs(&thr_bar->th_fixed_icvs,
611 &team->t.t_implicit_task_taskdata[tid].td_icvs);
615 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
616 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
618 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
619 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
620 ANNOTATE_BARRIER_END(this_thr);
621 #if USE_ITT_BUILD && USE_ITT_NOTIFY 622 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
624 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
626 __kmp_itt_task_starting(itt_sync_obj);
628 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
631 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
632 if (itt_sync_obj != NULL)
634 __kmp_itt_task_finished(itt_sync_obj);
638 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
642 team = __kmp_threads[gtid]->th.th_team;
643 KMP_DEBUG_ASSERT(team != NULL);
644 tid = __kmp_tid_from_gtid(gtid);
646 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
648 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
649 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
652 num_threads = this_thr->th.th_team_nproc;
653 other_threads = team->t.t_threads;
655 #ifdef KMP_REVERSE_HYPER_BAR 657 for (level = 0, offset = 1;
658 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
659 level += branch_bits, offset <<= branch_bits)
663 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
664 level -= branch_bits, offset >>= branch_bits)
667 for (level = 0, offset = 1; offset < num_threads;
668 level += branch_bits, offset <<= branch_bits)
671 #ifdef KMP_REVERSE_HYPER_BAR 674 child = num_threads >> ((level == 0) ? level : level - 1);
675 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
676 child_tid = tid + (child << level);
677 child >= 1; child--, child_tid -= (1 << level))
679 if (((tid >> level) & (branch_factor - 1)) != 0)
684 for (child = 1, child_tid = tid + (1 << level);
685 child < branch_factor && child_tid < num_threads;
686 child++, child_tid += (1 << level))
687 #endif // KMP_REVERSE_HYPER_BAR 689 if (child_tid >= num_threads)
692 kmp_info_t *child_thr = other_threads[child_tid];
693 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
695 kmp_uint32 next_child_tid = child_tid - (1 << level);
697 #ifdef KMP_REVERSE_HYPER_BAR 698 if (child - 1 >= 1 && next_child_tid < num_threads)
700 if (child + 1 < branch_factor && next_child_tid < num_threads)
701 #endif // KMP_REVERSE_HYPER_BAR 703 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
706 #if KMP_BARRIER_ICV_PUSH 708 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
709 #endif // KMP_BARRIER_ICV_PUSH 713 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 714 "go(%p): %u => %u\n",
715 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
716 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
717 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
719 ANNOTATE_BARRIER_BEGIN(child_thr);
720 kmp_flag_64 flag(&child_bar->b_go, child_thr);
725 #if KMP_BARRIER_ICV_PUSH 726 if (propagate_icvs &&
727 !KMP_MASTER_TID(tid)) {
728 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
730 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
731 &thr_bar->th_fixed_icvs);
736 (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
737 gtid, team->t.t_id, tid, bt));
750 static bool __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt,
751 kmp_bstate_t *thr_bar,
752 kmp_uint32 nproc,
int gtid,
753 int tid, kmp_team_t *team) {
755 bool uninitialized = thr_bar->team == NULL;
756 bool team_changed = team != thr_bar->team;
757 bool team_sz_changed = nproc != thr_bar->nproc;
758 bool tid_changed = tid != thr_bar->old_tid;
761 if (uninitialized || team_sz_changed) {
762 __kmp_get_hierarchy(nproc, thr_bar);
765 if (uninitialized || team_sz_changed || tid_changed) {
766 thr_bar->my_level = thr_bar->depth - 1;
767 thr_bar->parent_tid = -1;
771 while (d < thr_bar->depth) {
774 if (d == thr_bar->depth - 2) {
775 thr_bar->parent_tid = 0;
776 thr_bar->my_level = d;
778 }
else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
781 thr_bar->parent_tid = tid - rem;
782 thr_bar->my_level = d;
788 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
789 thr_bar->old_tid = tid;
790 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
791 thr_bar->team = team;
792 thr_bar->parent_bar =
793 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
795 if (uninitialized || team_changed || tid_changed) {
796 thr_bar->team = team;
797 thr_bar->parent_bar =
798 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
801 if (uninitialized || team_sz_changed || tid_changed) {
802 thr_bar->nproc = nproc;
803 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
804 if (thr_bar->my_level == 0)
805 thr_bar->leaf_kids = 0;
806 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
807 thr_bar->leaf_kids = nproc - tid - 1;
808 thr_bar->leaf_state = 0;
809 for (
int i = 0; i < thr_bar->leaf_kids; ++i)
810 ((
char *)&(thr_bar->leaf_state))[7 - i] = 1;
815 static void __kmp_hierarchical_barrier_gather(
816 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
817 void (*reduce)(
void *,
void *) USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
818 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
819 kmp_team_t *team = this_thr->th.th_team;
820 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
821 kmp_uint32 nproc = this_thr->th.th_team_nproc;
822 kmp_info_t **other_threads = team->t.t_threads;
823 kmp_uint64 new_state;
825 int level = team->t.t_level;
828 ->th.th_teams_microtask)
829 if (this_thr->th.th_teams_size.nteams > 1)
833 thr_bar->use_oncore_barrier = 1;
835 thr_bar->use_oncore_barrier = 0;
837 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " 839 gtid, team->t.t_id, tid, bt));
840 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
842 #if USE_ITT_BUILD && USE_ITT_NOTIFY 844 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
845 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
849 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
852 if (thr_bar->my_level) {
855 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
856 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
857 thr_bar->use_oncore_barrier) {
858 if (thr_bar->leaf_kids) {
860 kmp_uint64 leaf_state =
862 ? thr_bar->b_arrived | thr_bar->leaf_state
863 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
864 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " 866 gtid, team->t.t_id, tid));
867 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
868 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
870 ANNOTATE_REDUCE_AFTER(reduce);
871 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
873 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 875 gtid, team->t.t_id, tid,
876 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
878 ANNOTATE_BARRIER_END(other_threads[child_tid]);
879 (*reduce)(this_thr->th.th_local.reduce_data,
880 other_threads[child_tid]->th.th_local.reduce_data);
882 ANNOTATE_REDUCE_BEFORE(reduce);
883 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
886 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
889 for (kmp_uint32 d = 1; d < thr_bar->my_level;
891 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
892 skip = thr_bar->skip_per_level[d];
895 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
896 kmp_info_t *child_thr = other_threads[child_tid];
897 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
898 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " 900 "arrived(%p) == %llu\n",
901 gtid, team->t.t_id, tid,
902 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
903 child_tid, &child_bar->b_arrived, new_state));
904 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
905 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
906 ANNOTATE_BARRIER_END(child_thr);
908 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 910 gtid, team->t.t_id, tid,
911 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
913 ANNOTATE_REDUCE_AFTER(reduce);
914 (*reduce)(this_thr->th.th_local.reduce_data,
915 child_thr->th.th_local.reduce_data);
916 ANNOTATE_REDUCE_BEFORE(reduce);
917 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
922 for (kmp_uint32 d = 0; d < thr_bar->my_level;
924 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
925 skip = thr_bar->skip_per_level[d];
928 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
929 kmp_info_t *child_thr = other_threads[child_tid];
930 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
931 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " 933 "arrived(%p) == %llu\n",
934 gtid, team->t.t_id, tid,
935 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
936 child_tid, &child_bar->b_arrived, new_state));
937 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
938 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
939 ANNOTATE_BARRIER_END(child_thr);
941 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " 943 gtid, team->t.t_id, tid,
944 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
946 ANNOTATE_REDUCE_AFTER(reduce);
947 (*reduce)(this_thr->th.th_local.reduce_data,
948 child_thr->th.th_local.reduce_data);
949 ANNOTATE_REDUCE_BEFORE(reduce);
950 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
958 if (!KMP_MASTER_TID(tid)) {
959 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing" 960 " T#%d(%d:%d) arrived(%p): %llu => %llu\n",
961 gtid, team->t.t_id, tid,
962 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
963 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
964 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
968 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
969 !thr_bar->use_oncore_barrier) {
971 ANNOTATE_BARRIER_BEGIN(this_thr);
972 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
976 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
977 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
978 flag.set_waiter(other_threads[thr_bar->parent_tid]);
982 team->t.t_bar[bt].b_arrived = new_state;
983 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " 984 "arrived(%p) = %llu\n",
985 gtid, team->t.t_id, tid, team->t.t_id,
986 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
989 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " 991 gtid, team->t.t_id, tid, bt));
994 static void __kmp_hierarchical_barrier_release(
995 enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
996 int propagate_icvs USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
997 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
999 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1001 bool team_change =
false;
1003 if (KMP_MASTER_TID(tid)) {
1004 team = __kmp_threads[gtid]->th.th_team;
1005 KMP_DEBUG_ASSERT(team != NULL);
1006 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) master " 1007 "entered barrier type %d\n",
1008 gtid, team->t.t_id, tid, bt));
1011 if (!thr_bar->use_oncore_barrier ||
1012 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1013 thr_bar->team == NULL) {
1015 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1016 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1017 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1018 ANNOTATE_BARRIER_END(this_thr);
1019 TCW_8(thr_bar->b_go,
1020 KMP_INIT_BARRIER_STATE);
1024 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1025 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1026 thr_bar->offset, bt,
1027 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1028 flag.wait(this_thr, TRUE);
1029 if (thr_bar->wait_flag ==
1030 KMP_BARRIER_SWITCHING) {
1031 TCW_8(thr_bar->b_go,
1032 KMP_INIT_BARRIER_STATE);
1034 (RCAST(
volatile char *,
1035 &(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0;
1038 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1040 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1043 team = __kmp_threads[gtid]->th.th_team;
1044 KMP_DEBUG_ASSERT(team != NULL);
1045 tid = __kmp_tid_from_gtid(gtid);
1049 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1050 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1054 nproc = this_thr->th.th_team_nproc;
1055 int level = team->t.t_level;
1057 if (team->t.t_threads[0]
1058 ->th.th_teams_microtask) {
1059 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1060 this_thr->th.th_teams_level == level)
1062 if (this_thr->th.th_teams_size.nteams > 1)
1067 thr_bar->use_oncore_barrier = 1;
1069 thr_bar->use_oncore_barrier = 0;
1073 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1074 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1075 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1081 #if KMP_BARRIER_ICV_PUSH 1082 if (propagate_icvs) {
1083 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1087 copy_icvs(&thr_bar->th_fixed_icvs,
1088 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1089 }
else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1090 thr_bar->use_oncore_barrier) {
1091 if (!thr_bar->my_level)
1094 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1095 &thr_bar->parent_bar->th_fixed_icvs);
1098 if (thr_bar->my_level)
1100 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1102 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1103 &thr_bar->parent_bar->th_fixed_icvs);
1106 #endif // KMP_BARRIER_ICV_PUSH 1109 if (thr_bar->my_level) {
1110 kmp_int32 child_tid;
1112 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1113 thr_bar->use_oncore_barrier) {
1114 if (KMP_MASTER_TID(tid)) {
1117 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1120 ngo_load(&thr_bar->th_fixed_icvs);
1123 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (
int)nproc;
1124 child_tid += thr_bar->skip_per_level[1]) {
1125 kmp_bstate_t *child_bar =
1126 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1127 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) " 1128 "releasing T#%d(%d:%d)" 1129 " go(%p): %u => %u\n",
1130 gtid, team->t.t_id, tid,
1131 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1132 child_tid, &child_bar->b_go, child_bar->b_go,
1133 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1136 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1140 TCW_8(thr_bar->b_go,
1141 KMP_INIT_BARRIER_STATE);
1143 if (thr_bar->leaf_kids) {
1146 old_leaf_kids < thr_bar->leaf_kids) {
1147 if (old_leaf_kids) {
1148 thr_bar->b_go |= old_leaf_state;
1151 last = tid + thr_bar->skip_per_level[1];
1154 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1156 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1157 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1160 (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 1161 " T#%d(%d:%d) go(%p): %u => %u\n",
1162 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1163 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1164 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1166 ANNOTATE_BARRIER_BEGIN(child_thr);
1167 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1172 thr_bar->b_go |= thr_bar->leaf_state;
1176 for (
int d = thr_bar->my_level - 1; d >= 0;
1178 last = tid + thr_bar->skip_per_level[d + 1];
1179 kmp_uint32 skip = thr_bar->skip_per_level[d];
1182 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1183 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1184 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1185 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) " 1186 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1187 gtid, team->t.t_id, tid,
1188 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1189 child_tid, &child_bar->b_go, child_bar->b_go,
1190 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1192 ANNOTATE_BARRIER_BEGIN(child_thr);
1193 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1198 #if KMP_BARRIER_ICV_PUSH 1199 if (propagate_icvs && !KMP_MASTER_TID(tid))
1201 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1202 &thr_bar->th_fixed_icvs);
1203 #endif // KMP_BARRIER_ICV_PUSH 1205 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " 1206 "barrier type %d\n",
1207 gtid, team->t.t_id, tid, bt));
1217 int __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
1218 size_t reduce_size,
void *reduce_data,
1219 void (*reduce)(
void *,
void *)) {
1220 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1221 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1222 int tid = __kmp_tid_from_gtid(gtid);
1223 kmp_info_t *this_thr = __kmp_threads[gtid];
1224 kmp_team_t *team = this_thr->th.th_team;
1226 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1228 ompt_data_t *my_task_data;
1229 ompt_data_t *my_parallel_data;
1230 void *return_address;
1233 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1234 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1236 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1238 if (ompt_enabled.enabled) {
1240 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1241 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1242 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1243 if (ompt_enabled.ompt_callback_sync_region) {
1244 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1245 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1246 my_task_data, return_address);
1248 if (ompt_enabled.ompt_callback_sync_region_wait) {
1249 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1250 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1251 my_task_data, return_address);
1257 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
1261 if (!team->t.t_serialized) {
1264 void *itt_sync_obj = NULL;
1266 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1267 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1270 if (__kmp_tasking_mode == tskm_extra_barrier) {
1271 __kmp_tasking_barrier(team, this_thr, gtid);
1273 (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1274 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1281 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1283 this_thr->th.th_team_bt_intervals =
1284 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1285 this_thr->th.th_team_bt_set =
1286 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1288 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1293 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1294 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1298 if (KMP_MASTER_TID(tid)) {
1299 team->t.t_bar[bt].b_master_arrived += 1;
1301 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1304 if (reduce != NULL) {
1306 this_thr->th.th_local.reduce_data = reduce_data;
1309 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1310 __kmp_task_team_setup(
1314 switch (__kmp_barrier_gather_pattern[bt]) {
1315 case bp_hyper_bar: {
1316 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1318 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1319 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1322 case bp_hierarchical_bar: {
1323 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1324 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1328 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1330 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1331 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1335 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1336 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1342 if (KMP_MASTER_TID(tid)) {
1344 if (__kmp_tasking_mode != tskm_immediate_exec) {
1345 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1350 team->t.t_bar[bt].b_team_arrived += 1;
1354 kmp_int32 cancel_request = KMP_ATOMIC_LD_RLX(&team->t.t_cancel_request);
1356 if (cancel_request == cancel_loop || cancel_request == cancel_sections) {
1357 KMP_ATOMIC_ST_RLX(&team->t.t_cancel_request, cancel_noreq);
1365 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1366 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1368 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1370 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1371 __kmp_forkjoin_frames_mode &&
1373 this_thr->th.th_teams_microtask == NULL &&
1375 team->t.t_active_level == 1) {
1376 kmp_uint64 cur_time = __itt_get_timestamp();
1377 kmp_info_t **other_threads = team->t.t_threads;
1378 int nproc = this_thr->th.th_team_nproc;
1380 switch (__kmp_forkjoin_frames_mode) {
1382 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1384 this_thr->th.th_frame_time = cur_time;
1388 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1392 if (__itt_metadata_add_ptr) {
1394 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1397 this_thr->th.th_bar_arrive_time = 0;
1398 for (i = 1; i < nproc; ++i) {
1399 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1400 other_threads[i]->th.th_bar_arrive_time = 0;
1402 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1404 (kmp_uint64)(reduce != NULL));
1406 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1408 this_thr->th.th_frame_time = cur_time;
1416 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1417 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1420 if (status == 1 || !is_split) {
1421 switch (__kmp_barrier_release_pattern[bt]) {
1422 case bp_hyper_bar: {
1423 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1424 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1425 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1428 case bp_hierarchical_bar: {
1429 __kmp_hierarchical_barrier_release(
1430 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1434 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1435 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1436 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1440 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1441 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1444 if (__kmp_tasking_mode != tskm_immediate_exec) {
1445 __kmp_task_team_sync(this_thr, team);
1453 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1454 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1458 if (__kmp_tasking_mode != tskm_immediate_exec) {
1460 if (this_thr->th.th_task_team != NULL) {
1461 void *itt_sync_obj = NULL;
1463 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1464 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1465 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1469 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==
1471 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1472 __kmp_task_team_setup(this_thr, team, 0);
1475 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1476 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1482 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1483 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1487 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1488 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
1489 __kmp_tid_from_gtid(gtid), status));
1492 if (ompt_enabled.enabled) {
1494 if (ompt_enabled.ompt_callback_sync_region_wait) {
1495 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1496 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1497 my_task_data, return_address);
1499 if (ompt_enabled.ompt_callback_sync_region) {
1500 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1501 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1502 my_task_data, return_address);
1505 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1508 ANNOTATE_BARRIER_END(&team->t.t_bar);
1513 void __kmp_end_split_barrier(
enum barrier_type bt,
int gtid) {
1514 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1515 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1516 int tid = __kmp_tid_from_gtid(gtid);
1517 kmp_info_t *this_thr = __kmp_threads[gtid];
1518 kmp_team_t *team = this_thr->th.th_team;
1520 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1521 if (!team->t.t_serialized) {
1522 if (KMP_MASTER_GTID(gtid)) {
1523 switch (__kmp_barrier_release_pattern[bt]) {
1524 case bp_hyper_bar: {
1525 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1526 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1527 FALSE USE_ITT_BUILD_ARG(NULL));
1530 case bp_hierarchical_bar: {
1531 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1532 FALSE USE_ITT_BUILD_ARG(NULL));
1536 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1537 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1538 FALSE USE_ITT_BUILD_ARG(NULL));
1542 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1543 FALSE USE_ITT_BUILD_ARG(NULL));
1546 if (__kmp_tasking_mode != tskm_immediate_exec) {
1547 __kmp_task_team_sync(this_thr, team);
1551 ANNOTATE_BARRIER_END(&team->t.t_bar);
1554 void __kmp_join_barrier(
int gtid) {
1555 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1556 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1557 kmp_info_t *this_thr = __kmp_threads[gtid];
1560 kmp_info_t *master_thread;
1566 void *itt_sync_obj = NULL;
1568 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1570 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1576 team = this_thr->th.th_team;
1577 nproc = this_thr->th.th_team_nproc;
1578 KMP_DEBUG_ASSERT((
int)nproc == team->t.t_nproc);
1579 tid = __kmp_tid_from_gtid(gtid);
1581 team_id = team->t.t_id;
1583 master_thread = this_thr->th.th_team_master;
1585 if (master_thread != team->t.t_threads[0]) {
1586 __kmp_print_structure();
1589 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1593 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1594 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1595 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1596 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1597 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
1598 gtid, team_id, tid));
1600 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1602 ompt_data_t *my_task_data;
1603 ompt_data_t *my_parallel_data;
1604 if (ompt_enabled.enabled) {
1606 void *codeptr = NULL;
1607 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1608 if (KMP_MASTER_TID(ds_tid) &&
1609 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
1610 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
1611 codeptr = team->t.ompt_team_info.master_return_address;
1612 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1613 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1614 if (ompt_enabled.ompt_callback_sync_region) {
1615 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1616 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1617 my_task_data, codeptr);
1619 if (ompt_enabled.ompt_callback_sync_region_wait) {
1620 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1621 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1622 my_task_data, codeptr);
1624 if (!KMP_MASTER_TID(ds_tid))
1625 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
1627 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
1631 if (__kmp_tasking_mode == tskm_extra_barrier) {
1632 __kmp_tasking_barrier(team, this_thr, gtid);
1633 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
1637 if (__kmp_tasking_mode != tskm_immediate_exec) {
1638 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = " 1639 "%p, th_task_team = %p\n",
1640 __kmp_gtid_from_thread(this_thr), team_id,
1641 team->t.t_task_team[this_thr->th.th_task_state],
1642 this_thr->th.th_task_team));
1643 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
1644 team->t.t_task_team[this_thr->th.th_task_state]);
1653 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1655 this_thr->th.th_team_bt_intervals =
1656 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1657 this_thr->th.th_team_bt_set =
1658 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1660 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1665 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1666 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1669 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1670 case bp_hyper_bar: {
1671 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1672 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1673 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1676 case bp_hierarchical_bar: {
1677 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1678 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1682 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1683 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1684 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1688 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1689 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1697 if (KMP_MASTER_TID(tid)) {
1698 if (__kmp_tasking_mode != tskm_immediate_exec) {
1699 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1701 #if KMP_STATS_ENABLED 1705 for (
int i = 0; i < team->t.t_nproc; ++i) {
1706 kmp_info_t *team_thread = team->t.t_threads[i];
1707 if (team_thread == this_thr)
1709 team_thread->th.th_stats->setIdleFlag();
1710 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1711 team_thread->th.th_sleep_loc != NULL)
1712 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1713 team_thread->th.th_sleep_loc);
1717 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1718 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1721 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1723 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1724 __kmp_forkjoin_frames_mode &&
1726 this_thr->th.th_teams_microtask == NULL &&
1728 team->t.t_active_level == 1) {
1729 kmp_uint64 cur_time = __itt_get_timestamp();
1730 ident_t *loc = team->t.t_ident;
1731 kmp_info_t **other_threads = team->t.t_threads;
1732 int nproc = this_thr->th.th_team_nproc;
1734 switch (__kmp_forkjoin_frames_mode) {
1736 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1740 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1744 if (__itt_metadata_add_ptr) {
1746 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1749 this_thr->th.th_bar_arrive_time = 0;
1750 for (i = 1; i < nproc; ++i) {
1751 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1752 other_threads[i]->th.th_bar_arrive_time = 0;
1754 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1755 cur_time, delta, 0);
1757 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1759 this_thr->th.th_frame_time = cur_time;
1767 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1768 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1773 if (KMP_MASTER_TID(tid)) {
1776 (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1777 gtid, team_id, tid, nproc));
1784 (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
1786 ANNOTATE_BARRIER_END(&team->t.t_bar);
1791 void __kmp_fork_barrier(
int gtid,
int tid) {
1792 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1793 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1794 kmp_info_t *this_thr = __kmp_threads[gtid];
1795 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1797 void *itt_sync_obj = NULL;
1800 ANNOTATE_BARRIER_END(&team->t.t_bar);
1802 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
1803 (team != NULL) ? team->t.t_id : -1, tid));
1806 if (KMP_MASTER_TID(tid)) {
1807 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1808 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1810 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1811 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1816 kmp_info_t **other_threads = team->t.t_threads;
1822 for (i = 1; i < team->t.t_nproc; ++i) {
1824 (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " 1826 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1827 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1828 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1830 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
1831 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
1832 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1836 if (__kmp_tasking_mode != tskm_immediate_exec) {
1838 __kmp_task_team_setup(this_thr, team, 0);
1847 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1849 this_thr->th.th_team_bt_intervals =
1850 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1851 this_thr->th.th_team_bt_set =
1852 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1854 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
1859 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1860 case bp_hyper_bar: {
1861 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1862 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1863 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1866 case bp_hierarchical_bar: {
1867 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1868 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1872 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1873 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1874 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1878 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1879 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1884 if (ompt_enabled.enabled &&
1885 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
1886 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1887 ompt_data_t *task_data = (team)
1888 ? OMPT_CUR_TASK_DATA(this_thr)
1889 : &(this_thr->th.ompt_thread_info.task_data);
1890 this_thr->th.ompt_thread_info.state = omp_state_overhead;
1892 void *codeptr = NULL;
1893 if (KMP_MASTER_TID(ds_tid) &&
1894 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
1895 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
1896 codeptr = team->t.ompt_team_info.master_return_address;
1897 if (ompt_enabled.ompt_callback_sync_region_wait) {
1898 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1899 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
1901 if (ompt_enabled.ompt_callback_sync_region) {
1902 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1903 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
1906 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
1907 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1908 ompt_scope_end, NULL, task_data, 0, ds_tid);
1914 if (TCR_4(__kmp_global.g.g_done)) {
1915 this_thr->th.th_task_team = NULL;
1917 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1918 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1919 if (!KMP_MASTER_TID(tid)) {
1920 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1922 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1926 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1934 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1935 KMP_DEBUG_ASSERT(team != NULL);
1936 tid = __kmp_tid_from_gtid(gtid);
1938 #if KMP_BARRIER_ICV_PULL 1946 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1947 if (!KMP_MASTER_TID(tid)) {
1951 (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1952 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1954 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1955 &team->t.t_threads[0]
1956 ->th.th_bar[bs_forkjoin_barrier]
1960 #endif // KMP_BARRIER_ICV_PULL 1962 if (__kmp_tasking_mode != tskm_immediate_exec) {
1963 __kmp_task_team_sync(this_thr, team);
1966 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1967 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1968 if (proc_bind == proc_bind_intel) {
1970 #if KMP_AFFINITY_SUPPORTED 1972 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1973 __kmp_balanced_affinity(tid, team->t.t_nproc);
1975 #endif // KMP_AFFINITY_SUPPORTED 1976 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1977 }
else if (proc_bind != proc_bind_false) {
1978 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1979 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
1980 __kmp_gtid_from_thread(this_thr),
1981 this_thr->th.th_current_place));
1983 __kmp_affinity_set_place(gtid);
1988 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1989 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1990 if (!KMP_MASTER_TID(tid)) {
1992 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1993 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1997 ANNOTATE_BARRIER_END(&team->t.t_bar);
1998 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
1999 team->t.t_id, tid));
2002 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
2003 kmp_internal_control_t *new_icvs,
ident_t *loc) {
2004 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
2006 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
2007 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
2012 #if KMP_BARRIER_ICV_PULL 2016 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
2019 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2021 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
2022 team->t.t_threads[0], team));
2023 #elif KMP_BARRIER_ICV_PUSH 2026 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
2027 team->t.t_threads[0], team));
2032 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
2034 for (
int f = 1; f < new_nproc; ++f) {
2036 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2037 f, team->t.t_threads[f], team));
2038 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2039 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2040 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2041 f, team->t.t_threads[f], team));
2044 #endif // KMP_BARRIER_ICV_PULL