17 #include "kmp_wait_release.h" 18 #include "kmp_stats.h" 24 #include <immintrin.h> 25 #define USE_NGO_STORES 1 28 #if KMP_MIC && USE_NGO_STORES 30 #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) 31 #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 32 #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 33 #define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") 35 #define ngo_load(src) ((void)0) 36 #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 37 #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) 38 #define ngo_sync() ((void)0) 41 void __kmp_print_structure(
void);
47 __kmp_linear_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
48 void (*reduce)(
void *,
void *)
49 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
51 KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
52 register kmp_team_t *team = this_thr->th.th_team;
53 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
54 register kmp_info_t **other_threads = team->t.t_threads;
56 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
57 gtid, team->t.t_id, tid, bt));
58 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
60 #if USE_ITT_BUILD && USE_ITT_NOTIFY 62 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
63 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
67 if (!KMP_MASTER_TID(tid)) {
68 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 69 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
70 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived,
71 thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
75 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
78 register kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
79 register int nproc = this_thr->th.th_team_nproc;
82 register kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
85 for (i=1; i<nproc; ++i) {
89 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived);
91 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 92 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
93 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
94 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
97 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
98 flag.wait(this_thr, FALSE
99 USE_ITT_BUILD_ARG(itt_sync_obj) );
100 #if USE_ITT_BUILD && USE_ITT_NOTIFY 102 if (__kmp_forkjoin_frames_mode == 2) {
103 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
104 other_threads[i]->th.th_bar_min_time);
108 KA_TRACE(100, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid,
109 team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i));
110 (*reduce)(this_thr->th.th_local.reduce_data,
111 other_threads[i]->th.th_local.reduce_data);
115 team_bar->b_arrived = new_state;
116 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
117 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state));
119 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
120 gtid, team->t.t_id, tid, bt));
124 __kmp_linear_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
126 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
128 KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
129 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
130 register kmp_team_t *team;
132 if (KMP_MASTER_TID(tid)) {
133 register unsigned int i;
134 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
135 register kmp_info_t **other_threads;
137 team = __kmp_threads[gtid]->th.th_team;
138 KMP_DEBUG_ASSERT(team != NULL);
139 other_threads = team->t.t_threads;
141 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
142 gtid, team->t.t_id, tid, bt));
145 #if KMP_BARRIER_ICV_PUSH 147 KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
148 if (propagate_icvs) {
149 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
150 for (i=1; i<nproc; ++i) {
151 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
152 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
153 &team->t.t_implicit_task_taskdata[0].td_icvs);
158 #endif // KMP_BARRIER_ICV_PUSH 161 for (i=1; i<nproc; ++i) {
165 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go);
167 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 168 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
169 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
170 &other_threads[i]->th.th_bar[bt].bb.b_go,
171 other_threads[i]->th.th_bar[bt].bb.b_go,
172 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
173 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]);
178 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
179 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
180 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
181 flag.wait(this_thr, TRUE
182 USE_ITT_BUILD_ARG(itt_sync_obj) );
183 #if USE_ITT_BUILD && USE_ITT_NOTIFY 184 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
186 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
188 __kmp_itt_task_starting(itt_sync_obj);
190 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
193 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
194 if (itt_sync_obj != NULL)
196 __kmp_itt_task_finished(itt_sync_obj);
200 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
204 tid = __kmp_tid_from_gtid(gtid);
205 team = __kmp_threads[gtid]->th.th_team;
207 KMP_DEBUG_ASSERT(team != NULL);
208 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
209 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
210 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
213 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
214 gtid, team->t.t_id, tid, bt));
219 __kmp_tree_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
220 void (*reduce)(
void *,
void *)
221 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
223 KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
224 register kmp_team_t *team = this_thr->th.th_team;
225 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
226 register kmp_info_t **other_threads = team->t.t_threads;
227 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
228 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
229 register kmp_uint32 branch_factor = 1 << branch_bits;
230 register kmp_uint32 child;
231 register kmp_uint32 child_tid;
232 register kmp_uint64 new_state;
234 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
235 gtid, team->t.t_id, tid, bt));
236 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
238 #if USE_ITT_BUILD && USE_ITT_NOTIFY 240 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
241 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
245 child_tid = (tid << branch_bits) + 1;
246 if (child_tid < nproc) {
248 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
251 register kmp_info_t *child_thr = other_threads[child_tid];
252 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
255 if (child+1 <= branch_factor && child_tid+1 < nproc)
256 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived);
258 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 259 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
260 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
261 &child_bar->b_arrived, new_state));
263 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
264 flag.wait(this_thr, FALSE
265 USE_ITT_BUILD_ARG(itt_sync_obj) );
266 #if USE_ITT_BUILD && USE_ITT_NOTIFY 268 if (__kmp_forkjoin_frames_mode == 2) {
269 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
270 child_thr->th.th_bar_min_time);
274 KA_TRACE(100, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
275 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
276 team->t.t_id, child_tid));
277 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
282 while (child <= branch_factor && child_tid < nproc);
285 if (!KMP_MASTER_TID(tid)) {
286 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
288 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 289 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
290 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
291 &thr_bar->b_arrived, thr_bar->b_arrived,
292 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
297 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
302 team->t.t_bar[bt].b_arrived = new_state;
304 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
305 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
306 gtid, team->t.t_id, tid, team->t.t_id,
307 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
309 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
310 gtid, team->t.t_id, tid, bt));
314 __kmp_tree_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
316 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
318 KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
319 register kmp_team_t *team;
320 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
321 register kmp_uint32 nproc;
322 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
323 register kmp_uint32 branch_factor = 1 << branch_bits;
324 register kmp_uint32 child;
325 register kmp_uint32 child_tid;
328 if (!KMP_MASTER_TID(tid)) {
329 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
330 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
332 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
333 flag.wait(this_thr, TRUE
334 USE_ITT_BUILD_ARG(itt_sync_obj) );
335 #if USE_ITT_BUILD && USE_ITT_NOTIFY 336 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
338 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
340 __kmp_itt_task_starting(itt_sync_obj);
342 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
345 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
346 if (itt_sync_obj != NULL)
348 __kmp_itt_task_finished(itt_sync_obj);
352 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
356 team = __kmp_threads[gtid]->th.th_team;
357 KMP_DEBUG_ASSERT(team != NULL);
358 tid = __kmp_tid_from_gtid(gtid);
360 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
361 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
362 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
365 team = __kmp_threads[gtid]->th.th_team;
366 KMP_DEBUG_ASSERT(team != NULL);
367 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
368 gtid, team->t.t_id, tid, bt));
370 nproc = this_thr->th.th_team_nproc;
371 child_tid = (tid << branch_bits) + 1;
373 if (child_tid < nproc) {
374 register kmp_info_t **other_threads = team->t.t_threads;
378 register kmp_info_t *child_thr = other_threads[child_tid];
379 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
382 if (child+1 <= branch_factor && child_tid+1 < nproc)
383 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go);
386 #if KMP_BARRIER_ICV_PUSH 388 KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
389 if (propagate_icvs) {
390 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
391 team, child_tid, FALSE);
392 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
393 &team->t.t_implicit_task_taskdata[0].td_icvs);
396 #endif // KMP_BARRIER_ICV_PUSH 397 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 398 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
399 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
400 child_tid, &child_bar->b_go, child_bar->b_go,
401 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
403 kmp_flag_64 flag(&child_bar->b_go, child_thr);
408 while (child <= branch_factor && child_tid < nproc);
410 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
411 gtid, team->t.t_id, tid, bt));
417 __kmp_hyper_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
418 void (*reduce)(
void *,
void *)
419 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
421 KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
422 register kmp_team_t *team = this_thr->th.th_team;
423 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
424 register kmp_info_t **other_threads = team->t.t_threads;
425 register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
426 register kmp_uint32 num_threads = this_thr->th.th_team_nproc;
427 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
428 register kmp_uint32 branch_factor = 1 << branch_bits;
429 register kmp_uint32 offset;
430 register kmp_uint32 level;
432 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
433 gtid, team->t.t_id, tid, bt));
435 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
437 #if USE_ITT_BUILD && USE_ITT_NOTIFY 439 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
440 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
445 kmp_flag_64 p_flag(&thr_bar->b_arrived);
446 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
448 register kmp_uint32 child;
449 register kmp_uint32 child_tid;
451 if (((tid >> level) & (branch_factor - 1)) != 0) {
452 register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1);
454 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 455 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
456 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
457 &thr_bar->b_arrived, thr_bar->b_arrived,
458 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
463 p_flag.set_waiter(other_threads[parent_tid]);
469 if (new_state == KMP_BARRIER_UNUSED_STATE)
470 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
471 for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads;
472 child++, child_tid+=(1 << level))
474 register kmp_info_t *child_thr = other_threads[child_tid];
475 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
477 register kmp_uint32 next_child_tid = child_tid + (1 << level);
479 if (child+1 < branch_factor && next_child_tid < num_threads)
480 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
482 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 483 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
484 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
485 &child_bar->b_arrived, new_state));
487 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
488 c_flag.wait(this_thr, FALSE
489 USE_ITT_BUILD_ARG(itt_sync_obj) );
490 #if USE_ITT_BUILD && USE_ITT_NOTIFY 492 if (__kmp_forkjoin_frames_mode == 2) {
493 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
494 child_thr->th.th_bar_min_time);
498 KA_TRACE(100, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
499 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
500 team->t.t_id, child_tid));
501 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
506 if (KMP_MASTER_TID(tid)) {
508 if (new_state == KMP_BARRIER_UNUSED_STATE)
509 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
511 team->t.t_bar[bt].b_arrived = new_state;
512 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
513 gtid, team->t.t_id, tid, team->t.t_id,
514 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
516 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
517 gtid, team->t.t_id, tid, bt));
521 #define KMP_REVERSE_HYPER_BAR 523 __kmp_hyper_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
525 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
527 KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
528 register kmp_team_t *team;
529 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
530 register kmp_info_t **other_threads;
531 register kmp_uint32 num_threads;
532 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
533 register kmp_uint32 branch_factor = 1 << branch_bits;
534 register kmp_uint32 child;
535 register kmp_uint32 child_tid;
536 register kmp_uint32 offset;
537 register kmp_uint32 level;
542 if (KMP_MASTER_TID(tid)) {
543 team = __kmp_threads[gtid]->th.th_team;
544 KMP_DEBUG_ASSERT(team != NULL);
545 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
546 gtid, team->t.t_id, tid, bt));
547 #if KMP_BARRIER_ICV_PUSH 548 if (propagate_icvs) {
549 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
554 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
555 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
557 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
558 flag.wait(this_thr, TRUE
559 USE_ITT_BUILD_ARG(itt_sync_obj) );
560 #if USE_ITT_BUILD && USE_ITT_NOTIFY 561 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
563 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
565 __kmp_itt_task_starting(itt_sync_obj);
567 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
570 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
571 if (itt_sync_obj != NULL)
573 __kmp_itt_task_finished(itt_sync_obj);
577 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
581 team = __kmp_threads[gtid]->th.th_team;
582 KMP_DEBUG_ASSERT(team != NULL);
583 tid = __kmp_tid_from_gtid(gtid);
585 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
586 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
587 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
590 num_threads = this_thr->th.th_team_nproc;
591 other_threads = team->t.t_threads;
593 #ifdef KMP_REVERSE_HYPER_BAR 595 for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0);
596 level+=branch_bits, offset<<=branch_bits);
599 for (level-=branch_bits, offset>>=branch_bits; offset != 0;
600 level-=branch_bits, offset>>=branch_bits)
603 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
606 #ifdef KMP_REVERSE_HYPER_BAR 609 child = num_threads >> ((level==0)?level:level-1);
610 for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level);
611 child>=1; child--, child_tid-=(1<<level))
613 if (((tid >> level) & (branch_factor - 1)) != 0)
617 for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads;
618 child++, child_tid+=(1<<level))
619 #endif // KMP_REVERSE_HYPER_BAR 621 if (child_tid >= num_threads)
continue;
623 register kmp_info_t *child_thr = other_threads[child_tid];
624 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
626 register kmp_uint32 next_child_tid = child_tid - (1 << level);
628 # ifdef KMP_REVERSE_HYPER_BAR 629 if (child-1 >= 1 && next_child_tid < num_threads)
631 if (child+1 < branch_factor && next_child_tid < num_threads)
632 # endif // KMP_REVERSE_HYPER_BAR 633 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
636 #if KMP_BARRIER_ICV_PUSH 638 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
639 #endif // KMP_BARRIER_ICV_PUSH 641 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 642 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
643 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
644 child_tid, &child_bar->b_go, child_bar->b_go,
645 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
647 kmp_flag_64 flag(&child_bar->b_go, child_thr);
652 #if KMP_BARRIER_ICV_PUSH 653 if (propagate_icvs && !KMP_MASTER_TID(tid)) {
654 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
655 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
658 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
659 gtid, team->t.t_id, tid, bt));
671 __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc,
672 int gtid,
int tid, kmp_team_t *team)
675 bool uninitialized = thr_bar->team == NULL;
676 bool team_changed = team != thr_bar->team;
677 bool team_sz_changed = nproc != thr_bar->nproc;
678 bool tid_changed = tid != thr_bar->old_tid;
681 if (uninitialized || team_sz_changed) {
682 __kmp_get_hierarchy(nproc, thr_bar);
685 if (uninitialized || team_sz_changed || tid_changed) {
686 thr_bar->my_level = thr_bar->depth-1;
687 thr_bar->parent_tid = -1;
688 if (!KMP_MASTER_TID(tid)) {
690 while (d<thr_bar->depth) {
692 if (d == thr_bar->depth-2) {
693 thr_bar->parent_tid = 0;
694 thr_bar->my_level = d;
697 else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) {
699 thr_bar->parent_tid = tid - rem;
700 thr_bar->my_level = d;
706 thr_bar->offset = 7-(tid-thr_bar->parent_tid-1);
707 thr_bar->old_tid = tid;
708 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
709 thr_bar->team = team;
710 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
712 if (uninitialized || team_changed || tid_changed) {
713 thr_bar->team = team;
714 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
717 if (uninitialized || team_sz_changed || tid_changed) {
718 thr_bar->nproc = nproc;
719 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
720 if (thr_bar->my_level == 0) thr_bar->leaf_kids=0;
721 if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc)
722 thr_bar->leaf_kids = nproc - tid - 1;
723 thr_bar->leaf_state = 0;
724 for (
int i=0; i<thr_bar->leaf_kids; ++i) ((
char *)&(thr_bar->leaf_state))[7-i] = 1;
730 __kmp_hierarchical_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
731 int gtid,
int tid,
void (*reduce) (
void *,
void *)
732 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
734 KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
735 register kmp_team_t *team = this_thr->th.th_team;
736 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
737 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
738 register kmp_info_t **other_threads = team->t.t_threads;
739 register kmp_uint64 new_state;
741 int level = team->t.t_level;
743 if (other_threads[0]->th.th_teams_microtask)
744 if (this_thr->th.th_teams_size.nteams > 1)
747 if (level == 1) thr_bar->use_oncore_barrier = 1;
748 else thr_bar->use_oncore_barrier = 0;
750 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
751 gtid, team->t.t_id, tid, bt));
752 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
754 #if USE_ITT_BUILD && USE_ITT_NOTIFY 756 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
757 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
761 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
763 if (thr_bar->my_level) {
764 register kmp_int32 child_tid;
765 new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
766 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
767 if (thr_bar->leaf_kids) {
768 kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
769 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n",
770 gtid, team->t.t_id, tid));
771 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
772 flag.wait(this_thr, FALSE
773 USE_ITT_BUILD_ARG(itt_sync_obj) );
775 for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) {
776 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
777 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
778 team->t.t_id, child_tid));
779 (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data);
782 (void) KMP_TEST_THEN_AND64((
volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state));
785 for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) {
786 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
787 if (last > nproc) last = nproc;
788 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
789 register kmp_info_t *child_thr = other_threads[child_tid];
790 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
791 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 792 "arrived(%p) == %llu\n",
793 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
794 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
795 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
796 flag.wait(this_thr, FALSE
797 USE_ITT_BUILD_ARG(itt_sync_obj) );
799 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
800 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
801 team->t.t_id, child_tid));
802 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
808 for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) {
809 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
810 if (last > nproc) last = nproc;
811 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
812 register kmp_info_t *child_thr = other_threads[child_tid];
813 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
814 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 815 "arrived(%p) == %llu\n",
816 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
817 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
818 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
819 flag.wait(this_thr, FALSE
820 USE_ITT_BUILD_ARG(itt_sync_obj) );
822 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
823 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
824 team->t.t_id, child_tid));
825 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
833 if (!KMP_MASTER_TID(tid)) {
834 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 835 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
836 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid,
837 &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP));
840 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
841 || !thr_bar->use_oncore_barrier) {
842 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
846 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
847 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
848 flag.set_waiter(other_threads[thr_bar->parent_tid]);
852 team->t.t_bar[bt].b_arrived = new_state;
853 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
854 gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
857 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
858 gtid, team->t.t_id, tid, bt));
862 __kmp_hierarchical_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
864 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
866 KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
867 register kmp_team_t *team;
868 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
869 register kmp_uint32 nproc;
870 bool team_change =
false;
872 if (KMP_MASTER_TID(tid)) {
873 team = __kmp_threads[gtid]->th.th_team;
874 KMP_DEBUG_ASSERT(team != NULL);
875 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n",
876 gtid, team->t.t_id, tid, bt));
880 if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
881 || thr_bar->my_level != 0 || thr_bar->team == NULL) {
883 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
884 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
885 flag.wait(this_thr, TRUE
886 USE_ITT_BUILD_ARG(itt_sync_obj) );
887 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
891 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
892 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset,
894 USE_ITT_BUILD_ARG(itt_sync_obj) );
895 flag.wait(this_thr, TRUE);
896 if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) {
897 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
900 ((
char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
903 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
905 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
908 team = __kmp_threads[gtid]->th.th_team;
909 KMP_DEBUG_ASSERT(team != NULL);
910 tid = __kmp_tid_from_gtid(gtid);
912 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
913 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
917 nproc = this_thr->th.th_team_nproc;
918 int level = team->t.t_level;
920 if (team->t.t_threads[0]->th.th_teams_microtask ) {
921 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level)
923 if( this_thr->th.th_teams_size.nteams > 1 )
927 if (level == 1) thr_bar->use_oncore_barrier = 1;
928 else thr_bar->use_oncore_barrier = 0;
931 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
932 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
933 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
935 if (team_change) old_leaf_kids = 0;
937 #if KMP_BARRIER_ICV_PUSH 938 if (propagate_icvs) {
939 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
940 if (KMP_MASTER_TID(tid)) {
941 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
943 else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
944 if (!thr_bar->my_level)
946 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
947 &thr_bar->parent_bar->th_fixed_icvs);
951 if (thr_bar->my_level)
952 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
954 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
955 &thr_bar->parent_bar->th_fixed_icvs);
958 #endif // KMP_BARRIER_ICV_PUSH 961 if (thr_bar->my_level) {
962 register kmp_int32 child_tid;
964 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
965 if (KMP_MASTER_TID(tid)) {
967 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
969 ngo_load(&thr_bar->th_fixed_icvs);
971 for (child_tid=thr_bar->skip_per_level[1]; child_tid<(
int)nproc; child_tid+=thr_bar->skip_per_level[1]) {
972 register kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
973 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 974 " go(%p): %u => %u\n",
975 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
976 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
977 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
979 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
983 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
985 if (thr_bar->leaf_kids) {
987 if (team_change || old_leaf_kids < thr_bar->leaf_kids) {
989 thr_bar->b_go |= old_leaf_state;
992 last = tid+thr_bar->skip_per_level[1];
993 if (last > nproc) last = nproc;
994 for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) {
995 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
996 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
997 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 998 " T#%d(%d:%d) go(%p): %u => %u\n",
999 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1000 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1001 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1003 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1008 thr_bar->b_go |= thr_bar->leaf_state;
1013 for (
int d=thr_bar->my_level-1; d>=0; --d) {
1014 last = tid+thr_bar->skip_per_level[d+1];
1015 kmp_uint32 skip = thr_bar->skip_per_level[d];
1016 if (last > nproc) last = nproc;
1017 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
1018 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1019 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1020 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 1021 " go(%p): %u => %u\n",
1022 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1023 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1024 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1026 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1031 #if KMP_BARRIER_ICV_PUSH 1032 if (propagate_icvs && !KMP_MASTER_TID(tid))
1033 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
1034 #endif // KMP_BARRIER_ICV_PUSH 1036 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1037 gtid, team->t.t_id, tid, bt));
1047 __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
size_t reduce_size,
1048 void *reduce_data,
void (*reduce)(
void *,
void *))
1050 KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
1051 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1052 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1053 register int tid = __kmp_tid_from_gtid(gtid);
1054 register kmp_info_t *this_thr = __kmp_threads[gtid];
1055 register kmp_team_t *team = this_thr->th.th_team;
1056 register int status = 0;
1057 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1059 ompt_task_id_t my_task_id;
1060 ompt_parallel_id_t my_parallel_id;
1063 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n",
1064 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1069 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1070 my_parallel_id = team->t.ompt_team_info.parallel_id;
1073 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1074 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1075 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
1076 my_parallel_id, my_task_id);
1080 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1081 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1082 my_parallel_id, my_task_id);
1088 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1092 if (! team->t.t_serialized) {
1095 void *itt_sync_obj = NULL;
1097 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1098 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1101 if (__kmp_tasking_mode == tskm_extra_barrier) {
1102 __kmp_tasking_barrier(team, this_thr, gtid);
1103 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1104 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1110 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1111 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1112 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1116 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1117 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1121 if (KMP_MASTER_TID(tid)) {
1122 team->t.t_bar[bt].b_master_arrived += 1;
1124 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1127 if (reduce != NULL) {
1129 this_thr->th.th_local.reduce_data = reduce_data;
1132 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1133 __kmp_task_team_setup(this_thr, team, 0);
1135 switch (__kmp_barrier_gather_pattern[bt]) {
1136 case bp_hyper_bar: {
1137 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1138 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce
1139 USE_ITT_BUILD_ARG(itt_sync_obj) );
1142 case bp_hierarchical_bar: {
1143 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce
1144 USE_ITT_BUILD_ARG(itt_sync_obj));
1148 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1149 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce
1150 USE_ITT_BUILD_ARG(itt_sync_obj) );
1154 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce
1155 USE_ITT_BUILD_ARG(itt_sync_obj) );
1161 if (KMP_MASTER_TID(tid)) {
1163 if (__kmp_tasking_mode != tskm_immediate_exec) {
1164 __kmp_task_team_wait(this_thr, team
1165 USE_ITT_BUILD_ARG(itt_sync_obj) );
1169 team->t.t_bar[bt].b_team_arrived += 1;
1176 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1177 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1179 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1181 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1183 this_thr->th.th_teams_microtask == NULL &&
1185 team->t.t_active_level == 1)
1187 kmp_uint64 cur_time = __itt_get_timestamp();
1188 kmp_info_t **other_threads = team->t.t_threads;
1189 int nproc = this_thr->th.th_team_nproc;
1191 switch(__kmp_forkjoin_frames_mode) {
1193 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1194 this_thr->th.th_frame_time = cur_time;
1197 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1200 if( __itt_metadata_add_ptr ) {
1202 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1204 this_thr->th.th_bar_arrive_time = 0;
1205 for (i=1; i<nproc; ++i) {
1206 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
1207 other_threads[i]->th.th_bar_arrive_time = 0;
1209 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL));
1211 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1212 this_thr->th.th_frame_time = cur_time;
1220 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1221 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1224 if (status == 1 || ! is_split) {
1225 switch (__kmp_barrier_release_pattern[bt]) {
1226 case bp_hyper_bar: {
1227 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1228 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1229 USE_ITT_BUILD_ARG(itt_sync_obj) );
1232 case bp_hierarchical_bar: {
1233 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1234 USE_ITT_BUILD_ARG(itt_sync_obj) );
1238 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1239 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1240 USE_ITT_BUILD_ARG(itt_sync_obj) );
1244 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1245 USE_ITT_BUILD_ARG(itt_sync_obj) );
1248 if (__kmp_tasking_mode != tskm_immediate_exec) {
1249 __kmp_task_team_sync(this_thr, team);
1257 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1258 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1262 if (__kmp_tasking_mode != tskm_immediate_exec) {
1264 if ( this_thr->th.th_task_team != NULL ) {
1265 void *itt_sync_obj = NULL;
1267 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1268 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1269 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1273 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
1274 __kmp_task_team_wait(this_thr, team
1275 USE_ITT_BUILD_ARG(itt_sync_obj));
1276 __kmp_task_team_setup(this_thr, team, 0);
1279 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1280 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1285 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1286 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1290 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1291 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
1296 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1297 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1298 my_parallel_id, my_task_id);
1301 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1310 __kmp_end_split_barrier(
enum barrier_type bt,
int gtid)
1312 KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
1313 int tid = __kmp_tid_from_gtid(gtid);
1314 kmp_info_t *this_thr = __kmp_threads[gtid];
1315 kmp_team_t *team = this_thr->th.th_team;
1317 if (!team->t.t_serialized) {
1318 if (KMP_MASTER_GTID(gtid)) {
1319 switch (__kmp_barrier_release_pattern[bt]) {
1320 case bp_hyper_bar: {
1321 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1322 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1323 USE_ITT_BUILD_ARG(NULL) );
1326 case bp_hierarchical_bar: {
1327 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1328 USE_ITT_BUILD_ARG(NULL));
1332 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1333 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1334 USE_ITT_BUILD_ARG(NULL) );
1338 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1339 USE_ITT_BUILD_ARG(NULL) );
1342 if (__kmp_tasking_mode != tskm_immediate_exec) {
1343 __kmp_task_team_sync(this_thr, team);
1351 __kmp_join_barrier(
int gtid)
1353 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
1354 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1355 KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
1356 register kmp_info_t *this_thr = __kmp_threads[gtid];
1357 register kmp_team_t *team;
1358 register kmp_uint nproc;
1359 kmp_info_t *master_thread;
1365 void *itt_sync_obj = NULL;
1367 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1369 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1375 team = this_thr->th.th_team;
1376 nproc = this_thr->th.th_team_nproc;
1377 KMP_DEBUG_ASSERT((
int)nproc == team->t.t_nproc);
1378 tid = __kmp_tid_from_gtid(gtid);
1380 team_id = team->t.t_id;
1382 master_thread = this_thr->th.th_team_master;
1384 if (master_thread != team->t.t_threads[0]) {
1385 __kmp_print_structure();
1388 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1392 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1393 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1394 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1395 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1396 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
1401 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1402 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1403 team->t.ompt_team_info.parallel_id,
1404 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1407 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1410 if (__kmp_tasking_mode == tskm_extra_barrier) {
1411 __kmp_tasking_barrier(team, this_thr, gtid);
1412 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
1415 if (__kmp_tasking_mode != tskm_immediate_exec) {
1416 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
1417 __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
1418 this_thr->th.th_task_team));
1419 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
1427 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1428 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1429 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1433 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1434 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1437 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1438 case bp_hyper_bar: {
1439 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1440 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1441 USE_ITT_BUILD_ARG(itt_sync_obj) );
1444 case bp_hierarchical_bar: {
1445 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1446 USE_ITT_BUILD_ARG(itt_sync_obj) );
1450 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1451 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1452 USE_ITT_BUILD_ARG(itt_sync_obj) );
1456 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1457 USE_ITT_BUILD_ARG(itt_sync_obj) );
1465 if (KMP_MASTER_TID(tid)) {
1466 if (__kmp_tasking_mode != tskm_immediate_exec) {
1467 __kmp_task_team_wait(this_thr, team
1468 USE_ITT_BUILD_ARG(itt_sync_obj) );
1470 #if KMP_STATS_ENABLED 1473 for (
int i=0; i<team->t.t_nproc; ++i) {
1474 kmp_info_t* team_thread = team->t.t_threads[i];
1475 if (team_thread == this_thr)
1477 team_thread->th.th_stats->setIdleFlag();
1478 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
1479 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
1483 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1484 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1487 # if USE_ITT_BUILD && USE_ITT_NOTIFY 1489 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1491 this_thr->th.th_teams_microtask == NULL &&
1493 team->t.t_active_level == 1)
1495 kmp_uint64 cur_time = __itt_get_timestamp();
1496 ident_t * loc = team->t.t_ident;
1497 kmp_info_t **other_threads = team->t.t_threads;
1498 int nproc = this_thr->th.th_team_nproc;
1500 switch(__kmp_forkjoin_frames_mode) {
1502 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1505 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1508 if( __itt_metadata_add_ptr ) {
1510 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1512 this_thr->th.th_bar_arrive_time = 0;
1513 for (i=1; i<nproc; ++i) {
1514 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
1515 other_threads[i]->th.th_bar_arrive_time = 0;
1517 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0);
1519 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1520 this_thr->th.th_frame_time = cur_time;
1528 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1529 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1534 if (KMP_MASTER_TID(tid)) {
1535 KA_TRACE(15, (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1536 gtid, team_id, tid, nproc));
1542 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
1547 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1548 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1549 team->t.ompt_team_info.parallel_id,
1550 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1555 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1563 __kmp_fork_barrier(
int gtid,
int tid)
1565 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
1566 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1567 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
1568 kmp_info_t *this_thr = __kmp_threads[gtid];
1569 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1571 void * itt_sync_obj = NULL;
1574 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
1575 gtid, (team != NULL) ? team->t.t_id : -1, tid));
1578 if (KMP_MASTER_TID(tid)) {
1579 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1580 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1582 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1583 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1588 register kmp_info_t **other_threads = team->t.t_threads;
1594 for(i=1; i<team->t.t_nproc; ++i) {
1595 KA_TRACE(500, (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n",
1596 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1597 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1598 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1599 KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)
1600 & ~(KMP_BARRIER_SLEEP_STATE))
1601 == KMP_INIT_BARRIER_STATE);
1602 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1606 if (__kmp_tasking_mode != tskm_immediate_exec) {
1607 __kmp_task_team_setup(this_thr, team, 0);
1614 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1615 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1616 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1620 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1621 case bp_hyper_bar: {
1622 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1623 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1624 USE_ITT_BUILD_ARG(itt_sync_obj) );
1627 case bp_hierarchical_bar: {
1628 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1629 USE_ITT_BUILD_ARG(itt_sync_obj) );
1633 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1634 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1635 USE_ITT_BUILD_ARG(itt_sync_obj) );
1639 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1640 USE_ITT_BUILD_ARG(itt_sync_obj) );
1645 if (TCR_4(__kmp_global.g.g_done)) {
1646 this_thr->th.th_task_team = NULL;
1648 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1649 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1650 if (!KMP_MASTER_TID(tid)) {
1651 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1653 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1657 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1664 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1665 KMP_DEBUG_ASSERT(team != NULL);
1666 tid = __kmp_tid_from_gtid(gtid);
1669 #if KMP_BARRIER_ICV_PULL 1676 KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
1677 if (!KMP_MASTER_TID(tid)) {
1679 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1680 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
1681 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1682 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
1685 #endif // KMP_BARRIER_ICV_PULL 1687 if (__kmp_tasking_mode != tskm_immediate_exec) {
1688 __kmp_task_team_sync(this_thr, team);
1691 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1692 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1693 if (proc_bind == proc_bind_intel) {
1695 #if KMP_AFFINITY_SUPPORTED 1697 if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1698 __kmp_balanced_affinity(tid, team->t.t_nproc);
1700 #endif // KMP_AFFINITY_SUPPORTED 1701 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1703 else if (proc_bind != proc_bind_false) {
1704 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1705 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
1706 __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place));
1709 __kmp_affinity_set_place(gtid);
1714 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1715 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1716 if (!KMP_MASTER_TID(tid)) {
1718 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1719 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1723 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid));
1728 __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc, kmp_internal_control_t *new_icvs,
ident_t *loc )
1730 KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
1732 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1733 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
1738 #if KMP_BARRIER_ICV_PULL 1741 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1742 copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs);
1743 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n",
1744 0, team->t.t_threads[0], team));
1745 #elif KMP_BARRIER_ICV_PUSH 1747 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n",
1748 0, team->t.t_threads[0], team));
1752 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1753 for (
int f=1; f<new_nproc; ++f) {
1755 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1756 f, team->t.t_threads[f], team));
1757 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
1758 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
1759 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1760 f, team->t.t_threads[f], team));
1763 #endif // KMP_BARRIER_ICV_PULL