17 #include "kmp_wait_release.h" 18 #include "kmp_stats.h" 24 #include <immintrin.h> 25 #define USE_NGO_STORES 1 28 #if KMP_MIC && USE_NGO_STORES 30 #define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) 31 #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 32 #define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) 33 #define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") 35 #define ngo_load(src) ((void)0) 36 #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) 37 #define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) 38 #define ngo_sync() ((void)0) 41 void __kmp_print_structure(
void);
47 __kmp_linear_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
48 void (*reduce)(
void *,
void *)
49 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
51 KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
52 register kmp_team_t *team = this_thr->th.th_team;
53 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
54 register kmp_info_t **other_threads = team->t.t_threads;
56 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
57 gtid, team->t.t_id, tid, bt));
58 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
60 #if USE_ITT_BUILD && USE_ITT_NOTIFY 62 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
63 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
67 if (!KMP_MASTER_TID(tid)) {
68 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" 69 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
70 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived,
71 thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
75 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
78 register kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
79 register int nproc = this_thr->th.th_team_nproc;
82 register kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
85 for (i=1; i<nproc; ++i) {
89 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived);
91 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 92 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
93 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
94 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
97 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
98 flag.wait(this_thr, FALSE
99 USE_ITT_BUILD_ARG(itt_sync_obj) );
100 #if USE_ITT_BUILD && USE_ITT_NOTIFY 102 if (__kmp_forkjoin_frames_mode == 2) {
103 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
104 other_threads[i]->th.th_bar_min_time);
108 KA_TRACE(100, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid,
109 team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i));
110 (*reduce)(this_thr->th.th_local.reduce_data,
111 other_threads[i]->th.th_local.reduce_data);
115 team_bar->b_arrived = new_state;
116 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
117 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state));
119 KA_TRACE(20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
120 gtid, team->t.t_id, tid, bt));
124 __kmp_linear_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
126 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
128 KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
129 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
130 register kmp_team_t *team;
132 if (KMP_MASTER_TID(tid)) {
133 register unsigned int i;
134 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
135 register kmp_info_t **other_threads;
137 team = __kmp_threads[gtid]->th.th_team;
138 KMP_DEBUG_ASSERT(team != NULL);
139 other_threads = team->t.t_threads;
141 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
142 gtid, team->t.t_id, tid, bt));
145 #if KMP_BARRIER_ICV_PUSH 147 KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
148 if (propagate_icvs) {
149 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
150 for (i=1; i<nproc; ++i) {
151 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
152 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
153 &team->t.t_implicit_task_taskdata[0].td_icvs);
158 #endif // KMP_BARRIER_ICV_PUSH 161 for (i=1; i<nproc; ++i) {
165 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go);
167 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " 168 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
169 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
170 &other_threads[i]->th.th_bar[bt].bb.b_go,
171 other_threads[i]->th.th_bar[bt].bb.b_go,
172 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
173 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]);
178 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
179 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
180 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
181 flag.wait(this_thr, TRUE
182 USE_ITT_BUILD_ARG(itt_sync_obj) );
183 #if USE_ITT_BUILD && USE_ITT_NOTIFY 184 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
186 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
188 __kmp_itt_task_starting(itt_sync_obj);
190 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
193 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
194 if (itt_sync_obj != NULL)
196 __kmp_itt_task_finished(itt_sync_obj);
200 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
204 tid = __kmp_tid_from_gtid(gtid);
205 team = __kmp_threads[gtid]->th.th_team;
207 KMP_DEBUG_ASSERT(team != NULL);
208 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
209 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
210 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
213 KA_TRACE(20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
214 gtid, team->t.t_id, tid, bt));
219 __kmp_tree_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
220 void (*reduce)(
void *,
void *)
221 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
223 KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
224 register kmp_team_t *team = this_thr->th.th_team;
225 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
226 register kmp_info_t **other_threads = team->t.t_threads;
227 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
228 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
229 register kmp_uint32 branch_factor = 1 << branch_bits;
230 register kmp_uint32 child;
231 register kmp_uint32 child_tid;
232 register kmp_uint64 new_state;
234 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
235 gtid, team->t.t_id, tid, bt));
236 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
238 #if USE_ITT_BUILD && USE_ITT_NOTIFY 240 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
241 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
245 child_tid = (tid << branch_bits) + 1;
246 if (child_tid < nproc) {
248 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
251 register kmp_info_t *child_thr = other_threads[child_tid];
252 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
255 if (child+1 <= branch_factor && child_tid+1 < nproc)
256 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived);
258 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 259 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
260 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
261 &child_bar->b_arrived, new_state));
263 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
264 flag.wait(this_thr, FALSE
265 USE_ITT_BUILD_ARG(itt_sync_obj) );
266 #if USE_ITT_BUILD && USE_ITT_NOTIFY 268 if (__kmp_forkjoin_frames_mode == 2) {
269 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
270 child_thr->th.th_bar_min_time);
274 KA_TRACE(100, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
275 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
276 team->t.t_id, child_tid));
277 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
282 while (child <= branch_factor && child_tid < nproc);
285 if (!KMP_MASTER_TID(tid)) {
286 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
288 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 289 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
290 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
291 &thr_bar->b_arrived, thr_bar->b_arrived,
292 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
297 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
302 team->t.t_bar[bt].b_arrived = new_state;
304 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
305 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
306 gtid, team->t.t_id, tid, team->t.t_id,
307 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
309 KA_TRACE(20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
310 gtid, team->t.t_id, tid, bt));
314 __kmp_tree_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
316 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
318 KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
319 register kmp_team_t *team;
320 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
321 register kmp_uint32 nproc;
322 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
323 register kmp_uint32 branch_factor = 1 << branch_bits;
324 register kmp_uint32 child;
325 register kmp_uint32 child_tid;
328 if (!KMP_MASTER_TID(tid)) {
329 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
330 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
332 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
333 flag.wait(this_thr, TRUE
334 USE_ITT_BUILD_ARG(itt_sync_obj) );
335 #if USE_ITT_BUILD && USE_ITT_NOTIFY 336 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
338 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
340 __kmp_itt_task_starting(itt_sync_obj);
342 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
345 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
346 if (itt_sync_obj != NULL)
348 __kmp_itt_task_finished(itt_sync_obj);
352 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
356 team = __kmp_threads[gtid]->th.th_team;
357 KMP_DEBUG_ASSERT(team != NULL);
358 tid = __kmp_tid_from_gtid(gtid);
360 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
361 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
362 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
365 team = __kmp_threads[gtid]->th.th_team;
366 KMP_DEBUG_ASSERT(team != NULL);
367 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
368 gtid, team->t.t_id, tid, bt));
370 nproc = this_thr->th.th_team_nproc;
371 child_tid = (tid << branch_bits) + 1;
373 if (child_tid < nproc) {
374 register kmp_info_t **other_threads = team->t.t_threads;
378 register kmp_info_t *child_thr = other_threads[child_tid];
379 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
382 if (child+1 <= branch_factor && child_tid+1 < nproc)
383 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go);
386 #if KMP_BARRIER_ICV_PUSH 388 KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
389 if (propagate_icvs) {
390 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
391 team, child_tid, FALSE);
392 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
393 &team->t.t_implicit_task_taskdata[0].td_icvs);
396 #endif // KMP_BARRIER_ICV_PUSH 397 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 398 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
399 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
400 child_tid, &child_bar->b_go, child_bar->b_go,
401 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
403 kmp_flag_64 flag(&child_bar->b_go, child_thr);
408 while (child <= branch_factor && child_tid < nproc);
410 KA_TRACE(20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
411 gtid, team->t.t_id, tid, bt));
417 __kmp_hyper_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
418 void (*reduce)(
void *,
void *)
419 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
421 KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
422 register kmp_team_t *team = this_thr->th.th_team;
423 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
424 register kmp_info_t **other_threads = team->t.t_threads;
425 register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
426 register kmp_uint32 num_threads = this_thr->th.th_team_nproc;
427 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
428 register kmp_uint32 branch_factor = 1 << branch_bits;
429 register kmp_uint32 offset;
430 register kmp_uint32 level;
432 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
433 gtid, team->t.t_id, tid, bt));
435 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
437 #if USE_ITT_BUILD && USE_ITT_NOTIFY 439 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
440 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
445 kmp_flag_64 p_flag(&thr_bar->b_arrived);
446 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
448 register kmp_uint32 child;
449 register kmp_uint32 child_tid;
451 if (((tid >> level) & (branch_factor - 1)) != 0) {
452 register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1);
454 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 455 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
456 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
457 &thr_bar->b_arrived, thr_bar->b_arrived,
458 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
463 p_flag.set_waiter(other_threads[parent_tid]);
469 if (new_state == KMP_BARRIER_UNUSED_STATE)
470 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
471 for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads;
472 child++, child_tid+=(1 << level))
474 register kmp_info_t *child_thr = other_threads[child_tid];
475 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
477 register kmp_uint32 next_child_tid = child_tid + (1 << level);
479 if (child+1 < branch_factor && next_child_tid < num_threads)
480 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
482 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " 483 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
484 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
485 &child_bar->b_arrived, new_state));
487 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
488 c_flag.wait(this_thr, FALSE
489 USE_ITT_BUILD_ARG(itt_sync_obj) );
490 #if USE_ITT_BUILD && USE_ITT_NOTIFY 492 if (__kmp_forkjoin_frames_mode == 2) {
493 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
494 child_thr->th.th_bar_min_time);
498 KA_TRACE(100, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
499 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
500 team->t.t_id, child_tid));
501 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
506 if (KMP_MASTER_TID(tid)) {
508 if (new_state == KMP_BARRIER_UNUSED_STATE)
509 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
511 team->t.t_bar[bt].b_arrived = new_state;
512 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
513 gtid, team->t.t_id, tid, team->t.t_id,
514 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
516 KA_TRACE(20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
517 gtid, team->t.t_id, tid, bt));
521 #define KMP_REVERSE_HYPER_BAR 523 __kmp_hyper_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
525 USE_ITT_BUILD_ARG(
void *itt_sync_obj) )
527 KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
528 register kmp_team_t *team;
529 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
530 register kmp_info_t **other_threads;
531 register kmp_uint32 num_threads;
532 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
533 register kmp_uint32 branch_factor = 1 << branch_bits;
534 register kmp_uint32 child;
535 register kmp_uint32 child_tid;
536 register kmp_uint32 offset;
537 register kmp_uint32 level;
542 if (KMP_MASTER_TID(tid)) {
543 team = __kmp_threads[gtid]->th.th_team;
544 KMP_DEBUG_ASSERT(team != NULL);
545 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
546 gtid, team->t.t_id, tid, bt));
547 #if KMP_BARRIER_ICV_PUSH 548 if (propagate_icvs) {
549 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
554 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
555 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
557 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
558 flag.wait(this_thr, TRUE
559 USE_ITT_BUILD_ARG(itt_sync_obj) );
560 #if USE_ITT_BUILD && USE_ITT_NOTIFY 561 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
563 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
565 __kmp_itt_task_starting(itt_sync_obj);
567 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
570 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
571 if (itt_sync_obj != NULL)
573 __kmp_itt_task_finished(itt_sync_obj);
577 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
581 team = __kmp_threads[gtid]->th.th_team;
582 KMP_DEBUG_ASSERT(team != NULL);
583 tid = __kmp_tid_from_gtid(gtid);
585 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
586 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
587 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
590 num_threads = this_thr->th.th_team_nproc;
591 other_threads = team->t.t_threads;
593 #ifdef KMP_REVERSE_HYPER_BAR 595 for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0);
596 level+=branch_bits, offset<<=branch_bits);
599 for (level-=branch_bits, offset>>=branch_bits; offset != 0;
600 level-=branch_bits, offset>>=branch_bits)
603 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
606 #ifdef KMP_REVERSE_HYPER_BAR 609 child = num_threads >> ((level==0)?level:level-1);
610 for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level);
611 child>=1; child--, child_tid-=(1<<level))
613 if (((tid >> level) & (branch_factor - 1)) != 0)
617 for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads;
618 child++, child_tid+=(1<<level))
619 #endif // KMP_REVERSE_HYPER_BAR 621 if (child_tid >= num_threads)
continue;
623 register kmp_info_t *child_thr = other_threads[child_tid];
624 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
626 register kmp_uint32 next_child_tid = child_tid - (1 << level);
628 # ifdef KMP_REVERSE_HYPER_BAR 629 if (child-1 >= 1 && next_child_tid < num_threads)
631 if (child+1 < branch_factor && next_child_tid < num_threads)
632 # endif // KMP_REVERSE_HYPER_BAR 633 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
636 #if KMP_BARRIER_ICV_PUSH 638 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
639 #endif // KMP_BARRIER_ICV_PUSH 641 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" 642 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
643 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
644 child_tid, &child_bar->b_go, child_bar->b_go,
645 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
647 kmp_flag_64 flag(&child_bar->b_go, child_thr);
652 #if KMP_BARRIER_ICV_PUSH 653 if (propagate_icvs && !KMP_MASTER_TID(tid)) {
654 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
655 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
658 KA_TRACE(20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
659 gtid, team->t.t_id, tid, bt));
671 __kmp_init_hierarchical_barrier_thread(
enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc,
672 int gtid,
int tid, kmp_team_t *team)
675 bool uninitialized = thr_bar->team == NULL;
676 bool team_changed = team != thr_bar->team;
677 bool team_sz_changed = nproc != thr_bar->nproc;
678 bool tid_changed = tid != thr_bar->old_tid;
681 if (uninitialized || team_sz_changed) {
682 __kmp_get_hierarchy(nproc, thr_bar);
685 if (uninitialized || team_sz_changed || tid_changed) {
686 thr_bar->my_level = thr_bar->depth-1;
687 thr_bar->parent_tid = -1;
688 if (!KMP_MASTER_TID(tid)) {
690 while (d<thr_bar->depth) {
692 if (d == thr_bar->depth-2) {
693 thr_bar->parent_tid = 0;
694 thr_bar->my_level = d;
697 else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) {
699 thr_bar->parent_tid = tid - rem;
700 thr_bar->my_level = d;
706 thr_bar->offset = 7-(tid-thr_bar->parent_tid-1);
707 thr_bar->old_tid = tid;
708 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
709 thr_bar->team = team;
710 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
712 if (uninitialized || team_changed || tid_changed) {
713 thr_bar->team = team;
714 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
717 if (uninitialized || team_sz_changed || tid_changed) {
718 thr_bar->nproc = nproc;
719 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
720 if (thr_bar->my_level == 0) thr_bar->leaf_kids=0;
721 if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc)
722 thr_bar->leaf_kids = nproc - tid - 1;
723 thr_bar->leaf_state = 0;
724 for (
int i=0; i<thr_bar->leaf_kids; ++i) ((
char *)&(thr_bar->leaf_state))[7-i] = 1;
730 __kmp_hierarchical_barrier_gather(
enum barrier_type bt, kmp_info_t *this_thr,
731 int gtid,
int tid,
void (*reduce) (
void *,
void *)
732 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
734 KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
735 register kmp_team_t *team = this_thr->th.th_team;
736 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
737 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
738 register kmp_info_t **other_threads = team->t.t_threads;
739 register kmp_uint64 new_state;
741 int level = team->t.t_level;
743 if (other_threads[0]->th.th_teams_microtask)
744 if (this_thr->th.th_teams_size.nteams > 1)
747 if (level == 1) thr_bar->use_oncore_barrier = 1;
748 else thr_bar->use_oncore_barrier = 0;
750 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
751 gtid, team->t.t_id, tid, bt));
752 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
754 #if USE_ITT_BUILD && USE_ITT_NOTIFY 756 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
757 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
761 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
763 if (thr_bar->my_level) {
764 register kmp_int32 child_tid;
765 new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
766 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
767 if (thr_bar->leaf_kids) {
768 kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
769 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n",
770 gtid, team->t.t_id, tid));
771 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
772 flag.wait(this_thr, FALSE
773 USE_ITT_BUILD_ARG(itt_sync_obj) );
775 for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) {
776 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
777 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
778 team->t.t_id, child_tid));
779 (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data);
782 (void) KMP_TEST_THEN_AND64((
volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state));
785 for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) {
786 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
787 if (last > nproc) last = nproc;
788 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
789 register kmp_info_t *child_thr = other_threads[child_tid];
790 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
791 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 792 "arrived(%p) == %llu\n",
793 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
794 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
795 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
796 flag.wait(this_thr, FALSE
797 USE_ITT_BUILD_ARG(itt_sync_obj) );
799 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
800 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
801 team->t.t_id, child_tid));
802 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
808 for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) {
809 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
810 if (last > nproc) last = nproc;
811 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
812 register kmp_info_t *child_thr = other_threads[child_tid];
813 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
814 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " 815 "arrived(%p) == %llu\n",
816 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
817 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
818 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
819 flag.wait(this_thr, FALSE
820 USE_ITT_BUILD_ARG(itt_sync_obj) );
822 KA_TRACE(100, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
823 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
824 team->t.t_id, child_tid));
825 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
833 if (!KMP_MASTER_TID(tid)) {
834 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " 835 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
836 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid,
837 &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP));
840 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
841 || !thr_bar->use_oncore_barrier) {
842 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
846 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
847 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
848 flag.set_waiter(other_threads[thr_bar->parent_tid]);
852 team->t.t_bar[bt].b_arrived = new_state;
853 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
854 gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
857 KA_TRACE(20, (
"__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
858 gtid, team->t.t_id, tid, bt));
862 __kmp_hierarchical_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
864 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
866 KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
867 register kmp_team_t *team;
868 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
869 register kmp_uint32 nproc;
870 bool team_change =
false;
872 if (KMP_MASTER_TID(tid)) {
873 team = __kmp_threads[gtid]->th.th_team;
874 KMP_DEBUG_ASSERT(team != NULL);
875 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n",
876 gtid, team->t.t_id, tid, bt));
880 if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
881 || thr_bar->my_level != 0 || thr_bar->team == NULL) {
883 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
884 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
885 flag.wait(this_thr, TRUE
886 USE_ITT_BUILD_ARG(itt_sync_obj) );
887 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
891 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
892 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset,
894 USE_ITT_BUILD_ARG(itt_sync_obj) );
895 flag.wait(this_thr, TRUE);
896 if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) {
897 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
900 ((
char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
903 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
905 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
908 team = __kmp_threads[gtid]->th.th_team;
909 KMP_DEBUG_ASSERT(team != NULL);
910 tid = __kmp_tid_from_gtid(gtid);
912 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
913 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
917 nproc = this_thr->th.th_team_nproc;
918 int level = team->t.t_level;
920 if (team->t.t_threads[0]->th.th_teams_microtask ) {
921 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level)
923 if( this_thr->th.th_teams_size.nteams > 1 )
927 if (level == 1) thr_bar->use_oncore_barrier = 1;
928 else thr_bar->use_oncore_barrier = 0;
931 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
932 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
933 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
935 if (team_change) old_leaf_kids = 0;
937 #if KMP_BARRIER_ICV_PUSH 938 if (propagate_icvs) {
939 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
940 if (KMP_MASTER_TID(tid)) {
941 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
943 else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
944 if (!thr_bar->my_level)
946 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
947 &thr_bar->parent_bar->th_fixed_icvs);
951 if (thr_bar->my_level)
952 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
954 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
955 &thr_bar->parent_bar->th_fixed_icvs);
958 #endif // KMP_BARRIER_ICV_PUSH 961 if (thr_bar->my_level) {
962 register kmp_int32 child_tid;
964 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
965 if (KMP_MASTER_TID(tid)) {
967 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
969 ngo_load(&thr_bar->th_fixed_icvs);
971 for (child_tid=thr_bar->skip_per_level[1]; child_tid<(
int)nproc; child_tid+=thr_bar->skip_per_level[1]) {
972 register kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
973 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 974 " go(%p): %u => %u\n",
975 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
976 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
977 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
979 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
983 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
985 if (thr_bar->leaf_kids) {
987 if (team_change || old_leaf_kids < thr_bar->leaf_kids) {
989 thr_bar->b_go |= old_leaf_state;
992 last = tid+thr_bar->skip_per_level[1];
993 if (last > nproc) last = nproc;
994 for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) {
995 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
996 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
997 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" 998 " T#%d(%d:%d) go(%p): %u => %u\n",
999 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1000 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1001 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1003 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1008 thr_bar->b_go |= thr_bar->leaf_state;
1013 for (
int d=thr_bar->my_level-1; d>=0; --d) {
1014 last = tid+thr_bar->skip_per_level[d+1];
1015 kmp_uint32 skip = thr_bar->skip_per_level[d];
1016 if (last > nproc) last = nproc;
1017 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
1018 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1019 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1020 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" 1021 " go(%p): %u => %u\n",
1022 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1023 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1024 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1026 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1031 #if KMP_BARRIER_ICV_PUSH 1032 if (propagate_icvs && !KMP_MASTER_TID(tid))
1033 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
1034 #endif // KMP_BARRIER_ICV_PUSH 1036 KA_TRACE(20, (
"__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1037 gtid, team->t.t_id, tid, bt));
1047 __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
size_t reduce_size,
1048 void *reduce_data,
void (*reduce)(
void *,
void *))
1050 KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
1051 register int tid = __kmp_tid_from_gtid(gtid);
1052 register kmp_info_t *this_thr = __kmp_threads[gtid];
1053 register kmp_team_t *team = this_thr->th.th_team;
1054 register int status = 0;
1055 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
1057 ompt_task_id_t my_task_id;
1058 ompt_parallel_id_t my_parallel_id;
1061 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n",
1062 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1067 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1068 my_parallel_id = team->t.ompt_team_info.parallel_id;
1071 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1072 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1073 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
1074 my_parallel_id, my_task_id);
1078 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1079 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1080 my_parallel_id, my_task_id);
1086 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1090 if (! team->t.t_serialized) {
1093 void *itt_sync_obj = NULL;
1095 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1096 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1099 if (__kmp_tasking_mode == tskm_extra_barrier) {
1100 __kmp_tasking_barrier(team, this_thr, gtid);
1101 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1102 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1108 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1109 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1110 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1114 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1115 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1119 if (KMP_MASTER_TID(tid)) {
1120 team->t.t_bar[bt].b_master_arrived += 1;
1122 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1125 if (reduce != NULL) {
1127 this_thr->th.th_local.reduce_data = reduce_data;
1130 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1131 __kmp_task_team_setup(this_thr, team, 0);
1133 switch (__kmp_barrier_gather_pattern[bt]) {
1134 case bp_hyper_bar: {
1135 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1136 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce
1137 USE_ITT_BUILD_ARG(itt_sync_obj) );
1140 case bp_hierarchical_bar: {
1141 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce
1142 USE_ITT_BUILD_ARG(itt_sync_obj));
1146 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
1147 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce
1148 USE_ITT_BUILD_ARG(itt_sync_obj) );
1152 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce
1153 USE_ITT_BUILD_ARG(itt_sync_obj) );
1159 if (KMP_MASTER_TID(tid)) {
1161 if (__kmp_tasking_mode != tskm_immediate_exec) {
1162 __kmp_task_team_wait(this_thr, team
1163 USE_ITT_BUILD_ARG(itt_sync_obj) );
1167 team->t.t_bar[bt].b_team_arrived += 1;
1174 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1175 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1177 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1179 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1181 this_thr->th.th_teams_microtask == NULL &&
1183 team->t.t_active_level == 1)
1185 kmp_uint64 cur_time = __itt_get_timestamp();
1186 kmp_info_t **other_threads = team->t.t_threads;
1187 int nproc = this_thr->th.th_team_nproc;
1189 switch(__kmp_forkjoin_frames_mode) {
1191 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1192 this_thr->th.th_frame_time = cur_time;
1195 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1198 if( __itt_metadata_add_ptr ) {
1200 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1201 for (i=1; i<nproc; ++i) {
1202 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
1204 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL));
1206 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1207 this_thr->th.th_frame_time = cur_time;
1215 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1216 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1219 if (status == 1 || ! is_split) {
1220 switch (__kmp_barrier_release_pattern[bt]) {
1221 case bp_hyper_bar: {
1222 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1223 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1224 USE_ITT_BUILD_ARG(itt_sync_obj) );
1227 case bp_hierarchical_bar: {
1228 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1229 USE_ITT_BUILD_ARG(itt_sync_obj) );
1233 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1234 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1235 USE_ITT_BUILD_ARG(itt_sync_obj) );
1239 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1240 USE_ITT_BUILD_ARG(itt_sync_obj) );
1243 if (__kmp_tasking_mode != tskm_immediate_exec) {
1244 __kmp_task_team_sync(this_thr, team);
1252 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1253 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1257 if (__kmp_tasking_mode != tskm_immediate_exec) {
1259 if ( this_thr->th.th_task_team != NULL ) {
1260 void *itt_sync_obj = NULL;
1262 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1263 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1264 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1268 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
1269 __kmp_task_team_wait(this_thr, team
1270 USE_ITT_BUILD_ARG(itt_sync_obj));
1271 __kmp_task_team_setup(this_thr, team, 0);
1274 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1275 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1280 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1281 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1285 KA_TRACE(15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1286 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
1291 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1292 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1293 my_parallel_id, my_task_id);
1296 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1305 __kmp_end_split_barrier(
enum barrier_type bt,
int gtid)
1307 KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
1308 int tid = __kmp_tid_from_gtid(gtid);
1309 kmp_info_t *this_thr = __kmp_threads[gtid];
1310 kmp_team_t *team = this_thr->th.th_team;
1312 if (!team->t.t_serialized) {
1313 if (KMP_MASTER_GTID(gtid)) {
1314 switch (__kmp_barrier_release_pattern[bt]) {
1315 case bp_hyper_bar: {
1316 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1317 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1318 USE_ITT_BUILD_ARG(NULL) );
1321 case bp_hierarchical_bar: {
1322 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1323 USE_ITT_BUILD_ARG(NULL));
1327 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1328 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1329 USE_ITT_BUILD_ARG(NULL) );
1333 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1334 USE_ITT_BUILD_ARG(NULL) );
1337 if (__kmp_tasking_mode != tskm_immediate_exec) {
1338 __kmp_task_team_sync(this_thr, team);
1346 __kmp_join_barrier(
int gtid)
1348 KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
1349 register kmp_info_t *this_thr = __kmp_threads[gtid];
1350 register kmp_team_t *team;
1351 register kmp_uint nproc;
1352 kmp_info_t *master_thread;
1358 void *itt_sync_obj = NULL;
1360 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1362 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1368 team = this_thr->th.th_team;
1369 nproc = this_thr->th.th_team_nproc;
1370 KMP_DEBUG_ASSERT((
int)nproc == team->t.t_nproc);
1371 tid = __kmp_tid_from_gtid(gtid);
1373 team_id = team->t.t_id;
1375 master_thread = this_thr->th.th_team_master;
1377 if (master_thread != team->t.t_threads[0]) {
1378 __kmp_print_structure();
1381 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1385 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1386 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1387 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1388 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1389 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
1394 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1395 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1396 team->t.ompt_team_info.parallel_id,
1397 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1400 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1403 if (__kmp_tasking_mode == tskm_extra_barrier) {
1404 __kmp_tasking_barrier(team, this_thr, gtid);
1405 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
1408 if (__kmp_tasking_mode != tskm_immediate_exec) {
1409 KA_TRACE(20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
1410 __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
1411 this_thr->th.th_task_team));
1412 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
1420 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1421 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1422 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1426 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1427 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1430 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1431 case bp_hyper_bar: {
1432 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1433 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1434 USE_ITT_BUILD_ARG(itt_sync_obj) );
1437 case bp_hierarchical_bar: {
1438 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1439 USE_ITT_BUILD_ARG(itt_sync_obj) );
1443 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1444 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1445 USE_ITT_BUILD_ARG(itt_sync_obj) );
1449 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1450 USE_ITT_BUILD_ARG(itt_sync_obj) );
1458 if (KMP_MASTER_TID(tid)) {
1459 if (__kmp_tasking_mode != tskm_immediate_exec) {
1462 __kmp_task_team_wait(this_thr, team
1463 USE_ITT_BUILD_ARG(itt_sync_obj) );
1466 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1467 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1470 # if USE_ITT_BUILD && USE_ITT_NOTIFY 1472 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1474 this_thr->th.th_teams_microtask == NULL &&
1476 team->t.t_active_level == 1)
1478 kmp_uint64 cur_time = __itt_get_timestamp();
1479 ident_t * loc = team->t.t_ident;
1480 kmp_info_t **other_threads = team->t.t_threads;
1481 int nproc = this_thr->th.th_team_nproc;
1483 switch(__kmp_forkjoin_frames_mode) {
1485 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1488 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1491 if( __itt_metadata_add_ptr ) {
1493 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1494 for (i=1; i<nproc; ++i) {
1495 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
1497 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0);
1499 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1500 this_thr->th.th_frame_time = cur_time;
1508 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1509 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1514 if (KMP_MASTER_TID(tid)) {
1515 KA_TRACE(15, (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1516 gtid, team_id, tid, nproc));
1522 KA_TRACE(10, (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
1527 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1528 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1529 team->t.ompt_team_info.parallel_id,
1530 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1535 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1543 __kmp_fork_barrier(
int gtid,
int tid)
1545 KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
1546 kmp_info_t *this_thr = __kmp_threads[gtid];
1547 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1549 void * itt_sync_obj = NULL;
1552 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
1553 gtid, (team != NULL) ? team->t.t_id : -1, tid));
1556 if (KMP_MASTER_TID(tid)) {
1557 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1558 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1560 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1561 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1566 register kmp_info_t **other_threads = team->t.t_threads;
1572 for(i=1; i<team->t.t_nproc; ++i) {
1573 KA_TRACE(500, (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n",
1574 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1575 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1576 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1577 KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)
1578 & ~(KMP_BARRIER_SLEEP_STATE))
1579 == KMP_INIT_BARRIER_STATE);
1580 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1584 if (__kmp_tasking_mode != tskm_immediate_exec) {
1585 __kmp_task_team_setup(this_thr, team, 0);
1592 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1593 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1594 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1598 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1599 case bp_hyper_bar: {
1600 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1601 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1602 USE_ITT_BUILD_ARG(itt_sync_obj) );
1605 case bp_hierarchical_bar: {
1606 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1607 USE_ITT_BUILD_ARG(itt_sync_obj) );
1611 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1612 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1613 USE_ITT_BUILD_ARG(itt_sync_obj) );
1617 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1618 USE_ITT_BUILD_ARG(itt_sync_obj) );
1623 if (TCR_4(__kmp_global.g.g_done)) {
1624 this_thr->th.th_task_team = NULL;
1626 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1627 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1628 if (!KMP_MASTER_TID(tid)) {
1629 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1631 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1635 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1642 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1643 KMP_DEBUG_ASSERT(team != NULL);
1644 tid = __kmp_tid_from_gtid(gtid);
1647 #if KMP_BARRIER_ICV_PULL 1654 KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
1655 if (!KMP_MASTER_TID(tid)) {
1657 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1658 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
1659 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1660 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
1663 #endif // KMP_BARRIER_ICV_PULL 1665 if (__kmp_tasking_mode != tskm_immediate_exec) {
1666 __kmp_task_team_sync(this_thr, team);
1669 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1670 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1671 if (proc_bind == proc_bind_intel) {
1673 #if KMP_AFFINITY_SUPPORTED 1675 if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1676 __kmp_balanced_affinity(tid, team->t.t_nproc);
1678 #endif // KMP_AFFINITY_SUPPORTED 1679 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1681 else if (proc_bind != proc_bind_false) {
1682 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1683 KA_TRACE(100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
1684 __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place));
1687 __kmp_affinity_set_place(gtid);
1692 #if USE_ITT_BUILD && USE_ITT_NOTIFY 1693 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1694 if (!KMP_MASTER_TID(tid)) {
1696 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1697 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1701 KA_TRACE(10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid));
1706 __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc, kmp_internal_control_t *new_icvs,
ident_t *loc )
1708 KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
1710 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1711 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
1716 #if KMP_BARRIER_ICV_PULL 1719 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1720 copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs);
1721 KF_TRACE(10, (
"__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n",
1722 0, team->t.t_threads[0], team));
1723 #elif KMP_BARRIER_ICV_PUSH 1725 KF_TRACE(10, (
"__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n",
1726 0, team->t.t_threads[0], team));
1730 KMP_DEBUG_ASSERT(team->t.t_threads[0]);
1731 for (
int f=1; f<new_nproc; ++f) {
1733 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1734 f, team->t.t_threads[f], team));
1735 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
1736 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
1737 KF_TRACE(10, (
"__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1738 f, team->t.t_threads[f], team));
1741 #endif // KMP_BARRIER_ICV_PULL