16 #ifndef KMP_WAIT_RELEASE_H 17 #define KMP_WAIT_RELEASE_H 21 #include "kmp_stats.h" 58 volatile P *
get() {
return loc; }
62 void set(
volatile P *new_loc) { loc = new_loc; }
88 # if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) 90 extern double __kmp_ticks_per_nsec;
91 # define KMP_NOW() __kmp_hardware_timestamp() 92 # define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC * __kmp_ticks_per_nsec) 93 # define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW()) 96 extern kmp_uint64 __kmp_now_nsec();
97 # define KMP_NOW() __kmp_now_nsec() 98 # define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC) 99 # define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) 107 __kmp_wait_template(kmp_info_t *this_thr, C *flag,
int final_spin
108 USE_ITT_BUILD_ARG(
void * itt_sync_obj) )
111 volatile typename C::flag_t *spin = flag->get();
113 kmp_uint32 hibernate;
115 int tasks_completed = FALSE;
117 #if ! KMP_USE_MONITOR 118 kmp_uint64 poll_count;
119 kmp_uint64 hibernate_goal;
122 KMP_FSYNC_SPIN_INIT(spin, NULL);
123 if (flag->done_check()) {
124 KMP_FSYNC_SPIN_ACQUIRED(spin);
127 th_gtid = this_thr->th.th_info.ds.ds_gtid;
128 KA_TRACE(20, (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
129 #if KMP_STATS_ENABLED 133 #if OMPT_SUPPORT && OMPT_BLAME 134 ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
136 ompt_state != ompt_state_undefined) {
137 if (ompt_state == ompt_state_idle) {
138 if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
139 ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
141 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
142 KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
143 ompt_state == ompt_state_wait_barrier_implicit ||
144 ompt_state == ompt_state_wait_barrier_explicit);
146 ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
147 ompt_parallel_id_t pId;
150 pId = team->ompt_team_info.parallel_id;
151 tId = team->ompt_task_info.task_id;
153 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
154 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
156 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
162 KMP_INIT_YIELD(spins);
164 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
168 #ifdef KMP_ADJUST_BLOCKTIME 169 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
173 hibernate = this_thr->th.th_team_bt_intervals;
175 hibernate = this_thr->th.th_team_bt_intervals;
185 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
186 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
187 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
188 hibernate - __kmp_global.g.g_time.dt.t_value));
190 hibernate_goal = KMP_NOW() + KMP_BLOCKTIME_INTERVAL();
192 #endif // KMP_USE_MONITOR 195 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
199 while (flag->notdone_check()) {
201 kmp_task_team_t * task_team = NULL;
202 if (__kmp_tasking_mode != tskm_immediate_exec) {
203 task_team = this_thr->th.th_task_team;
210 if (task_team != NULL) {
211 if (TCR_SYNC_4(task_team->tt.tt_active)) {
212 if (KMP_TASKING_ENABLED(task_team))
213 flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
214 USE_ITT_BUILD_ARG(itt_sync_obj), 0);
217 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
218 this_thr->th.th_task_team = NULL;
223 KMP_FSYNC_SPIN_PREPARE(spin);
224 if (TCR_4(__kmp_global.g.g_done)) {
225 if (__kmp_global.g.g_abort)
226 __kmp_abort_thread();
231 KMP_YIELD(oversubscribed);
235 KMP_YIELD_SPIN(spins);
239 in_pool = !!TCR_4(this_thr->th.th_in_pool);
240 if (in_pool != !!this_thr->th.th_active_in_pool) {
242 KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
243 this_thr->th.th_active_in_pool = TRUE;
251 KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
252 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
253 this_thr->th.th_active_in_pool = FALSE;
257 #if KMP_STATS_ENABLED 260 if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
261 KMP_SET_THREAD_STATE(IDLE);
262 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
267 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
271 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
276 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
279 if (KMP_BLOCKING(hibernate_goal, poll_count++))
283 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
285 flag->suspend(th_gtid);
287 if (TCR_4(__kmp_global.g.g_done)) {
288 if (__kmp_global.g.g_abort)
289 __kmp_abort_thread();
295 #if OMPT_SUPPORT && OMPT_BLAME 297 ompt_state != ompt_state_undefined) {
298 if (ompt_state == ompt_state_idle) {
299 if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
300 ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
302 }
else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
303 KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
304 ompt_state == ompt_state_wait_barrier_implicit ||
305 ompt_state == ompt_state_wait_barrier_explicit);
307 ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
308 ompt_parallel_id_t pId;
311 pId = team->ompt_team_info.parallel_id;
312 tId = team->ompt_task_info.task_id;
314 pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
315 tId = this_thr->th.th_current_task->ompt_task_info.task_id;
317 ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
321 #if KMP_STATS_ENABLED 323 if (KMP_GET_THREAD_STATE() == IDLE) {
324 KMP_POP_PARTITIONED_TIMER();
325 KMP_SET_THREAD_STATE(thread_state);
326 this_thr->th.th_stats->resetIdleFlag();
330 KMP_FSYNC_SPIN_ACQUIRED(spin);
338 __kmp_release_template(C *flag)
341 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
343 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
344 KMP_DEBUG_ASSERT(flag->get());
345 KMP_FSYNC_RELEASING(flag->get());
347 flag->internal_release();
349 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get())));
351 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
353 if (flag->is_any_sleeping()) {
354 for (
unsigned int i=0; i<flag->get_num_waiters(); ++i) {
355 kmp_info_t * waiter = flag->get_waiter(i);
357 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
359 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n",
360 gtid, wait_gtid, flag->get()));
361 flag->resume(wait_gtid);
368 template <
typename FlagType>
369 struct flag_traits {};
372 struct flag_traits<kmp_uint32> {
373 typedef kmp_uint32 flag_t;
375 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
376 static inline flag_t test_then_add4(
volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_32((
volatile kmp_int32 *)f); }
377 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR32((
volatile kmp_int32 *)f, v); }
378 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND32((
volatile kmp_int32 *)f, v); }
382 struct flag_traits<kmp_uint64> {
383 typedef kmp_uint64 flag_t;
385 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
386 static inline flag_t test_then_add4(
volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_64((
volatile kmp_int64 *)f); }
387 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR64((
volatile kmp_int64 *)f, v); }
388 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND64((
volatile kmp_int64 *)f, v); }
391 template <
typename FlagType>
392 class kmp_basic_flag :
public kmp_flag<FlagType> {
393 typedef flag_traits<FlagType> traits_type;
395 kmp_info_t * waiting_threads[1];
396 kmp_uint32 num_waiting_threads;
398 kmp_basic_flag(
volatile FlagType *p) :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
399 kmp_basic_flag(
volatile FlagType *p, kmp_info_t *thr) :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
400 waiting_threads[0] = thr;
402 kmp_basic_flag(
volatile FlagType *p, FlagType c) :
kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
407 kmp_info_t * get_waiter(kmp_uint32 i) {
408 KMP_DEBUG_ASSERT(i<num_waiting_threads);
409 return waiting_threads[i];
414 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
420 void set_waiter(kmp_info_t *thr) {
421 waiting_threads[0] = thr;
422 num_waiting_threads = 1;
427 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
432 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
440 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
445 void internal_release() {
446 (void) traits_type::test_then_add4((
volatile FlagType *)this->
get());
452 FlagType set_sleeping() {
453 return traits_type::test_then_or((
volatile FlagType *)this->
get(), KMP_BARRIER_SLEEP_STATE);
459 FlagType unset_sleeping() {
460 return traits_type::test_then_and((
volatile FlagType *)this->
get(), ~KMP_BARRIER_SLEEP_STATE);
466 bool is_sleeping_val(FlagType old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE; }
470 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
471 bool is_any_sleeping() {
return is_sleeping_val(*(this->
get())); }
472 kmp_uint8 *get_stolen() {
return NULL; }
473 enum barrier_type get_bt() {
return bs_last_barrier; }
476 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
478 kmp_flag_32(
volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
479 kmp_flag_32(
volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
480 kmp_flag_32(
volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
481 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
482 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
483 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
484 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
485 return __kmp_execute_tasks_32(this_thr, gtid,
this, final_spin, thread_finished
486 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
488 void wait(kmp_info_t *this_thr,
int final_spin
489 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
490 __kmp_wait_template(this_thr,
this, final_spin
491 USE_ITT_BUILD_ARG(itt_sync_obj));
493 void release() { __kmp_release_template(
this); }
497 class kmp_flag_64 :
public kmp_basic_flag<kmp_uint64> {
499 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
500 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
501 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
502 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
503 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
504 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
505 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
506 return __kmp_execute_tasks_64(this_thr, gtid,
this, final_spin, thread_finished
507 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
509 void wait(kmp_info_t *this_thr,
int final_spin
510 USE_ITT_BUILD_ARG(
void * itt_sync_obj)) {
511 __kmp_wait_template(this_thr,
this, final_spin
512 USE_ITT_BUILD_ARG(itt_sync_obj));
514 void release() { __kmp_release_template(
this); }
519 class kmp_flag_oncore :
public kmp_flag<kmp_uint64> {
521 kmp_info_t * waiting_threads[1];
522 kmp_uint32 num_waiting_threads;
525 enum barrier_type bt;
526 kmp_info_t * this_thr;
530 unsigned char& byteref(
volatile kmp_uint64*
loc,
size_t offset) {
return ((
unsigned char *)loc)[offset]; }
532 kmp_flag_oncore(
volatile kmp_uint64 *p)
534 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
536 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
enum barrier_type bar_t,
543 flag_switch(
false), bt(bar_t), this_thr(thr)
548 kmp_info_t * get_waiter(kmp_uint32 i) {
549 KMP_DEBUG_ASSERT(i<num_waiting_threads);
550 return waiting_threads[i];
552 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
553 void set_waiter(kmp_info_t *thr) {
554 waiting_threads[0] = thr;
555 num_waiting_threads = 1;
557 bool done_check_val(kmp_uint64 old_loc) {
return byteref(&old_loc,offset) == checker; }
558 bool done_check() {
return done_check_val(*
get()); }
559 bool notdone_check() {
561 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
563 if (byteref(
get(),offset) != 1 && !flag_switch)
565 else if (flag_switch) {
566 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
567 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
568 __kmp_wait_64(this_thr, &flag, TRUE
576 void internal_release() {
577 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
578 byteref(
get(),offset) = 1;
582 byteref(&mask,offset) = 1;
583 (void) KMP_TEST_THEN_OR64((
volatile kmp_int64 *)
get(), mask);
586 kmp_uint64 set_sleeping() {
587 return KMP_TEST_THEN_OR64((kmp_int64
volatile *)
get(), KMP_BARRIER_SLEEP_STATE);
589 kmp_uint64 unset_sleeping() {
590 return KMP_TEST_THEN_AND64((kmp_int64
volatile *)
get(), ~KMP_BARRIER_SLEEP_STATE);
592 bool is_sleeping_val(kmp_uint64 old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE; }
593 bool is_sleeping() {
return is_sleeping_val(*
get()); }
594 bool is_any_sleeping() {
return is_sleeping_val(*
get()); }
595 void wait(kmp_info_t *this_thr,
int final_spin) {
596 __kmp_wait_template<kmp_flag_oncore>(this_thr,
this, final_spin
597 USE_ITT_BUILD_ARG(itt_sync_obj));
599 void release() { __kmp_release_template(
this); }
600 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
601 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
602 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
int *thread_finished
603 USE_ITT_BUILD_ARG(
void * itt_sync_obj), kmp_int32 is_constrained) {
604 return __kmp_execute_tasks_oncore(this_thr, gtid,
this, final_spin, thread_finished
605 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
607 kmp_uint8 *get_stolen() {
return NULL; }
608 enum barrier_type get_bt() {
return bt; }
614 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
617 switch (((kmp_flag_64 *)flag)->
get_type()) {
618 case flag32: __kmp_resume_32(gtid, NULL);
break;
619 case flag64: __kmp_resume_64(gtid, NULL);
break;
620 case flag_oncore: __kmp_resume_oncore(gtid, NULL);
break;
628 #endif // KMP_WAIT_RELEASE_H
stats_state_e
the states which a thread can be in