14 #ifndef KMP_WAIT_RELEASE_H 15 #define KMP_WAIT_RELEASE_H 19 #include "kmp_stats.h" 21 #include "ompt-specific.h" 56 volatile P *
get() {
return loc; }
57 void set(
volatile P *new_loc) { loc = new_loc; }
59 P load() {
return *loc; }
60 void store(P val) { *loc = val; }
77 std::atomic<P> *
get() {
return loc; }
81 void set(std::atomic<P> *new_loc) { loc = new_loc; }
89 P
load() {
return loc->load(std::memory_order_acquire); }
93 void store(P val) { loc->store(val, std::memory_order_release); }
117 static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
118 omp_state_t omp_state,
121 int ds_tid = this_thr->th.th_info.ds.ds_tid;
122 if (omp_state == omp_state_wait_barrier_implicit) {
123 this_thr->th.ompt_thread_info.state = omp_state_overhead;
125 void *codeptr = NULL;
126 if (ompt_enabled.ompt_callback_sync_region_wait) {
127 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
128 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
130 if (ompt_enabled.ompt_callback_sync_region) {
131 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
132 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
135 if (!KMP_MASTER_TID(ds_tid)) {
136 if (ompt_enabled.ompt_callback_implicit_task) {
137 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
138 ompt_scope_end, NULL, tId, 0, ds_tid);
141 if (ompt_enabled.ompt_callback_idle) {
142 ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
146 this_thr->th.ompt_thread_info.state = omp_state_idle;
148 this_thr->th.ompt_thread_info.state = omp_state_overhead;
157 template <
class C,
int final_spin>
159 __kmp_wait_template(kmp_info_t *this_thr,
160 C *flag USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
162 volatile void *spin = flag->get();
164 kmp_uint32 hibernate;
166 int tasks_completed = FALSE;
169 kmp_uint64 poll_count;
170 kmp_uint64 hibernate_goal;
173 KMP_FSYNC_SPIN_INIT(spin, NULL);
174 if (flag->done_check()) {
175 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
178 th_gtid = this_thr->th.th_info.ds.ds_gtid;
181 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
184 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
185 #if KMP_STATS_ENABLED 240 omp_state_t ompt_entry_state;
241 ompt_data_t *pId = NULL;
243 if (ompt_enabled.enabled) {
244 ompt_entry_state = this_thr->th.ompt_thread_info.state;
245 if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
246 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
247 ompt_lw_taskteam_t *team =
248 this_thr->th.th_team->t.ompt_serialized_team_info;
250 pId = &(team->ompt_team_info.parallel_data);
251 tId = &(team->ompt_task_info.task_data);
253 pId = OMPT_CUR_TEAM_DATA(this_thr);
254 tId = OMPT_CUR_TASK_DATA(this_thr);
258 tId = &(this_thr->th.ompt_thread_info.task_data);
261 if (ompt_entry_state == omp_state_idle) {
262 if (ompt_enabled.ompt_callback_idle) {
263 ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
267 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
268 this_thr->th.th_task_team == NULL)) {
270 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
276 KMP_INIT_YIELD(spins);
278 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
282 #ifdef KMP_ADJUST_BLOCKTIME 283 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
288 hibernate = this_thr->th.th_team_bt_intervals;
290 hibernate = this_thr->th.th_team_bt_intervals;
301 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
302 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
303 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
304 hibernate - __kmp_global.g.g_time.dt.t_value));
306 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
308 #endif // KMP_USE_MONITOR 311 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
315 while (flag->notdone_check()) {
317 kmp_task_team_t *task_team = NULL;
318 if (__kmp_tasking_mode != tskm_immediate_exec) {
319 task_team = this_thr->th.th_task_team;
327 if (task_team != NULL) {
328 if (TCR_SYNC_4(task_team->tt.tt_active)) {
329 if (KMP_TASKING_ENABLED(task_team))
331 this_thr, th_gtid, final_spin,
332 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
334 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
336 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
339 if (final_spin && ompt_enabled.enabled)
340 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
342 this_thr->th.th_task_team = NULL;
343 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
346 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
350 KMP_FSYNC_SPIN_PREPARE(CCAST(
void *, spin));
351 if (TCR_4(__kmp_global.g.g_done)) {
352 if (__kmp_global.g.g_abort)
353 __kmp_abort_thread();
362 if (oversubscribed) {
365 KMP_YIELD_SPIN(spins);
369 in_pool = !!TCR_4(this_thr->th.th_in_pool);
370 if (in_pool != !!this_thr->th.th_active_in_pool) {
372 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
373 this_thr->th.th_active_in_pool = TRUE;
382 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
383 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
384 this_thr->th.th_active_in_pool = FALSE;
388 #if KMP_STATS_ENABLED 391 if (this_thr->th.th_stats->isIdle() &&
392 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
393 KMP_SET_THREAD_STATE(IDLE);
394 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
399 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
403 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
408 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
411 if (KMP_BLOCKING(hibernate_goal, poll_count++))
415 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
418 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
420 flag->suspend(th_gtid);
423 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
426 if (TCR_4(__kmp_global.g.g_done)) {
427 if (__kmp_global.g.g_abort)
428 __kmp_abort_thread();
430 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
431 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
432 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
438 omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
439 if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
442 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
443 ompt_exit_state = this_thr->th.ompt_thread_info.state;
446 if (ompt_exit_state == omp_state_idle) {
448 if (ompt_enabled.ompt_callback_idle) {
449 ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
452 this_thr->th.ompt_thread_info.state = omp_state_overhead;
456 #if KMP_STATS_ENABLED 458 if (KMP_GET_THREAD_STATE() == IDLE) {
459 KMP_POP_PARTITIONED_TIMER();
460 KMP_SET_THREAD_STATE(thread_state);
461 this_thr->th.th_stats->resetIdleFlag();
467 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
469 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
476 template <
class C>
static inline void __kmp_release_template(C *flag) {
478 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
480 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
481 KMP_DEBUG_ASSERT(flag->get());
482 KMP_FSYNC_RELEASING(flag->get());
484 flag->internal_release();
486 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
489 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
492 if (flag->is_any_sleeping()) {
493 for (
unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
495 kmp_info_t *waiter = flag->get_waiter(i);
497 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
499 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep " 501 gtid, wait_gtid, flag->get()));
502 flag->resume(wait_gtid);
509 template <
typename FlagType>
struct flag_traits {};
511 template <>
struct flag_traits<kmp_uint32> {
512 typedef kmp_uint32 flag_t;
514 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
515 static inline flag_t test_then_add4(
volatile flag_t *f) {
516 return KMP_TEST_THEN_ADD4_32(RCAST(
volatile kmp_int32 *, f));
518 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
519 return KMP_TEST_THEN_OR32(f, v);
521 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
522 return KMP_TEST_THEN_AND32(f, v);
526 template <>
struct flag_traits<kmp_uint64> {
527 typedef kmp_uint64 flag_t;
529 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
530 static inline flag_t test_then_add4(
volatile flag_t *f) {
531 return KMP_TEST_THEN_ADD4_64(RCAST(
volatile kmp_int64 *, f));
533 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
534 return KMP_TEST_THEN_OR64(f, v);
536 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
537 return KMP_TEST_THEN_AND64(f, v);
542 template <
typename FlagType>
544 typedef flag_traits<FlagType> traits_type;
552 kmp_basic_flag_native(
volatile FlagType *p)
554 kmp_basic_flag_native(
volatile FlagType *p, kmp_info_t *thr)
556 waiting_threads[0] = thr;
558 kmp_basic_flag_native(
volatile FlagType *p, FlagType c)
560 num_waiting_threads(0) {}
565 kmp_info_t *get_waiter(kmp_uint32 i) {
566 KMP_DEBUG_ASSERT(i < num_waiting_threads);
567 return waiting_threads[i];
572 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
578 void set_waiter(kmp_info_t *thr) {
579 waiting_threads[0] = thr;
580 num_waiting_threads = 1;
585 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
590 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
598 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
603 void internal_release() {
604 (void)traits_type::test_then_add4((
volatile FlagType *)this->
get());
611 FlagType set_sleeping() {
612 return traits_type::test_then_or((
volatile FlagType *)this->
get(),
613 KMP_BARRIER_SLEEP_STATE);
620 FlagType unset_sleeping() {
621 return traits_type::test_then_and((
volatile FlagType *)this->
get(),
622 ~KMP_BARRIER_SLEEP_STATE);
628 bool is_sleeping_val(FlagType old_loc) {
629 return old_loc & KMP_BARRIER_SLEEP_STATE;
634 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
635 bool is_any_sleeping() {
return is_sleeping_val(*(this->
get())); }
636 kmp_uint8 *get_stolen() {
return NULL; }
637 enum barrier_type get_bt() {
return bs_last_barrier; }
640 template <
typename FlagType>
class kmp_basic_flag :
public kmp_flag<FlagType> {
641 typedef flag_traits<FlagType> traits_type;
649 kmp_basic_flag(std::atomic<FlagType> *p)
651 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
653 waiting_threads[0] = thr;
655 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
657 num_waiting_threads(0) {}
662 kmp_info_t *get_waiter(kmp_uint32 i) {
663 KMP_DEBUG_ASSERT(i < num_waiting_threads);
664 return waiting_threads[i];
669 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
675 void set_waiter(kmp_info_t *thr) {
676 waiting_threads[0] = thr;
677 num_waiting_threads = 1;
682 bool done_check() {
return this->load() == checker; }
687 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
695 bool notdone_check() {
return this->load() != checker; }
700 void internal_release() { KMP_ATOMIC_ADD(this->
get(), 4); }
706 FlagType set_sleeping() {
707 return KMP_ATOMIC_OR(this->
get(), KMP_BARRIER_SLEEP_STATE);
714 FlagType unset_sleeping() {
715 return KMP_ATOMIC_AND(this->
get(), ~KMP_BARRIER_SLEEP_STATE);
721 bool is_sleeping_val(FlagType old_loc) {
722 return old_loc & KMP_BARRIER_SLEEP_STATE;
727 bool is_sleeping() {
return is_sleeping_val(this->load()); }
728 bool is_any_sleeping() {
return is_sleeping_val(this->load()); }
729 kmp_uint8 *get_stolen() {
return NULL; }
730 enum barrier_type get_bt() {
return bs_last_barrier; }
733 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
735 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
736 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
737 : kmp_basic_flag<kmp_uint32>(p, thr) {}
738 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
739 : kmp_basic_flag<kmp_uint32>(p, c) {}
740 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
741 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
742 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
743 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
744 kmp_int32 is_constrained) {
745 return __kmp_execute_tasks_32(
746 this_thr, gtid,
this, final_spin,
747 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
749 void wait(kmp_info_t *this_thr,
750 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
752 __kmp_wait_template<kmp_flag_32, TRUE>(
753 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
755 __kmp_wait_template<kmp_flag_32, FALSE>(
756 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
758 void release() { __kmp_release_template(
this); }
762 class kmp_flag_64 :
public kmp_basic_flag_native<kmp_uint64> {
764 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
765 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr)
766 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
767 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c)
768 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
769 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
770 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
771 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
772 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
773 kmp_int32 is_constrained) {
774 return __kmp_execute_tasks_64(
775 this_thr, gtid,
this, final_spin,
776 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
778 void wait(kmp_info_t *this_thr,
779 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
781 __kmp_wait_template<kmp_flag_64, TRUE>(
782 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
784 __kmp_wait_template<kmp_flag_64, FALSE>(
785 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
787 void release() { __kmp_release_template(
this); }
794 kmp_info_t *waiting_threads[1];
795 kmp_uint32 num_waiting_threads;
799 enum barrier_type bt;
800 kmp_info_t *this_thr;
806 unsigned char &byteref(
volatile kmp_uint64 *loc,
size_t offset) {
807 return (RCAST(
unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
811 kmp_flag_oncore(
volatile kmp_uint64 *p)
813 flag_switch(
false) {}
814 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
816 offset(idx), flag_switch(
false) {}
817 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
818 enum barrier_type bar_t,
819 kmp_info_t *thr USE_ITT_BUILD_ARG(
void *itt))
821 num_waiting_threads(0), offset(idx), flag_switch(
false), bt(bar_t),
822 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
823 kmp_info_t *get_waiter(kmp_uint32 i) {
824 KMP_DEBUG_ASSERT(i < num_waiting_threads);
825 return waiting_threads[i];
827 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
828 void set_waiter(kmp_info_t *thr) {
829 waiting_threads[0] = thr;
830 num_waiting_threads = 1;
832 bool done_check_val(kmp_uint64 old_loc) {
833 return byteref(&old_loc, offset) == checker;
835 bool done_check() {
return done_check_val(*
get()); }
836 bool notdone_check() {
838 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
840 if (byteref(
get(), offset) != 1 && !flag_switch)
842 else if (flag_switch) {
843 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
844 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
845 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
846 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
850 void internal_release() {
852 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
853 byteref(
get(), offset) = 1;
856 byteref(&mask, offset) = 1;
857 KMP_TEST_THEN_OR64(
get(), mask);
860 kmp_uint64 set_sleeping() {
861 return KMP_TEST_THEN_OR64(
get(), KMP_BARRIER_SLEEP_STATE);
863 kmp_uint64 unset_sleeping() {
864 return KMP_TEST_THEN_AND64(
get(), ~KMP_BARRIER_SLEEP_STATE);
866 bool is_sleeping_val(kmp_uint64 old_loc) {
867 return old_loc & KMP_BARRIER_SLEEP_STATE;
869 bool is_sleeping() {
return is_sleeping_val(*
get()); }
870 bool is_any_sleeping() {
return is_sleeping_val(*
get()); }
871 void wait(kmp_info_t *this_thr,
int final_spin) {
873 __kmp_wait_template<kmp_flag_oncore, TRUE>(
874 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
876 __kmp_wait_template<kmp_flag_oncore, FALSE>(
877 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
879 void release() { __kmp_release_template(
this); }
880 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
881 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
882 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
883 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
884 kmp_int32 is_constrained) {
885 return __kmp_execute_tasks_oncore(
886 this_thr, gtid,
this, final_spin,
887 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
889 kmp_uint8 *get_stolen() {
return NULL; }
890 enum barrier_type get_bt() {
return bt; }
896 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
900 switch (RCAST(kmp_flag_64 *, CCAST(
void *, flag))->get_type()) {
902 __kmp_resume_32(gtid, NULL);
905 __kmp_resume_64(gtid, NULL);
908 __kmp_resume_oncore(gtid, NULL);
917 #endif // KMP_WAIT_RELEASE_H
stats_state_e
the states which a thread can be in