LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_WAIT_RELEASE_H
15 #define KMP_WAIT_RELEASE_H
16 
17 #include "kmp.h"
18 #include "kmp_itt.h"
19 #include "kmp_stats.h"
20 #if OMPT_SUPPORT
21 #include "ompt-specific.h"
22 #endif
23 
40 enum flag_type {
44 };
45 
49 template <typename P> class kmp_flag_native {
50  volatile P *loc;
51  flag_type t;
52 
53 public:
54  typedef P flag_t;
55  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56  volatile P *get() { return loc; }
57  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
58  void set(volatile P *new_loc) { loc = new_loc; }
59  flag_type get_type() { return t; }
60  P load() { return *loc; }
61  void store(P val) { *loc = val; }
62 };
63 
67 template <typename P> class kmp_flag {
68  std::atomic<P>
69  *loc;
72 public:
73  typedef P flag_t;
74  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
78  std::atomic<P> *get() { return loc; }
82  void *get_void_p() { return RCAST(void *, loc); }
86  void set(std::atomic<P> *new_loc) { loc = new_loc; }
90  flag_type get_type() { return t; }
94  P load() { return loc->load(std::memory_order_acquire); }
98  void store(P val) { loc->store(val, std::memory_order_release); }
99  // Derived classes must provide the following:
100  /*
101  kmp_info_t * get_waiter(kmp_uint32 i);
102  kmp_uint32 get_num_waiters();
103  bool done_check();
104  bool done_check_val(P old_loc);
105  bool notdone_check();
106  P internal_release();
107  void suspend(int th_gtid);
108  void resume(int th_gtid);
109  P set_sleeping();
110  P unset_sleeping();
111  bool is_sleeping();
112  bool is_any_sleeping();
113  bool is_sleeping_val(P old_loc);
114  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
115  int *thread_finished
116  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
117  is_constrained);
118  */
119 };
120 
121 #if OMPT_SUPPORT
122 OMPT_NOINLINE
123 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
124  ompt_state_t ompt_state,
125  ompt_data_t *tId) {
126  int ds_tid = this_thr->th.th_info.ds.ds_tid;
127  if (ompt_state == ompt_state_wait_barrier_implicit) {
128  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
129 #if OMPT_OPTIONAL
130  void *codeptr = NULL;
131  if (ompt_enabled.ompt_callback_sync_region_wait) {
132  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
133  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
134  }
135  if (ompt_enabled.ompt_callback_sync_region) {
136  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
138  }
139 #endif
140  if (!KMP_MASTER_TID(ds_tid)) {
141  if (ompt_enabled.ompt_callback_implicit_task) {
142  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
143  ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
144  }
145  // return to idle state
146  this_thr->th.ompt_thread_info.state = ompt_state_idle;
147  } else {
148  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
149  }
150  }
151 }
152 #endif
153 
154 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
155  calls __kmp_wait_* must make certain that another thread calls __kmp_release
156  to wake it back up to prevent deadlocks!
157 
158  NOTE: We may not belong to a team at this point. */
159 template <class C, int final_spin>
160 static inline void
161 __kmp_wait_template(kmp_info_t *this_thr,
162  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
163 #if USE_ITT_BUILD && USE_ITT_NOTIFY
164  volatile void *spin = flag->get();
165 #endif
166  kmp_uint32 spins;
167  int th_gtid;
168  int tasks_completed = FALSE;
169  int oversubscribed;
170 #if !KMP_USE_MONITOR
171  kmp_uint64 poll_count;
172  kmp_uint64 hibernate_goal;
173 #else
174  kmp_uint32 hibernate;
175 #endif
176 
177  KMP_FSYNC_SPIN_INIT(spin, NULL);
178  if (flag->done_check()) {
179  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
180  return;
181  }
182  th_gtid = this_thr->th.th_info.ds.ds_gtid;
183 #if KMP_OS_UNIX
184  if (final_spin)
185  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
186 #endif
187  KA_TRACE(20,
188  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
189 #if KMP_STATS_ENABLED
190  stats_state_e thread_state = KMP_GET_THREAD_STATE();
191 #endif
192 
193 /* OMPT Behavior:
194 THIS function is called from
195  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
196  these have join / fork behavior
197 
198  In these cases, we don't change the state or trigger events in THIS
199 function.
200  Events are triggered in the calling code (__kmp_barrier):
201 
202  state := ompt_state_overhead
203  barrier-begin
204  barrier-wait-begin
205  state := ompt_state_wait_barrier
206  call join-barrier-implementation (finally arrive here)
207  {}
208  call fork-barrier-implementation (finally arrive here)
209  {}
210  state := ompt_state_overhead
211  barrier-wait-end
212  barrier-end
213  state := ompt_state_work_parallel
214 
215 
216  __kmp_fork_barrier (after thread creation, before executing implicit task)
217  call fork-barrier-implementation (finally arrive here)
218  {} // worker arrive here with state = ompt_state_idle
219 
220 
221  __kmp_join_barrier (implicit barrier at end of parallel region)
222  state := ompt_state_barrier_implicit
223  barrier-begin
224  barrier-wait-begin
225  call join-barrier-implementation (finally arrive here
226 final_spin=FALSE)
227  {
228  }
229  __kmp_fork_barrier (implicit barrier at end of parallel region)
230  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
231 
232  Worker after task-team is finished:
233  barrier-wait-end
234  barrier-end
235  implicit-task-end
236  idle-begin
237  state := ompt_state_idle
238 
239  Before leaving, if state = ompt_state_idle
240  idle-end
241  state := ompt_state_overhead
242 */
243 #if OMPT_SUPPORT
244  ompt_state_t ompt_entry_state;
245  ompt_data_t *tId;
246  if (ompt_enabled.enabled) {
247  ompt_entry_state = this_thr->th.ompt_thread_info.state;
248  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
249  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
250  ompt_lw_taskteam_t *team =
251  this_thr->th.th_team->t.ompt_serialized_team_info;
252  if (team) {
253  tId = &(team->ompt_task_info.task_data);
254  } else {
255  tId = OMPT_CUR_TASK_DATA(this_thr);
256  }
257  } else {
258  tId = &(this_thr->th.ompt_thread_info.task_data);
259  }
260  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
261  this_thr->th.th_task_team == NULL)) {
262  // implicit task is done. Either no taskqueue, or task-team finished
263  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
264  }
265  }
266 #endif
267 
268  // Setup for waiting
269  KMP_INIT_YIELD(spins);
270 
271  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
272 #if KMP_USE_MONITOR
273 // The worker threads cannot rely on the team struct existing at this point.
274 // Use the bt values cached in the thread struct instead.
275 #ifdef KMP_ADJUST_BLOCKTIME
276  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
277  // Force immediate suspend if not set by user and more threads than
278  // available procs
279  hibernate = 0;
280  else
281  hibernate = this_thr->th.th_team_bt_intervals;
282 #else
283  hibernate = this_thr->th.th_team_bt_intervals;
284 #endif /* KMP_ADJUST_BLOCKTIME */
285 
286  /* If the blocktime is nonzero, we want to make sure that we spin wait for
287  the entirety of the specified #intervals, plus up to one interval more.
288  This increment make certain that this thread doesn't go to sleep too
289  soon. */
290  if (hibernate != 0)
291  hibernate++;
292 
293  // Add in the current time value.
294  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
295  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
296  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
297  hibernate - __kmp_global.g.g_time.dt.t_value));
298 #else
299  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
300  poll_count = 0;
301 #endif // KMP_USE_MONITOR
302  }
303 
304  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
305  KMP_MB();
306 
307  // Main wait spin loop
308  while (flag->notdone_check()) {
309  int in_pool;
310  kmp_task_team_t *task_team = NULL;
311  if (__kmp_tasking_mode != tskm_immediate_exec) {
312  task_team = this_thr->th.th_task_team;
313  /* If the thread's task team pointer is NULL, it means one of 3 things:
314  1) A newly-created thread is first being released by
315  __kmp_fork_barrier(), and its task team has not been set up yet.
316  2) All tasks have been executed to completion.
317  3) Tasking is off for this region. This could be because we are in a
318  serialized region (perhaps the outer one), or else tasking was manually
319  disabled (KMP_TASKING=0). */
320  if (task_team != NULL) {
321  if (TCR_SYNC_4(task_team->tt.tt_active)) {
322  if (KMP_TASKING_ENABLED(task_team))
323  flag->execute_tasks(
324  this_thr, th_gtid, final_spin,
325  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
326  else
327  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
328  } else {
329  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
330 #if OMPT_SUPPORT
331  // task-team is done now, other cases should be catched above
332  if (final_spin && ompt_enabled.enabled)
333  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
334 #endif
335  this_thr->th.th_task_team = NULL;
336  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
337  }
338  } else {
339  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
340  } // if
341  } // if
342 
343  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
344  if (TCR_4(__kmp_global.g.g_done)) {
345  if (__kmp_global.g.g_abort)
346  __kmp_abort_thread();
347  break;
348  }
349 
350  // If we are oversubscribed, or have waited a bit (and
351  // KMP_LIBRARY=throughput), then yield
352  // TODO: Should it be number of cores instead of thread contexts? Like:
353  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
354  // Need performance improvement data to make the change...
355  if (oversubscribed) {
356  KMP_YIELD(1);
357  } else {
358  KMP_YIELD_SPIN(spins);
359  }
360  // Check if this thread was transferred from a team
361  // to the thread pool (or vice-versa) while spinning.
362  in_pool = !!TCR_4(this_thr->th.th_in_pool);
363  if (in_pool != !!this_thr->th.th_active_in_pool) {
364  if (in_pool) { // Recently transferred from team to pool
365  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
366  this_thr->th.th_active_in_pool = TRUE;
367  /* Here, we cannot assert that:
368  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
369  __kmp_thread_pool_nth);
370  __kmp_thread_pool_nth is inc/dec'd by the master thread while the
371  fork/join lock is held, whereas __kmp_thread_pool_active_nth is
372  inc/dec'd asynchronously by the workers. The two can get out of sync
373  for brief periods of time. */
374  } else { // Recently transferred from pool to team
375  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
376  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
377  this_thr->th.th_active_in_pool = FALSE;
378  }
379  }
380 
381 #if KMP_STATS_ENABLED
382  // Check if thread has been signalled to idle state
383  // This indicates that the logical "join-barrier" has finished
384  if (this_thr->th.th_stats->isIdle() &&
385  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
386  KMP_SET_THREAD_STATE(IDLE);
387  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
388  }
389 #endif
390 
391  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
392  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
393  continue;
394 
395  // Don't suspend if there is a likelihood of new tasks being spawned.
396  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
397  continue;
398 
399 #if KMP_USE_MONITOR
400  // If we have waited a bit more, fall asleep
401  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
402  continue;
403 #else
404  if (KMP_BLOCKING(hibernate_goal, poll_count++))
405  continue;
406 #endif
407 
408  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
409 #if KMP_OS_UNIX
410  if (final_spin)
411  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
412 #endif
413  flag->suspend(th_gtid);
414 #if KMP_OS_UNIX
415  if (final_spin)
416  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
417 #endif
418 
419  if (TCR_4(__kmp_global.g.g_done)) {
420  if (__kmp_global.g.g_abort)
421  __kmp_abort_thread();
422  break;
423  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
424  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
425  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
426  }
427  // TODO: If thread is done with work and times out, disband/free
428  }
429 
430 #if OMPT_SUPPORT
431  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
432  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
433 #if OMPT_OPTIONAL
434  if (final_spin) {
435  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
436  ompt_exit_state = this_thr->th.ompt_thread_info.state;
437  }
438 #endif
439  if (ompt_exit_state == ompt_state_idle) {
440  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
441  }
442  }
443 #endif
444 #if KMP_STATS_ENABLED
445  // If we were put into idle state, pop that off the state stack
446  if (KMP_GET_THREAD_STATE() == IDLE) {
447  KMP_POP_PARTITIONED_TIMER();
448  KMP_SET_THREAD_STATE(thread_state);
449  this_thr->th.th_stats->resetIdleFlag();
450  }
451 #endif
452 
453 #if KMP_OS_UNIX
454  if (final_spin)
455  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
456 #endif
457  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
458 }
459 
460 /* Release any threads specified as waiting on the flag by releasing the flag
461  and resume the waiting thread if indicated by the sleep bit(s). A thread that
462  calls __kmp_wait_template must call this function to wake up the potentially
463  sleeping thread and prevent deadlocks! */
464 template <class C> static inline void __kmp_release_template(C *flag) {
465 #ifdef KMP_DEBUG
466  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
467 #endif
468  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
469  KMP_DEBUG_ASSERT(flag->get());
470  KMP_FSYNC_RELEASING(flag->get_void_p());
471 
472  flag->internal_release();
473 
474  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
475  flag->load()));
476 
477  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
478  // Only need to check sleep stuff if infinite block time not set.
479  // Are *any* threads waiting on flag sleeping?
480  if (flag->is_any_sleeping()) {
481  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
482  // if sleeping waiter exists at i, sets current_waiter to i inside flag
483  kmp_info_t *waiter = flag->get_waiter(i);
484  if (waiter) {
485  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
486  // Wake up thread if needed
487  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
488  "flag(%p) set\n",
489  gtid, wait_gtid, flag->get()));
490  flag->resume(wait_gtid); // unsets flag's current_waiter when done
491  }
492  }
493  }
494  }
495 }
496 
497 template <typename FlagType> struct flag_traits {};
498 
499 template <> struct flag_traits<kmp_uint32> {
500  typedef kmp_uint32 flag_t;
501  static const flag_type t = flag32;
502  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
503  static inline flag_t test_then_add4(volatile flag_t *f) {
504  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
505  }
506  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
507  return KMP_TEST_THEN_OR32(f, v);
508  }
509  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
510  return KMP_TEST_THEN_AND32(f, v);
511  }
512 };
513 
514 template <> struct flag_traits<kmp_uint64> {
515  typedef kmp_uint64 flag_t;
516  static const flag_type t = flag64;
517  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
518  static inline flag_t test_then_add4(volatile flag_t *f) {
519  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
520  }
521  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
522  return KMP_TEST_THEN_OR64(f, v);
523  }
524  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
525  return KMP_TEST_THEN_AND64(f, v);
526  }
527 };
528 
529 // Basic flag that does not use C11 Atomics
530 template <typename FlagType>
531 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
532  typedef flag_traits<FlagType> traits_type;
533  FlagType checker;
535  kmp_info_t
536  *waiting_threads[1];
537  kmp_uint32
538  num_waiting_threads;
539 public:
540  kmp_basic_flag_native(volatile FlagType *p)
541  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
542  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
543  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
544  waiting_threads[0] = thr;
545  }
546  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
547  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
548  num_waiting_threads(0) {}
553  kmp_info_t *get_waiter(kmp_uint32 i) {
554  KMP_DEBUG_ASSERT(i < num_waiting_threads);
555  return waiting_threads[i];
556  }
560  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
566  void set_waiter(kmp_info_t *thr) {
567  waiting_threads[0] = thr;
568  num_waiting_threads = 1;
569  }
573  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
578  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
586  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
591  void internal_release() {
592  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
593  }
599  FlagType set_sleeping() {
600  return traits_type::test_then_or((volatile FlagType *)this->get(),
601  KMP_BARRIER_SLEEP_STATE);
602  }
608  FlagType unset_sleeping() {
609  return traits_type::test_then_and((volatile FlagType *)this->get(),
610  ~KMP_BARRIER_SLEEP_STATE);
611  }
616  bool is_sleeping_val(FlagType old_loc) {
617  return old_loc & KMP_BARRIER_SLEEP_STATE;
618  }
622  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
623  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
624  kmp_uint8 *get_stolen() { return NULL; }
625  enum barrier_type get_bt() { return bs_last_barrier; }
626 };
627 
628 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
629  typedef flag_traits<FlagType> traits_type;
630  FlagType checker;
632  kmp_info_t
633  *waiting_threads[1];
634  kmp_uint32
635  num_waiting_threads;
636 public:
637  kmp_basic_flag(std::atomic<FlagType> *p)
638  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
639  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
640  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
641  waiting_threads[0] = thr;
642  }
643  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
644  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
645  num_waiting_threads(0) {}
650  kmp_info_t *get_waiter(kmp_uint32 i) {
651  KMP_DEBUG_ASSERT(i < num_waiting_threads);
652  return waiting_threads[i];
653  }
657  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
663  void set_waiter(kmp_info_t *thr) {
664  waiting_threads[0] = thr;
665  num_waiting_threads = 1;
666  }
670  bool done_check() { return this->load() == checker; }
675  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
683  bool notdone_check() { return this->load() != checker; }
688  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
694  FlagType set_sleeping() {
695  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
696  }
702  FlagType unset_sleeping() {
703  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
704  }
709  bool is_sleeping_val(FlagType old_loc) {
710  return old_loc & KMP_BARRIER_SLEEP_STATE;
711  }
715  bool is_sleeping() { return is_sleeping_val(this->load()); }
716  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
717  kmp_uint8 *get_stolen() { return NULL; }
718  enum barrier_type get_bt() { return bs_last_barrier; }
719 };
720 
721 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
722 public:
723  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
724  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
725  : kmp_basic_flag<kmp_uint32>(p, thr) {}
726  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
727  : kmp_basic_flag<kmp_uint32>(p, c) {}
728  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
729  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
730  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
731  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
732  kmp_int32 is_constrained) {
733  return __kmp_execute_tasks_32(
734  this_thr, gtid, this, final_spin,
735  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
736  }
737  void wait(kmp_info_t *this_thr,
738  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
739  if (final_spin)
740  __kmp_wait_template<kmp_flag_32, TRUE>(
741  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
742  else
743  __kmp_wait_template<kmp_flag_32, FALSE>(
744  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
745  }
746  void release() { __kmp_release_template(this); }
747  flag_type get_ptr_type() { return flag32; }
748 };
749 
750 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
751 public:
752  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
753  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
754  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
755  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
756  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
757  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
758  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
759  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
760  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
761  kmp_int32 is_constrained) {
762  return __kmp_execute_tasks_64(
763  this_thr, gtid, this, final_spin,
764  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
765  }
766  void wait(kmp_info_t *this_thr,
767  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
768  if (final_spin)
769  __kmp_wait_template<kmp_flag_64, TRUE>(
770  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
771  else
772  __kmp_wait_template<kmp_flag_64, FALSE>(
773  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
774  }
775  void release() { __kmp_release_template(this); }
776  flag_type get_ptr_type() { return flag64; }
777 };
778 
779 // Hierarchical 64-bit on-core barrier instantiation
780 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
781  kmp_uint64 checker;
782  kmp_info_t *waiting_threads[1];
783  kmp_uint32 num_waiting_threads;
784  kmp_uint32
785  offset;
786  bool flag_switch;
787  enum barrier_type bt;
788  kmp_info_t *this_thr;
790 #if USE_ITT_BUILD
791  void *
792  itt_sync_obj;
793 #endif
794  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
795  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
796  }
797 
798 public:
799  kmp_flag_oncore(volatile kmp_uint64 *p)
800  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
801  flag_switch(false) {}
802  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
803  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
804  offset(idx), flag_switch(false) {}
805  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
806  enum barrier_type bar_t,
807  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
808  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
809  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
810  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
811  kmp_info_t *get_waiter(kmp_uint32 i) {
812  KMP_DEBUG_ASSERT(i < num_waiting_threads);
813  return waiting_threads[i];
814  }
815  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
816  void set_waiter(kmp_info_t *thr) {
817  waiting_threads[0] = thr;
818  num_waiting_threads = 1;
819  }
820  bool done_check_val(kmp_uint64 old_loc) {
821  return byteref(&old_loc, offset) == checker;
822  }
823  bool done_check() { return done_check_val(*get()); }
824  bool notdone_check() {
825  // Calculate flag_switch
826  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
827  flag_switch = true;
828  if (byteref(get(), offset) != 1 && !flag_switch)
829  return true;
830  else if (flag_switch) {
831  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
832  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
833  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
834  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
835  }
836  return false;
837  }
838  void internal_release() {
839  // Other threads can write their own bytes simultaneously.
840  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
841  byteref(get(), offset) = 1;
842  } else {
843  kmp_uint64 mask = 0;
844  byteref(&mask, offset) = 1;
845  KMP_TEST_THEN_OR64(get(), mask);
846  }
847  }
848  kmp_uint64 set_sleeping() {
849  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
850  }
851  kmp_uint64 unset_sleeping() {
852  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
853  }
854  bool is_sleeping_val(kmp_uint64 old_loc) {
855  return old_loc & KMP_BARRIER_SLEEP_STATE;
856  }
857  bool is_sleeping() { return is_sleeping_val(*get()); }
858  bool is_any_sleeping() { return is_sleeping_val(*get()); }
859  void wait(kmp_info_t *this_thr, int final_spin) {
860  if (final_spin)
861  __kmp_wait_template<kmp_flag_oncore, TRUE>(
862  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
863  else
864  __kmp_wait_template<kmp_flag_oncore, FALSE>(
865  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
866  }
867  void release() { __kmp_release_template(this); }
868  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
869  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
870  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
871  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
872  kmp_int32 is_constrained) {
873  return __kmp_execute_tasks_oncore(
874  this_thr, gtid, this, final_spin,
875  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
876  }
877  kmp_uint8 *get_stolen() { return NULL; }
878  enum barrier_type get_bt() { return bt; }
879  flag_type get_ptr_type() { return flag_oncore; }
880 };
881 
882 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
883 // associated with int gtid.
884 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
885  if (!flag)
886  return;
887 
888  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
889  case flag32:
890  __kmp_resume_32(gtid, NULL);
891  break;
892  case flag64:
893  __kmp_resume_64(gtid, NULL);
894  break;
895  case flag_oncore:
896  __kmp_resume_oncore(gtid, NULL);
897  break;
898  }
899 }
900 
905 #endif // KMP_WAIT_RELEASE_H
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64