LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_WAIT_RELEASE_H
15 #define KMP_WAIT_RELEASE_H
16 
17 #include "kmp.h"
18 #include "kmp_itt.h"
19 #include "kmp_stats.h"
20 #if OMPT_SUPPORT
21 #include "ompt-specific.h"
22 #endif
23 
40 enum flag_type {
44 };
45 
49 template <typename P> class kmp_flag_native {
50  volatile P *loc;
51  flag_type t;
52 
53 public:
54  typedef P flag_t;
55  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56  volatile P *get() { return loc; }
57  void set(volatile P *new_loc) { loc = new_loc; }
58  flag_type get_type() { return t; }
59  P load() { return *loc; }
60  void store(P val) { *loc = val; }
61 };
62 
66 template <typename P> class kmp_flag {
67  std::atomic<P>
68  *loc;
71 public:
72  typedef P flag_t;
73  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
77  std::atomic<P> *get() { return loc; }
81  void set(std::atomic<P> *new_loc) { loc = new_loc; }
85  flag_type get_type() { return t; }
89  P load() { return loc->load(std::memory_order_acquire); }
93  void store(P val) { loc->store(val, std::memory_order_release); }
94  // Derived classes must provide the following:
95  /*
96  kmp_info_t * get_waiter(kmp_uint32 i);
97  kmp_uint32 get_num_waiters();
98  bool done_check();
99  bool done_check_val(P old_loc);
100  bool notdone_check();
101  P internal_release();
102  void suspend(int th_gtid);
103  void resume(int th_gtid);
104  P set_sleeping();
105  P unset_sleeping();
106  bool is_sleeping();
107  bool is_any_sleeping();
108  bool is_sleeping_val(P old_loc);
109  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
110  int *thread_finished
111  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
112  is_constrained);
113  */
114 };
115 
116 #if OMPT_SUPPORT
117 static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
118  omp_state_t omp_state,
119  ompt_data_t *tId,
120  ompt_data_t *pId) {
121  int ds_tid = this_thr->th.th_info.ds.ds_tid;
122  if (omp_state == omp_state_wait_barrier_implicit) {
123  this_thr->th.ompt_thread_info.state = omp_state_overhead;
124 #if OMPT_OPTIONAL
125  void *codeptr = NULL;
126  if (ompt_enabled.ompt_callback_sync_region_wait) {
127  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
128  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
129  }
130  if (ompt_enabled.ompt_callback_sync_region) {
131  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
132  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
133  }
134 #endif
135  if (!KMP_MASTER_TID(ds_tid)) {
136  if (ompt_enabled.ompt_callback_implicit_task) {
137  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
138  ompt_scope_end, NULL, tId, 0, ds_tid);
139  }
140 #if OMPT_OPTIONAL
141  if (ompt_enabled.ompt_callback_idle) {
142  ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
143  }
144 #endif
145  // return to idle state
146  this_thr->th.ompt_thread_info.state = omp_state_idle;
147  } else {
148  this_thr->th.ompt_thread_info.state = omp_state_overhead;
149  }
150  }
151 }
152 #endif
153 
154 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
155  calls __kmp_wait_* must make certain that another thread calls __kmp_release
156  to wake it back up to prevent deadlocks! */
157 template <class C, int final_spin>
158 static inline void
159 __kmp_wait_template(kmp_info_t *this_thr,
160  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
161  // NOTE: We may not belong to a team at this point.
162  volatile void *spin = flag->get();
163  kmp_uint32 spins;
164  kmp_uint32 hibernate;
165  int th_gtid;
166  int tasks_completed = FALSE;
167  int oversubscribed;
168 #if !KMP_USE_MONITOR
169  kmp_uint64 poll_count;
170  kmp_uint64 hibernate_goal;
171 #endif
172 
173  KMP_FSYNC_SPIN_INIT(spin, NULL);
174  if (flag->done_check()) {
175  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
176  return;
177  }
178  th_gtid = this_thr->th.th_info.ds.ds_gtid;
179 #if KMP_OS_UNIX
180  if (final_spin)
181  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
182 #endif
183  KA_TRACE(20,
184  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
185 #if KMP_STATS_ENABLED
186  stats_state_e thread_state = KMP_GET_THREAD_STATE();
187 #endif
188 
189 /* OMPT Behavior:
190 THIS function is called from
191  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
192  these have join / fork behavior
193 
194  In these cases, we don't change the state or trigger events in THIS
195 function.
196  Events are triggered in the calling code (__kmp_barrier):
197 
198  state := omp_state_overhead
199  barrier-begin
200  barrier-wait-begin
201  state := omp_state_wait_barrier
202  call join-barrier-implementation (finally arrive here)
203  {}
204  call fork-barrier-implementation (finally arrive here)
205  {}
206  state := omp_state_overhead
207  barrier-wait-end
208  barrier-end
209  state := omp_state_work_parallel
210 
211 
212  __kmp_fork_barrier (after thread creation, before executing implicit task)
213  call fork-barrier-implementation (finally arrive here)
214  {} // worker arrive here with state = omp_state_idle
215 
216 
217  __kmp_join_barrier (implicit barrier at end of parallel region)
218  state := omp_state_barrier_implicit
219  barrier-begin
220  barrier-wait-begin
221  call join-barrier-implementation (finally arrive here
222 final_spin=FALSE)
223  {
224  }
225  __kmp_fork_barrier (implicit barrier at end of parallel region)
226  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
227 
228  Worker after task-team is finished:
229  barrier-wait-end
230  barrier-end
231  implicit-task-end
232  idle-begin
233  state := omp_state_idle
234 
235  Before leaving, if state = omp_state_idle
236  idle-end
237  state := omp_state_overhead
238 */
239 #if OMPT_SUPPORT
240  omp_state_t ompt_entry_state;
241  ompt_data_t *pId = NULL;
242  ompt_data_t *tId;
243  if (ompt_enabled.enabled) {
244  ompt_entry_state = this_thr->th.ompt_thread_info.state;
245  if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
246  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
247  ompt_lw_taskteam_t *team =
248  this_thr->th.th_team->t.ompt_serialized_team_info;
249  if (team) {
250  pId = &(team->ompt_team_info.parallel_data);
251  tId = &(team->ompt_task_info.task_data);
252  } else {
253  pId = OMPT_CUR_TEAM_DATA(this_thr);
254  tId = OMPT_CUR_TASK_DATA(this_thr);
255  }
256  } else {
257  pId = NULL;
258  tId = &(this_thr->th.ompt_thread_info.task_data);
259  }
260 #if OMPT_OPTIONAL
261  if (ompt_entry_state == omp_state_idle) {
262  if (ompt_enabled.ompt_callback_idle) {
263  ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
264  }
265  } else
266 #endif
267  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
268  this_thr->th.th_task_team == NULL)) {
269  // implicit task is done. Either no taskqueue, or task-team finished
270  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
271  }
272  }
273 #endif
274 
275  // Setup for waiting
276  KMP_INIT_YIELD(spins);
277 
278  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
279 #if KMP_USE_MONITOR
280 // The worker threads cannot rely on the team struct existing at this point.
281 // Use the bt values cached in the thread struct instead.
282 #ifdef KMP_ADJUST_BLOCKTIME
283  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
284  // Force immediate suspend if not set by user and more threads than
285  // available procs
286  hibernate = 0;
287  else
288  hibernate = this_thr->th.th_team_bt_intervals;
289 #else
290  hibernate = this_thr->th.th_team_bt_intervals;
291 #endif /* KMP_ADJUST_BLOCKTIME */
292 
293  /* If the blocktime is nonzero, we want to make sure that we spin wait for
294  the entirety of the specified #intervals, plus up to one interval more.
295  This increment make certain that this thread doesn't go to sleep too
296  soon. */
297  if (hibernate != 0)
298  hibernate++;
299 
300  // Add in the current time value.
301  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
302  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
303  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
304  hibernate - __kmp_global.g.g_time.dt.t_value));
305 #else
306  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
307  poll_count = 0;
308 #endif // KMP_USE_MONITOR
309  }
310 
311  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
312  KMP_MB();
313 
314  // Main wait spin loop
315  while (flag->notdone_check()) {
316  int in_pool;
317  kmp_task_team_t *task_team = NULL;
318  if (__kmp_tasking_mode != tskm_immediate_exec) {
319  task_team = this_thr->th.th_task_team;
320  /* If the thread's task team pointer is NULL, it means one of 3 things:
321  1) A newly-created thread is first being released by
322  __kmp_fork_barrier(), and its task team has not been set up yet.
323  2) All tasks have been executed to completion.
324  3) Tasking is off for this region. This could be because we are in a
325  serialized region (perhaps the outer one), or else tasking was manually
326  disabled (KMP_TASKING=0). */
327  if (task_team != NULL) {
328  if (TCR_SYNC_4(task_team->tt.tt_active)) {
329  if (KMP_TASKING_ENABLED(task_team))
330  flag->execute_tasks(
331  this_thr, th_gtid, final_spin,
332  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
333  else
334  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
335  } else {
336  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
337 #if OMPT_SUPPORT
338  // task-team is done now, other cases should be catched above
339  if (final_spin && ompt_enabled.enabled)
340  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
341 #endif
342  this_thr->th.th_task_team = NULL;
343  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
344  }
345  } else {
346  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
347  } // if
348  } // if
349 
350  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
351  if (TCR_4(__kmp_global.g.g_done)) {
352  if (__kmp_global.g.g_abort)
353  __kmp_abort_thread();
354  break;
355  }
356 
357  // If we are oversubscribed, or have waited a bit (and
358  // KMP_LIBRARY=throughput), then yield
359  // TODO: Should it be number of cores instead of thread contexts? Like:
360  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
361  // Need performance improvement data to make the change...
362  if (oversubscribed) {
363  KMP_YIELD(1);
364  } else {
365  KMP_YIELD_SPIN(spins);
366  }
367  // Check if this thread was transferred from a team
368  // to the thread pool (or vice-versa) while spinning.
369  in_pool = !!TCR_4(this_thr->th.th_in_pool);
370  if (in_pool != !!this_thr->th.th_active_in_pool) {
371  if (in_pool) { // Recently transferred from team to pool
372  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
373  this_thr->th.th_active_in_pool = TRUE;
374  /* Here, we cannot assert that:
375  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
376  __kmp_thread_pool_nth);
377  __kmp_thread_pool_nth is inc/dec'd by the master thread while the
378  fork/join lock is held, whereas __kmp_thread_pool_active_nth is
379  inc/dec'd asynchronously by the workers. The two can get out of sync
380  for brief periods of time. */
381  } else { // Recently transferred from pool to team
382  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
383  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
384  this_thr->th.th_active_in_pool = FALSE;
385  }
386  }
387 
388 #if KMP_STATS_ENABLED
389  // Check if thread has been signalled to idle state
390  // This indicates that the logical "join-barrier" has finished
391  if (this_thr->th.th_stats->isIdle() &&
392  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
393  KMP_SET_THREAD_STATE(IDLE);
394  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
395  }
396 #endif
397 
398  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
399  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
400  continue;
401 
402  // Don't suspend if there is a likelihood of new tasks being spawned.
403  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
404  continue;
405 
406 #if KMP_USE_MONITOR
407  // If we have waited a bit more, fall asleep
408  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
409  continue;
410 #else
411  if (KMP_BLOCKING(hibernate_goal, poll_count++))
412  continue;
413 #endif
414 
415  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
416 #if KMP_OS_UNIX
417  if (final_spin)
418  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
419 #endif
420  flag->suspend(th_gtid);
421 #if KMP_OS_UNIX
422  if (final_spin)
423  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
424 #endif
425 
426  if (TCR_4(__kmp_global.g.g_done)) {
427  if (__kmp_global.g.g_abort)
428  __kmp_abort_thread();
429  break;
430  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
431  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
432  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
433  }
434  // TODO: If thread is done with work and times out, disband/free
435  }
436 
437 #if OMPT_SUPPORT
438  omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
439  if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
440 #if OMPT_OPTIONAL
441  if (final_spin) {
442  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
443  ompt_exit_state = this_thr->th.ompt_thread_info.state;
444  }
445 #endif
446  if (ompt_exit_state == omp_state_idle) {
447 #if OMPT_OPTIONAL
448  if (ompt_enabled.ompt_callback_idle) {
449  ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
450  }
451 #endif
452  this_thr->th.ompt_thread_info.state = omp_state_overhead;
453  }
454  }
455 #endif
456 #if KMP_STATS_ENABLED
457  // If we were put into idle state, pop that off the state stack
458  if (KMP_GET_THREAD_STATE() == IDLE) {
459  KMP_POP_PARTITIONED_TIMER();
460  KMP_SET_THREAD_STATE(thread_state);
461  this_thr->th.th_stats->resetIdleFlag();
462  }
463 #endif
464 
465 #if KMP_OS_UNIX
466  if (final_spin)
467  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
468 #endif
469  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
470 }
471 
472 /* Release any threads specified as waiting on the flag by releasing the flag
473  and resume the waiting thread if indicated by the sleep bit(s). A thread that
474  calls __kmp_wait_template must call this function to wake up the potentially
475  sleeping thread and prevent deadlocks! */
476 template <class C> static inline void __kmp_release_template(C *flag) {
477 #ifdef KMP_DEBUG
478  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
479 #endif
480  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
481  KMP_DEBUG_ASSERT(flag->get());
482  KMP_FSYNC_RELEASING(flag->get());
483 
484  flag->internal_release();
485 
486  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
487  flag->load()));
488 
489  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
490  // Only need to check sleep stuff if infinite block time not set.
491  // Are *any* threads waiting on flag sleeping?
492  if (flag->is_any_sleeping()) {
493  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
494  // if sleeping waiter exists at i, sets current_waiter to i inside flag
495  kmp_info_t *waiter = flag->get_waiter(i);
496  if (waiter) {
497  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
498  // Wake up thread if needed
499  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
500  "flag(%p) set\n",
501  gtid, wait_gtid, flag->get()));
502  flag->resume(wait_gtid); // unsets flag's current_waiter when done
503  }
504  }
505  }
506  }
507 }
508 
509 template <typename FlagType> struct flag_traits {};
510 
511 template <> struct flag_traits<kmp_uint32> {
512  typedef kmp_uint32 flag_t;
513  static const flag_type t = flag32;
514  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
515  static inline flag_t test_then_add4(volatile flag_t *f) {
516  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
517  }
518  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
519  return KMP_TEST_THEN_OR32(f, v);
520  }
521  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
522  return KMP_TEST_THEN_AND32(f, v);
523  }
524 };
525 
526 template <> struct flag_traits<kmp_uint64> {
527  typedef kmp_uint64 flag_t;
528  static const flag_type t = flag64;
529  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
530  static inline flag_t test_then_add4(volatile flag_t *f) {
531  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
532  }
533  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
534  return KMP_TEST_THEN_OR64(f, v);
535  }
536  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
537  return KMP_TEST_THEN_AND64(f, v);
538  }
539 };
540 
541 // Basic flag that does not use C11 Atomics
542 template <typename FlagType>
543 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
544  typedef flag_traits<FlagType> traits_type;
545  FlagType checker;
547  kmp_info_t
548  *waiting_threads[1];
549  kmp_uint32
550  num_waiting_threads;
551 public:
552  kmp_basic_flag_native(volatile FlagType *p)
553  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
554  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
555  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
556  waiting_threads[0] = thr;
557  }
558  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
559  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
560  num_waiting_threads(0) {}
565  kmp_info_t *get_waiter(kmp_uint32 i) {
566  KMP_DEBUG_ASSERT(i < num_waiting_threads);
567  return waiting_threads[i];
568  }
572  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
578  void set_waiter(kmp_info_t *thr) {
579  waiting_threads[0] = thr;
580  num_waiting_threads = 1;
581  }
585  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
590  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
598  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
603  void internal_release() {
604  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
605  }
611  FlagType set_sleeping() {
612  return traits_type::test_then_or((volatile FlagType *)this->get(),
613  KMP_BARRIER_SLEEP_STATE);
614  }
620  FlagType unset_sleeping() {
621  return traits_type::test_then_and((volatile FlagType *)this->get(),
622  ~KMP_BARRIER_SLEEP_STATE);
623  }
628  bool is_sleeping_val(FlagType old_loc) {
629  return old_loc & KMP_BARRIER_SLEEP_STATE;
630  }
634  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
635  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
636  kmp_uint8 *get_stolen() { return NULL; }
637  enum barrier_type get_bt() { return bs_last_barrier; }
638 };
639 
640 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
641  typedef flag_traits<FlagType> traits_type;
642  FlagType checker;
644  kmp_info_t
645  *waiting_threads[1];
646  kmp_uint32
647  num_waiting_threads;
648 public:
649  kmp_basic_flag(std::atomic<FlagType> *p)
650  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
651  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
652  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
653  waiting_threads[0] = thr;
654  }
655  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
656  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
657  num_waiting_threads(0) {}
662  kmp_info_t *get_waiter(kmp_uint32 i) {
663  KMP_DEBUG_ASSERT(i < num_waiting_threads);
664  return waiting_threads[i];
665  }
669  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
675  void set_waiter(kmp_info_t *thr) {
676  waiting_threads[0] = thr;
677  num_waiting_threads = 1;
678  }
682  bool done_check() { return this->load() == checker; }
687  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
695  bool notdone_check() { return this->load() != checker; }
700  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
706  FlagType set_sleeping() {
707  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
708  }
714  FlagType unset_sleeping() {
715  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
716  }
721  bool is_sleeping_val(FlagType old_loc) {
722  return old_loc & KMP_BARRIER_SLEEP_STATE;
723  }
727  bool is_sleeping() { return is_sleeping_val(this->load()); }
728  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
729  kmp_uint8 *get_stolen() { return NULL; }
730  enum barrier_type get_bt() { return bs_last_barrier; }
731 };
732 
733 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
734 public:
735  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
736  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
737  : kmp_basic_flag<kmp_uint32>(p, thr) {}
738  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
739  : kmp_basic_flag<kmp_uint32>(p, c) {}
740  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
741  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
742  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
743  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
744  kmp_int32 is_constrained) {
745  return __kmp_execute_tasks_32(
746  this_thr, gtid, this, final_spin,
747  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
748  }
749  void wait(kmp_info_t *this_thr,
750  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
751  if (final_spin)
752  __kmp_wait_template<kmp_flag_32, TRUE>(
753  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
754  else
755  __kmp_wait_template<kmp_flag_32, FALSE>(
756  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
757  }
758  void release() { __kmp_release_template(this); }
759  flag_type get_ptr_type() { return flag32; }
760 };
761 
762 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
763 public:
764  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
765  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
766  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
767  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
768  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
769  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
770  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
771  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
772  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
773  kmp_int32 is_constrained) {
774  return __kmp_execute_tasks_64(
775  this_thr, gtid, this, final_spin,
776  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
777  }
778  void wait(kmp_info_t *this_thr,
779  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
780  if (final_spin)
781  __kmp_wait_template<kmp_flag_64, TRUE>(
782  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
783  else
784  __kmp_wait_template<kmp_flag_64, FALSE>(
785  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
786  }
787  void release() { __kmp_release_template(this); }
788  flag_type get_ptr_type() { return flag64; }
789 };
790 
791 // Hierarchical 64-bit on-core barrier instantiation
792 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
793  kmp_uint64 checker;
794  kmp_info_t *waiting_threads[1];
795  kmp_uint32 num_waiting_threads;
796  kmp_uint32
797  offset;
798  bool flag_switch;
799  enum barrier_type bt;
800  kmp_info_t *this_thr;
802 #if USE_ITT_BUILD
803  void *
804  itt_sync_obj;
805 #endif
806  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
807  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
808  }
809 
810 public:
811  kmp_flag_oncore(volatile kmp_uint64 *p)
812  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
813  flag_switch(false) {}
814  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
815  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
816  offset(idx), flag_switch(false) {}
817  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
818  enum barrier_type bar_t,
819  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
820  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
821  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
822  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
823  kmp_info_t *get_waiter(kmp_uint32 i) {
824  KMP_DEBUG_ASSERT(i < num_waiting_threads);
825  return waiting_threads[i];
826  }
827  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
828  void set_waiter(kmp_info_t *thr) {
829  waiting_threads[0] = thr;
830  num_waiting_threads = 1;
831  }
832  bool done_check_val(kmp_uint64 old_loc) {
833  return byteref(&old_loc, offset) == checker;
834  }
835  bool done_check() { return done_check_val(*get()); }
836  bool notdone_check() {
837  // Calculate flag_switch
838  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
839  flag_switch = true;
840  if (byteref(get(), offset) != 1 && !flag_switch)
841  return true;
842  else if (flag_switch) {
843  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
844  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
845  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
846  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
847  }
848  return false;
849  }
850  void internal_release() {
851  // Other threads can write their own bytes simultaneously.
852  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
853  byteref(get(), offset) = 1;
854  } else {
855  kmp_uint64 mask = 0;
856  byteref(&mask, offset) = 1;
857  KMP_TEST_THEN_OR64(get(), mask);
858  }
859  }
860  kmp_uint64 set_sleeping() {
861  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
862  }
863  kmp_uint64 unset_sleeping() {
864  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
865  }
866  bool is_sleeping_val(kmp_uint64 old_loc) {
867  return old_loc & KMP_BARRIER_SLEEP_STATE;
868  }
869  bool is_sleeping() { return is_sleeping_val(*get()); }
870  bool is_any_sleeping() { return is_sleeping_val(*get()); }
871  void wait(kmp_info_t *this_thr, int final_spin) {
872  if (final_spin)
873  __kmp_wait_template<kmp_flag_oncore, TRUE>(
874  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
875  else
876  __kmp_wait_template<kmp_flag_oncore, FALSE>(
877  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
878  }
879  void release() { __kmp_release_template(this); }
880  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
881  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
882  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
883  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
884  kmp_int32 is_constrained) {
885  return __kmp_execute_tasks_oncore(
886  this_thr, gtid, this, final_spin,
887  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
888  }
889  kmp_uint8 *get_stolen() { return NULL; }
890  enum barrier_type get_bt() { return bt; }
891  flag_type get_ptr_type() { return flag_oncore; }
892 };
893 
894 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
895 // associated with int gtid.
896 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
897  if (!flag)
898  return;
899 
900  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
901  case flag32:
902  __kmp_resume_32(gtid, NULL);
903  break;
904  case flag64:
905  __kmp_resume_64(gtid, NULL);
906  break;
907  case flag_oncore:
908  __kmp_resume_oncore(gtid, NULL);
909  break;
910  }
911 }
912 
917 #endif // KMP_WAIT_RELEASE_H
std::atomic< P > * loc
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64