LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_wait_release.h
1/*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_WAIT_RELEASE_H
14#define KMP_WAIT_RELEASE_H
15
16#include "kmp.h"
17#include "kmp_itt.h"
18#include "kmp_stats.h"
19#if OMPT_SUPPORT
20#include "ompt-specific.h"
21#endif
22
44
45struct flag_properties {
46 unsigned int type : 16;
47 unsigned int reserved : 16;
48};
49
53template <typename P> class kmp_flag_native {
54 volatile P *loc;
55 flag_properties t;
56
57public:
58 typedef P flag_t;
59 kmp_flag_native(volatile P *p, flag_type ft)
60 : loc(p), t({(short unsigned int)ft, 0U}) {}
61 volatile P *get() { return loc; }
62 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
63 void set(volatile P *new_loc) { loc = new_loc; }
64 flag_type get_type() { return (flag_type)(t.type); }
65 P load() { return *loc; }
66 void store(P val) { *loc = val; }
67};
68
72template <typename P> class kmp_flag {
73 std::atomic<P>
74 *loc;
76 flag_properties t;
77public:
78 typedef P flag_t;
79 kmp_flag(std::atomic<P> *p, flag_type ft)
80 : loc(p), t({(short unsigned int)ft, 0U}) {}
84 std::atomic<P> *get() { return loc; }
88 void *get_void_p() { return RCAST(void *, loc); }
92 void set(std::atomic<P> *new_loc) { loc = new_loc; }
96 flag_type get_type() { return (flag_type)(t.type); }
100 P load() { return loc->load(std::memory_order_acquire); }
104 void store(P val) { loc->store(val, std::memory_order_release); }
105 // Derived classes must provide the following:
106 /*
107 kmp_info_t * get_waiter(kmp_uint32 i);
108 kmp_uint32 get_num_waiters();
109 bool done_check();
110 bool done_check_val(P old_loc);
111 bool notdone_check();
112 P internal_release();
113 void suspend(int th_gtid);
114 void mwait(int th_gtid);
115 void resume(int th_gtid);
116 P set_sleeping();
117 P unset_sleeping();
118 bool is_sleeping();
119 bool is_any_sleeping();
120 bool is_sleeping_val(P old_loc);
121 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
122 int *thread_finished
123 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
124 is_constrained);
125 */
126};
127
128#if OMPT_SUPPORT
129OMPT_NOINLINE
130static void __ompt_implicit_task_end(kmp_info_t *this_thr,
131 ompt_state_t ompt_state,
132 ompt_data_t *tId) {
133 int ds_tid = this_thr->th.th_info.ds.ds_tid;
134 if (ompt_state == ompt_state_wait_barrier_implicit) {
135 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
136#if OMPT_OPTIONAL
137 void *codeptr = NULL;
138 if (ompt_enabled.ompt_callback_sync_region_wait) {
139 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
140 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141 codeptr);
142 }
143 if (ompt_enabled.ompt_callback_sync_region) {
144 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
145 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
146 codeptr);
147 }
148#endif
149 if (!KMP_MASTER_TID(ds_tid)) {
150 if (ompt_enabled.ompt_callback_implicit_task) {
151 int flags = this_thr->th.ompt_thread_info.parallel_flags;
152 flags = (flags & ompt_parallel_league) ? ompt_task_initial
153 : ompt_task_implicit;
154 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
155 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
156 }
157 // return to idle state
158 this_thr->th.ompt_thread_info.state = ompt_state_idle;
159 } else {
160 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
161 }
162 }
163}
164#endif
165
166/* Spin wait loop that first does pause/yield, then sleep. A thread that calls
167 __kmp_wait_* must make certain that another thread calls __kmp_release
168 to wake it back up to prevent deadlocks!
169
170 NOTE: We may not belong to a team at this point. */
171template <class C, bool final_spin, bool Cancellable = false,
172 bool Sleepable = true>
173static inline bool
174__kmp_wait_template(kmp_info_t *this_thr,
175 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
176#if USE_ITT_BUILD && USE_ITT_NOTIFY
177 volatile void *spin = flag->get();
178#endif
179 kmp_uint32 spins;
180 int th_gtid;
181 int tasks_completed = FALSE;
182#if !KMP_USE_MONITOR
183 kmp_uint64 poll_count;
184 kmp_uint64 hibernate_goal;
185#else
186 kmp_uint32 hibernate;
187#endif
188
189 KMP_FSYNC_SPIN_INIT(spin, NULL);
190 if (flag->done_check()) {
191 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
192 return false;
193 }
194 th_gtid = this_thr->th.th_info.ds.ds_gtid;
195 if (Cancellable) {
196 kmp_team_t *team = this_thr->th.th_team;
197 if (team && team->t.t_cancel_request == cancel_parallel)
198 return true;
199 }
200#if KMP_OS_UNIX
201 if (final_spin)
202 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
203#endif
204 KA_TRACE(20,
205 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
206#if KMP_STATS_ENABLED
207 stats_state_e thread_state = KMP_GET_THREAD_STATE();
208#endif
209
210/* OMPT Behavior:
211THIS function is called from
212 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
213 these have join / fork behavior
214
215 In these cases, we don't change the state or trigger events in THIS
216function.
217 Events are triggered in the calling code (__kmp_barrier):
218
219 state := ompt_state_overhead
220 barrier-begin
221 barrier-wait-begin
222 state := ompt_state_wait_barrier
223 call join-barrier-implementation (finally arrive here)
224 {}
225 call fork-barrier-implementation (finally arrive here)
226 {}
227 state := ompt_state_overhead
228 barrier-wait-end
229 barrier-end
230 state := ompt_state_work_parallel
231
232
233 __kmp_fork_barrier (after thread creation, before executing implicit task)
234 call fork-barrier-implementation (finally arrive here)
235 {} // worker arrive here with state = ompt_state_idle
236
237
238 __kmp_join_barrier (implicit barrier at end of parallel region)
239 state := ompt_state_barrier_implicit
240 barrier-begin
241 barrier-wait-begin
242 call join-barrier-implementation (finally arrive here
243final_spin=FALSE)
244 {
245 }
246 __kmp_fork_barrier (implicit barrier at end of parallel region)
247 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
248
249 Worker after task-team is finished:
250 barrier-wait-end
251 barrier-end
252 implicit-task-end
253 idle-begin
254 state := ompt_state_idle
255
256 Before leaving, if state = ompt_state_idle
257 idle-end
258 state := ompt_state_overhead
259*/
260#if OMPT_SUPPORT
261 ompt_state_t ompt_entry_state;
262 ompt_data_t *tId;
263 if (ompt_enabled.enabled) {
264 ompt_entry_state = this_thr->th.ompt_thread_info.state;
265 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
266 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
267 ompt_lw_taskteam_t *team =
268 this_thr->th.th_team->t.ompt_serialized_team_info;
269 if (team) {
270 tId = &(team->ompt_task_info.task_data);
271 } else {
272 tId = OMPT_CUR_TASK_DATA(this_thr);
273 }
274 } else {
275 tId = &(this_thr->th.ompt_thread_info.task_data);
276 }
277 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
278 this_thr->th.th_task_team == NULL)) {
279 // implicit task is done. Either no taskqueue, or task-team finished
280 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
281 }
282 }
283#endif
284
285 KMP_INIT_YIELD(spins); // Setup for waiting
286
287 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
288 __kmp_pause_status == kmp_soft_paused) {
289#if KMP_USE_MONITOR
290// The worker threads cannot rely on the team struct existing at this point.
291// Use the bt values cached in the thread struct instead.
292#ifdef KMP_ADJUST_BLOCKTIME
293 if (__kmp_pause_status == kmp_soft_paused ||
294 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
295 // Force immediate suspend if not set by user and more threads than
296 // available procs
297 hibernate = 0;
298 else
299 hibernate = this_thr->th.th_team_bt_intervals;
300#else
301 hibernate = this_thr->th.th_team_bt_intervals;
302#endif /* KMP_ADJUST_BLOCKTIME */
303
304 /* If the blocktime is nonzero, we want to make sure that we spin wait for
305 the entirety of the specified #intervals, plus up to one interval more.
306 This increment make certain that this thread doesn't go to sleep too
307 soon. */
308 if (hibernate != 0)
309 hibernate++;
310
311 // Add in the current time value.
312 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
313 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
314 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
315 hibernate - __kmp_global.g.g_time.dt.t_value));
316#else
317 if (__kmp_pause_status == kmp_soft_paused) {
318 // Force immediate suspend
319 hibernate_goal = KMP_NOW();
320 } else
321 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
322 poll_count = 0;
323 (void)poll_count;
324#endif // KMP_USE_MONITOR
325 }
326
327 KMP_MB();
328
329 // Main wait spin loop
330 while (flag->notdone_check()) {
331 kmp_task_team_t *task_team = NULL;
332 if (__kmp_tasking_mode != tskm_immediate_exec) {
333 task_team = this_thr->th.th_task_team;
334 /* If the thread's task team pointer is NULL, it means one of 3 things:
335 1) A newly-created thread is first being released by
336 __kmp_fork_barrier(), and its task team has not been set up yet.
337 2) All tasks have been executed to completion.
338 3) Tasking is off for this region. This could be because we are in a
339 serialized region (perhaps the outer one), or else tasking was manually
340 disabled (KMP_TASKING=0). */
341 if (task_team != NULL) {
342 if (TCR_SYNC_4(task_team->tt.tt_active)) {
343 if (KMP_TASKING_ENABLED(task_team))
344 flag->execute_tasks(
345 this_thr, th_gtid, final_spin,
346 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
347 else
348 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
349 } else {
350 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
351#if OMPT_SUPPORT
352 // task-team is done now, other cases should be catched above
353 if (final_spin && ompt_enabled.enabled)
354 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
355#endif
356 this_thr->th.th_task_team = NULL;
357 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
358 }
359 } else {
360 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
361 } // if
362 } // if
363
364 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
365 if (TCR_4(__kmp_global.g.g_done)) {
366 if (__kmp_global.g.g_abort)
367 __kmp_abort_thread();
368 break;
369 }
370
371 // If we are oversubscribed, or have waited a bit (and
372 // KMP_LIBRARY=throughput), then yield
373 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
374
375#if KMP_STATS_ENABLED
376 // Check if thread has been signalled to idle state
377 // This indicates that the logical "join-barrier" has finished
378 if (this_thr->th.th_stats->isIdle() &&
379 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
380 KMP_SET_THREAD_STATE(IDLE);
381 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
382 }
383#endif
384 // Check if the barrier surrounding this wait loop has been cancelled
385 if (Cancellable) {
386 kmp_team_t *team = this_thr->th.th_team;
387 if (team && team->t.t_cancel_request == cancel_parallel)
388 break;
389 }
390
391 // For hidden helper thread, if task_team is nullptr, it means the main
392 // thread has not released the barrier. We cannot wait here because once the
393 // main thread releases all children barriers, all hidden helper threads are
394 // still sleeping. This leads to a problem that following configuration,
395 // such as task team sync, will not be performed such that this thread does
396 // not have task team. Usually it is not bad. However, a corner case is,
397 // when the first task encountered is an untied task, the check in
398 // __kmp_task_alloc will crash because it uses the task team pointer without
399 // checking whether it is nullptr. It is probably under some kind of
400 // assumption.
401 if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
402 !TCR_4(__kmp_hidden_helper_team_done)) {
403 // If there is still hidden helper tasks to be executed, the hidden helper
404 // thread will not enter a waiting status.
405 if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
406 __kmp_hidden_helper_worker_thread_wait();
407 }
408 continue;
409 }
410
411 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
412 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
413 __kmp_pause_status != kmp_soft_paused)
414 continue;
415
416 // Don't suspend if there is a likelihood of new tasks being spawned.
417 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
418 continue;
419
420#if KMP_USE_MONITOR
421 // If we have waited a bit more, fall asleep
422 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
423 continue;
424#else
425 if (KMP_BLOCKING(hibernate_goal, poll_count++))
426 continue;
427#endif
428 // Don't suspend if wait loop designated non-sleepable
429 // in template parameters
430 if (!Sleepable)
431 continue;
432
433 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
434 __kmp_pause_status != kmp_soft_paused)
435 continue;
436
437#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
438 if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
439 KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
440 flag->mwait(th_gtid);
441 } else {
442#endif
443 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
444#if KMP_OS_UNIX
445 if (final_spin)
446 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
447#endif
448 flag->suspend(th_gtid);
449#if KMP_OS_UNIX
450 if (final_spin)
451 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
452#endif
453#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
454 }
455#endif
456
457 if (TCR_4(__kmp_global.g.g_done)) {
458 if (__kmp_global.g.g_abort)
459 __kmp_abort_thread();
460 break;
461 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
462 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
463 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
464 }
465 // TODO: If thread is done with work and times out, disband/free
466 }
467
468#if OMPT_SUPPORT
469 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
470 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
471#if OMPT_OPTIONAL
472 if (final_spin) {
473 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
474 ompt_exit_state = this_thr->th.ompt_thread_info.state;
475 }
476#endif
477 if (ompt_exit_state == ompt_state_idle) {
478 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
479 }
480 }
481#endif
482#if KMP_STATS_ENABLED
483 // If we were put into idle state, pop that off the state stack
484 if (KMP_GET_THREAD_STATE() == IDLE) {
485 KMP_POP_PARTITIONED_TIMER();
486 KMP_SET_THREAD_STATE(thread_state);
487 this_thr->th.th_stats->resetIdleFlag();
488 }
489#endif
490
491#if KMP_OS_UNIX
492 if (final_spin)
493 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
494#endif
495 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
496 if (Cancellable) {
497 kmp_team_t *team = this_thr->th.th_team;
498 if (team && team->t.t_cancel_request == cancel_parallel) {
499 if (tasks_completed) {
500 // undo the previous decrement of unfinished_threads so that the
501 // thread can decrement at the join barrier with no problem
502 kmp_task_team_t *task_team = this_thr->th.th_task_team;
503 std::atomic<kmp_int32> *unfinished_threads =
504 &(task_team->tt.tt_unfinished_threads);
505 KMP_ATOMIC_INC(unfinished_threads);
506 }
507 return true;
508 }
509 }
510 return false;
511}
512
513#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
514// Set up a monitor on the flag variable causing the calling thread to wait in
515// a less active state until the flag variable is modified.
516template <class C>
517static inline void __kmp_mwait_template(int th_gtid, C *flag) {
518 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
519 kmp_info_t *th = __kmp_threads[th_gtid];
520
521 KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
522 flag->get()));
523
524 // User-level mwait is available
525 KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
526
527 __kmp_suspend_initialize_thread(th);
528 __kmp_lock_suspend_mx(th);
529
530 volatile void *spin = flag->get();
531 void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
532
533 if (!flag->done_check()) {
534 // Mark thread as no longer active
535 th->th.th_active = FALSE;
536 if (th->th.th_active_in_pool) {
537 th->th.th_active_in_pool = FALSE;
538 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
539 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
540 }
541 flag->set_sleeping();
542 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
543#if KMP_HAVE_UMWAIT
544 if (__kmp_umwait_enabled) {
545 __kmp_umonitor(cacheline);
546 }
547#elif KMP_HAVE_MWAIT
548 if (__kmp_mwait_enabled) {
549 __kmp_mm_monitor(cacheline, 0, 0);
550 }
551#endif
552 // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
553 // the address could happen after the last time we checked and before
554 // monitoring started, in which case monitor can't detect the change.
555 if (flag->done_check())
556 flag->unset_sleeping();
557 else {
558 // if flag changes here, wake-up happens immediately
559 TCW_PTR(th->th.th_sleep_loc, (void *)flag);
560 __kmp_unlock_suspend_mx(th);
561 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
562#if KMP_HAVE_UMWAIT
563 if (__kmp_umwait_enabled) {
564 __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
565 }
566#elif KMP_HAVE_MWAIT
567 if (__kmp_mwait_enabled) {
568 __kmp_mm_mwait(0, __kmp_mwait_hints);
569 }
570#endif
571 KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
572 __kmp_lock_suspend_mx(th);
573 // Clean up sleep info; doesn't matter how/why this thread stopped waiting
574 if (flag->is_sleeping())
575 flag->unset_sleeping();
576 TCW_PTR(th->th.th_sleep_loc, NULL);
577 }
578 // Mark thread as active again
579 th->th.th_active = TRUE;
580 if (TCR_4(th->th.th_in_pool)) {
581 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
582 th->th.th_active_in_pool = TRUE;
583 }
584 } // Drop out to main wait loop to check flag, handle tasks, etc.
585 __kmp_unlock_suspend_mx(th);
586 KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
587}
588#endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
589
590/* Release any threads specified as waiting on the flag by releasing the flag
591 and resume the waiting thread if indicated by the sleep bit(s). A thread that
592 calls __kmp_wait_template must call this function to wake up the potentially
593 sleeping thread and prevent deadlocks! */
594template <class C> static inline void __kmp_release_template(C *flag) {
595#ifdef KMP_DEBUG
596 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
597#endif
598 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
599 KMP_DEBUG_ASSERT(flag->get());
600 KMP_FSYNC_RELEASING(flag->get_void_p());
601
602 flag->internal_release();
603
604 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
605 flag->load()));
606
607 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
608 // Only need to check sleep stuff if infinite block time not set.
609 // Are *any* threads waiting on flag sleeping?
610 if (flag->is_any_sleeping()) {
611 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
612 // if sleeping waiter exists at i, sets current_waiter to i inside flag
613 kmp_info_t *waiter = flag->get_waiter(i);
614 if (waiter) {
615 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
616 // Wake up thread if needed
617 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
618 "flag(%p) set\n",
619 gtid, wait_gtid, flag->get()));
620 flag->resume(wait_gtid); // unsets flag's current_waiter when done
621 }
622 }
623 }
624 }
625}
626
627template <typename FlagType> struct flag_traits {};
628
629template <> struct flag_traits<kmp_uint32> {
630 typedef kmp_uint32 flag_t;
631 static const flag_type t = flag32;
632 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
633 static inline flag_t test_then_add4(volatile flag_t *f) {
634 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
635 }
636 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
637 return KMP_TEST_THEN_OR32(f, v);
638 }
639 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
640 return KMP_TEST_THEN_AND32(f, v);
641 }
642};
643
644template <> struct flag_traits<kmp_uint64> {
645 typedef kmp_uint64 flag_t;
646 static const flag_type t = flag64;
647 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
648 static inline flag_t test_then_add4(volatile flag_t *f) {
649 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
650 }
651 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
652 return KMP_TEST_THEN_OR64(f, v);
653 }
654 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
655 return KMP_TEST_THEN_AND64(f, v);
656 }
657};
658
659// Basic flag that does not use C11 Atomics
660template <typename FlagType, bool Sleepable>
661class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
662 typedef flag_traits<FlagType> traits_type;
663 FlagType checker;
665 kmp_info_t
666 *waiting_threads[1];
667 kmp_uint32
668 num_waiting_threads;
669public:
670 kmp_basic_flag_native(volatile FlagType *p)
671 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
672 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
673 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
674 waiting_threads[0] = thr;
675 }
676 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
677 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
678 num_waiting_threads(0) {}
683 kmp_info_t *get_waiter(kmp_uint32 i) {
684 KMP_DEBUG_ASSERT(i < num_waiting_threads);
685 return waiting_threads[i];
686 }
690 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
696 void set_waiter(kmp_info_t *thr) {
697 waiting_threads[0] = thr;
698 num_waiting_threads = 1;
699 }
703 bool done_check() {
704 if (Sleepable)
705 return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
706 checker;
707 else
708 return traits_type::tcr(*(this->get())) == checker;
709 }
714 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
722 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
727 void internal_release() {
728 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
729 }
735 FlagType set_sleeping() {
736 return traits_type::test_then_or((volatile FlagType *)this->get(),
737 KMP_BARRIER_SLEEP_STATE);
738 }
744 FlagType unset_sleeping() {
745 return traits_type::test_then_and((volatile FlagType *)this->get(),
746 ~KMP_BARRIER_SLEEP_STATE);
747 }
752 bool is_sleeping_val(FlagType old_loc) {
753 return old_loc & KMP_BARRIER_SLEEP_STATE;
754 }
758 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
759 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
760 kmp_uint8 *get_stolen() { return NULL; }
761 enum barrier_type get_bt() { return bs_last_barrier; }
762};
763
764template <typename FlagType, bool Sleepable>
765class kmp_basic_flag : public kmp_flag<FlagType> {
766 typedef flag_traits<FlagType> traits_type;
767 FlagType checker;
769 kmp_info_t
770 *waiting_threads[1];
771 kmp_uint32
772 num_waiting_threads;
773public:
774 kmp_basic_flag(std::atomic<FlagType> *p)
775 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
776 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
777 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
778 waiting_threads[0] = thr;
779 }
780 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
781 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
782 num_waiting_threads(0) {}
787 kmp_info_t *get_waiter(kmp_uint32 i) {
788 KMP_DEBUG_ASSERT(i < num_waiting_threads);
789 return waiting_threads[i];
790 }
794 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
800 void set_waiter(kmp_info_t *thr) {
801 waiting_threads[0] = thr;
802 num_waiting_threads = 1;
803 }
807 bool done_check() {
808 if (Sleepable)
809 return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
810 else
811 return this->load() == checker;
812 }
817 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
825 bool notdone_check() { return this->load() != checker; }
830 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
836 FlagType set_sleeping() {
837 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
838 }
844 FlagType unset_sleeping() {
845 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
846 }
851 bool is_sleeping_val(FlagType old_loc) {
852 return old_loc & KMP_BARRIER_SLEEP_STATE;
853 }
857 bool is_sleeping() { return is_sleeping_val(this->load()); }
858 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
859 kmp_uint8 *get_stolen() { return NULL; }
860 enum barrier_type get_bt() { return bs_last_barrier; }
861};
862
863template <bool Cancellable, bool Sleepable>
864class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
865public:
866 kmp_flag_32(std::atomic<kmp_uint32> *p)
867 : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
868 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
869 : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
870 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
871 : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
872 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
873#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
874 void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
875#endif
876 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
877 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
878 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
879 kmp_int32 is_constrained) {
880 return __kmp_execute_tasks_32(
881 this_thr, gtid, this, final_spin,
882 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
883 }
884 bool wait(kmp_info_t *this_thr,
885 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
886 if (final_spin)
888 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
889 else
891 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
892 }
893 void release() { __kmp_release_template(this); }
894 flag_type get_ptr_type() { return flag32; }
895};
896
897template <bool Cancellable, bool Sleepable>
898class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
899public:
900 kmp_flag_64(volatile kmp_uint64 *p)
901 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
902 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
903 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
904 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
905 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
906 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
907#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
908 void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
909#endif
910 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
911 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
912 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
913 kmp_int32 is_constrained) {
914 return __kmp_execute_tasks_64(
915 this_thr, gtid, this, final_spin,
916 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
917 }
918 bool wait(kmp_info_t *this_thr,
919 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
920 if (final_spin)
921 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
922 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
923 else
924 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
925 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
926 }
927 void release() { __kmp_release_template(this); }
928 flag_type get_ptr_type() { return flag64; }
929};
930
931// Hierarchical 64-bit on-core barrier instantiation
932class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
933 kmp_uint64 checker;
934 kmp_info_t *waiting_threads[1];
935 kmp_uint32 num_waiting_threads;
936 kmp_uint32
937 offset;
938 bool flag_switch;
939 enum barrier_type bt;
940 kmp_info_t *this_thr;
942#if USE_ITT_BUILD
943 void *
944 itt_sync_obj;
945#endif
946 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
947 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
948 }
949
950public:
951 kmp_flag_oncore(volatile kmp_uint64 *p)
952 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
953 flag_switch(false) {}
954 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
955 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
956 offset(idx), flag_switch(false) {}
957 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
958 enum barrier_type bar_t,
959 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
960 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
961 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
962 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
963 kmp_info_t *get_waiter(kmp_uint32 i) {
964 KMP_DEBUG_ASSERT(i < num_waiting_threads);
965 return waiting_threads[i];
966 }
967 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
968 void set_waiter(kmp_info_t *thr) {
969 waiting_threads[0] = thr;
970 num_waiting_threads = 1;
971 }
972 bool done_check_val(kmp_uint64 old_loc) {
973 return byteref(&old_loc, offset) == checker;
974 }
975 bool done_check() { return done_check_val(*get()); }
976 bool notdone_check() {
977 // Calculate flag_switch
978 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
979 flag_switch = true;
980 if (byteref(get(), offset) != 1 && !flag_switch)
981 return true;
982 else if (flag_switch) {
983 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
984 kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
985 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
986 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
987 }
988 return false;
989 }
990 void internal_release() {
991 // Other threads can write their own bytes simultaneously.
992 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
993 byteref(get(), offset) = 1;
994 } else {
995 kmp_uint64 mask = 0;
996 byteref(&mask, offset) = 1;
997 KMP_TEST_THEN_OR64(get(), mask);
998 }
999 }
1000 kmp_uint64 set_sleeping() {
1001 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
1002 }
1003 kmp_uint64 unset_sleeping() {
1004 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
1005 }
1006 bool is_sleeping_val(kmp_uint64 old_loc) {
1007 return old_loc & KMP_BARRIER_SLEEP_STATE;
1008 }
1009 bool is_sleeping() { return is_sleeping_val(*get()); }
1010 bool is_any_sleeping() { return is_sleeping_val(*get()); }
1011 void wait(kmp_info_t *this_thr, int final_spin) {
1012 if (final_spin)
1013 __kmp_wait_template<kmp_flag_oncore, TRUE>(
1014 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1015 else
1016 __kmp_wait_template<kmp_flag_oncore, FALSE>(
1017 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1018 }
1019 void release() { __kmp_release_template(this); }
1020 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1021#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1022 void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1023#endif
1024 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1025 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1026 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1027 kmp_int32 is_constrained) {
1028#if OMPD_SUPPORT
1029 int ret = __kmp_execute_tasks_oncore(
1030 this_thr, gtid, this, final_spin,
1031 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1032 if (ompd_state & OMPD_ENABLE_BP)
1033 ompd_bp_task_end();
1034 return ret;
1035#else
1036 return __kmp_execute_tasks_oncore(
1037 this_thr, gtid, this, final_spin,
1038 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1039#endif
1040 }
1041 kmp_uint8 *get_stolen() { return NULL; }
1042 enum barrier_type get_bt() { return bt; }
1043 flag_type get_ptr_type() { return flag_oncore; }
1044};
1045
1046// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
1047// associated with int gtid.
1048static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
1049 if (!flag)
1050 return;
1051
1052 switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
1053 case flag32:
1054 __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
1055 break;
1056 case flag64:
1057 __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
1058 break;
1059 case flag_oncore:
1060 __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
1061 break;
1062 }
1063}
1064
1069#endif // KMP_WAIT_RELEASE_H
flag_type get_type()
void store(P val)
void set(std::atomic< P > *new_loc)
void * get_void_p()
std::atomic< P > * loc
std::atomic< P > * get()
flag_properties t
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
flag_type
@ flag64
@ flag_oncore
@ flag32