14 #ifndef KMP_DISPATCH_H 15 #define KMP_DISPATCH_H 22 #if defined(_WIN32_WINNT) && defined(_M_IX86) 24 #define _WIN32_WINNT 0x0502 28 #include "kmp_error.h" 31 #include "kmp_stats.h" 33 #if KMP_OS_WINDOWS && KMP_ARCH_X86 38 #include "ompt-internal.h" 39 #include "ompt-specific.h" 44 #if KMP_USE_HIER_SCHED 46 template <
typename T>
struct kmp_hier_t;
47 template <
typename T>
struct kmp_hier_top_unit_t;
48 #endif // KMP_USE_HIER_SCHED 50 template <
typename T>
struct dispatch_shared_info_template;
51 template <
typename T>
struct dispatch_private_info_template;
54 extern void __kmp_dispatch_init_algorithm(
ident_t *loc,
int gtid,
55 dispatch_private_info_template<T> *pr,
57 typename traits_t<T>::signed_t st,
59 kmp_uint64 *cur_chunk,
61 typename traits_t<T>::signed_t chunk,
64 extern int __kmp_dispatch_next_algorithm(
65 int gtid, dispatch_private_info_template<T> *pr,
66 dispatch_shared_info_template<T>
volatile *sh, kmp_int32 *p_last, T *p_lb,
67 T *p_ub,
typename traits_t<T>::signed_t *p_st, T nproc, T unit_id);
69 void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref);
70 void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref);
72 #if KMP_STATIC_STEAL_ENABLED 76 template <
typename T>
struct dispatch_private_infoXX_template {
77 typedef typename traits_t<T>::unsigned_t UT;
78 typedef typename traits_t<T>::signed_t ST;
85 T static_steal_counter;
95 struct KMP_ALIGN(32) {
113 template <
typename T>
struct dispatch_private_infoXX_template {
114 typedef typename traits_t<T>::unsigned_t UT;
115 typedef typename traits_t<T>::signed_t ST;
136 template <
typename T>
struct KMP_ALIGN_CACHE dispatch_private_info_template {
139 union KMP_ALIGN_CACHE private_info_tmpl {
140 dispatch_private_infoXX_template<T> p;
141 dispatch_private_info64_t p64;
144 kmp_sched_flags_t flags;
145 kmp_uint32 ordered_bumped;
147 kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
148 dispatch_private_info *next;
149 kmp_uint32 type_size;
150 #if KMP_USE_HIER_SCHED 152 kmp_hier_top_unit_t<T> *hier_parent;
154 kmp_int32 get_hier_id()
const {
return hier_id; }
155 kmp_hier_top_unit_t<T> *get_parent() {
return hier_parent; }
157 enum cons_type pushed_ws;
162 template <
typename T>
struct dispatch_shared_infoXX_template {
163 typedef typename traits_t<T>::unsigned_t UT;
166 volatile UT iteration;
167 volatile UT num_done;
168 volatile UT ordered_iteration;
170 UT ordered_dummy[KMP_MAX_ORDERED - 3];
174 template <
typename T>
struct dispatch_shared_info_template {
175 typedef typename traits_t<T>::unsigned_t UT;
177 union shared_info_tmpl {
178 dispatch_shared_infoXX_template<UT> s;
179 dispatch_shared_info64_t s64;
181 volatile kmp_uint32 buffer_index;
183 volatile kmp_int32 doacross_buf_idx;
184 kmp_uint32 *doacross_flags;
185 kmp_int32 doacross_num_done;
187 #if KMP_USE_HIER_SCHED 201 #undef USE_TEST_LOCKS 204 template <
typename T>
static __forceinline T test_then_add(
volatile T *p, T d);
207 __forceinline kmp_int32 test_then_add<kmp_int32>(
volatile kmp_int32 *p,
210 r = KMP_TEST_THEN_ADD32(p, d);
215 __forceinline kmp_int64 test_then_add<kmp_int64>(
volatile kmp_int64 *p,
218 r = KMP_TEST_THEN_ADD64(p, d);
223 template <
typename T>
static __forceinline T test_then_inc_acq(
volatile T *p);
226 __forceinline kmp_int32 test_then_inc_acq<kmp_int32>(
volatile kmp_int32 *p) {
228 r = KMP_TEST_THEN_INC_ACQ32(p);
233 __forceinline kmp_int64 test_then_inc_acq<kmp_int64>(
volatile kmp_int64 *p) {
235 r = KMP_TEST_THEN_INC_ACQ64(p);
240 template <
typename T>
static __forceinline T test_then_inc(
volatile T *p);
243 __forceinline kmp_int32 test_then_inc<kmp_int32>(
volatile kmp_int32 *p) {
245 r = KMP_TEST_THEN_INC32(p);
250 __forceinline kmp_int64 test_then_inc<kmp_int64>(
volatile kmp_int64 *p) {
252 r = KMP_TEST_THEN_INC64(p);
257 template <
typename T>
258 static __forceinline kmp_int32 compare_and_swap(
volatile T *p, T c, T s);
261 __forceinline kmp_int32 compare_and_swap<kmp_int32>(
volatile kmp_int32 *p,
262 kmp_int32 c, kmp_int32 s) {
263 return KMP_COMPARE_AND_STORE_REL32(p, c, s);
267 __forceinline kmp_int32 compare_and_swap<kmp_int64>(
volatile kmp_int64 *p,
268 kmp_int64 c, kmp_int64 s) {
269 return KMP_COMPARE_AND_STORE_REL64(p, c, s);
272 template <
typename T> kmp_uint32 __kmp_ge(T value, T checker) {
273 return value >= checker;
275 template <
typename T> kmp_uint32 __kmp_eq(T value, T checker) {
276 return value == checker;
298 template <
typename UT>
299 static UT __kmp_wait_yield(
volatile UT *spinner, UT checker,
300 kmp_uint32 (*pred)(UT, UT)
301 USE_ITT_BUILD_ARG(
void *obj)) {
303 volatile UT *spin = spinner;
306 kmp_uint32 (*f)(UT, UT) = pred;
309 KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
310 KMP_INIT_YIELD(spins);
312 while (!f(r = *spin, check)) {
313 KMP_FSYNC_SPIN_PREPARE(obj);
323 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
324 KMP_YIELD_SPIN(spins);
326 KMP_FSYNC_SPIN_ACQUIRED(obj);
333 template <
typename UT>
334 void __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
335 typedef typename traits_t<UT>::signed_t ST;
336 dispatch_private_info_template<UT> *pr;
338 int gtid = *gtid_ref;
340 kmp_info_t *th = __kmp_threads[gtid];
341 KMP_DEBUG_ASSERT(th->th.th_dispatch);
343 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid));
344 if (__kmp_env_consistency_check) {
345 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
346 th->th.th_dispatch->th_dispatch_pr_current);
347 if (pr->pushed_ws != ct_none) {
348 #if KMP_USE_DYNAMIC_LOCK 349 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0);
351 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL);
356 if (!th->th.th_team->t.t_serialized) {
357 dispatch_shared_info_template<UT> *sh =
358 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
359 th->th.th_dispatch->th_dispatch_sh_current);
362 if (!__kmp_env_consistency_check) {
363 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
364 th->th.th_dispatch->th_dispatch_pr_current);
366 lower = pr->u.p.ordered_lower;
368 #if !defined(KMP_GOMP_COMPAT) 369 if (__kmp_env_consistency_check) {
370 if (pr->ordered_bumped) {
371 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
372 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
373 ct_ordered_in_pdo, loc_ref,
374 &p->stack_data[p->w_top]);
384 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d before wait: " 385 "ordered_iter:%%%s lower:%%%s\n",
386 traits_t<UT>::spec, traits_t<UT>::spec);
387 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
388 __kmp_str_free(&buff);
391 __kmp_wait_yield<UT>(&sh->u.s.ordered_iteration, lower,
392 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
398 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d after wait: " 399 "ordered_iter:%%%s lower:%%%s\n",
400 traits_t<UT>::spec, traits_t<UT>::spec);
401 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
402 __kmp_str_free(&buff);
406 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid));
409 template <
typename UT>
410 void __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
411 typedef typename traits_t<UT>::signed_t ST;
412 dispatch_private_info_template<UT> *pr;
414 int gtid = *gtid_ref;
416 kmp_info_t *th = __kmp_threads[gtid];
417 KMP_DEBUG_ASSERT(th->th.th_dispatch);
419 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid));
420 if (__kmp_env_consistency_check) {
421 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
422 th->th.th_dispatch->th_dispatch_pr_current);
423 if (pr->pushed_ws != ct_none) {
424 __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref);
428 if (!th->th.th_team->t.t_serialized) {
429 dispatch_shared_info_template<UT> *sh =
430 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
431 th->th.th_dispatch->th_dispatch_sh_current);
433 if (!__kmp_env_consistency_check) {
434 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
435 th->th.th_dispatch->th_dispatch_pr_current);
438 KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration));
439 #if !defined(KMP_GOMP_COMPAT) 440 if (__kmp_env_consistency_check) {
441 if (pr->ordered_bumped != 0) {
442 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
444 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
445 ct_ordered_in_pdo, loc_ref,
446 &p->stack_data[p->w_top]);
453 pr->ordered_bumped += 1;
456 (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
457 gtid, pr->ordered_bumped));
462 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
466 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid));
471 template <
typename UT>
472 static __forceinline
long double __kmp_pow(
long double x, UT y) {
473 long double s = 1.0L;
475 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
494 template <
typename T>
495 static __inline
typename traits_t<T>::unsigned_t
496 __kmp_dispatch_guided_remaining(T tc,
typename traits_t<T>::floating_t base,
497 typename traits_t<T>::unsigned_t idx) {
505 typedef typename traits_t<T>::unsigned_t UT;
507 long double x = tc * __kmp_pow<UT>(base, idx);
520 static const int guided_int_param = 2;
521 static const double guided_flt_param = 0.5;
522 #endif // KMP_DISPATCH_H