32 #include "kmp_error.h" 33 #include "kmp_stats.h" 34 #if KMP_OS_WINDOWS && KMP_ARCH_X86 39 #include "ompt-internal.h" 40 #include "ompt-specific.h" 47 template<
typename T >
53 struct i_maxmin< int > {
54 static const int mx = 0x7fffffff;
55 static const int mn = 0x80000000;
58 struct i_maxmin< unsigned int > {
59 static const unsigned int mx = 0xffffffff;
60 static const unsigned int mn = 0x00000000;
63 struct i_maxmin< long long > {
64 static const long long mx = 0x7fffffffffffffffLL;
65 static const long long mn = 0x8000000000000000LL;
68 struct i_maxmin< unsigned long long > {
69 static const unsigned long long mx = 0xffffffffffffffffLL;
70 static const unsigned long long mn = 0x0000000000000000LL;
74 #ifdef KMP_STATIC_STEAL_ENABLED 77 template<
typename T >
78 struct dispatch_private_infoXX_template {
79 typedef typename traits_t< T >::unsigned_t UT;
80 typedef typename traits_t< T >::signed_t ST;
87 T static_steal_counter;
97 struct KMP_ALIGN( 32 ) {
114 template<
typename T >
115 struct dispatch_private_infoXX_template {
116 typedef typename traits_t< T >::unsigned_t UT;
117 typedef typename traits_t< T >::signed_t ST;
140 template<
typename T >
141 struct KMP_ALIGN_CACHE dispatch_private_info_template {
143 union KMP_ALIGN_CACHE private_info_tmpl {
144 dispatch_private_infoXX_template< T > p;
145 dispatch_private_info64_t p64;
149 kmp_uint32 ordered_bumped;
150 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
151 dispatch_private_info * next;
153 kmp_uint32 type_size;
154 enum cons_type pushed_ws;
159 template<
typename UT >
160 struct dispatch_shared_infoXX_template {
163 volatile UT iteration;
164 volatile UT num_done;
165 volatile UT ordered_iteration;
166 UT ordered_dummy[KMP_MAX_ORDERED-1];
170 template<
typename UT >
171 struct dispatch_shared_info_template {
173 union shared_info_tmpl {
174 dispatch_shared_infoXX_template< UT > s;
175 dispatch_shared_info64_t s64;
177 volatile kmp_uint32 buffer_index;
183 #undef USE_TEST_LOCKS 186 template<
typename T >
187 static __forceinline T
188 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
191 __forceinline kmp_int32
192 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
195 r = KMP_TEST_THEN_ADD32( p, d );
200 __forceinline kmp_int64
201 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
204 r = KMP_TEST_THEN_ADD64( p, d );
209 template<
typename T >
210 static __forceinline T
211 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
214 __forceinline kmp_int32
215 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
218 r = KMP_TEST_THEN_INC_ACQ32( p );
223 __forceinline kmp_int64
224 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
227 r = KMP_TEST_THEN_INC_ACQ64( p );
232 template<
typename T >
233 static __forceinline T
234 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
237 __forceinline kmp_int32
238 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
241 r = KMP_TEST_THEN_INC32( p );
246 __forceinline kmp_int64
247 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
250 r = KMP_TEST_THEN_INC64( p );
255 template<
typename T >
256 static __forceinline kmp_int32
257 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
260 __forceinline kmp_int32
261 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
263 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
267 __forceinline kmp_int32
268 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
270 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
286 template<
typename UT >
289 __kmp_wait_yield(
volatile UT * spinner,
291 kmp_uint32 (* pred)( UT, UT )
292 USE_ITT_BUILD_ARG(
void * obj)
296 register volatile UT * spin = spinner;
297 register UT check = checker;
298 register kmp_uint32 spins;
299 register kmp_uint32 (*f) ( UT, UT ) = pred;
302 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
303 KMP_INIT_YIELD( spins );
305 while(!f(r = *spin, check))
307 KMP_FSYNC_SPIN_PREPARE( obj );
316 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
317 KMP_YIELD_SPIN( spins );
319 KMP_FSYNC_SPIN_ACQUIRED( obj );
323 template<
typename UT >
324 static kmp_uint32 __kmp_eq( UT value, UT checker) {
325 return value == checker;
328 template<
typename UT >
329 static kmp_uint32 __kmp_neq( UT value, UT checker) {
330 return value != checker;
333 template<
typename UT >
334 static kmp_uint32 __kmp_lt( UT value, UT checker) {
335 return value < checker;
338 template<
typename UT >
339 static kmp_uint32 __kmp_ge( UT value, UT checker) {
340 return value >= checker;
343 template<
typename UT >
344 static kmp_uint32 __kmp_le( UT value, UT checker) {
345 return value <= checker;
353 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
357 KMP_DEBUG_ASSERT( gtid_ref );
359 if ( __kmp_env_consistency_check ) {
360 th = __kmp_threads[*gtid_ref];
361 if ( th -> th.th_root -> r.r_active
362 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
363 #if KMP_USE_DYNAMIC_LOCK 364 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 );
366 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
372 template<
typename UT >
374 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
376 typedef typename traits_t< UT >::signed_t ST;
377 dispatch_private_info_template< UT > * pr;
379 int gtid = *gtid_ref;
381 kmp_info_t *th = __kmp_threads[ gtid ];
382 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
384 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
385 if ( __kmp_env_consistency_check ) {
386 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 387 ( th -> th.th_dispatch -> th_dispatch_pr_current );
388 if ( pr -> pushed_ws != ct_none ) {
389 #if KMP_USE_DYNAMIC_LOCK 390 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 );
392 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
397 if ( ! th -> th.th_team -> t.t_serialized ) {
398 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
> 399 ( th -> th.th_dispatch -> th_dispatch_sh_current );
402 if ( ! __kmp_env_consistency_check ) {
403 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 404 ( th -> th.th_dispatch -> th_dispatch_pr_current );
406 lower = pr->u.p.ordered_lower;
408 #if ! defined( KMP_GOMP_COMPAT ) 409 if ( __kmp_env_consistency_check ) {
410 if ( pr->ordered_bumped ) {
411 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
412 __kmp_error_construct2(
413 kmp_i18n_msg_CnsMultipleNesting,
414 ct_ordered_in_pdo, loc_ref,
415 & p->stack_data[ p->w_top ]
426 buff = __kmp_str_format(
427 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
428 traits_t< UT >::spec, traits_t< UT >::spec );
429 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
430 __kmp_str_free( &buff );
434 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
435 USE_ITT_BUILD_ARG( NULL )
442 buff = __kmp_str_format(
443 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
444 traits_t< UT >::spec, traits_t< UT >::spec );
445 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
446 __kmp_str_free( &buff );
450 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
454 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
458 if ( __kmp_env_consistency_check ) {
459 th = __kmp_threads[*gtid_ref];
460 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
461 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
466 template<
typename UT >
468 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
470 typedef typename traits_t< UT >::signed_t ST;
471 dispatch_private_info_template< UT > * pr;
473 int gtid = *gtid_ref;
475 kmp_info_t *th = __kmp_threads[ gtid ];
476 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
478 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
479 if ( __kmp_env_consistency_check ) {
480 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 481 ( th -> th.th_dispatch -> th_dispatch_pr_current );
482 if ( pr -> pushed_ws != ct_none ) {
483 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
487 if ( ! th -> th.th_team -> t.t_serialized ) {
488 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
> 489 ( th -> th.th_dispatch -> th_dispatch_sh_current );
491 if ( ! __kmp_env_consistency_check ) {
492 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
> 493 ( th -> th.th_dispatch -> th_dispatch_pr_current );
496 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
497 #if ! defined( KMP_GOMP_COMPAT ) 498 if ( __kmp_env_consistency_check ) {
499 if ( pr->ordered_bumped != 0 ) {
500 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
502 __kmp_error_construct2(
503 kmp_i18n_msg_CnsMultipleNesting,
504 ct_ordered_in_pdo, loc_ref,
505 & p->stack_data[ p->w_top ]
513 pr->ordered_bumped += 1;
515 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
516 gtid, pr->ordered_bumped ) );
521 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
525 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
529 template<
typename UT >
530 static __forceinline
long double 531 __kmp_pow(
long double x, UT y) {
534 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
550 template<
typename T >
551 static __inline
typename traits_t< T >::unsigned_t
552 __kmp_dispatch_guided_remaining(
554 typename traits_t< T >::floating_t base,
555 typename traits_t< T >::unsigned_t idx
564 typedef typename traits_t< T >::unsigned_t UT;
566 long double x = tc * __kmp_pow< UT >(base, idx);
578 static int guided_int_param = 2;
579 static double guided_flt_param = 0.5;
583 template<
typename T >
591 typename traits_t< T >::signed_t st,
592 typename traits_t< T >::signed_t chunk,
595 typedef typename traits_t< T >::unsigned_t UT;
596 typedef typename traits_t< T >::signed_t ST;
597 typedef typename traits_t< T >::floating_t DBL;
598 static const int ___kmp_size_type =
sizeof( UT );
604 kmp_uint32 my_buffer_index;
605 dispatch_private_info_template< T > * pr;
606 dispatch_shared_info_template< UT >
volatile * sh;
608 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
609 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
611 if ( ! TCR_4( __kmp_init_parallel ) )
612 __kmp_parallel_initialize();
614 #if INCLUDE_SSC_MARKS 615 SSC_MARK_DISPATCH_INIT();
621 buff = __kmp_str_format(
622 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
623 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
624 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
625 __kmp_str_free( &buff );
629 th = __kmp_threads[ gtid ];
630 team = th -> th.th_team;
631 active = ! team -> t.t_serialized;
632 th->th.th_ident = loc;
635 kmp_uint64 cur_chunk = chunk;
636 int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
637 KMP_MASTER_GTID(gtid) &&
639 th->th.th_teams_microtask == NULL &&
641 team->t.t_active_level == 1;
644 pr =
reinterpret_cast< dispatch_private_info_template< T >*
> 645 ( th -> th.th_dispatch -> th_disp_buffer );
647 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
648 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
650 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
653 pr =
reinterpret_cast< dispatch_private_info_template< T > *
> 654 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
655 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
> 656 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
666 pr->type_size = ___kmp_size_type;
675 schedule = __kmp_static;
677 if ( schedule == kmp_sch_runtime ) {
679 schedule = team -> t.t_sched.r_sched_type;
682 schedule = __kmp_guided;
684 schedule = __kmp_static;
687 chunk = team -> t.t_sched.chunk;
695 buff = __kmp_str_format(
696 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
697 traits_t< ST >::spec );
698 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
699 __kmp_str_free( &buff );
704 schedule = __kmp_guided;
707 chunk = KMP_DEFAULT_CHUNK;
713 schedule = __kmp_auto;
718 buff = __kmp_str_format(
719 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
720 traits_t< ST >::spec );
721 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
722 __kmp_str_free( &buff );
728 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
729 schedule = kmp_sch_guided_iterative_chunked;
730 KMP_WARNING( DispatchManyThreads );
732 pr->u.p.parm1 = chunk;
735 "unknown scheduling type" );
739 if ( __kmp_env_consistency_check ) {
741 __kmp_error_construct(
742 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
743 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
748 tc = ( ub - lb + st );
763 }
else if ( ub < lb ) {
769 if (schedule == __kmp_static)
786 pr->u.p.last_upper = ub + st;
792 if ( pr->ordered == 0 ) {
793 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
794 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
796 pr->ordered_bumped = 0;
798 pr->u.p.ordered_lower = 1;
799 pr->u.p.ordered_upper = 0;
801 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
802 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
806 if ( __kmp_env_consistency_check ) {
807 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
809 __kmp_push_workshare( gtid, ws, loc );
812 __kmp_check_workshare( gtid, ws, loc );
813 pr->pushed_ws = ct_none;
817 switch ( schedule ) {
818 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 821 T nproc = team->t.t_nproc;
824 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
826 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
827 if ( nproc > 1 && ntc >= nproc ) {
828 T
id = __kmp_tid_from_gtid(gtid);
829 T small_chunk, extras;
831 small_chunk = ntc / nproc;
832 extras = ntc % nproc;
834 init =
id * small_chunk + (
id < extras ?
id : extras );
835 pr->u.p.count = init;
836 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
844 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
846 schedule = kmp_sch_static_balanced;
852 case kmp_sch_static_balanced:
854 T nproc = team->t.t_nproc;
857 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
861 T
id = __kmp_tid_from_gtid(gtid);
867 pr->u.p.parm1 = (
id == tc - 1);
870 pr->u.p.parm1 = FALSE;
874 T small_chunk = tc / nproc;
875 T extras = tc % nproc;
876 init =
id * small_chunk + (
id < extras ?
id : extras);
877 limit = init + small_chunk - (
id < extras ? 0 : 1);
878 pr->u.p.parm1 = (
id == nproc - 1);
884 pr->u.p.parm1 = TRUE;
888 pr->u.p.parm1 = FALSE;
894 if ( itt_need_metadata_reporting )
895 cur_chunk = limit - init + 1;
898 pr->u.p.lb = lb + init;
899 pr->u.p.ub = lb + limit;
901 T ub_tmp = lb + limit * st;
902 pr->u.p.lb = lb + init * st;
905 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
907 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
911 pr->u.p.ordered_lower = init;
912 pr->u.p.ordered_upper = limit;
916 case kmp_sch_guided_iterative_chunked :
918 T nproc = team->t.t_nproc;
919 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
922 if ( (2L * chunk + 1 ) * nproc >= tc ) {
924 schedule = kmp_sch_dynamic_chunked;
927 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
928 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
931 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
932 schedule = kmp_sch_static_greedy;
934 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
939 case kmp_sch_guided_analytical_chunked:
941 T nproc = team->t.t_nproc;
942 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
945 if ( (2L * chunk + 1 ) * nproc >= tc ) {
947 schedule = kmp_sch_dynamic_chunked;
952 #if KMP_OS_WINDOWS && KMP_ARCH_X86 965 unsigned int oldFpcw = _control87(0,0);
966 _control87(_PC_64,_MCW_PC);
969 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
976 x = (
long double)1.0 - (
long double)0.5 / nproc;
987 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
989 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
994 *(DBL*)&pr->u.p.parm3 = x;
1007 p = __kmp_pow< UT >(x,right);
1012 }
while(p>target && right < (1<<27));
1019 while ( left + 1 < right ) {
1020 mid = (left + right) / 2;
1021 if ( __kmp_pow< UT >(x,mid) > target ) {
1030 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1033 pr->u.p.parm2 = cross;
1036 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) ) 1037 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3) 1039 #define GUIDED_ANALYTICAL_WORKAROUND (x) 1042 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1043 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1045 _control87(oldFpcw,_MCW_PC);
1049 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1051 schedule = kmp_sch_static_greedy;
1057 case kmp_sch_static_greedy:
1058 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1059 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1060 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1063 case kmp_sch_static_chunked :
1064 case kmp_sch_dynamic_chunked :
1065 if ( pr->u.p.parm1 <= 0 ) {
1066 pr->u.p.parm1 = KMP_DEFAULT_CHUNK;
1068 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1070 case kmp_sch_trapezoidal :
1074 T parm1, parm2, parm3, parm4;
1075 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1080 parm2 = ( tc / (2 * team->t.t_nproc) );
1091 }
else if ( parm1 > parm2 ) {
1096 parm3 = ( parm2 + parm1 );
1097 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1104 parm4 = ( parm3 - 1 );
1105 parm4 = ( parm2 - parm1 ) / parm4;
1112 pr->u.p.parm1 = parm1;
1113 pr->u.p.parm2 = parm2;
1114 pr->u.p.parm3 = parm3;
1115 pr->u.p.parm4 = parm4;
1123 KMP_MSG( UnknownSchedTypeDetected ),
1124 KMP_HNT( GetNewerLibrary ),
1130 pr->schedule = schedule;
1134 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1135 gtid, my_buffer_index, sh->buffer_index) );
1136 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1137 USE_ITT_BUILD_ARG( NULL )
1142 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1143 gtid, my_buffer_index, sh->buffer_index) );
1145 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1146 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1148 if ( pr->ordered ) {
1149 __kmp_itt_ordered_init( gtid );
1152 if ( itt_need_metadata_reporting ) {
1154 kmp_uint64 schedtype = 0;
1155 switch ( schedule ) {
1156 case kmp_sch_static_chunked:
1157 case kmp_sch_static_balanced:
1159 case kmp_sch_static_greedy:
1160 cur_chunk = pr->u.p.parm1;
1162 case kmp_sch_dynamic_chunked:
1165 case kmp_sch_guided_iterative_chunked:
1166 case kmp_sch_guided_analytical_chunked:
1175 __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk);
1184 buff = __kmp_str_format(
1185 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1186 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1187 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1188 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1189 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1190 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1191 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1192 KD_TRACE(10, ( buff,
1193 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1194 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1195 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1196 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1197 __kmp_str_free( &buff );
1200 #if ( KMP_STATIC_STEAL_ENABLED ) 1201 if ( ___kmp_size_type < 8 ) {
1210 volatile T * p = &pr->u.p.static_steal_counter;
1214 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING ) 1216 #if OMPT_SUPPORT && OMPT_TRACE 1218 ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
1219 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1220 ompt_task_info_t *task_info = __ompt_get_taskinfo(0);
1221 ompt_callbacks.ompt_callback(ompt_event_loop_begin)(
1222 team_info->parallel_id, task_info->task_id, team_info->microtask);
1234 template<
typename UT >
1236 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1238 typedef typename traits_t< UT >::signed_t ST;
1239 kmp_info_t *th = __kmp_threads[ gtid ];
1241 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1242 if ( ! th -> th.th_team -> t.t_serialized ) {
1244 dispatch_private_info_template< UT > * pr =
1245 reinterpret_cast< dispatch_private_info_template< UT >*
> 1246 ( th->th.th_dispatch->th_dispatch_pr_current );
1247 dispatch_shared_info_template< UT >
volatile * sh =
1248 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
> 1249 ( th->th.th_dispatch->th_dispatch_sh_current );
1250 KMP_DEBUG_ASSERT( pr );
1251 KMP_DEBUG_ASSERT( sh );
1252 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1253 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1255 if ( pr->ordered_bumped ) {
1256 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1258 pr->ordered_bumped = 0;
1260 UT lower = pr->u.p.ordered_lower;
1266 buff = __kmp_str_format(
1267 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1268 traits_t< UT >::spec, traits_t< UT >::spec );
1269 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1270 __kmp_str_free( &buff );
1274 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1275 USE_ITT_BUILD_ARG(NULL)
1282 buff = __kmp_str_format(
1283 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1284 traits_t< UT >::spec, traits_t< UT >::spec );
1285 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1286 __kmp_str_free( &buff );
1290 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1293 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1296 #ifdef KMP_GOMP_COMPAT 1298 template<
typename UT >
1300 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1302 typedef typename traits_t< UT >::signed_t ST;
1303 kmp_info_t *th = __kmp_threads[ gtid ];
1305 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1306 if ( ! th -> th.th_team -> t.t_serialized ) {
1308 dispatch_private_info_template< UT > * pr =
1309 reinterpret_cast< dispatch_private_info_template< UT >*
> 1310 ( th->th.th_dispatch->th_dispatch_pr_current );
1311 dispatch_shared_info_template< UT >
volatile * sh =
1312 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
> 1313 ( th->th.th_dispatch->th_dispatch_sh_current );
1314 KMP_DEBUG_ASSERT( pr );
1315 KMP_DEBUG_ASSERT( sh );
1316 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1317 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1320 UT lower = pr->u.p.ordered_lower;
1321 UT upper = pr->u.p.ordered_upper;
1322 UT inc = upper - lower + 1;
1324 if ( pr->ordered_bumped == inc ) {
1325 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1327 pr->ordered_bumped = 0;
1329 inc -= pr->ordered_bumped;
1335 buff = __kmp_str_format(
1336 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1337 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1338 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1339 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1340 __kmp_str_free( &buff );
1344 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1345 USE_ITT_BUILD_ARG(NULL)
1349 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1351 pr->ordered_bumped = 0;
1357 buff = __kmp_str_format(
1358 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1359 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1360 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1361 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1362 __kmp_str_free( &buff );
1366 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1370 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1378 #if OMPT_SUPPORT && OMPT_TRACE 1379 #define OMPT_LOOP_END \ 1380 if (status == 0) { \ 1381 if (ompt_enabled && \ 1382 ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ 1383 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ 1384 ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ 1385 ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ 1386 team_info->parallel_id, task_info->task_id); \ 1390 #define OMPT_LOOP_END // no-op 1393 template<
typename T >
1395 __kmp_dispatch_next(
1396 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1399 typedef typename traits_t< T >::unsigned_t UT;
1400 typedef typename traits_t< T >::signed_t ST;
1401 typedef typename traits_t< T >::floating_t DBL;
1402 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 1403 static const int ___kmp_size_type =
sizeof( UT );
1412 dispatch_private_info_template< T > * pr;
1413 kmp_info_t * th = __kmp_threads[ gtid ];
1414 kmp_team_t * team = th -> th.th_team;
1416 KMP_DEBUG_ASSERT( p_lb && p_ub && p_st );
1421 buff = __kmp_str_format(
1422 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1423 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1424 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1425 __kmp_str_free( &buff );
1429 if ( team -> t.t_serialized ) {
1431 pr =
reinterpret_cast< dispatch_private_info_template< T >*
> 1432 ( th -> th.th_dispatch -> th_disp_buffer );
1433 KMP_DEBUG_ASSERT( pr );
1435 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1442 if ( __kmp_env_consistency_check ) {
1443 if ( pr->pushed_ws != ct_none ) {
1444 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1447 }
else if ( pr->nomerge ) {
1450 UT limit, trip, init;
1452 T chunk = pr->u.p.parm1;
1454 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1456 init = chunk * pr->u.p.count++;
1457 trip = pr->u.p.tc - 1;
1459 if ( (status = (init <= trip)) == 0 ) {
1466 if ( __kmp_env_consistency_check ) {
1467 if ( pr->pushed_ws != ct_none ) {
1468 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1473 limit = chunk + init - 1;
1476 if ( (last = (limit >= trip)) != 0 ) {
1479 pr->u.p.last_upper = pr->u.p.ub;
1482 if ( p_last != NULL )
1487 *p_lb = start + init;
1488 *p_ub = start + limit;
1490 *p_lb = start + init * incr;
1491 *p_ub = start + limit * incr;
1494 if ( pr->ordered ) {
1495 pr->u.p.ordered_lower = init;
1496 pr->u.p.ordered_upper = limit;
1501 buff = __kmp_str_format(
1502 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1503 traits_t< UT >::spec, traits_t< UT >::spec );
1504 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1505 __kmp_str_free( &buff );
1515 pr->u.p.last_upper = *p_ub;
1517 if ( p_last != NULL )
1526 buff = __kmp_str_format(
1527 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1528 "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n",
1529 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1530 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) );
1531 __kmp_str_free( &buff );
1534 #if INCLUDE_SSC_MARKS 1535 SSC_MARK_DISPATCH_NEXT();
1541 dispatch_shared_info_template< UT > *sh;
1544 UT limit, trip, init;
1546 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1547 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1549 pr =
reinterpret_cast< dispatch_private_info_template< T >*
> 1550 ( th->th.th_dispatch->th_dispatch_pr_current );
1551 KMP_DEBUG_ASSERT( pr );
1552 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
> 1553 ( th->th.th_dispatch->th_dispatch_sh_current );
1554 KMP_DEBUG_ASSERT( sh );
1556 if ( pr->u.p.tc == 0 ) {
1560 switch (pr->schedule) {
1561 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 1564 T chunk = pr->u.p.parm1;
1566 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1568 trip = pr->u.p.tc - 1;
1570 if ( ___kmp_size_type > 4 ) {
1573 init = ( pr->u.p.count )++;
1574 status = ( init < (UT)pr->u.p.ub );
1586 union_i4 vold, vnew;
1587 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1590 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1591 (
volatile kmp_int64* )&pr->u.p.count,
1592 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1593 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1595 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1600 init = vnew.p.count;
1601 status = ( init < (UT)vnew.p.ub ) ;
1605 kmp_info_t **other_threads = team->t.t_threads;
1606 int while_limit = 10;
1607 int while_index = 0;
1611 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1612 union_i4 vold, vnew;
1613 kmp_int32 remaining;
1614 T victimIdx = pr->u.p.parm4;
1615 T oldVictimIdx = victimIdx;
1616 dispatch_private_info_template< T > * victim;
1620 victimIdx = team->t.t_nproc - 1;
1624 victim =
reinterpret_cast< dispatch_private_info_template< T >*
> 1625 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1626 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1629 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1630 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1636 if ( oldVictimIdx == victimIdx ) {
1639 pr->u.p.parm4 = victimIdx;
1642 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1645 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip );
1646 if ( vnew.p.count >= (UT)vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1649 vnew.p.ub -= (remaining >> 2);
1650 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip);
1651 #pragma warning( push ) 1653 #pragma warning( disable: 186 ) 1654 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1655 #pragma warning( pop ) 1657 if ( KMP_COMPARE_AND_STORE_ACQ64(
1658 (
volatile kmp_int64 * )&victim->u.p.count,
1659 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1660 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1669 init = vold.p.count;
1671 pr->u.p.count = init + 1;
1672 pr->u.p.ub = vnew.p.count;
1675 vold.p.count = init + 1;
1677 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1678 #endif // KMP_ARCH_X86 1689 if ( p_st != NULL ) *p_st = 0;
1691 start = pr->u.p.parm2;
1693 limit = chunk + init - 1;
1696 KMP_DEBUG_ASSERT(init <= trip);
1697 if ( (last = (limit >= trip)) != 0 )
1699 if ( p_st != NULL ) *p_st = incr;
1702 *p_lb = start + init;
1703 *p_ub = start + limit;
1705 *p_lb = start + init * incr;
1706 *p_ub = start + limit * incr;
1709 if ( pr->ordered ) {
1710 pr->u.p.ordered_lower = init;
1711 pr->u.p.ordered_upper = limit;
1716 buff = __kmp_str_format(
1717 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1718 traits_t< UT >::spec, traits_t< UT >::spec );
1719 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1720 __kmp_str_free( &buff );
1727 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 ) 1728 case kmp_sch_static_balanced:
1730 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1731 if ( (status = !pr->u.p.count) != 0 ) {
1735 last = pr->u.p.parm1;
1739 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1741 if ( pr->ordered ) {
1746 buff = __kmp_str_format(
1747 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1748 traits_t< UT >::spec, traits_t< UT >::spec );
1749 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1750 __kmp_str_free( &buff );
1756 case kmp_sch_static_greedy:
1757 case kmp_sch_static_chunked:
1761 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1763 parm1 = pr->u.p.parm1;
1765 trip = pr->u.p.tc - 1;
1766 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1768 if ( (status = (init <= trip)) != 0 ) {
1771 limit = parm1 + init - 1;
1773 if ( (last = (limit >= trip)) != 0 )
1776 if ( p_st != NULL ) *p_st = incr;
1778 pr->u.p.count += team->t.t_nproc;
1781 *p_lb = start + init;
1782 *p_ub = start + limit;
1785 *p_lb = start + init * incr;
1786 *p_ub = start + limit * incr;
1789 if ( pr->ordered ) {
1790 pr->u.p.ordered_lower = init;
1791 pr->u.p.ordered_upper = limit;
1796 buff = __kmp_str_format(
1797 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1798 traits_t< UT >::spec, traits_t< UT >::spec );
1799 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1800 __kmp_str_free( &buff );
1808 case kmp_sch_dynamic_chunked:
1810 T chunk = pr->u.p.parm1;
1812 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1815 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1816 trip = pr->u.p.tc - 1;
1818 if ( (status = (init <= trip)) == 0 ) {
1821 if ( p_st != NULL ) *p_st = 0;
1824 limit = chunk + init - 1;
1827 if ( (last = (limit >= trip)) != 0 )
1830 if ( p_st != NULL ) *p_st = incr;
1833 *p_lb = start + init;
1834 *p_ub = start + limit;
1836 *p_lb = start + init * incr;
1837 *p_ub = start + limit * incr;
1840 if ( pr->ordered ) {
1841 pr->u.p.ordered_lower = init;
1842 pr->u.p.ordered_upper = limit;
1847 buff = __kmp_str_format(
1848 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1849 traits_t< UT >::spec, traits_t< UT >::spec );
1850 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1851 __kmp_str_free( &buff );
1859 case kmp_sch_guided_iterative_chunked:
1861 T chunkspec = pr->u.p.parm1;
1863 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1868 init = sh->u.s.iteration;
1869 remaining = trip - init;
1870 if ( remaining <= 0 ) {
1875 if ( (T)remaining < pr->u.p.parm2 ) {
1878 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1879 remaining = trip - init;
1880 if (remaining <= 0) {
1885 if ( (T)remaining > chunkspec ) {
1886 limit = init + chunkspec - 1;
1889 limit = init + remaining - 1;
1894 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1895 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1902 if ( status != 0 ) {
1907 *p_lb = start + init * incr;
1908 *p_ub = start + limit * incr;
1909 if ( pr->ordered ) {
1910 pr->u.p.ordered_lower = init;
1911 pr->u.p.ordered_upper = limit;
1916 buff = __kmp_str_format(
1917 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1918 traits_t< UT >::spec, traits_t< UT >::spec );
1919 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1920 __kmp_str_free( &buff );
1933 case kmp_sch_guided_analytical_chunked:
1935 T chunkspec = pr->u.p.parm1;
1937 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1940 unsigned int oldFpcw;
1941 unsigned int fpcwSet = 0;
1943 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1948 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1949 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)team->t.t_nproc < trip);
1952 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1953 if ( chunkIdx >= (UT)pr->u.p.parm2 ) {
1956 init = chunkIdx * chunkspec + pr->u.p.count;
1958 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1959 limit = init + chunkspec -1;
1961 if ( (last = (limit >= trip)) != 0 )
1970 #if KMP_OS_WINDOWS && KMP_ARCH_X86 1975 oldFpcw = _control87(0,0);
1976 _control87(_PC_64,_MCW_PC);
1981 init = __kmp_dispatch_guided_remaining< T >(
1982 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1983 KMP_DEBUG_ASSERT(init);
1987 limit = trip - __kmp_dispatch_guided_remaining< T >(
1988 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1989 KMP_ASSERT(init <= limit);
1990 if ( init < limit ) {
1991 KMP_DEBUG_ASSERT(limit <= trip);
1998 #if KMP_OS_WINDOWS && KMP_ARCH_X86 2002 if ( fpcwSet && ( oldFpcw & fpcwSet ) )
2003 _control87(oldFpcw,_MCW_PC);
2005 if ( status != 0 ) {
2010 *p_lb = start + init * incr;
2011 *p_ub = start + limit * incr;
2012 if ( pr->ordered ) {
2013 pr->u.p.ordered_lower = init;
2014 pr->u.p.ordered_upper = limit;
2019 buff = __kmp_str_format(
2020 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2021 traits_t< UT >::spec, traits_t< UT >::spec );
2022 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2023 __kmp_str_free( &buff );
2036 case kmp_sch_trapezoidal:
2039 T parm2 = pr->u.p.parm2;
2040 T parm3 = pr->u.p.parm3;
2041 T parm4 = pr->u.p.parm4;
2042 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
2045 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
2047 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
2048 trip = pr->u.p.tc - 1;
2050 if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) {
2053 if ( p_st != NULL ) *p_st = 0;
2056 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
2059 if ( (last = (limit >= trip)) != 0 )
2062 if ( p_st != NULL ) *p_st = incr;
2065 *p_lb = start + init;
2066 *p_ub = start + limit;
2068 *p_lb = start + init * incr;
2069 *p_ub = start + limit * incr;
2072 if ( pr->ordered ) {
2073 pr->u.p.ordered_lower = init;
2074 pr->u.p.ordered_upper = limit;
2079 buff = __kmp_str_format(
2080 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2081 traits_t< UT >::spec, traits_t< UT >::spec );
2082 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2083 __kmp_str_free( &buff );
2095 KMP_MSG( UnknownSchedTypeDetected ),
2096 KMP_HNT( GetNewerLibrary ),
2104 if ( status == 0 ) {
2107 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2112 buff = __kmp_str_format(
2113 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2114 traits_t< UT >::spec );
2115 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2116 __kmp_str_free( &buff );
2120 if ( (ST)num_done == team->t.t_nproc-1 ) {
2125 sh->u.s.num_done = 0;
2126 sh->u.s.iteration = 0;
2129 if ( pr->ordered ) {
2130 sh->u.s.ordered_iteration = 0;
2135 sh -> buffer_index += KMP_MAX_DISP_BUF;
2136 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2137 gtid, sh->buffer_index) );
2142 if ( __kmp_env_consistency_check ) {
2143 if ( pr->pushed_ws != ct_none ) {
2144 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2148 th -> th.th_dispatch -> th_deo_fcn = NULL;
2149 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2150 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2151 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2155 pr->u.p.last_upper = pr->u.p.ub;
2158 if ( p_last != NULL && status != 0 )
2166 buff = __kmp_str_format(
2167 "__kmp_dispatch_next: T#%%d normal case: " \
2168 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2169 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2170 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2171 __kmp_str_free( &buff );
2174 #if INCLUDE_SSC_MARKS 2175 SSC_MARK_DISPATCH_NEXT();
2181 template<
typename T >
2183 __kmp_dist_get_bounds(
2186 kmp_int32 *plastiter,
2189 typename traits_t< T >::signed_t incr
2191 typedef typename traits_t< T >::unsigned_t UT;
2192 typedef typename traits_t< T >::signed_t ST;
2193 register kmp_uint32 team_id;
2194 register kmp_uint32 nteams;
2195 register UT trip_count;
2196 register kmp_team_t *team;
2199 KMP_DEBUG_ASSERT( plastiter && plower && pupper );
2200 KE_TRACE( 10, (
"__kmpc_dist_get_bounds called (%d)\n", gtid));
2205 buff = __kmp_str_format(
"__kmpc_dist_get_bounds: T#%%d liter=%%d "\
2206 "iter=(%%%s, %%%s, %%%s) signed?<%s>\n",
2207 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec,
2208 traits_t< T >::spec );
2209 KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) );
2210 __kmp_str_free( &buff );
2214 if( __kmp_env_consistency_check ) {
2216 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc );
2218 if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) {
2228 __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc );
2231 th = __kmp_threads[gtid];
2232 team = th->th.th_team;
2234 KMP_DEBUG_ASSERT(th->th.th_teams_microtask);
2235 nteams = th->th.th_teams_size.nteams;
2237 team_id = team->t.t_master_tid;
2238 KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc);
2242 trip_count = *pupper - *plower + 1;
2243 }
else if(incr == -1) {
2244 trip_count = *plower - *pupper + 1;
2246 trip_count = (ST)(*pupper - *plower) / incr + 1;
2249 if( trip_count <= nteams ) {
2251 __kmp_static == kmp_sch_static_greedy || \
2252 __kmp_static == kmp_sch_static_balanced
2255 if( team_id < trip_count ) {
2256 *pupper = *plower = *plower + team_id * incr;
2258 *plower = *pupper + incr;
2260 if( plastiter != NULL )
2261 *plastiter = ( team_id == trip_count - 1 );
2263 if( __kmp_static == kmp_sch_static_balanced ) {
2264 register UT chunk = trip_count / nteams;
2265 register UT extras = trip_count % nteams;
2266 *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) );
2267 *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr );
2268 if( plastiter != NULL )
2269 *plastiter = ( team_id == nteams - 1 );
2271 register T chunk_inc_count =
2272 ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr;
2273 register T upper = *pupper;
2274 KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy );
2276 *plower += team_id * chunk_inc_count;
2277 *pupper = *plower + chunk_inc_count - incr;
2280 if( *pupper < *plower )
2281 *pupper = i_maxmin< T >::mx;
2282 if( plastiter != NULL )
2283 *plastiter = *plower <= upper && *pupper > upper - incr;
2284 if( *pupper > upper )
2287 if( *pupper > *plower )
2288 *pupper = i_maxmin< T >::mn;
2289 if( plastiter != NULL )
2290 *plastiter = *plower >= upper && *pupper < upper - incr;
2291 if( *pupper < upper )
2322 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2324 KMP_DEBUG_ASSERT( __kmp_init_serial );
2325 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2332 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2334 KMP_DEBUG_ASSERT( __kmp_init_serial );
2335 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2343 kmp_int64 lb, kmp_int64 ub,
2344 kmp_int64 st, kmp_int64 chunk )
2346 KMP_DEBUG_ASSERT( __kmp_init_serial );
2347 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2355 kmp_uint64 lb, kmp_uint64 ub,
2356 kmp_int64 st, kmp_int64 chunk )
2358 KMP_DEBUG_ASSERT( __kmp_init_serial );
2359 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2373 kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2375 KMP_DEBUG_ASSERT( __kmp_init_serial );
2376 __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st );
2377 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2381 __kmpc_dist_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2382 kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2384 KMP_DEBUG_ASSERT( __kmp_init_serial );
2385 __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st );
2386 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2390 __kmpc_dist_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2391 kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk )
2393 KMP_DEBUG_ASSERT( __kmp_init_serial );
2394 __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st );
2395 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2399 __kmpc_dist_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2400 kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk )
2402 KMP_DEBUG_ASSERT( __kmp_init_serial );
2403 __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st );
2404 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2421 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2423 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2431 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2433 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2441 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2443 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2451 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2453 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2465 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2474 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2483 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2492 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2499 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2500 return value == checker;
2503 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2504 return value != checker;
2507 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2508 return value < checker;
2511 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2512 return value >= checker;
2515 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2516 return value <= checker;
2520 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2522 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2527 register volatile kmp_uint32 * spin = spinner;
2528 register kmp_uint32 check = checker;
2529 register kmp_uint32 spins;
2530 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2531 register kmp_uint32 r;
2533 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2534 KMP_INIT_YIELD( spins );
2536 while(!f(r = TCR_4(*spin), check)) {
2537 KMP_FSYNC_SPIN_PREPARE( obj );
2545 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2546 KMP_YIELD_SPIN( spins );
2548 KMP_FSYNC_SPIN_ACQUIRED( obj );
2554 #ifdef KMP_GOMP_COMPAT 2557 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2558 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2559 kmp_int32 chunk,
int push_ws )
2561 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2566 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2567 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2568 kmp_int32 chunk,
int push_ws )
2570 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2575 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2576 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2577 kmp_int64 chunk,
int push_ws )
2579 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2584 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2585 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2586 kmp_int64 chunk,
int push_ws )
2588 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2593 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2595 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2599 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2601 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2605 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2607 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2611 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2613 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
#define KMP_TIME_BLOCK(name)
Uses specified timer (name) to time code block.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)