LLVM OpenMP* Runtime Library
kmp_sched.cpp
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65static inline void check_loc(ident_t *&loc) {
66 if (loc == NULL)
67 loc = &loc_stub; // may need to report location info to ittnotify
68}
69
70template <typename T>
71static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72 kmp_int32 schedtype, kmp_int32 *plastiter,
73 T *plower, T *pupper,
74 typename traits_t<T>::signed_t *pstride,
75 typename traits_t<T>::signed_t incr,
76 typename traits_t<T>::signed_t chunk
77#if OMPT_SUPPORT && OMPT_OPTIONAL
78 ,
79 void *codeptr
80#endif
81) {
82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85
86 // Clear monotonic/nonmonotonic bits (ignore it)
87 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
88
89 typedef typename traits_t<T>::unsigned_t UT;
90 typedef typename traits_t<T>::signed_t ST;
91 /* this all has to be changed back to TID and such.. */
92 kmp_int32 gtid = global_tid;
93 kmp_uint32 tid;
94 kmp_uint32 nth;
95 UT trip_count;
96 kmp_team_t *team;
97 __kmp_assert_valid_gtid(gtid);
98 kmp_info_t *th = __kmp_threads[gtid];
99
100#if OMPT_SUPPORT && OMPT_OPTIONAL
101 ompt_team_info_t *team_info = NULL;
102 ompt_task_info_t *task_info = NULL;
103 ompt_work_t ompt_work_type = ompt_work_loop;
104
105 static kmp_int8 warn = 0;
106
107 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
108 // Only fully initialize variables needed by OMPT if OMPT is enabled.
109 team_info = __ompt_get_teaminfo(0, NULL);
110 task_info = __ompt_get_task_info_object(0);
111 // Determine workshare type
112 if (loc != NULL) {
113 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
114 ompt_work_type = ompt_work_loop;
115 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
116 ompt_work_type = ompt_work_sections;
117 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
118 ompt_work_type = ompt_work_distribute;
119 } else {
120 kmp_int8 bool_res =
121 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
122 if (bool_res)
123 KMP_WARNING(OmptOutdatedWorkshare);
124 }
125 KMP_DEBUG_ASSERT(ompt_work_type);
126 }
127 }
128#endif
129
130 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
131 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
132#ifdef KMP_DEBUG
133 {
134 char *buff;
135 // create format specifiers before the debug output
136 buff = __kmp_str_format(
137 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
138 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
139 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
140 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
141 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
142 *pstride, incr, chunk));
143 __kmp_str_free(&buff);
144 }
145#endif
146
147 if (__kmp_env_consistency_check) {
148 __kmp_push_workshare(global_tid, ct_pdo, loc);
149 if (incr == 0) {
150 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
151 loc);
152 }
153 }
154 /* special handling for zero-trip loops */
155 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
156 if (plastiter != NULL)
157 *plastiter = FALSE;
158 /* leave pupper and plower set to entire iteration space */
159 *pstride = incr; /* value should never be used */
160// *plower = *pupper - incr;
161// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
162// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
163// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
164#ifdef KMP_DEBUG
165 {
166 char *buff;
167 // create format specifiers before the debug output
168 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
169 "lower=%%%s upper=%%%s stride = %%%s "
170 "signed?<%s>, loc = %%s\n",
171 traits_t<T>::spec, traits_t<T>::spec,
172 traits_t<ST>::spec, traits_t<T>::spec);
173 check_loc(loc);
174 KD_TRACE(100,
175 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
176 __kmp_str_free(&buff);
177 }
178#endif
179 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
180
181#if OMPT_SUPPORT && OMPT_OPTIONAL
182 if (ompt_enabled.ompt_callback_work) {
183 ompt_callbacks.ompt_callback(ompt_callback_work)(
184 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
185 &(task_info->task_data), 0, codeptr);
186 }
187#endif
188 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
189 return;
190 }
191
192 // Although there are schedule enumerations above kmp_ord_upper which are not
193 // schedules for "distribute", the only ones which are useful are dynamic, so
194 // cannot be seen here, since this codepath is only executed for static
195 // schedules.
196 if (schedtype > kmp_ord_upper) {
197 // we are in DISTRIBUTE construct
198 schedtype += kmp_sch_static -
199 kmp_distribute_static; // AC: convert to usual schedule type
200 if (th->th.th_team->t.t_serialized > 1) {
201 tid = 0;
202 team = th->th.th_team;
203 } else {
204 tid = th->th.th_team->t.t_master_tid;
205 team = th->th.th_team->t.t_parent;
206 }
207 } else {
208 tid = __kmp_tid_from_gtid(global_tid);
209 team = th->th.th_team;
210 }
211
212 /* determine if "for" loop is an active worksharing construct */
213 if (team->t.t_serialized) {
214 /* serialized parallel, each thread executes whole iteration space */
215 if (plastiter != NULL)
216 *plastiter = TRUE;
217 /* leave pupper and plower set to entire iteration space */
218 *pstride =
219 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
220
221#ifdef KMP_DEBUG
222 {
223 char *buff;
224 // create format specifiers before the debug output
225 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
226 "lower=%%%s upper=%%%s stride = %%%s\n",
227 traits_t<T>::spec, traits_t<T>::spec,
228 traits_t<ST>::spec);
229 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
230 __kmp_str_free(&buff);
231 }
232#endif
233 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
234
235#if OMPT_SUPPORT && OMPT_OPTIONAL
236 if (ompt_enabled.ompt_callback_work) {
237 ompt_callbacks.ompt_callback(ompt_callback_work)(
238 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
239 &(task_info->task_data), *pstride, codeptr);
240 }
241#endif
242 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
243 return;
244 }
245 nth = team->t.t_nproc;
246 if (nth == 1) {
247 if (plastiter != NULL)
248 *plastiter = TRUE;
249 *pstride =
250 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
251#ifdef KMP_DEBUG
252 {
253 char *buff;
254 // create format specifiers before the debug output
255 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
256 "lower=%%%s upper=%%%s stride = %%%s\n",
257 traits_t<T>::spec, traits_t<T>::spec,
258 traits_t<ST>::spec);
259 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
260 __kmp_str_free(&buff);
261 }
262#endif
263 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
264
265#if OMPT_SUPPORT && OMPT_OPTIONAL
266 if (ompt_enabled.ompt_callback_work) {
267 ompt_callbacks.ompt_callback(ompt_callback_work)(
268 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
269 &(task_info->task_data), *pstride, codeptr);
270 }
271#endif
272 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
273 return;
274 }
275
276 /* compute trip count */
277 if (incr == 1) {
278 trip_count = *pupper - *plower + 1;
279 } else if (incr == -1) {
280 trip_count = *plower - *pupper + 1;
281 } else if (incr > 0) {
282 // upper-lower can exceed the limit of signed type
283 trip_count = (UT)(*pupper - *plower) / incr + 1;
284 } else {
285 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
286 }
287
288#if KMP_STATS_ENABLED
289 if (KMP_MASTER_GTID(gtid)) {
290 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
291 }
292#endif
293
294 if (__kmp_env_consistency_check) {
295 /* tripcount overflow? */
296 if (trip_count == 0 && *pupper != *plower) {
297 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
298 loc);
299 }
300 }
301
302 /* compute remaining parameters */
303 switch (schedtype) {
304 case kmp_sch_static: {
305 if (trip_count < nth) {
306 KMP_DEBUG_ASSERT(
307 __kmp_static == kmp_sch_static_greedy ||
308 __kmp_static ==
309 kmp_sch_static_balanced); // Unknown static scheduling type.
310 if (tid < trip_count) {
311 *pupper = *plower = *plower + tid * incr;
312 } else {
313 // set bounds so non-active threads execute no iterations
314 *plower = *pupper + (incr > 0 ? 1 : -1);
315 }
316 if (plastiter != NULL)
317 *plastiter = (tid == trip_count - 1);
318 } else {
319 if (__kmp_static == kmp_sch_static_balanced) {
320 UT small_chunk = trip_count / nth;
321 UT extras = trip_count % nth;
322 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
323 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
324 if (plastiter != NULL)
325 *plastiter = (tid == nth - 1);
326 } else {
327 T big_chunk_inc_count =
328 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
329 T old_upper = *pupper;
330
331 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
332 // Unknown static scheduling type.
333
334 *plower += tid * big_chunk_inc_count;
335 *pupper = *plower + big_chunk_inc_count - incr;
336 if (incr > 0) {
337 if (*pupper < *plower)
338 *pupper = traits_t<T>::max_value;
339 if (plastiter != NULL)
340 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
341 if (*pupper > old_upper)
342 *pupper = old_upper; // tracker C73258
343 } else {
344 if (*pupper > *plower)
345 *pupper = traits_t<T>::min_value;
346 if (plastiter != NULL)
347 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
348 if (*pupper < old_upper)
349 *pupper = old_upper; // tracker C73258
350 }
351 }
352 }
353 *pstride = trip_count;
354 break;
355 }
356 case kmp_sch_static_chunked: {
357 ST span;
358 UT nchunks;
359 if (chunk < 1)
360 chunk = 1;
361 else if ((UT)chunk > trip_count)
362 chunk = trip_count;
363 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
364 span = chunk * incr;
365 if (nchunks < nth) {
366 *pstride = span * nchunks;
367 if (tid < nchunks) {
368 *plower = *plower + (span * tid);
369 *pupper = *plower + span - incr;
370 } else {
371 *plower = *pupper + (incr > 0 ? 1 : -1);
372 }
373 } else {
374 *pstride = span * nth;
375 *plower = *plower + (span * tid);
376 *pupper = *plower + span - incr;
377 }
378 if (plastiter != NULL)
379 *plastiter = (tid == (nchunks - 1) % nth);
380 break;
381 }
382 case kmp_sch_static_balanced_chunked: {
383 T old_upper = *pupper;
384 // round up to make sure the chunk is enough to cover all iterations
385 UT span = (trip_count + nth - 1) / nth;
386
387 // perform chunk adjustment
388 chunk = (span + chunk - 1) & ~(chunk - 1);
389
390 span = chunk * incr;
391 *plower = *plower + (span * tid);
392 *pupper = *plower + span - incr;
393 if (incr > 0) {
394 if (*pupper > old_upper)
395 *pupper = old_upper;
396 } else if (*pupper < old_upper)
397 *pupper = old_upper;
398
399 if (plastiter != NULL)
400 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
401 break;
402 }
403 default:
404 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
405 break;
406 }
407
408#if USE_ITT_BUILD
409 // Report loop metadata
410 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
411 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
412 team->t.t_active_level == 1) {
413 kmp_uint64 cur_chunk = chunk;
414 check_loc(loc);
415 // Calculate chunk in case it was not specified; it is specified for
416 // kmp_sch_static_chunked
417 if (schedtype == kmp_sch_static) {
418 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
419 }
420 // 0 - "static" schedule
421 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
422 }
423#endif
424#ifdef KMP_DEBUG
425 {
426 char *buff;
427 // create format specifiers before the debug output
428 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
429 "upper=%%%s stride = %%%s signed?<%s>\n",
430 traits_t<T>::spec, traits_t<T>::spec,
431 traits_t<ST>::spec, traits_t<T>::spec);
432 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
433 __kmp_str_free(&buff);
434 }
435#endif
436 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
437
438#if OMPT_SUPPORT && OMPT_OPTIONAL
439 if (ompt_enabled.ompt_callback_work) {
440 ompt_callbacks.ompt_callback(ompt_callback_work)(
441 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
442 &(task_info->task_data), trip_count, codeptr);
443 }
444 if (ompt_enabled.ompt_callback_dispatch) {
445 ompt_dispatch_t dispatch_type;
446 ompt_data_t instance = ompt_data_none;
447 ompt_dispatch_chunk_t dispatch_chunk;
448 if (ompt_work_type == ompt_work_sections) {
449 dispatch_type = ompt_dispatch_section;
450 instance.ptr = codeptr;
451 } else {
452 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
453 dispatch_type = (ompt_work_type == ompt_work_distribute)
454 ? ompt_dispatch_distribute_chunk
455 : ompt_dispatch_ws_loop_chunk;
456 instance.ptr = &dispatch_chunk;
457 }
458 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
459 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
460 instance);
461 }
462#endif
463
464 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
465 return;
466}
467
468template <typename T>
469static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
470 kmp_int32 schedule, kmp_int32 *plastiter,
471 T *plower, T *pupper, T *pupperDist,
472 typename traits_t<T>::signed_t *pstride,
473 typename traits_t<T>::signed_t incr,
474 typename traits_t<T>::signed_t chunk
475#if OMPT_SUPPORT && OMPT_OPTIONAL
476 ,
477 void *codeptr
478#endif
479) {
480 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
481 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
482 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
483 typedef typename traits_t<T>::unsigned_t UT;
484 typedef typename traits_t<T>::signed_t ST;
485 kmp_uint32 tid;
486 kmp_uint32 nth;
487 kmp_uint32 team_id;
488 kmp_uint32 nteams;
489 UT trip_count;
490 kmp_team_t *team;
491 kmp_info_t *th;
492
493 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
494 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
495 __kmp_assert_valid_gtid(gtid);
496#ifdef KMP_DEBUG
497 {
498 char *buff;
499 // create format specifiers before the debug output
500 buff = __kmp_str_format(
501 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
502 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
503 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
504 traits_t<ST>::spec, traits_t<T>::spec);
505 KD_TRACE(100,
506 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
507 __kmp_str_free(&buff);
508 }
509#endif
510
511 if (__kmp_env_consistency_check) {
512 __kmp_push_workshare(gtid, ct_pdo, loc);
513 if (incr == 0) {
514 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
515 loc);
516 }
517 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
518 // The loop is illegal.
519 // Some zero-trip loops maintained by compiler, e.g.:
520 // for(i=10;i<0;++i) // lower >= upper - run-time check
521 // for(i=0;i>10;--i) // lower <= upper - run-time check
522 // for(i=0;i>10;++i) // incr > 0 - compile-time check
523 // for(i=10;i<0;--i) // incr < 0 - compile-time check
524 // Compiler does not check the following illegal loops:
525 // for(i=0;i<10;i+=incr) // where incr<0
526 // for(i=10;i>0;i-=incr) // where incr<0
527 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
528 }
529 }
530 tid = __kmp_tid_from_gtid(gtid);
531 th = __kmp_threads[gtid];
532 nth = th->th.th_team_nproc;
533 team = th->th.th_team;
534 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
535 nteams = th->th.th_teams_size.nteams;
536 team_id = team->t.t_master_tid;
537 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
538
539 // compute global trip count
540 if (incr == 1) {
541 trip_count = *pupper - *plower + 1;
542 } else if (incr == -1) {
543 trip_count = *plower - *pupper + 1;
544 } else if (incr > 0) {
545 // upper-lower can exceed the limit of signed type
546 trip_count = (UT)(*pupper - *plower) / incr + 1;
547 } else {
548 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
549 }
550
551 *pstride = *pupper - *plower; // just in case (can be unused)
552 if (trip_count <= nteams) {
553 KMP_DEBUG_ASSERT(
554 __kmp_static == kmp_sch_static_greedy ||
555 __kmp_static ==
556 kmp_sch_static_balanced); // Unknown static scheduling type.
557 // only primary threads of some teams get single iteration, other threads
558 // get nothing
559 if (team_id < trip_count && tid == 0) {
560 *pupper = *pupperDist = *plower = *plower + team_id * incr;
561 } else {
562 *pupperDist = *pupper;
563 *plower = *pupper + incr; // compiler should skip loop body
564 }
565 if (plastiter != NULL)
566 *plastiter = (tid == 0 && team_id == trip_count - 1);
567 } else {
568 // Get the team's chunk first (each team gets at most one chunk)
569 if (__kmp_static == kmp_sch_static_balanced) {
570 UT chunkD = trip_count / nteams;
571 UT extras = trip_count % nteams;
572 *plower +=
573 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
574 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
575 if (plastiter != NULL)
576 *plastiter = (team_id == nteams - 1);
577 } else {
578 T chunk_inc_count =
579 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
580 T upper = *pupper;
581 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
582 // Unknown static scheduling type.
583 *plower += team_id * chunk_inc_count;
584 *pupperDist = *plower + chunk_inc_count - incr;
585 // Check/correct bounds if needed
586 if (incr > 0) {
587 if (*pupperDist < *plower)
588 *pupperDist = traits_t<T>::max_value;
589 if (plastiter != NULL)
590 *plastiter = *plower <= upper && *pupperDist > upper - incr;
591 if (*pupperDist > upper)
592 *pupperDist = upper; // tracker C73258
593 if (*plower > *pupperDist) {
594 *pupper = *pupperDist; // no iterations available for the team
595 goto end;
596 }
597 } else {
598 if (*pupperDist > *plower)
599 *pupperDist = traits_t<T>::min_value;
600 if (plastiter != NULL)
601 *plastiter = *plower >= upper && *pupperDist < upper - incr;
602 if (*pupperDist < upper)
603 *pupperDist = upper; // tracker C73258
604 if (*plower < *pupperDist) {
605 *pupper = *pupperDist; // no iterations available for the team
606 goto end;
607 }
608 }
609 }
610 // Get the parallel loop chunk now (for thread)
611 // compute trip count for team's chunk
612 if (incr == 1) {
613 trip_count = *pupperDist - *plower + 1;
614 } else if (incr == -1) {
615 trip_count = *plower - *pupperDist + 1;
616 } else if (incr > 1) {
617 // upper-lower can exceed the limit of signed type
618 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
619 } else {
620 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
621 }
622 KMP_DEBUG_ASSERT(trip_count);
623 switch (schedule) {
624 case kmp_sch_static: {
625 if (trip_count <= nth) {
626 KMP_DEBUG_ASSERT(
627 __kmp_static == kmp_sch_static_greedy ||
628 __kmp_static ==
629 kmp_sch_static_balanced); // Unknown static scheduling type.
630 if (tid < trip_count)
631 *pupper = *plower = *plower + tid * incr;
632 else
633 *plower = *pupper + incr; // no iterations available
634 if (plastiter != NULL)
635 if (*plastiter != 0 && !(tid == trip_count - 1))
636 *plastiter = 0;
637 } else {
638 if (__kmp_static == kmp_sch_static_balanced) {
639 UT chunkL = trip_count / nth;
640 UT extras = trip_count % nth;
641 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
642 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
643 if (plastiter != NULL)
644 if (*plastiter != 0 && !(tid == nth - 1))
645 *plastiter = 0;
646 } else {
647 T chunk_inc_count =
648 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
649 T upper = *pupperDist;
650 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
651 // Unknown static scheduling type.
652 *plower += tid * chunk_inc_count;
653 *pupper = *plower + chunk_inc_count - incr;
654 if (incr > 0) {
655 if (*pupper < *plower)
656 *pupper = traits_t<T>::max_value;
657 if (plastiter != NULL)
658 if (*plastiter != 0 &&
659 !(*plower <= upper && *pupper > upper - incr))
660 *plastiter = 0;
661 if (*pupper > upper)
662 *pupper = upper; // tracker C73258
663 } else {
664 if (*pupper > *plower)
665 *pupper = traits_t<T>::min_value;
666 if (plastiter != NULL)
667 if (*plastiter != 0 &&
668 !(*plower >= upper && *pupper < upper - incr))
669 *plastiter = 0;
670 if (*pupper < upper)
671 *pupper = upper; // tracker C73258
672 }
673 }
674 }
675 break;
676 }
677 case kmp_sch_static_chunked: {
678 ST span;
679 if (chunk < 1)
680 chunk = 1;
681 span = chunk * incr;
682 *pstride = span * nth;
683 *plower = *plower + (span * tid);
684 *pupper = *plower + span - incr;
685 if (plastiter != NULL)
686 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
687 *plastiter = 0;
688 break;
689 }
690 default:
691 KMP_ASSERT2(0,
692 "__kmpc_dist_for_static_init: unknown loop scheduling type");
693 break;
694 }
695 }
696end:;
697#ifdef KMP_DEBUG
698 {
699 char *buff;
700 // create format specifiers before the debug output
701 buff = __kmp_str_format(
702 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
703 "stride=%%%s signed?<%s>\n",
704 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
705 traits_t<ST>::spec, traits_t<T>::spec);
706 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
707 __kmp_str_free(&buff);
708 }
709#endif
710 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
711#if OMPT_SUPPORT && OMPT_OPTIONAL
712 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
713 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
714 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
715 if (ompt_enabled.ompt_callback_work) {
716 ompt_callbacks.ompt_callback(ompt_callback_work)(
717 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
718 &(task_info->task_data), 0, codeptr);
719 }
720 if (ompt_enabled.ompt_callback_dispatch) {
721 ompt_data_t instance = ompt_data_none;
722 ompt_dispatch_chunk_t dispatch_chunk;
723 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
724 instance.ptr = &dispatch_chunk;
725 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
726 &(team_info->parallel_data), &(task_info->task_data),
727 ompt_dispatch_distribute_chunk, instance);
728 }
729 }
730#endif // OMPT_SUPPORT && OMPT_OPTIONAL
731 KMP_STATS_LOOP_END(OMP_distribute_iterations);
732 return;
733}
734
735template <typename T>
736static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
737 kmp_int32 *p_last, T *p_lb, T *p_ub,
738 typename traits_t<T>::signed_t *p_st,
739 typename traits_t<T>::signed_t incr,
740 typename traits_t<T>::signed_t chunk) {
741 // The routine returns the first chunk distributed to the team and
742 // stride for next chunks calculation.
743 // Last iteration flag set for the team that will execute
744 // the last iteration of the loop.
745 // The routine is called for dist_schedule(static,chunk) only.
746 typedef typename traits_t<T>::unsigned_t UT;
747 typedef typename traits_t<T>::signed_t ST;
748 kmp_uint32 team_id;
749 kmp_uint32 nteams;
750 UT trip_count;
751 T lower;
752 T upper;
753 ST span;
754 kmp_team_t *team;
755 kmp_info_t *th;
756
757 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
758 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
759 __kmp_assert_valid_gtid(gtid);
760#ifdef KMP_DEBUG
761 {
762 char *buff;
763 // create format specifiers before the debug output
764 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
765 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
766 traits_t<T>::spec, traits_t<T>::spec,
767 traits_t<ST>::spec, traits_t<ST>::spec,
768 traits_t<T>::spec);
769 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
770 __kmp_str_free(&buff);
771 }
772#endif
773
774 lower = *p_lb;
775 upper = *p_ub;
776 if (__kmp_env_consistency_check) {
777 if (incr == 0) {
778 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
779 loc);
780 }
781 if (incr > 0 ? (upper < lower) : (lower < upper)) {
782 // The loop is illegal.
783 // Some zero-trip loops maintained by compiler, e.g.:
784 // for(i=10;i<0;++i) // lower >= upper - run-time check
785 // for(i=0;i>10;--i) // lower <= upper - run-time check
786 // for(i=0;i>10;++i) // incr > 0 - compile-time check
787 // for(i=10;i<0;--i) // incr < 0 - compile-time check
788 // Compiler does not check the following illegal loops:
789 // for(i=0;i<10;i+=incr) // where incr<0
790 // for(i=10;i>0;i-=incr) // where incr<0
791 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
792 }
793 }
794 th = __kmp_threads[gtid];
795 team = th->th.th_team;
796 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
797 nteams = th->th.th_teams_size.nteams;
798 team_id = team->t.t_master_tid;
799 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
800
801 // compute trip count
802 if (incr == 1) {
803 trip_count = upper - lower + 1;
804 } else if (incr == -1) {
805 trip_count = lower - upper + 1;
806 } else if (incr > 0) {
807 // upper-lower can exceed the limit of signed type
808 trip_count = (UT)(upper - lower) / incr + 1;
809 } else {
810 trip_count = (UT)(lower - upper) / (-incr) + 1;
811 }
812 if (chunk < 1)
813 chunk = 1;
814 span = chunk * incr;
815 *p_st = span * nteams;
816 *p_lb = lower + (span * team_id);
817 *p_ub = *p_lb + span - incr;
818 if (p_last != NULL)
819 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
820 // Correct upper bound if needed
821 if (incr > 0) {
822 if (*p_ub < *p_lb) // overflow?
823 *p_ub = traits_t<T>::max_value;
824 if (*p_ub > upper)
825 *p_ub = upper; // tracker C73258
826 } else { // incr < 0
827 if (*p_ub > *p_lb)
828 *p_ub = traits_t<T>::min_value;
829 if (*p_ub < upper)
830 *p_ub = upper; // tracker C73258
831 }
832#ifdef KMP_DEBUG
833 {
834 char *buff;
835 // create format specifiers before the debug output
836 buff =
837 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
838 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
839 traits_t<T>::spec, traits_t<T>::spec,
840 traits_t<ST>::spec, traits_t<ST>::spec);
841 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
842 __kmp_str_free(&buff);
843 }
844#endif
845}
846
847//------------------------------------------------------------------------------
848extern "C" {
870void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
871 kmp_int32 *plastiter, kmp_int32 *plower,
872 kmp_int32 *pupper, kmp_int32 *pstride,
873 kmp_int32 incr, kmp_int32 chunk) {
874 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
875 pupper, pstride, incr, chunk
876#if OMPT_SUPPORT && OMPT_OPTIONAL
877 ,
878 OMPT_GET_RETURN_ADDRESS(0)
879#endif
880 );
881}
882
886void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
887 kmp_int32 schedtype, kmp_int32 *plastiter,
888 kmp_uint32 *plower, kmp_uint32 *pupper,
889 kmp_int32 *pstride, kmp_int32 incr,
890 kmp_int32 chunk) {
891 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
892 pupper, pstride, incr, chunk
893#if OMPT_SUPPORT && OMPT_OPTIONAL
894 ,
895 OMPT_GET_RETURN_ADDRESS(0)
896#endif
897 );
898}
899
903void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
904 kmp_int32 *plastiter, kmp_int64 *plower,
905 kmp_int64 *pupper, kmp_int64 *pstride,
906 kmp_int64 incr, kmp_int64 chunk) {
907 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
908 pupper, pstride, incr, chunk
909#if OMPT_SUPPORT && OMPT_OPTIONAL
910 ,
911 OMPT_GET_RETURN_ADDRESS(0)
912#endif
913 );
914}
915
919void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
920 kmp_int32 schedtype, kmp_int32 *plastiter,
921 kmp_uint64 *plower, kmp_uint64 *pupper,
922 kmp_int64 *pstride, kmp_int64 incr,
923 kmp_int64 chunk) {
924 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
925 pupper, pstride, incr, chunk
926#if OMPT_SUPPORT && OMPT_OPTIONAL
927 ,
928 OMPT_GET_RETURN_ADDRESS(0)
929#endif
930 );
931}
936#if OMPT_SUPPORT && OMPT_OPTIONAL
937#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
938#else
939#define OMPT_CODEPTR_ARG
940#endif
941
964void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
965 kmp_int32 schedule, kmp_int32 *plastiter,
966 kmp_int32 *plower, kmp_int32 *pupper,
967 kmp_int32 *pupperD, kmp_int32 *pstride,
968 kmp_int32 incr, kmp_int32 chunk) {
969 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
970 pupper, pupperD, pstride, incr,
971 chunk OMPT_CODEPTR_ARG);
972}
973
977void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
978 kmp_int32 schedule, kmp_int32 *plastiter,
979 kmp_uint32 *plower, kmp_uint32 *pupper,
980 kmp_uint32 *pupperD, kmp_int32 *pstride,
981 kmp_int32 incr, kmp_int32 chunk) {
982 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
983 pupper, pupperD, pstride, incr,
984 chunk OMPT_CODEPTR_ARG);
985}
986
990void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
991 kmp_int32 schedule, kmp_int32 *plastiter,
992 kmp_int64 *plower, kmp_int64 *pupper,
993 kmp_int64 *pupperD, kmp_int64 *pstride,
994 kmp_int64 incr, kmp_int64 chunk) {
995 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
996 pupper, pupperD, pstride, incr,
997 chunk OMPT_CODEPTR_ARG);
998}
999
1003void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1004 kmp_int32 schedule, kmp_int32 *plastiter,
1005 kmp_uint64 *plower, kmp_uint64 *pupper,
1006 kmp_uint64 *pupperD, kmp_int64 *pstride,
1007 kmp_int64 incr, kmp_int64 chunk) {
1008 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1009 pupper, pupperD, pstride, incr,
1010 chunk OMPT_CODEPTR_ARG);
1011}
1016//------------------------------------------------------------------------------
1017// Auxiliary routines for Distribute Parallel Loop construct implementation
1018// Transfer call to template< type T >
1019// __kmp_team_static_init( ident_t *loc, int gtid,
1020// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1021
1042void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1043 kmp_int32 *p_lb, kmp_int32 *p_ub,
1044 kmp_int32 *p_st, kmp_int32 incr,
1045 kmp_int32 chunk) {
1046 KMP_DEBUG_ASSERT(__kmp_init_serial);
1047 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1048 chunk);
1049}
1050
1054void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1055 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1056 kmp_int32 *p_st, kmp_int32 incr,
1057 kmp_int32 chunk) {
1058 KMP_DEBUG_ASSERT(__kmp_init_serial);
1059 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1060 chunk);
1061}
1062
1066void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1067 kmp_int64 *p_lb, kmp_int64 *p_ub,
1068 kmp_int64 *p_st, kmp_int64 incr,
1069 kmp_int64 chunk) {
1070 KMP_DEBUG_ASSERT(__kmp_init_serial);
1071 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1072 chunk);
1073}
1074
1078void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1079 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1080 kmp_int64 *p_st, kmp_int64 incr,
1081 kmp_int64 chunk) {
1082 KMP_DEBUG_ASSERT(__kmp_init_serial);
1083 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1084 chunk);
1085}
1090} // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:898
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:911
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:903
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:990
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:886
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1066
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:977
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1078
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:919
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:1054
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:964
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:870
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:1042
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1003
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_ord_upper
Definition: kmp.h:392
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236