LLVM OpenMP* Runtime Library
kmp.h
1 
2 /*
3  * kmp.h -- KPTS runtime header file.
4  */
5 
6 
7 //===----------------------------------------------------------------------===//
8 //
9 // The LLVM Compiler Infrastructure
10 //
11 // This file is dual licensed under the MIT and the University of Illinois Open
12 // Source Licenses. See LICENSE.txt for details.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 
17 #ifndef KMP_H
18 #define KMP_H
19 
20 #include "kmp_config.h"
21 
22 /* #define BUILD_PARALLEL_ORDERED 1 */
23 
24 /* This fix replaces gettimeofday with clock_gettime for better scalability on
25  the Altix. Requires user code to be linked with -lrt.
26 */
27 //#define FIX_SGI_CLOCK
28 
29 /* Defines for OpenMP 3.0 tasking and auto scheduling */
30 
31 # ifndef KMP_STATIC_STEAL_ENABLED
32 # define KMP_STATIC_STEAL_ENABLED 1
33 # endif
34 
35 #define TASK_CURRENT_NOT_QUEUED 0
36 #define TASK_CURRENT_QUEUED 1
37 
38 #ifdef BUILD_TIED_TASK_STACK
39 #define TASK_STACK_EMPTY 0 // entries when the stack is empty
40 
41 #define TASK_STACK_BLOCK_BITS 5 // Used to define TASK_STACK_SIZE and TASK_STACK_MASK
42 #define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array
43 #define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block
44 #endif // BUILD_TIED_TASK_STACK
45 
46 #define TASK_NOT_PUSHED 1
47 #define TASK_SUCCESSFULLY_PUSHED 0
48 #define TASK_TIED 1
49 #define TASK_UNTIED 0
50 #define TASK_EXPLICIT 1
51 #define TASK_IMPLICIT 0
52 #define TASK_PROXY 1
53 #define TASK_FULL 0
54 
55 #define KMP_CANCEL_THREADS
56 #define KMP_THREAD_ATTR
57 
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <stddef.h>
61 #include <stdarg.h>
62 #include <string.h>
63 #include <signal.h>
64 /* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */
65 /* some macros provided below to replace some of these functions */
66 #ifndef __ABSOFT_WIN
67 #include <sys/types.h>
68 #endif
69 #include <limits.h>
70 #include <time.h>
71 
72 #include <errno.h>
73 
74 #include "kmp_os.h"
75 
76 #include "kmp_safe_c_api.h"
77 
78 #if KMP_STATS_ENABLED
79 class kmp_stats_list;
80 #endif
81 
82 #if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED
83 # include "hwloc.h"
84 #endif
85 
86 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
87 #include <xmmintrin.h>
88 #endif
89 
90 #include "kmp_version.h"
91 #include "kmp_debug.h"
92 #include "kmp_lock.h"
93 #if USE_DEBUGGER
94 #include "kmp_debugger.h"
95 #endif
96 #include "kmp_i18n.h"
97 
98 #define KMP_HANDLE_SIGNALS (KMP_OS_UNIX || KMP_OS_WINDOWS)
99 
100 #include "kmp_wrapper_malloc.h"
101 #if KMP_OS_UNIX
102 # include <unistd.h>
103 # if !defined NSIG && defined _NSIG
104 # define NSIG _NSIG
105 # endif
106 #endif
107 
108 #if KMP_OS_LINUX
109 # pragma weak clock_gettime
110 #endif
111 
112 #if OMPT_SUPPORT
113 #include "ompt-internal.h"
114 #endif
115 
116 /*Select data placement in NUMA memory */
117 #define NO_FIRST_TOUCH 0
118 #define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */
119 
120 /* If not specified on compile command line, assume no first touch */
121 #ifndef BUILD_MEMORY
122 #define BUILD_MEMORY NO_FIRST_TOUCH
123 #endif
124 
125 // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
126 // 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size.
127 #ifndef USE_FAST_MEMORY
128 #define USE_FAST_MEMORY 3
129 #endif
130 
131 #ifndef KMP_NESTED_HOT_TEAMS
132 # define KMP_NESTED_HOT_TEAMS 0
133 # define USE_NESTED_HOT_ARG(x)
134 #else
135 # if KMP_NESTED_HOT_TEAMS
136 # if OMP_40_ENABLED
137 # define USE_NESTED_HOT_ARG(x) ,x
138 # else
139 // Nested hot teams feature depends on omp 4.0, disable it for earlier versions
140 # undef KMP_NESTED_HOT_TEAMS
141 # define KMP_NESTED_HOT_TEAMS 0
142 # define USE_NESTED_HOT_ARG(x)
143 # endif
144 # else
145 # define USE_NESTED_HOT_ARG(x)
146 # endif
147 #endif
148 
149 // Assume using BGET compare_exchange instruction instead of lock by default.
150 #ifndef USE_CMP_XCHG_FOR_BGET
151 #define USE_CMP_XCHG_FOR_BGET 1
152 #endif
153 
154 // Test to see if queuing lock is better than bootstrap lock for bget
155 // #ifndef USE_QUEUING_LOCK_FOR_BGET
156 // #define USE_QUEUING_LOCK_FOR_BGET
157 // #endif
158 
159 #define KMP_NSEC_PER_SEC 1000000000L
160 #define KMP_USEC_PER_SEC 1000000L
161 
167 // FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...)
172 #define KMP_IDENT_IMB 0x01
173 
174 #define KMP_IDENT_KMPC 0x02
175 /* 0x04 is no longer used */
177 #define KMP_IDENT_AUTOPAR 0x08
178 
179 #define KMP_IDENT_ATOMIC_REDUCE 0x10
180 
181 #define KMP_IDENT_BARRIER_EXPL 0x20
182 
183 #define KMP_IDENT_BARRIER_IMPL 0x0040
184 #define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0
185 #define KMP_IDENT_BARRIER_IMPL_FOR 0x0040
186 #define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0
187 
188 #define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140
189 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
190 
194 typedef struct ident {
195  kmp_int32 reserved_1;
196  kmp_int32 flags;
197  kmp_int32 reserved_2;
198 #if USE_ITT_BUILD
199  /* but currently used for storing region-specific ITT */
200  /* contextual information. */
201 #endif /* USE_ITT_BUILD */
202  kmp_int32 reserved_3;
203  char const *psource;
207 } ident_t;
212 // Some forward declarations.
213 
214 typedef union kmp_team kmp_team_t;
215 typedef struct kmp_taskdata kmp_taskdata_t;
216 typedef union kmp_task_team kmp_task_team_t;
217 typedef union kmp_team kmp_team_p;
218 typedef union kmp_info kmp_info_p;
219 typedef union kmp_root kmp_root_p;
220 
221 #ifdef __cplusplus
222 extern "C" {
223 #endif
224 
225 /* ------------------------------------------------------------------------ */
226 /* ------------------------------------------------------------------------ */
227 
228 /* Pack two 32-bit signed integers into a 64-bit signed integer */
229 /* ToDo: Fix word ordering for big-endian machines. */
230 #define KMP_PACK_64(HIGH_32,LOW_32) \
231  ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) )
232 
233 
234 /*
235  * Generic string manipulation macros.
236  * Assume that _x is of type char *
237  */
238 #define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; }
239 #define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; }
240 #define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; }
241 
242 /* ------------------------------------------------------------------------ */
243 /* ------------------------------------------------------------------------ */
244 
245 #define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
246 #define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
247 
248 /* ------------------------------------------------------------------------ */
249 /* ------------------------------------------------------------------------ */
250 
251 
252 /* Enumeration types */
253 
254 enum kmp_state_timer {
255  ts_stop,
256  ts_start,
257  ts_pause,
258 
259  ts_last_state
260 };
261 
262 enum dynamic_mode {
263  dynamic_default,
264 #ifdef USE_LOAD_BALANCE
265  dynamic_load_balance,
266 #endif /* USE_LOAD_BALANCE */
267  dynamic_random,
268  dynamic_thread_limit,
269  dynamic_max
270 };
271 
272 /* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */
273 #ifndef KMP_SCHED_TYPE_DEFINED
274 #define KMP_SCHED_TYPE_DEFINED
275 typedef enum kmp_sched {
276  kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
277  // Note: need to adjust __kmp_sch_map global array in case this enum is changed
278  kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
279  kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
280  kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
281  kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
282  kmp_sched_upper_std = 5, // upper bound for standard schedules
283  kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
284  kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
285 // kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
286  kmp_sched_upper = 102,
287  kmp_sched_default = kmp_sched_static // default scheduling
288 } kmp_sched_t;
289 #endif
290 
297  kmp_sch_static_chunked = 33,
299  kmp_sch_dynamic_chunked = 35,
301  kmp_sch_runtime = 37,
303  kmp_sch_trapezoidal = 39,
304 
305  /* accessible only through KMP_SCHEDULE environment variable */
306  kmp_sch_static_greedy = 40,
307  kmp_sch_static_balanced = 41,
308  /* accessible only through KMP_SCHEDULE environment variable */
309  kmp_sch_guided_iterative_chunked = 42,
310  kmp_sch_guided_analytical_chunked = 43,
311 
314 #if OMP_45_ENABLED
315  kmp_sch_static_balanced_chunked = 45,
316 #endif
317 
318  /* accessible only through KMP_SCHEDULE environment variable */
322  kmp_ord_static_chunked = 65,
324  kmp_ord_dynamic_chunked = 67,
325  kmp_ord_guided_chunked = 68,
326  kmp_ord_runtime = 69,
328  kmp_ord_trapezoidal = 71,
331 #if OMP_40_ENABLED
332  /* Schedules for Distribute construct */
335 #endif
336 
337  /*
338  * For the "nomerge" versions, kmp_dispatch_next*() will always return
339  * a single iteration/chunk, even if the loop is serialized. For the
340  * schedule types listed above, the entire iteration vector is returned
341  * if the loop is serialized. This doesn't work for gcc/gcomp sections.
342  */
343  kmp_nm_lower = 160,
345  kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
347  kmp_nm_dynamic_chunked = 163,
349  kmp_nm_runtime = 165,
350  kmp_nm_auto = 166,
351  kmp_nm_trapezoidal = 167,
352 
353  /* accessible only through KMP_SCHEDULE environment variable */
354  kmp_nm_static_greedy = 168,
355  kmp_nm_static_balanced = 169,
356  /* accessible only through KMP_SCHEDULE environment variable */
357  kmp_nm_guided_iterative_chunked = 170,
358  kmp_nm_guided_analytical_chunked = 171,
359  kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */
360 
361  kmp_nm_ord_static_chunked = 193,
363  kmp_nm_ord_dynamic_chunked = 195,
364  kmp_nm_ord_guided_chunked = 196,
365  kmp_nm_ord_runtime = 197,
367  kmp_nm_ord_trapezoidal = 199,
368  kmp_nm_upper = 200,
370 #if OMP_45_ENABLED
371  /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
372  * Since we need to distinguish the three possible cases (no modifier, monotonic modifier,
373  * nonmonotonic modifier), we need separate bits for each modifier.
374  * The absence of monotonic does not imply nonmonotonic, especially since 4.5 says
375  * that the behaviour of the "no modifier" case is implementation defined in 4.5,
376  * but will become "nonmonotonic" in 5.0.
377  *
378  * Since we're passing a full 32 bit value, we can use a couple of high bits for these
379  * flags; out of paranoia we avoid the sign bit.
380  *
381  * These modifiers can be or-ed into non-static schedules by the compiler to pass
382  * the additional information.
383  * They will be stripped early in the processing in __kmp_dispatch_init when setting up schedules, so
384  * most of the code won't ever see schedules with these bits set.
385  */
386  kmp_sch_modifier_monotonic = (1<<29),
387  kmp_sch_modifier_nonmonotonic = (1<<30),
389 # define SCHEDULE_WITHOUT_MODIFIERS(s) (enum sched_type)((s) & ~ (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
390 # define SCHEDULE_HAS_MONOTONIC(s) (((s) & kmp_sch_modifier_monotonic) != 0)
391 # define SCHEDULE_HAS_NONMONOTONIC(s) (((s) & kmp_sch_modifier_nonmonotonic) != 0)
392 # define SCHEDULE_HAS_NO_MODIFIERS(s) (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
393 #else
394  /* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers can now eliminate tests on compile time
395  * constants and dead code that results from them, so we can leave code guarded by such an if in place.
396  */
397 # define SCHEDULE_WITHOUT_MODIFIERS(s) (s)
398 # define SCHEDULE_HAS_MONOTONIC(s) false
399 # define SCHEDULE_HAS_NONMONOTONIC(s) false
400 # define SCHEDULE_HAS_NO_MODIFIERS(s) true
401 #endif
402 
404 };
405 
406 /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
407 typedef struct kmp_r_sched {
408  enum sched_type r_sched_type;
409  int chunk;
410 } kmp_r_sched_t;
411 
412 extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types
413 
414 enum library_type {
415  library_none,
416  library_serial,
417  library_turnaround,
418  library_throughput
419 };
420 
421 #if KMP_OS_LINUX
422 enum clock_function_type {
423  clock_function_gettimeofday,
424  clock_function_clock_gettime
425 };
426 #endif /* KMP_OS_LINUX */
427 
428 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
429 enum mic_type {
430  non_mic,
431  mic1,
432  mic2,
433  mic3,
434  dummy
435 };
436 #endif
437 
438 /* ------------------------------------------------------------------------ */
439 /* -- fast reduction stuff ------------------------------------------------ */
440 
441 #undef KMP_FAST_REDUCTION_BARRIER
442 #define KMP_FAST_REDUCTION_BARRIER 1
443 
444 #undef KMP_FAST_REDUCTION_CORE_DUO
445 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
446  #define KMP_FAST_REDUCTION_CORE_DUO 1
447 #endif
448 
449 enum _reduction_method {
450  reduction_method_not_defined = 0,
451  critical_reduce_block = ( 1 << 8 ),
452  atomic_reduce_block = ( 2 << 8 ),
453  tree_reduce_block = ( 3 << 8 ),
454  empty_reduce_block = ( 4 << 8 )
455 };
456 
457 // description of the packed_reduction_method variable
458 // the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte:
459 // 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier
460 // 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction;
461 // reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty,
462 // so no need to execute a shift instruction while packing/unpacking
463 
464 #if KMP_FAST_REDUCTION_BARRIER
465  #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
466  ( ( reduction_method ) | ( barrier_type ) )
467 
468  #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
469  ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) )
470 
471  #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
472  ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) )
473 #else
474  #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
475  ( reduction_method )
476 
477  #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
478  ( packed_reduction_method )
479 
480  #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
481  ( bs_plain_barrier )
482 #endif
483 
484 #define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \
485  ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) )
486 
487 #if KMP_FAST_REDUCTION_BARRIER
488  #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
489  ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) )
490 
491  #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
492  ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) )
493 #endif
494 
495 typedef int PACKED_REDUCTION_METHOD_T;
496 
497 /* -- end of fast reduction stuff ----------------------------------------- */
498 
499 /* ------------------------------------------------------------------------ */
500 /* ------------------------------------------------------------------------ */
501 
502 #if KMP_OS_WINDOWS
503 # define USE_CBLKDATA
504 # pragma warning( push )
505 # pragma warning( disable: 271 310 )
506 # include <windows.h>
507 # pragma warning( pop )
508 #endif
509 
510 #if KMP_OS_UNIX
511 # include <pthread.h>
512 # include <dlfcn.h>
513 #endif
514 
515 /* ------------------------------------------------------------------------ */
516 /* ------------------------------------------------------------------------ */
517 
518 /*
519  * Only Linux* OS and Windows* OS support thread affinity.
520  */
521 #if KMP_AFFINITY_SUPPORTED
522 
523 # if KMP_GROUP_AFFINITY
524 // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
525 # if _MSC_VER < 1600
526 typedef struct GROUP_AFFINITY {
527  KAFFINITY Mask;
528  WORD Group;
529  WORD Reserved[3];
530 } GROUP_AFFINITY;
531 # endif /* _MSC_VER < 1600 */
532 extern int __kmp_num_proc_groups;
533 typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
534 extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
535 
536 typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
537 extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
538 
539 typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
540 extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
541 
542 typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
543 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
544 # endif /* KMP_GROUP_AFFINITY */
545 
546 extern size_t __kmp_affin_mask_size;
547 # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
548 # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
549 # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
550 # if !KMP_USE_HWLOC
551 # define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)
552 # define KMP_CPU_SET_ITERATE(i,mask) \
553  for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)
554 # endif
555 
556 #if KMP_USE_HWLOC
557 
558 extern hwloc_topology_t __kmp_hwloc_topology;
559 extern int __kmp_hwloc_error;
560 typedef hwloc_cpuset_t kmp_affin_mask_t;
561 # define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
562 # define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
563 # define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
564 # define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask)
565 # define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
566 # define KMP_CPU_AND(dest, src) hwloc_bitmap_and((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
567 # define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
568  { \
569  unsigned i; \
570  for(i=0;i<(unsigned)max_bit_number+1;i++) { \
571  if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
572  hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
573  } else { \
574  hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
575  } \
576  } \
577  hwloc_bitmap_and((hwloc_cpuset_t)mask, (hwloc_cpuset_t)mask, \
578  (hwloc_cpuset_t)__kmp_affin_fullMask); \
579  } \
580 
581 # define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
582 # define KMP_CPU_SET_ITERATE(i,mask) \
583  for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))
584 
585 # define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
586 # define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
587 # define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
588 # define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
589 # define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
590 # define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
591 
592 //
593 // The following macro should be used to index an array of masks.
594 // The array should be declared as "kmp_affinity_t *" and allocated with
595 // size "__kmp_affinity_mask_size * len". The macro takes care of the fact
596 // that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
597 // on Linux* OS, sizeof(kmp_affin_t) is 1.
598 //
599 # define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
600 # define KMP_CPU_ALLOC_ARRAY(arr, n) { \
601  arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
602  unsigned i; \
603  for(i=0;i<(unsigned)n;i++) { \
604  arr[i] = hwloc_bitmap_alloc(); \
605  } \
606  }
607 # define KMP_CPU_FREE_ARRAY(arr, n) { \
608  unsigned i; \
609  for(i=0;i<(unsigned)n;i++) { \
610  hwloc_bitmap_free(arr[i]); \
611  } \
612  __kmp_free(arr); \
613  }
614 # define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \
615  arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
616  unsigned i; \
617  for(i=0;i<(unsigned)n;i++) { \
618  arr[i] = hwloc_bitmap_alloc(); \
619  } \
620  }
621 # define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
622  unsigned i; \
623  for(i=0;i<(unsigned)n;i++) { \
624  hwloc_bitmap_free(arr[i]); \
625  } \
626  KMP_INTERNAL_FREE(arr); \
627  }
628 
629 #else /* KMP_USE_HWLOC */
630 # if KMP_OS_LINUX
631 //
632 // On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
633 // (in bytes). It should be allocated on a word boundary.
634 //
635 // WARNING!!! We have made the base type of the affinity mask unsigned char,
636 // in order to eliminate a lot of checks that the true system mask size is
637 // really a multiple of 4 bytes (on Linux* OS).
638 //
639 // THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!!
640 //
641 
642 typedef unsigned char kmp_affin_mask_t;
643 
644 # define _KMP_CPU_SET(i,mask) (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
645 # define KMP_CPU_SET(i,mask) _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
646 # define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT))))
647 # define KMP_CPU_ISSET(i,mask) _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
648 # define _KMP_CPU_CLR(i,mask) (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
649 # define KMP_CPU_CLR(i,mask) _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
650 
651 # define KMP_CPU_ZERO(mask) \
652  { \
653  size_t __i; \
654  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
655  ((kmp_affin_mask_t *)(mask))[__i] = 0; \
656  } \
657  }
658 
659 # define KMP_CPU_COPY(dest, src) \
660  { \
661  size_t __i; \
662  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
663  ((kmp_affin_mask_t *)(dest))[__i] \
664  = ((kmp_affin_mask_t *)(src))[__i]; \
665  } \
666  }
667 
668 # define KMP_CPU_AND(dest, src) \
669  { \
670  size_t __i; \
671  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
672  ((kmp_affin_mask_t *)(dest))[__i] \
673  &= ((kmp_affin_mask_t *)(src))[__i]; \
674  } \
675  }
676 
677 # define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
678  { \
679  size_t __i; \
680  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
681  ((kmp_affin_mask_t *)(mask))[__i] \
682  = ~((kmp_affin_mask_t *)(mask))[__i]; \
683  } \
684  KMP_CPU_AND(mask, __kmp_affin_fullMask); \
685  }
686 
687 # define KMP_CPU_UNION(dest, src) \
688  { \
689  size_t __i; \
690  for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
691  ((kmp_affin_mask_t *)(dest))[__i] \
692  |= ((kmp_affin_mask_t *)(src))[__i]; \
693  } \
694  }
695 
696 # endif /* KMP_OS_LINUX */
697 
698 # if KMP_OS_WINDOWS
699 //
700 // On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on
701 // Intel(R) 64 it is 8 bytes times the number of processor groups.
702 //
703 
704 # if KMP_GROUP_AFFINITY
705 typedef DWORD_PTR kmp_affin_mask_t;
706 
707 # define _KMP_CPU_SET(i,mask) \
708  (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \
709  (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
710 
711 # define KMP_CPU_SET(i,mask) \
712  _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
713 
714 # define _KMP_CPU_ISSET(i,mask) \
715  (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] & \
716  (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))))
717 
718 # define KMP_CPU_ISSET(i,mask) \
719  _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
720 
721 # define _KMP_CPU_CLR(i,mask) \
722  (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &= \
723  ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
724 
725 # define KMP_CPU_CLR(i,mask) \
726  _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
727 
728 # define KMP_CPU_ZERO(mask) \
729  { \
730  int __i; \
731  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
732  ((kmp_affin_mask_t *)(mask))[__i] = 0; \
733  } \
734  }
735 
736 # define KMP_CPU_COPY(dest, src) \
737  { \
738  int __i; \
739  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
740  ((kmp_affin_mask_t *)(dest))[__i] \
741  = ((kmp_affin_mask_t *)(src))[__i]; \
742  } \
743  }
744 
745 # define KMP_CPU_AND(dest, src) \
746  { \
747  int __i; \
748  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
749  ((kmp_affin_mask_t *)(dest))[__i] \
750  &= ((kmp_affin_mask_t *)(src))[__i]; \
751  } \
752  }
753 
754 # define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
755  { \
756  int __i; \
757  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
758  ((kmp_affin_mask_t *)(mask))[__i] \
759  = ~((kmp_affin_mask_t *)(mask))[__i]; \
760  } \
761  KMP_CPU_AND(mask, __kmp_affin_fullMask); \
762  }
763 
764 # define KMP_CPU_UNION(dest, src) \
765  { \
766  int __i; \
767  for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
768  ((kmp_affin_mask_t *)(dest))[__i] \
769  |= ((kmp_affin_mask_t *)(src))[__i]; \
770  } \
771  }
772 
773 
774 # else /* KMP_GROUP_AFFINITY */
775 
776 typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
777 
778 # define KMP_CPU_SET(i,mask) (*(mask) |= (((kmp_affin_mask_t)1) << (i)))
779 # define KMP_CPU_ISSET(i,mask) (!!(*(mask) & (((kmp_affin_mask_t)1) << (i))))
780 # define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
781 # define KMP_CPU_ZERO(mask) (*(mask) = 0)
782 # define KMP_CPU_COPY(dest, src) (*(dest) = *(src))
783 # define KMP_CPU_AND(dest, src) (*(dest) &= *(src))
784 # define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)); KMP_CPU_AND(mask, __kmp_affin_fullMask)
785 # define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
786 
787 # endif /* KMP_GROUP_AFFINITY */
788 
789 # endif /* KMP_OS_WINDOWS */
790 
791 //
792 // __kmp_allocate() will return memory allocated on a 4-bytes boundary.
793 // after zeroing it - it takes care of those assumptions stated above.
794 //
795 # define KMP_CPU_ALLOC(ptr) \
796  (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
797 # define KMP_CPU_FREE(ptr) __kmp_free(ptr)
798 # define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
799 # define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
800 # define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
801 # define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr)
802 
803 //
804 // The following macro should be used to index an array of masks.
805 // The array should be declared as "kmp_affinity_t *" and allocated with
806 // size "__kmp_affinity_mask_size * len". The macro takes care of the fact
807 // that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
808 // on Linux* OS, sizeof(kmp_affin_t) is 1.
809 //
810 # define KMP_CPU_INDEX(array,i) \
811  ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
812 # define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
813 # define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
814 # define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
815 # define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);
816 
817 #endif /* KMP_USE_HWLOC */
818 
819 // prototype after typedef of kmp_affin_mask_t
820 #if KMP_GROUP_AFFINITY
821 extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
822 #endif
823 
824 //
825 // Declare local char buffers with this size for printing debug and info
826 // messages, using __kmp_affinity_print_mask().
827 //
828 #define KMP_AFFIN_MASK_PRINT_LEN 1024
829 
830 enum affinity_type {
831  affinity_none = 0,
832  affinity_physical,
833  affinity_logical,
834  affinity_compact,
835  affinity_scatter,
836  affinity_explicit,
837  affinity_balanced,
838  affinity_disabled, // not used outsize the env var parser
839  affinity_default
840 };
841 
842 enum affinity_gran {
843  affinity_gran_fine = 0,
844  affinity_gran_thread,
845  affinity_gran_core,
846  affinity_gran_package,
847  affinity_gran_node,
848 #if KMP_GROUP_AFFINITY
849  //
850  // The "group" granularity isn't necesssarily coarser than all of the
851  // other levels, but we put it last in the enum.
852  //
853  affinity_gran_group,
854 #endif /* KMP_GROUP_AFFINITY */
855  affinity_gran_default
856 };
857 
858 enum affinity_top_method {
859  affinity_top_method_all = 0, // try all (supported) methods, in order
860 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
861  affinity_top_method_apicid,
862  affinity_top_method_x2apicid,
863 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
864  affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
865 #if KMP_GROUP_AFFINITY
866  affinity_top_method_group,
867 #endif /* KMP_GROUP_AFFINITY */
868  affinity_top_method_flat,
869 #if KMP_USE_HWLOC
870  affinity_top_method_hwloc,
871 #endif
872  affinity_top_method_default
873 };
874 
875 #define affinity_respect_mask_default (-1)
876 
877 extern enum affinity_type __kmp_affinity_type; /* Affinity type */
878 extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
879 extern int __kmp_affinity_gran_levels; /* corresponding int value */
880 extern int __kmp_affinity_dups; /* Affinity duplicate masks */
881 extern enum affinity_top_method __kmp_affinity_top_method;
882 extern int __kmp_affinity_compact; /* Affinity 'compact' value */
883 extern int __kmp_affinity_offset; /* Affinity offset value */
884 extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
885 extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
886 extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */
887 extern char * __kmp_affinity_proclist; /* proc ID list */
888 extern kmp_affin_mask_t *__kmp_affinity_masks;
889 extern unsigned __kmp_affinity_num_masks;
890 extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error);
891 extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error);
892 extern void __kmp_affinity_bind_thread(int which);
893 
894 extern kmp_affin_mask_t *__kmp_affin_fullMask;
895 extern char const * __kmp_cpuinfo_file;
896 
897 #endif /* KMP_AFFINITY_SUPPORTED */
898 
899 #if OMP_40_ENABLED
900 
901 //
902 // This needs to be kept in sync with the values in omp.h !!!
903 //
904 typedef enum kmp_proc_bind_t {
905  proc_bind_false = 0,
906  proc_bind_true,
907  proc_bind_master,
908  proc_bind_close,
909  proc_bind_spread,
910  proc_bind_intel, // use KMP_AFFINITY interface
911  proc_bind_default
912 } kmp_proc_bind_t;
913 
914 typedef struct kmp_nested_proc_bind_t {
915  kmp_proc_bind_t *bind_types;
916  int size;
917  int used;
918 } kmp_nested_proc_bind_t;
919 
920 extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
921 
922 #endif /* OMP_40_ENABLED */
923 
924 # if KMP_AFFINITY_SUPPORTED
925 # define KMP_PLACE_ALL (-1)
926 # define KMP_PLACE_UNDEFINED (-2)
927 # endif /* KMP_AFFINITY_SUPPORTED */
928 
929 extern int __kmp_affinity_num_places;
930 
931 
932 #if OMP_40_ENABLED
933 typedef enum kmp_cancel_kind_t {
934  cancel_noreq = 0,
935  cancel_parallel = 1,
936  cancel_loop = 2,
937  cancel_sections = 3,
938  cancel_taskgroup = 4
939 } kmp_cancel_kind_t;
940 #endif // OMP_40_ENABLED
941 
942 extern int __kmp_place_num_sockets;
943 extern int __kmp_place_socket_offset;
944 extern int __kmp_place_num_cores;
945 extern int __kmp_place_core_offset;
946 extern int __kmp_place_num_threads_per_core;
947 
948 /* ------------------------------------------------------------------------ */
949 /* ------------------------------------------------------------------------ */
950 
951 #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
952 
953 //
954 // We need to avoid using -1 as a GTID as +1 is added to the gtid
955 // when storing it in a lock, and the value 0 is reserved.
956 //
957 #define KMP_GTID_DNE (-2) /* Does not exist */
958 #define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
959 #define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
960 #define KMP_GTID_UNKNOWN (-5) /* Is not known */
961 #define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
962 
963 #define __kmp_get_gtid() __kmp_get_global_thread_id()
964 #define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
965 
966 #define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
967  __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
968 
969 #define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
970 #define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \
971  team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid )
972 
973 #define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team )
974 #define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
975  __kmp_threads[ (gtid) ]-> th.th_team )
976 
977 #define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] )
978 #define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) )
979 
980  // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works
981  // with registered and not-yet-registered threads.
982 #define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \
983  (thr)->th.th_info.ds.ds_gtid )
984 
985 // AT: Which way is correct?
986 // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
987 // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
988 #define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc )
989 
990 
991 /* ------------------------------------------------------------------------ */
992 /* ------------------------------------------------------------------------ */
993 
994 #define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1)))
995 
996 #define KMP_MIN_NTH 1
997 
998 #ifndef KMP_MAX_NTH
999 # if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
1000 # define KMP_MAX_NTH PTHREAD_THREADS_MAX
1001 # else
1002 # define KMP_MAX_NTH INT_MAX
1003 # endif
1004 #endif /* KMP_MAX_NTH */
1005 
1006 #ifdef PTHREAD_STACK_MIN
1007 # define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
1008 #else
1009 # define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
1010 #endif
1011 
1012 #define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
1013 
1014 #if KMP_ARCH_X86
1015 # define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
1016 #elif KMP_ARCH_X86_64
1017 # define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
1018 # define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
1019 #else
1020 # define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
1021 #endif
1022 
1023 #define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
1024 
1025 #define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024))
1026 #define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024))
1027 #define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
1028 
1029 #define KMP_MIN_STKOFFSET (0)
1030 #define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
1031 #if KMP_OS_DARWIN
1032 # define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
1033 #else
1034 # define KMP_DEFAULT_STKOFFSET CACHE_LINE
1035 #endif
1036 
1037 #define KMP_MIN_STKPADDING (0)
1038 #define KMP_MAX_STKPADDING (2 * 1024 * 1024)
1039 
1040 #define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */
1041 #define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */
1042 #define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */
1043 #define KMP_MIN_BLOCKTIME (0)
1044 #define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */
1045 #define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */
1046 /* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */
1047 /* Only allow increasing number of wakeups */
1048 #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1049  ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \
1050  ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \
1051  ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \
1052  (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) )
1053 
1054 /* Calculate number of intervals for a specific block time based on monitor_wakeups */
1055 #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
1056  ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \
1057  (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) )
1058 
1059 #define KMP_MIN_STATSCOLS 40
1060 #define KMP_MAX_STATSCOLS 4096
1061 #define KMP_DEFAULT_STATSCOLS 80
1062 
1063 #define KMP_MIN_INTERVAL 0
1064 #define KMP_MAX_INTERVAL (INT_MAX-1)
1065 #define KMP_DEFAULT_INTERVAL 0
1066 
1067 #define KMP_MIN_CHUNK 1
1068 #define KMP_MAX_CHUNK (INT_MAX-1)
1069 #define KMP_DEFAULT_CHUNK 1
1070 
1071 #define KMP_MIN_INIT_WAIT 1
1072 #define KMP_MAX_INIT_WAIT (INT_MAX/2)
1073 #define KMP_DEFAULT_INIT_WAIT 2048U
1074 
1075 #define KMP_MIN_NEXT_WAIT 1
1076 #define KMP_MAX_NEXT_WAIT (INT_MAX/2)
1077 #define KMP_DEFAULT_NEXT_WAIT 1024U
1078 
1079 #define KMP_DFLT_DISP_NUM_BUFF 7
1080 #define KMP_MAX_ORDERED 8
1081 
1082 #define KMP_MAX_FIELDS 32
1083 
1084 #define KMP_MAX_BRANCH_BITS 31
1085 
1086 #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
1087 
1088 #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
1089 
1090 /* Minimum number of threads before switch to TLS gtid (experimentally determined) */
1091 /* josh TODO: what about OS X* tuning? */
1092 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1093 # define KMP_TLS_GTID_MIN 5
1094 #else
1095 # define KMP_TLS_GTID_MIN INT_MAX
1096 #endif
1097 
1098 #define KMP_MASTER_TID(tid) ( (tid) == 0 )
1099 #define KMP_WORKER_TID(tid) ( (tid) != 0 )
1100 
1101 #define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 )
1102 #define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 )
1103 #define KMP_UBER_GTID(gtid) \
1104  ( \
1105  KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \
1106  KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \
1107  (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \
1108  (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\
1109  )
1110 #define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 )
1111 
1112 #ifndef TRUE
1113 #define FALSE 0
1114 #define TRUE (! FALSE)
1115 #endif
1116 
1117 /* NOTE: all of the following constants must be even */
1118 
1119 #if KMP_OS_WINDOWS
1120 # define KMP_INIT_WAIT 64U /* initial number of spin-tests */
1121 # define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
1122 #elif KMP_OS_CNK
1123 # define KMP_INIT_WAIT 16U /* initial number of spin-tests */
1124 # define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
1125 #elif KMP_OS_LINUX
1126 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1127 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1128 #elif KMP_OS_DARWIN
1129 /* TODO: tune for KMP_OS_DARWIN */
1130 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1131 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1132 #elif KMP_OS_FREEBSD
1133 /* TODO: tune for KMP_OS_FREEBSD */
1134 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1135 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1136 #elif KMP_OS_NETBSD
1137 /* TODO: tune for KMP_OS_NETBSD */
1138 # define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
1139 # define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
1140 #endif
1141 
1142 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1143 typedef struct kmp_cpuid {
1144  kmp_uint32 eax;
1145  kmp_uint32 ebx;
1146  kmp_uint32 ecx;
1147  kmp_uint32 edx;
1148 } kmp_cpuid_t;
1149 extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
1150 # if KMP_ARCH_X86
1151  extern void __kmp_x86_pause( void );
1152 # elif KMP_MIC
1153  static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); }
1154 # else
1155  static void __kmp_x86_pause( void ) { _mm_pause(); }
1156 # endif
1157 # define KMP_CPU_PAUSE() __kmp_x86_pause()
1158 #elif KMP_ARCH_PPC64
1159 # define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
1160 # define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
1161 # define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
1162 # define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0)
1163 #else
1164 # define KMP_CPU_PAUSE() /* nothing to do */
1165 #endif
1166 
1167 #define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; }
1168 
1169 #define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); }
1170 
1171 // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
1172 // there should be no yielding since the starting value from KMP_INIT_YIELD() is odd.
1173 
1174 #define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \
1175  if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } }
1176 #define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \
1177  if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } }
1178 
1179 /* ------------------------------------------------------------------------ */
1180 /* Support datatypes for the orphaned construct nesting checks. */
1181 /* ------------------------------------------------------------------------ */
1182 
1183 enum cons_type {
1184  ct_none,
1185  ct_parallel,
1186  ct_pdo,
1187  ct_pdo_ordered,
1188  ct_psections,
1189  ct_psingle,
1190 
1191  /* the following must be left in order and not split up */
1192  ct_taskq,
1193  ct_task, /* really task inside non-ordered taskq, considered a worksharing type */
1194  ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */
1195  /* the preceding must be left in order and not split up */
1196 
1197  ct_critical,
1198  ct_ordered_in_parallel,
1199  ct_ordered_in_pdo,
1200  ct_ordered_in_taskq,
1201  ct_master,
1202  ct_reduce,
1203  ct_barrier
1204 };
1205 
1206 /* test to see if we are in a taskq construct */
1207 # define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) )
1208 # define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered)
1209 
1210 struct cons_data {
1211  ident_t const *ident;
1212  enum cons_type type;
1213  int prev;
1214  kmp_user_lock_p name; /* address exclusively for critical section name comparison */
1215 };
1216 
1217 struct cons_header {
1218  int p_top, w_top, s_top;
1219  int stack_size, stack_top;
1220  struct cons_data *stack_data;
1221 };
1222 
1223 struct kmp_region_info {
1224  char *text;
1225  int offset[KMP_MAX_FIELDS];
1226  int length[KMP_MAX_FIELDS];
1227 };
1228 
1229 
1230 /* ---------------------------------------------------------------------- */
1231 /* ---------------------------------------------------------------------- */
1232 
1233 #if KMP_OS_WINDOWS
1234  typedef HANDLE kmp_thread_t;
1235  typedef DWORD kmp_key_t;
1236 #endif /* KMP_OS_WINDOWS */
1237 
1238 #if KMP_OS_UNIX
1239  typedef pthread_t kmp_thread_t;
1240  typedef pthread_key_t kmp_key_t;
1241 #endif
1242 
1243 extern kmp_key_t __kmp_gtid_threadprivate_key;
1244 
1245 typedef struct kmp_sys_info {
1246  long maxrss; /* the maximum resident set size utilized (in kilobytes) */
1247  long minflt; /* the number of page faults serviced without any I/O */
1248  long majflt; /* the number of page faults serviced that required I/O */
1249  long nswap; /* the number of times a process was "swapped" out of memory */
1250  long inblock; /* the number of times the file system had to perform input */
1251  long oublock; /* the number of times the file system had to perform output */
1252  long nvcsw; /* the number of times a context switch was voluntarily */
1253  long nivcsw; /* the number of times a context switch was forced */
1254 } kmp_sys_info_t;
1255 
1256 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1257 typedef struct kmp_cpuinfo {
1258  int initialized; // If 0, other fields are not initialized.
1259  int signature; // CPUID(1).EAX
1260  int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family )
1261  int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model)
1262  int stepping; // CPUID(1).EAX[3:0] ( Stepping )
1263  int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
1264  int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
1265  int cpu_stackoffset;
1266  int apic_id;
1267  int physical_id;
1268  int logical_id;
1269  kmp_uint64 frequency; // Nominal CPU frequency in Hz.
1270  char name [3*sizeof (kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
1271 } kmp_cpuinfo_t;
1272 #endif
1273 
1274 #ifdef BUILD_TV
1275 
1276 struct tv_threadprivate {
1277  /* Record type #1 */
1278  void *global_addr;
1279  void *thread_addr;
1280 };
1281 
1282 struct tv_data {
1283  struct tv_data *next;
1284  void *type;
1285  union tv_union {
1286  struct tv_threadprivate tp;
1287  } u;
1288 };
1289 
1290 extern kmp_key_t __kmp_tv_key;
1291 
1292 #endif /* BUILD_TV */
1293 
1294 /* ------------------------------------------------------------------------ */
1295 
1296 #if USE_ITT_BUILD
1297 // We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here.
1298 // Later we will check the type meets requirements.
1299 typedef int kmp_itt_mark_t;
1300 #define KMP_ITT_DEBUG 0
1301 #endif /* USE_ITT_BUILD */
1302 
1303 /* ------------------------------------------------------------------------ */
1304 
1305 /*
1306  * Taskq data structures
1307  */
1308 
1309 #define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4)
1310 #define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */
1311 
1312 /* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */
1313 
1314 #define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */
1315 #define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */
1316 #define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */
1317 #define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */
1318 #define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */
1319 #define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */
1320 #define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */
1321 #define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */
1322 
1323 #define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */
1324 
1325 #define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */
1326 #define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */
1327 #define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */
1328 #define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */
1329 #define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */
1330 #define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */
1331 
1332 #define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */
1333 
1334 typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t {
1335  kmp_int32 ai_data;
1336 } kmpc_aligned_int32_t;
1337 
1338 typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t {
1339  struct kmpc_thunk_t *qs_thunk;
1340 } kmpc_aligned_queue_slot_t;
1341 
1342 typedef struct kmpc_task_queue_t {
1343  /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */
1344  kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */
1345  union {
1346  struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */
1347  struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */
1348  } tq;
1349  volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */
1350  struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */
1351  struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */
1352  volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */
1353  /* (other than the thread executing the kmpc_end_taskq call) */
1354  /* locked by parent tq's tq_link_lck */
1355 
1356  /* shared data for task queue */
1357  struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */
1358  /* only one array element exists for all but outermost taskq */
1359 
1360  /* bookkeeping for ordered task queue */
1361  kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */
1362  volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */
1363 
1364  /* thunk storage management for task queue */
1365  kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */
1366  struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */
1367  struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */
1368 
1369  /* data fields for queue itself */
1370  kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */
1371  kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */
1372  volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */
1373  kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */
1374  kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */
1375  kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */
1376  volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */
1377  kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */
1378  volatile kmp_int32 tq_flags; /* TQF_xxx */
1379 
1380  /* bookkeeping for outstanding thunks */
1381  struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */
1382  kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */
1383 
1384  /* statistics library bookkeeping */
1385  ident_t *tq_loc; /* source location information for taskq directive */
1386 } kmpc_task_queue_t;
1387 
1388 typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk);
1389 
1390 /* sizeof_shareds passed as arg to __kmpc_taskq call */
1391 typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */
1392  kmpc_task_queue_t *sv_queue;
1393  /* (pointers to) shared vars */
1394 } kmpc_shared_vars_t;
1395 
1396 typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t {
1397  volatile struct kmpc_shared_vars_t *ai_data;
1398 } kmpc_aligned_shared_vars_t;
1399 
1400 /* sizeof_thunk passed as arg to kmpc_taskq call */
1401 typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */
1402  union { /* field used for internal freelists too */
1403  kmpc_shared_vars_t *th_shareds;
1404  struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */
1405  } th;
1406  kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */
1407  struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */
1408  kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */
1409  kmp_int32 th_status;
1410  kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */
1411  /* private vars */
1412 } kmpc_thunk_t;
1413 
1414 typedef struct KMP_ALIGN_CACHE kmp_taskq {
1415  int tq_curr_thunk_capacity;
1416 
1417  kmpc_task_queue_t *tq_root;
1418  kmp_int32 tq_global_flags;
1419 
1420  kmp_lock_t tq_freelist_lck;
1421  kmpc_task_queue_t *tq_freelist;
1422 
1423  kmpc_thunk_t **tq_curr_thunk;
1424 } kmp_taskq_t;
1425 
1426 /* END Taskq data structures */
1427 /* --------------------------------------------------------------------------- */
1428 
1429 typedef kmp_int32 kmp_critical_name[8];
1430 
1439 typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... );
1440 typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... );
1441 
1446 /* --------------------------------------------------------------------------- */
1447 /* Threadprivate initialization/finalization function declarations */
1448 
1449 /* for non-array objects: __kmpc_threadprivate_register() */
1450 
1455 typedef void *(*kmpc_ctor) (void *);
1456 
1461 typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */
1466 typedef void *(*kmpc_cctor) (void *, void *);
1467 
1468 /* for array objects: __kmpc_threadprivate_register_vec() */
1469  /* First arg: "this" pointer */
1470  /* Last arg: number of array elements */
1476 typedef void *(*kmpc_ctor_vec) (void *, size_t);
1482 typedef void (*kmpc_dtor_vec) (void *, size_t);
1488 typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */
1489 
1495 /* ------------------------------------------------------------------------ */
1496 
1497 /* keeps tracked of threadprivate cache allocations for cleanup later */
1498 typedef struct kmp_cached_addr {
1499  void **addr; /* address of allocated cache */
1500  struct kmp_cached_addr *next; /* pointer to next cached address */
1501 } kmp_cached_addr_t;
1502 
1503 struct private_data {
1504  struct private_data *next; /* The next descriptor in the list */
1505  void *data; /* The data buffer for this descriptor */
1506  int more; /* The repeat count for this descriptor */
1507  size_t size; /* The data size for this descriptor */
1508 };
1509 
1510 struct private_common {
1511  struct private_common *next;
1512  struct private_common *link;
1513  void *gbl_addr;
1514  void *par_addr; /* par_addr == gbl_addr for MASTER thread */
1515  size_t cmn_size;
1516 };
1517 
1518 struct shared_common
1519 {
1520  struct shared_common *next;
1521  struct private_data *pod_init;
1522  void *obj_init;
1523  void *gbl_addr;
1524  union {
1525  kmpc_ctor ctor;
1526  kmpc_ctor_vec ctorv;
1527  } ct;
1528  union {
1529  kmpc_cctor cctor;
1530  kmpc_cctor_vec cctorv;
1531  } cct;
1532  union {
1533  kmpc_dtor dtor;
1534  kmpc_dtor_vec dtorv;
1535  } dt;
1536  size_t vec_len;
1537  int is_vec;
1538  size_t cmn_size;
1539 };
1540 
1541 #define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
1542 #define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
1543 #define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
1544 #define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1))
1545 
1546 struct common_table {
1547  struct private_common *data[ KMP_HASH_TABLE_SIZE ];
1548 };
1549 
1550 struct shared_table {
1551  struct shared_common *data[ KMP_HASH_TABLE_SIZE ];
1552 };
1553 /* ------------------------------------------------------------------------ */
1554 /* ------------------------------------------------------------------------ */
1555 
1556 #if KMP_STATIC_STEAL_ENABLED
1557 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1558  kmp_int32 count;
1559  kmp_int32 ub;
1560  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1561  kmp_int32 lb;
1562  kmp_int32 st;
1563  kmp_int32 tc;
1564  kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */
1565 
1566  // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
1567  // a) parm3 is properly aligned and
1568  // b) all parm1-4 are in the same cache line.
1569  // Because of parm1-4 are used together, performance seems to be better
1570  // if they are in the same line (not measured though).
1571 
1572  struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template
1573  kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1574  kmp_int32 parm2; // make no real change at least while padding is off.
1575  kmp_int32 parm3;
1576  kmp_int32 parm4;
1577  };
1578 
1579  kmp_uint32 ordered_lower;
1580  kmp_uint32 ordered_upper;
1581 #if KMP_OS_WINDOWS
1582  // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
1583  // It would be nice to measure execution times.
1584  // Conditional if/endif can be removed at all.
1585  kmp_int32 last_upper;
1586 #endif /* KMP_OS_WINDOWS */
1587 } dispatch_private_info32_t;
1588 
1589 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1590  kmp_int64 count; /* current chunk number for static and static-steal scheduling*/
1591  kmp_int64 ub; /* upper-bound */
1592  /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
1593  kmp_int64 lb; /* lower-bound */
1594  kmp_int64 st; /* stride */
1595  kmp_int64 tc; /* trip count (number of iterations) */
1596  kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */
1597 
1598  /* parm[1-4] are used in different ways by different scheduling algorithms */
1599 
1600  // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1601  // a) parm3 is properly aligned and
1602  // b) all parm1-4 are in the same cache line.
1603  // Because of parm1-4 are used together, performance seems to be better
1604  // if they are in the same line (not measured though).
1605 
1606  struct KMP_ALIGN( 32 ) {
1607  kmp_int64 parm1;
1608  kmp_int64 parm2;
1609  kmp_int64 parm3;
1610  kmp_int64 parm4;
1611  };
1612 
1613  kmp_uint64 ordered_lower;
1614  kmp_uint64 ordered_upper;
1615 #if KMP_OS_WINDOWS
1616  // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
1617  // It would be nice to measure execution times.
1618  // Conditional if/endif can be removed at all.
1619  kmp_int64 last_upper;
1620 #endif /* KMP_OS_WINDOWS */
1621 } dispatch_private_info64_t;
1622 #else /* KMP_STATIC_STEAL_ENABLED */
1623 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1624  kmp_int32 lb;
1625  kmp_int32 ub;
1626  kmp_int32 st;
1627  kmp_int32 tc;
1628 
1629  kmp_int32 parm1;
1630  kmp_int32 parm2;
1631  kmp_int32 parm3;
1632  kmp_int32 parm4;
1633 
1634  kmp_int32 count;
1635 
1636  kmp_uint32 ordered_lower;
1637  kmp_uint32 ordered_upper;
1638 #if KMP_OS_WINDOWS
1639  kmp_int32 last_upper;
1640 #endif /* KMP_OS_WINDOWS */
1641 } dispatch_private_info32_t;
1642 
1643 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1644  kmp_int64 lb; /* lower-bound */
1645  kmp_int64 ub; /* upper-bound */
1646  kmp_int64 st; /* stride */
1647  kmp_int64 tc; /* trip count (number of iterations) */
1648 
1649  /* parm[1-4] are used in different ways by different scheduling algorithms */
1650  kmp_int64 parm1;
1651  kmp_int64 parm2;
1652  kmp_int64 parm3;
1653  kmp_int64 parm4;
1654 
1655  kmp_int64 count; /* current chunk number for static scheduling */
1656 
1657  kmp_uint64 ordered_lower;
1658  kmp_uint64 ordered_upper;
1659 #if KMP_OS_WINDOWS
1660  kmp_int64 last_upper;
1661 #endif /* KMP_OS_WINDOWS */
1662 } dispatch_private_info64_t;
1663 #endif /* KMP_STATIC_STEAL_ENABLED */
1664 
1665 typedef struct KMP_ALIGN_CACHE dispatch_private_info {
1666  union private_info {
1667  dispatch_private_info32_t p32;
1668  dispatch_private_info64_t p64;
1669  } u;
1670  enum sched_type schedule; /* scheduling algorithm */
1671  kmp_int32 ordered; /* ordered clause specified */
1672  kmp_int32 ordered_bumped;
1673  kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
1674  struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */
1675  kmp_int32 nomerge; /* don't merge iters if serialized */
1676  kmp_int32 type_size; /* the size of types in private_info */
1677  enum cons_type pushed_ws;
1678 } dispatch_private_info_t;
1679 
1680 typedef struct dispatch_shared_info32 {
1681  /* chunk index under dynamic, number of idle threads under static-steal;
1682  iteration index otherwise */
1683  volatile kmp_uint32 iteration;
1684  volatile kmp_uint32 num_done;
1685  volatile kmp_uint32 ordered_iteration;
1686  kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
1687 } dispatch_shared_info32_t;
1688 
1689 typedef struct dispatch_shared_info64 {
1690  /* chunk index under dynamic, number of idle threads under static-steal;
1691  iteration index otherwise */
1692  volatile kmp_uint64 iteration;
1693  volatile kmp_uint64 num_done;
1694  volatile kmp_uint64 ordered_iteration;
1695  kmp_int64 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
1696 } dispatch_shared_info64_t;
1697 
1698 typedef struct dispatch_shared_info {
1699  union shared_info {
1700  dispatch_shared_info32_t s32;
1701  dispatch_shared_info64_t s64;
1702  } u;
1703  volatile kmp_uint32 buffer_index;
1704 #if OMP_45_ENABLED
1705  volatile kmp_int32 doacross_buf_idx; // teamwise index
1706  volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
1707  kmp_int32 doacross_num_done; // count finished threads
1708 #endif
1709 #if KMP_USE_HWLOC
1710  // When linking with libhwloc, the ORDERED EPCC test slows down on big
1711  // machines (> 48 cores). Performance analysis showed that a cache thrash
1712  // was occurring and this padding helps alleviate the problem.
1713  char padding[64];
1714 #endif
1715 } dispatch_shared_info_t;
1716 
1717 typedef struct kmp_disp {
1718  /* Vector for ORDERED SECTION */
1719  void (*th_deo_fcn)( int * gtid, int * cid, ident_t *);
1720  /* Vector for END ORDERED SECTION */
1721  void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *);
1722 
1723  dispatch_shared_info_t *th_dispatch_sh_current;
1724  dispatch_private_info_t *th_dispatch_pr_current;
1725 
1726  dispatch_private_info_t *th_disp_buffer;
1727  kmp_int32 th_disp_index;
1728 #if OMP_45_ENABLED
1729  kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
1730  volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
1731  union { // we can use union here because doacross cannot be used in nonmonotonic loops
1732  kmp_int64 *th_doacross_info; // info on loop bounds
1733  kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
1734  };
1735 #else
1736  void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
1737 #endif
1738 #if KMP_USE_INTERNODE_ALIGNMENT
1739  char more_padding[INTERNODE_CACHE_LINE];
1740 #endif
1741 } kmp_disp_t;
1742 
1743 /* ------------------------------------------------------------------------ */
1744 /* ------------------------------------------------------------------------ */
1745 
1746 /* Barrier stuff */
1747 
1748 /* constants for barrier state update */
1749 #define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
1750 #define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
1751 #define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */
1752 #define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
1753 
1754 #define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
1755 #define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
1756 #define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
1757 
1758 #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
1759 # error "Barrier sleep bit must be smaller than barrier bump bit"
1760 #endif
1761 #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
1762 # error "Barrier unused bit must be smaller than barrier bump bit"
1763 #endif
1764 
1765 // Constants for release barrier wait state: currently, hierarchical only
1766 #define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
1767 #define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release
1768 #define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release
1769 #define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go
1770 #define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up
1771 
1772 enum barrier_type {
1773  bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */
1774  bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
1775  #if KMP_FAST_REDUCTION_BARRIER
1776  bs_reduction_barrier, /* 2, All barriers that are used in reduction */
1777  #endif // KMP_FAST_REDUCTION_BARRIER
1778  bs_last_barrier /* Just a placeholder to mark the end */
1779 };
1780 
1781 // to work with reduction barriers just like with plain barriers
1782 #if !KMP_FAST_REDUCTION_BARRIER
1783  #define bs_reduction_barrier bs_plain_barrier
1784 #endif // KMP_FAST_REDUCTION_BARRIER
1785 
1786 typedef enum kmp_bar_pat { /* Barrier communication patterns */
1787  bp_linear_bar = 0, /* Single level (degenerate) tree */
1788  bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */
1789  bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */
1790  bp_hierarchical_bar = 3, /* Machine hierarchy tree */
1791  bp_last_bar = 4 /* Placeholder to mark the end */
1792 } kmp_bar_pat_e;
1793 
1794 # define KMP_BARRIER_ICV_PUSH 1
1795 
1796 /* Record for holding the values of the internal controls stack records */
1797 typedef struct kmp_internal_control {
1798  int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */
1799  kmp_int8 nested; /* internal control for nested parallelism (per thread) */
1800  kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */
1801  kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */
1802  int blocktime; /* internal control for blocktime */
1803  int bt_intervals; /* internal control for blocktime intervals */
1804  int nproc; /* internal control for #threads for next parallel region (per thread) */
1805  int max_active_levels; /* internal control for max_active_levels */
1806  kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */
1807 #if OMP_40_ENABLED
1808  kmp_proc_bind_t proc_bind; /* internal control for affinity */
1809 #endif // OMP_40_ENABLED
1810  struct kmp_internal_control *next;
1811 } kmp_internal_control_t;
1812 
1813 static inline void
1814 copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
1815  *dst = *src;
1816 }
1817 
1818 /* Thread barrier needs volatile barrier fields */
1819 typedef struct KMP_ALIGN_CACHE kmp_bstate {
1820  // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it).
1821  // It is not explicitly aligned below, because we *don't* want it to be padded -- instead,
1822  // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines
1823  // stores in the hierarchical barrier.
1824  kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
1825  // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store
1826  volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
1827  KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point.
1828  kmp_uint32 *skip_per_level;
1829  kmp_uint32 my_level;
1830  kmp_int32 parent_tid;
1831  kmp_int32 old_tid;
1832  kmp_uint32 depth;
1833  struct kmp_bstate *parent_bar;
1834  kmp_team_t *team;
1835  kmp_uint64 leaf_state;
1836  kmp_uint32 nproc;
1837  kmp_uint8 base_leaf_kids;
1838  kmp_uint8 leaf_kids;
1839  kmp_uint8 offset;
1840  kmp_uint8 wait_flag;
1841  kmp_uint8 use_oncore_barrier;
1842 #if USE_DEBUGGER
1843  // The following field is intended for the debugger solely. Only the worker thread itself accesses this
1844  // field: the worker increases it by 1 when it arrives to a barrier.
1845  KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
1846 #endif /* USE_DEBUGGER */
1847 } kmp_bstate_t;
1848 
1849 union KMP_ALIGN_CACHE kmp_barrier_union {
1850  double b_align; /* use worst case alignment */
1851  char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ];
1852  kmp_bstate_t bb;
1853 };
1854 
1855 typedef union kmp_barrier_union kmp_balign_t;
1856 
1857 /* Team barrier needs only non-volatile arrived counter */
1858 union KMP_ALIGN_CACHE kmp_barrier_team_union {
1859  double b_align; /* use worst case alignment */
1860  char b_pad[ CACHE_LINE ];
1861  struct {
1862  kmp_uint64 b_arrived; /* STATE => task reached synch point. */
1863 #if USE_DEBUGGER
1864  // The following two fields are indended for the debugger solely. Only master of the team accesses
1865  // these fields: the first one is increased by 1 when master arrives to a barrier, the
1866  // second one is increased by one when all the threads arrived.
1867  kmp_uint b_master_arrived;
1868  kmp_uint b_team_arrived;
1869 #endif
1870  };
1871 };
1872 
1873 typedef union kmp_barrier_team_union kmp_balign_team_t;
1874 
1875 /*
1876  * Padding for Linux* OS pthreads condition variables and mutexes used to signal
1877  * threads when a condition changes. This is to workaround an NPTL bug
1878  * where padding was added to pthread_cond_t which caused the initialization
1879  * routine to write outside of the structure if compiled on pre-NPTL threads.
1880  */
1881 
1882 #if KMP_OS_WINDOWS
1883 typedef struct kmp_win32_mutex
1884 {
1885  /* The Lock */
1886  CRITICAL_SECTION cs;
1887 } kmp_win32_mutex_t;
1888 
1889 typedef struct kmp_win32_cond
1890 {
1891  /* Count of the number of waiters. */
1892  int waiters_count_;
1893 
1894  /* Serialize access to <waiters_count_> */
1895  kmp_win32_mutex_t waiters_count_lock_;
1896 
1897  /* Number of threads to release via a <cond_broadcast> or a */
1898  /* <cond_signal> */
1899  int release_count_;
1900 
1901  /* Keeps track of the current "generation" so that we don't allow */
1902  /* one thread to steal all the "releases" from the broadcast. */
1903  int wait_generation_count_;
1904 
1905  /* A manual-reset event that's used to block and release waiting */
1906  /* threads. */
1907  HANDLE event_;
1908 } kmp_win32_cond_t;
1909 #endif
1910 
1911 #if KMP_OS_UNIX
1912 
1913 union KMP_ALIGN_CACHE kmp_cond_union {
1914  double c_align;
1915  char c_pad[ CACHE_LINE ];
1916  pthread_cond_t c_cond;
1917 };
1918 
1919 typedef union kmp_cond_union kmp_cond_align_t;
1920 
1921 union KMP_ALIGN_CACHE kmp_mutex_union {
1922  double m_align;
1923  char m_pad[ CACHE_LINE ];
1924  pthread_mutex_t m_mutex;
1925 };
1926 
1927 typedef union kmp_mutex_union kmp_mutex_align_t;
1928 
1929 #endif /* KMP_OS_UNIX */
1930 
1931 typedef struct kmp_desc_base {
1932  void *ds_stackbase;
1933  size_t ds_stacksize;
1934  int ds_stackgrow;
1935  kmp_thread_t ds_thread;
1936  volatile int ds_tid;
1937  int ds_gtid;
1938 #if KMP_OS_WINDOWS
1939  volatile int ds_alive;
1940  DWORD ds_thread_id;
1941  /*
1942  ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However,
1943  debugger support (libomp_db) cannot work with handles, because they uncomparable. For
1944  example, debugger requests info about thread with handle h. h is valid within debugger
1945  process, and meaningless within debugee process. Even if h is duped by call to
1946  DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new*
1947  handle which does *not* equal to any other handle in debugee... The only way to
1948  compare handles is convert them to system-wide ids. GetThreadId() function is
1949  available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is
1950  available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by
1951  call to GetCurrentThreadId() from within the thread and save it to let libomp_db
1952  identify threads.
1953  */
1954 #endif /* KMP_OS_WINDOWS */
1955 } kmp_desc_base_t;
1956 
1957 typedef union KMP_ALIGN_CACHE kmp_desc {
1958  double ds_align; /* use worst case alignment */
1959  char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ];
1960  kmp_desc_base_t ds;
1961 } kmp_desc_t;
1962 
1963 
1964 typedef struct kmp_local {
1965  volatile int this_construct; /* count of single's encountered by thread */
1966  void *reduce_data;
1967 #if KMP_USE_BGET
1968  void *bget_data;
1969  void *bget_list;
1970 #if ! USE_CMP_XCHG_FOR_BGET
1971 #ifdef USE_QUEUING_LOCK_FOR_BGET
1972  kmp_lock_t bget_lock; /* Lock for accessing bget free list */
1973 #else
1974  kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */
1975  /* Must be bootstrap lock so we can use it at library shutdown */
1976 #endif /* USE_LOCK_FOR_BGET */
1977 #endif /* ! USE_CMP_XCHG_FOR_BGET */
1978 #endif /* KMP_USE_BGET */
1979 
1980 #ifdef BUILD_TV
1981  struct tv_data *tv_data;
1982 #endif
1983 
1984  PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */
1985 
1986 } kmp_local_t;
1987 
1988 #define KMP_CHECK_UPDATE(a, b) if ((a) != (b)) (a) = (b)
1989 #define KMP_CHECK_UPDATE_SYNC(a, b) if ((a) != (b)) TCW_SYNC_PTR((a), (b))
1990 
1991 #define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
1992 #define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
1993 #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
1994 
1995 #define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
1996 #define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
1997 #define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
1998 #define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
1999 
2000 #define set__blocktime_team( xteam, xtid, xval ) \
2001  ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) )
2002 
2003 #define set__bt_intervals_team( xteam, xtid, xval ) \
2004  ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
2005 
2006 #define set__bt_set_team( xteam, xtid, xval ) \
2007  ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) )
2008 
2009 
2010 #define set__nested( xthread, xval ) \
2011  ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) )
2012 #define get__nested( xthread ) \
2013  ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) )
2014 
2015 #define set__dynamic( xthread, xval ) \
2016  ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) )
2017 #define get__dynamic( xthread ) \
2018  ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) )
2019 
2020 #define set__nproc( xthread, xval ) \
2021  ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) )
2022 
2023 #define set__max_active_levels( xthread, xval ) \
2024  ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) )
2025 
2026 #define set__sched( xthread, xval ) \
2027  ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) )
2028 
2029 #if OMP_40_ENABLED
2030 
2031 #define set__proc_bind( xthread, xval ) \
2032  ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) )
2033 #define get__proc_bind( xthread ) \
2034  ( (xthread)->th.th_current_task->td_icvs.proc_bind )
2035 
2036 #endif /* OMP_40_ENABLED */
2037 
2038 
2039 /* ------------------------------------------------------------------------ */
2040 // OpenMP tasking data structures
2041 //
2042 
2043 typedef enum kmp_tasking_mode {
2044  tskm_immediate_exec = 0,
2045  tskm_extra_barrier = 1,
2046  tskm_task_teams = 2,
2047  tskm_max = 2
2048 } kmp_tasking_mode_t;
2049 
2050 extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */
2051 extern kmp_int32 __kmp_task_stealing_constraint;
2052 #if OMP_45_ENABLED
2053  extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
2054 #endif
2055 
2056 /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */
2057 #define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1)
2058 #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1)
2059 
2060 // The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
2061 // queued since the previous barrier release.
2062 #define KMP_TASKING_ENABLED(task_team) \
2063  (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
2064 
2071 typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * );
2072 
2073 #if OMP_40_ENABLED || OMP_45_ENABLED
2074 typedef union kmp_cmplrdata {
2075 #if OMP_45_ENABLED
2076  kmp_int32 priority;
2077 #endif // OMP_45_ENABLED
2078 #if OMP_40_ENABLED
2079  kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */
2080 #endif // OMP_40_ENABLED
2081  /* future data */
2082 } kmp_cmplrdata_t;
2083 #endif
2084 
2085 /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
2088 typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
2089  void * shareds;
2090  kmp_routine_entry_t routine;
2091  kmp_int32 part_id;
2092 #if OMP_40_ENABLED || OMP_45_ENABLED
2093  kmp_cmplrdata_t data1; /* Two known optional additions: destructors and priority */
2094  kmp_cmplrdata_t data2; /* Process destructors first, priority second */
2095  /* future data */
2096 #endif
2097  /* private vars */
2098 } kmp_task_t;
2099 
2104 #if OMP_40_ENABLED
2105 typedef struct kmp_taskgroup {
2106  kmp_uint32 count; // number of allocated and not yet complete tasks
2107  kmp_int32 cancel_request; // request for cancellation of this taskgroup
2108  struct kmp_taskgroup *parent; // parent taskgroup
2109 } kmp_taskgroup_t;
2110 
2111 
2112 // forward declarations
2113 typedef union kmp_depnode kmp_depnode_t;
2114 typedef struct kmp_depnode_list kmp_depnode_list_t;
2115 typedef struct kmp_dephash_entry kmp_dephash_entry_t;
2116 
2117 typedef struct kmp_depend_info {
2118  kmp_intptr_t base_addr;
2119  size_t len;
2120  struct {
2121  bool in:1;
2122  bool out:1;
2123  } flags;
2124 } kmp_depend_info_t;
2125 
2126 struct kmp_depnode_list {
2127  kmp_depnode_t * node;
2128  kmp_depnode_list_t * next;
2129 };
2130 
2131 typedef struct kmp_base_depnode {
2132  kmp_depnode_list_t * successors;
2133  kmp_task_t * task;
2134 
2135  kmp_lock_t lock;
2136 
2137 #if KMP_SUPPORT_GRAPH_OUTPUT
2138  kmp_uint32 id;
2139 #endif
2140 
2141  volatile kmp_int32 npredecessors;
2142  volatile kmp_int32 nrefs;
2143 } kmp_base_depnode_t;
2144 
2145 union KMP_ALIGN_CACHE kmp_depnode {
2146  double dn_align; /* use worst case alignment */
2147  char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ];
2148  kmp_base_depnode_t dn;
2149 };
2150 
2151 struct kmp_dephash_entry {
2152  kmp_intptr_t addr;
2153  kmp_depnode_t * last_out;
2154  kmp_depnode_list_t * last_ins;
2155  kmp_dephash_entry_t * next_in_bucket;
2156 };
2157 
2158 typedef struct kmp_dephash {
2159  kmp_dephash_entry_t ** buckets;
2160  size_t size;
2161 #ifdef KMP_DEBUG
2162  kmp_uint32 nelements;
2163  kmp_uint32 nconflicts;
2164 #endif
2165 } kmp_dephash_t;
2166 
2167 #endif
2168 
2169 #ifdef BUILD_TIED_TASK_STACK
2170 
2171 /* Tied Task stack definitions */
2172 typedef struct kmp_stack_block {
2173  kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ];
2174  struct kmp_stack_block * sb_next;
2175  struct kmp_stack_block * sb_prev;
2176 } kmp_stack_block_t;
2177 
2178 typedef struct kmp_task_stack {
2179  kmp_stack_block_t ts_first_block; // first block of stack entries
2180  kmp_taskdata_t ** ts_top; // pointer to the top of stack
2181  kmp_int32 ts_entries; // number of entries on the stack
2182 } kmp_task_stack_t;
2183 
2184 #endif // BUILD_TIED_TASK_STACK
2185 
2186 typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
2187  /* Compiler flags */ /* Total compiler flags must be 16 bits */
2188  unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
2189  unsigned final : 1; /* task is final(1) so execute immediately */
2190  unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
2191 #if OMP_40_ENABLED
2192  unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */
2193 #if OMP_45_ENABLED
2194  unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */
2195  unsigned priority_specified :1; /* set if the compiler provides priority setting for the task */
2196  unsigned reserved : 10; /* reserved for compiler use */
2197 #else
2198  unsigned reserved : 12; /* reserved for compiler use */
2199 #endif
2200 #else // OMP_40_ENABLED
2201  unsigned reserved : 13; /* reserved for compiler use */
2202 #endif // OMP_40_ENABLED
2203 
2204  /* Library flags */ /* Total library flags must be 16 bits */
2205  unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
2206  unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */
2207  unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */
2208  unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */
2209  /* If either team_serial or tasking_ser is set, task team may be NULL */
2210  /* Task State Flags: */
2211  unsigned started : 1; /* 1==started, 0==not started */
2212  unsigned executing : 1; /* 1==executing, 0==not executing */
2213  unsigned complete : 1; /* 1==complete, 0==not complete */
2214  unsigned freed : 1; /* 1==freed, 0==allocateed */
2215  unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
2216  unsigned reserved31 : 7; /* reserved for library use */
2217 
2218 } kmp_tasking_flags_t;
2219 
2220 
2221 struct kmp_taskdata { /* aligned during dynamic allocation */
2222  kmp_int32 td_task_id; /* id, assigned by debugger */
2223  kmp_tasking_flags_t td_flags; /* task flags */
2224  kmp_team_t * td_team; /* team for this task */
2225  kmp_info_p * td_alloc_thread; /* thread that allocated data structures */
2226  /* Currently not used except for perhaps IDB */
2227  kmp_taskdata_t * td_parent; /* parent task */
2228  kmp_int32 td_level; /* task nesting level */
2229  kmp_int32 td_untied_count; /* untied task active parts counter */
2230  ident_t * td_ident; /* task identifier */
2231  // Taskwait data.
2232  ident_t * td_taskwait_ident;
2233  kmp_uint32 td_taskwait_counter;
2234  kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
2235  KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */
2236  KMP_ALIGN_CACHE volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */
2237  volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */
2238 #if OMP_40_ENABLED
2239  kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup
2240  kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here
2241  kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies
2242 #endif
2243 #if OMPT_SUPPORT
2244  ompt_task_info_t ompt_task_info;
2245 #endif
2246 #if OMP_45_ENABLED
2247  kmp_task_team_t * td_task_team;
2248  kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
2249 #endif
2250 }; // struct kmp_taskdata
2251 
2252 // Make sure padding above worked
2253 KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 );
2254 
2255 // Data for task team but per thread
2256 typedef struct kmp_base_thread_data {
2257  kmp_info_p * td_thr; // Pointer back to thread info
2258  // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued?
2259  kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
2260  kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
2261  kmp_int32 td_deque_size; // Size of deck
2262  kmp_uint32 td_deque_head; // Head of deque (will wrap)
2263  kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
2264  kmp_int32 td_deque_ntasks; // Number of tasks in deque
2265  // GEH: shouldn't this be volatile since used in while-spin?
2266  kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
2267 #ifdef BUILD_TIED_TASK_STACK
2268  kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint
2269 #endif // BUILD_TIED_TASK_STACK
2270 } kmp_base_thread_data_t;
2271 
2272 #define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
2273 #define INITIAL_TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS )
2274 
2275 #define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
2276 #define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
2277 
2278 typedef union KMP_ALIGN_CACHE kmp_thread_data {
2279  kmp_base_thread_data_t td;
2280  double td_align; /* use worst case alignment */
2281  char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ];
2282 } kmp_thread_data_t;
2283 
2284 
2285 // Data for task teams which are used when tasking is enabled for the team
2286 typedef struct kmp_base_task_team {
2287  kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */
2288  /* must be bootstrap lock since used at library shutdown*/
2289  kmp_task_team_t * tt_next; /* For linking the task team free list */
2290  kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */
2291  /* Data survives task team deallocation */
2292  kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */
2293  /* TRUE means tt_threads_data is set up and initialized */
2294  kmp_int32 tt_nproc; /* #threads in team */
2295  kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */
2296 #if OMP_45_ENABLED
2297  kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */
2298 #endif
2299 
2300  KMP_ALIGN_CACHE
2301  volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */
2302 
2303  KMP_ALIGN_CACHE
2304  volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */
2305 } kmp_base_task_team_t;
2306 
2307 union KMP_ALIGN_CACHE kmp_task_team {
2308  kmp_base_task_team_t tt;
2309  double tt_align; /* use worst case alignment */
2310  char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ];
2311 };
2312 
2313 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
2314 // Free lists keep same-size free memory slots for fast memory allocation routines
2315 typedef struct kmp_free_list {
2316  void *th_free_list_self; // Self-allocated tasks free list
2317  void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads
2318  void *th_free_list_other; // Non-self free list (to be returned to owner's sync list)
2319 } kmp_free_list_t;
2320 #endif
2321 #if KMP_NESTED_HOT_TEAMS
2322 // Hot teams array keeps hot teams and their sizes for given thread.
2323 // Hot teams are not put in teams pool, and they don't put threads in threads pool.
2324 typedef struct kmp_hot_team_ptr {
2325  kmp_team_p *hot_team; // pointer to hot_team of given nesting level
2326  kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
2327 } kmp_hot_team_ptr_t;
2328 #endif
2329 #if OMP_40_ENABLED
2330 typedef struct kmp_teams_size {
2331  kmp_int32 nteams; // number of teams in a league
2332  kmp_int32 nth; // number of threads in each team of the league
2333 } kmp_teams_size_t;
2334 #endif
2335 
2336 /* ------------------------------------------------------------------------ */
2337 // OpenMP thread data structures
2338 //
2339 
2340 typedef struct KMP_ALIGN_CACHE kmp_base_info {
2341 /*
2342  * Start with the readonly data which is cache aligned and padded.
2343  * this is written before the thread starts working by the master.
2344  * (uber masters may update themselves later)
2345  * (usage does not consider serialized regions)
2346  */
2347  kmp_desc_t th_info;
2348  kmp_team_p *th_team; /* team we belong to */
2349  kmp_root_p *th_root; /* pointer to root of task hierarchy */
2350  kmp_info_p *th_next_pool; /* next available thread in the pool */
2351  kmp_disp_t *th_dispatch; /* thread's dispatch data */
2352  int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
2353 
2354  /* The following are cached from the team info structure */
2355  /* TODO use these in more places as determined to be needed via profiling */
2356  int th_team_nproc; /* number of threads in a team */
2357  kmp_info_p *th_team_master; /* the team's master thread */
2358  int th_team_serialized; /* team is serialized */
2359 #if OMP_40_ENABLED
2360  microtask_t th_teams_microtask; /* save entry address for teams construct */
2361  int th_teams_level; /* save initial level of teams construct */
2362  /* it is 0 on device but may be any on host */
2363 #endif
2364 
2365  /* The blocktime info is copied from the team struct to the thread sruct */
2366  /* at the start of a barrier, and the values stored in the team are used */
2367  /* at points in the code where the team struct is no longer guaranteed */
2368  /* to exist (from the POV of worker threads). */
2369  int th_team_bt_intervals;
2370  int th_team_bt_set;
2371 
2372 
2373 #if KMP_AFFINITY_SUPPORTED
2374  kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
2375 #endif
2376 
2377 /*
2378  * The data set by the master at reinit, then R/W by the worker
2379  */
2380  KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */
2381 #if KMP_NESTED_HOT_TEAMS
2382  kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
2383 #endif
2384 #if OMP_40_ENABLED
2385  kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
2386  kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */
2387 # if KMP_AFFINITY_SUPPORTED
2388  int th_current_place; /* place currently bound to */
2389  int th_new_place; /* place to bind to in par reg */
2390  int th_first_place; /* first place in partition */
2391  int th_last_place; /* last place in partition */
2392 # endif
2393 #endif
2394 #if USE_ITT_BUILD
2395  kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
2396  kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
2397  kmp_uint64 th_frame_time; /* frame timestamp */
2398 #endif /* USE_ITT_BUILD */
2399  kmp_local_t th_local;
2400  struct private_common *th_pri_head;
2401 
2402 /*
2403  * Now the data only used by the worker (after initial allocation)
2404  */
2405  /* TODO the first serial team should actually be stored in the info_t
2406  * structure. this will help reduce initial allocation overhead */
2407  KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/
2408 
2409 #if OMPT_SUPPORT
2410  ompt_thread_info_t ompt_thread_info;
2411 #endif
2412 
2413 /* The following are also read by the master during reinit */
2414  struct common_table *th_pri_common;
2415 
2416  volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
2417  /* while awaiting queuing lock acquire */
2418 
2419  volatile void *th_sleep_loc; // this points at a kmp_flag<T>
2420 
2421  ident_t *th_ident;
2422  unsigned th_x; // Random number generator data
2423  unsigned th_a; // Random number generator data
2424 
2425 /*
2426  * Tasking-related data for the thread
2427  */
2428  kmp_task_team_t * th_task_team; // Task team struct
2429  kmp_taskdata_t * th_current_task; // Innermost Task being executed
2430  kmp_uint8 th_task_state; // alternating 0/1 for task team identification
2431  kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels
2432  kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
2433  kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
2434 
2435  /*
2436  * More stuff for keeping track of active/sleeping threads
2437  * (this part is written by the worker thread)
2438  */
2439  kmp_uint8 th_active_in_pool; // included in count of
2440  // #active threads in pool
2441  int th_active; // ! sleeping
2442  // 32 bits for TCR/TCW
2443 
2444  struct cons_header * th_cons; // used for consistency check
2445 
2446 /*
2447  * Add the syncronizing data which is cache aligned and padded.
2448  */
2449  KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ];
2450 
2451  KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
2452 
2453 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
2454  #define NUM_LISTS 4
2455  kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines
2456 #endif
2457 
2458 #if KMP_OS_WINDOWS
2459  kmp_win32_cond_t th_suspend_cv;
2460  kmp_win32_mutex_t th_suspend_mx;
2461  int th_suspend_init;
2462 #endif
2463 #if KMP_OS_UNIX
2464  kmp_cond_align_t th_suspend_cv;
2465  kmp_mutex_align_t th_suspend_mx;
2466  int th_suspend_init_count;
2467 #endif
2468 
2469 #if USE_ITT_BUILD
2470  kmp_itt_mark_t th_itt_mark_single;
2471  // alignment ???
2472 #endif /* USE_ITT_BUILD */
2473 #if KMP_STATS_ENABLED
2474  kmp_stats_list* th_stats;
2475 #endif
2476 } kmp_base_info_t;
2477 
2478 typedef union KMP_ALIGN_CACHE kmp_info {
2479  double th_align; /* use worst case alignment */
2480  char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ];
2481  kmp_base_info_t th;
2482 } kmp_info_t;
2483 
2484 /* ------------------------------------------------------------------------ */
2485 // OpenMP thread team data structures
2486 //
2487 typedef struct kmp_base_data {
2488  volatile kmp_uint32 t_value;
2489 } kmp_base_data_t;
2490 
2491 typedef union KMP_ALIGN_CACHE kmp_sleep_team {
2492  double dt_align; /* use worst case alignment */
2493  char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
2494  kmp_base_data_t dt;
2495 } kmp_sleep_team_t;
2496 
2497 typedef union KMP_ALIGN_CACHE kmp_ordered_team {
2498  double dt_align; /* use worst case alignment */
2499  char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
2500  kmp_base_data_t dt;
2501 } kmp_ordered_team_t;
2502 
2503 typedef int (*launch_t)( int gtid );
2504 
2505 /* Minimum number of ARGV entries to malloc if necessary */
2506 #define KMP_MIN_MALLOC_ARGV_ENTRIES 100
2507 
2508 // Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we
2509 // have supported at least 96 bytes. Using a larger value for more space between the master write/worker
2510 // read section and read/write by all section seems to buy more performance on EPCC PARALLEL.
2511 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2512 # define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
2513 #else
2514 # define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
2515 #endif
2516 #define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
2517 
2518 typedef struct KMP_ALIGN_CACHE kmp_base_team {
2519  // Synchronization Data ---------------------------------------------------------------------------------
2520  KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
2521  kmp_balign_team_t t_bar[ bs_last_barrier ];
2522  volatile int t_construct; // count of single directive encountered by team
2523  kmp_lock_t t_single_lock; // team specific lock
2524 
2525  // Master only -----------------------------------------------------------------------------------------
2526  KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
2527  int t_master_this_cons; // "this_construct" single counter of master in parent team
2528  ident_t *t_ident; // if volatile, have to change too much other crud to volatile too
2529  kmp_team_p *t_parent; // parent team
2530  kmp_team_p *t_next_pool; // next free team in the team pool
2531  kmp_disp_t *t_dispatch; // thread's dispatch data
2532  kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
2533 #if OMP_40_ENABLED
2534  kmp_proc_bind_t t_proc_bind; // bind type for par region
2535 #endif // OMP_40_ENABLED
2536 #if USE_ITT_BUILD
2537  kmp_uint64 t_region_time; // region begin timestamp
2538 #endif /* USE_ITT_BUILD */
2539 
2540  // Master write, workers read --------------------------------------------------------------------------
2541  KMP_ALIGN_CACHE void **t_argv;
2542  int t_argc;
2543  int t_nproc; // number of threads in team
2544  microtask_t t_pkfn;
2545  launch_t t_invoke; // procedure to launch the microtask
2546 
2547 #if OMPT_SUPPORT
2548  ompt_team_info_t ompt_team_info;
2549  ompt_lw_taskteam_t *ompt_serialized_team_info;
2550 #endif
2551 
2552 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2553  kmp_int8 t_fp_control_saved;
2554  kmp_int8 t_pad2b;
2555  kmp_int16 t_x87_fpu_control_word; // FP control regs
2556  kmp_uint32 t_mxcsr;
2557 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2558 
2559  void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
2560 
2561  KMP_ALIGN_CACHE kmp_info_t **t_threads;
2562  kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
2563  int t_level; // nested parallel level
2564 
2565  KMP_ALIGN_CACHE int t_max_argc;
2566  int t_max_nproc; // maximum threads this team can handle (dynamicly expandable)
2567  int t_serialized; // levels deep of serialized teams
2568  dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
2569  int t_id; // team's id, assigned by debugger.
2570  int t_active_level; // nested active parallel level
2571  kmp_r_sched_t t_sched; // run-time schedule for the team
2572 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2573  int t_first_place; // first & last place in parent thread's partition.
2574  int t_last_place; // Restore these values to master after par region.
2575 #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
2576  int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
2577 
2578  // Read/write by workers as well -----------------------------------------------------------------------
2579 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_USE_HWLOC
2580  // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
2581  // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
2582  // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
2583  char dummy_padding[1024];
2584 #endif
2585  KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams.
2586  // for SERIALIZED teams nested 2 or more levels deep
2587 #if OMP_40_ENABLED
2588  kmp_int32 t_cancel_request; // typed flag to store request state of cancellation
2589 #endif
2590  int t_master_active; // save on fork, restore on join
2591  kmp_taskq_t t_taskq; // this team's task queue
2592  void *t_copypriv_data; // team specific pointer to copyprivate data array
2593  kmp_uint32 t_copyin_counter;
2594 #if USE_ITT_BUILD
2595  void *t_stack_id; // team specific stack stitching id (for ittnotify)
2596 #endif /* USE_ITT_BUILD */
2597 } kmp_base_team_t;
2598 
2599 union KMP_ALIGN_CACHE kmp_team {
2600  kmp_base_team_t t;
2601  double t_align; /* use worst case alignment */
2602  char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ];
2603 };
2604 
2605 
2606 typedef union KMP_ALIGN_CACHE kmp_time_global {
2607  double dt_align; /* use worst case alignment */
2608  char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
2609  kmp_base_data_t dt;
2610 } kmp_time_global_t;
2611 
2612 typedef struct kmp_base_global {
2613  /* cache-aligned */
2614  kmp_time_global_t g_time;
2615 
2616  /* non cache-aligned */
2617  volatile int g_abort;
2618  volatile int g_done;
2619 
2620  int g_dynamic;
2621  enum dynamic_mode g_dynamic_mode;
2622 } kmp_base_global_t;
2623 
2624 typedef union KMP_ALIGN_CACHE kmp_global {
2625  kmp_base_global_t g;
2626  double g_align; /* use worst case alignment */
2627  char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ];
2628 } kmp_global_t;
2629 
2630 
2631 typedef struct kmp_base_root {
2632  // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0)
2633  // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch
2634  // overhead or keeping r_active
2635 
2636  volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
2637  // GEH: This is misnamed, should be r_in_parallel
2638  volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely.
2639  int r_in_parallel; /* keeps a count of active parallel regions per root */
2640  // GEH: This is misnamed, should be r_active_levels
2641  kmp_team_t *r_root_team;
2642  kmp_team_t *r_hot_team;
2643  kmp_info_t *r_uber_thread;
2644  kmp_lock_t r_begin_lock;
2645  volatile int r_begin;
2646  int r_blocktime; /* blocktime for this root and descendants */
2647 } kmp_base_root_t;
2648 
2649 typedef union KMP_ALIGN_CACHE kmp_root {
2650  kmp_base_root_t r;
2651  double r_align; /* use worst case alignment */
2652  char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ];
2653 } kmp_root_t;
2654 
2655 struct fortran_inx_info {
2656  kmp_int32 data;
2657 };
2658 
2659 /* ------------------------------------------------------------------------ */
2660 
2661 /* ------------------------------------------------------------------------ */
2662 /* ------------------------------------------------------------------------ */
2663 
2664 extern int __kmp_settings;
2665 extern int __kmp_duplicate_library_ok;
2666 #if USE_ITT_BUILD
2667 extern int __kmp_forkjoin_frames;
2668 extern int __kmp_forkjoin_frames_mode;
2669 #endif
2670 extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
2671 extern int __kmp_determ_red;
2672 
2673 #ifdef KMP_DEBUG
2674 extern int kmp_a_debug;
2675 extern int kmp_b_debug;
2676 extern int kmp_c_debug;
2677 extern int kmp_d_debug;
2678 extern int kmp_e_debug;
2679 extern int kmp_f_debug;
2680 #endif /* KMP_DEBUG */
2681 
2682 /* For debug information logging using rotating buffer */
2683 #define KMP_DEBUG_BUF_LINES_INIT 512
2684 #define KMP_DEBUG_BUF_LINES_MIN 1
2685 
2686 #define KMP_DEBUG_BUF_CHARS_INIT 128
2687 #define KMP_DEBUG_BUF_CHARS_MIN 2
2688 
2689 extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
2690 extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
2691 extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
2692 extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */
2693 
2694 extern char *__kmp_debug_buffer; /* Debug buffer itself */
2695 extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */
2696 extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */
2697 /* end rotating debug buffer */
2698 
2699 #ifdef KMP_DEBUG
2700 extern int __kmp_par_range; /* +1 => only go par for constructs in range */
2701 
2702 #define KMP_PAR_RANGE_ROUTINE_LEN 1024
2703 extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
2704 #define KMP_PAR_RANGE_FILENAME_LEN 1024
2705 extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
2706 extern int __kmp_par_range_lb;
2707 extern int __kmp_par_range_ub;
2708 #endif
2709 
2710 /* For printing out dynamic storage map for threads and teams */
2711 extern int __kmp_storage_map; /* True means print storage map for threads and teams */
2712 extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */
2713 extern int __kmp_storage_map_verbose_specified;
2714 
2715 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2716 extern kmp_cpuinfo_t __kmp_cpuinfo;
2717 #endif
2718 
2719 extern volatile int __kmp_init_serial;
2720 extern volatile int __kmp_init_gtid;
2721 extern volatile int __kmp_init_common;
2722 extern volatile int __kmp_init_middle;
2723 extern volatile int __kmp_init_parallel;
2724 extern volatile int __kmp_init_monitor;
2725 extern volatile int __kmp_init_user_locks;
2726 extern int __kmp_init_counter;
2727 extern int __kmp_root_counter;
2728 extern int __kmp_version;
2729 
2730 /* list of address of allocated caches for commons */
2731 extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
2732 
2733 /* Barrier algorithm types and options */
2734 extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
2735 extern kmp_uint32 __kmp_barrier_release_bb_dflt;
2736 extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
2737 extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
2738 extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ];
2739 extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ];
2740 extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ];
2741 extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ];
2742 extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ];
2743 extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ];
2744 extern char const *__kmp_barrier_type_name [ bs_last_barrier ];
2745 extern char const *__kmp_barrier_pattern_name [ bp_last_bar ];
2746 
2747 /* Global Locks */
2748 extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
2749 extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
2750 extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */
2751 extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */
2752 extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
2753 
2754 extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
2755 extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
2756 extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
2757 
2758 /* used for yielding spin-waits */
2759 extern unsigned int __kmp_init_wait; /* initial number of spin-tests */
2760 extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
2761 
2762 extern enum library_type __kmp_library;
2763 
2764 extern enum sched_type __kmp_sched; /* default runtime scheduling */
2765 extern enum sched_type __kmp_static; /* default static scheduling method */
2766 extern enum sched_type __kmp_guided; /* default guided scheduling method */
2767 extern enum sched_type __kmp_auto; /* default auto scheduling method */
2768 extern int __kmp_chunk; /* default runtime chunk size */
2769 
2770 extern size_t __kmp_stksize; /* stack size per thread */
2771 extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */
2772 extern size_t __kmp_stkoffset; /* stack offset per thread */
2773 extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
2774 
2775 extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
2776 extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */
2777 extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
2778 extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */
2779 extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */
2780 extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */
2781 extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
2782 extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
2783 extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */
2784 extern int __kmp_generate_warnings; /* should we issue warnings? */
2785 extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
2786 
2787 #ifdef DEBUG_SUSPEND
2788 extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
2789 #endif
2790 
2791 extern kmp_uint32 __kmp_yield_init;
2792 extern kmp_uint32 __kmp_yield_next;
2793 extern kmp_uint32 __kmp_yielding_on;
2794 extern kmp_uint32 __kmp_yield_cycle;
2795 extern kmp_int32 __kmp_yield_on_count;
2796 extern kmp_int32 __kmp_yield_off_count;
2797 
2798 /* ------------------------------------------------------------------------- */
2799 extern int __kmp_allThreadsSpecified;
2800 
2801 extern size_t __kmp_align_alloc;
2802 /* following data protected by initialization routines */
2803 extern int __kmp_xproc; /* number of processors in the system */
2804 extern int __kmp_avail_proc; /* number of processors available to the process */
2805 extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
2806 extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
2807 extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */
2808 extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */
2809 extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */
2810 extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */
2811 extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */
2812 extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */
2813 extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */
2814 extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */
2815 extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */
2816 extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */
2817 #ifdef KMP_ADJUST_BLOCKTIME
2818 extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
2819 #endif /* KMP_ADJUST_BLOCKTIME */
2820 #ifdef KMP_DFLT_NTH_CORES
2821 extern int __kmp_ncores; /* Total number of cores for threads placement */
2822 #endif
2823 extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */
2824 
2825 extern int __kmp_need_register_atfork_specified;
2826 extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */
2827 extern int __kmp_gtid_mode; /* Method of getting gtid, values:
2828  0 - not set, will be set at runtime
2829  1 - using stack search
2830  2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS))
2831  3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only.
2832  */
2833 extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
2834 #ifdef KMP_TDATA_GTID
2835 #if KMP_OS_WINDOWS
2836 extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */
2837 #else
2838 extern __thread int __kmp_gtid;
2839 #endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */
2840 #endif
2841 extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
2842 extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */
2843 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2844 extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */
2845 extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */
2846 extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
2847 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
2848 
2849 extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */
2850 extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */
2851 #if KMP_NESTED_HOT_TEAMS
2852 extern int __kmp_hot_teams_mode;
2853 extern int __kmp_hot_teams_max_level;
2854 #endif
2855 
2856 # if KMP_OS_LINUX
2857 extern enum clock_function_type __kmp_clock_function;
2858 extern int __kmp_clock_function_param;
2859 # endif /* KMP_OS_LINUX */
2860 
2861 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
2862 extern enum mic_type __kmp_mic_type;
2863 #endif
2864 
2865 # ifdef USE_LOAD_BALANCE
2866 extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */
2867 # endif /* USE_LOAD_BALANCE */
2868 
2869 // OpenMP 3.1 - Nested num threads array
2870 typedef struct kmp_nested_nthreads_t {
2871  int * nth;
2872  int size;
2873  int used;
2874 } kmp_nested_nthreads_t;
2875 
2876 extern kmp_nested_nthreads_t __kmp_nested_nth;
2877 
2878 #if KMP_USE_ADAPTIVE_LOCKS
2879 
2880 // Parameters for the speculative lock backoff system.
2881 struct kmp_adaptive_backoff_params_t {
2882  // Number of soft retries before it counts as a hard retry.
2883  kmp_uint32 max_soft_retries;
2884  // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right
2885  kmp_uint32 max_badness;
2886 };
2887 
2888 extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
2889 
2890 #if KMP_DEBUG_ADAPTIVE_LOCKS
2891 extern char * __kmp_speculative_statsfile;
2892 #endif
2893 
2894 #endif // KMP_USE_ADAPTIVE_LOCKS
2895 
2896 #if OMP_40_ENABLED
2897 extern int __kmp_display_env; /* TRUE or FALSE */
2898 extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
2899 extern int __kmp_omp_cancellation; /* TRUE or FALSE */
2900 #endif
2901 
2902 /* ------------------------------------------------------------------------- */
2903 
2904 /* --------------------------------------------------------------------------- */
2905 /* the following are protected by the fork/join lock */
2906 /* write: lock read: anytime */
2907 extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
2908 /* read/write: lock */
2909 extern volatile kmp_team_t * __kmp_team_pool;
2910 extern volatile kmp_info_t * __kmp_thread_pool;
2911 
2912 /* total number of threads reachable from some root thread including all root threads*/
2913 extern volatile int __kmp_nth;
2914 /* total number of threads reachable from some root thread including all root threads,
2915  and those in the thread pool */
2916 extern volatile int __kmp_all_nth;
2917 extern int __kmp_thread_pool_nth;
2918 extern volatile int __kmp_thread_pool_active_nth;
2919 
2920 extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
2921 /* end data protected by fork/join lock */
2922 /* --------------------------------------------------------------------------- */
2923 
2924 extern kmp_global_t __kmp_global; /* global status */
2925 
2926 extern kmp_info_t __kmp_monitor;
2927 extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library.
2928 extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library.
2929 
2930 #if USE_DEBUGGER
2931 
2932 #define _KMP_GEN_ID( counter ) \
2933  ( \
2934  __kmp_debugging \
2935  ? \
2936  KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \
2937  : \
2938  ~ 0 \
2939  )
2940 #else
2941 #define _KMP_GEN_ID( counter ) \
2942  ( \
2943  ~ 0 \
2944  )
2945 #endif /* USE_DEBUGGER */
2946 
2947 #define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter )
2948 #define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter )
2949 
2950 /* ------------------------------------------------------------------------ */
2951 /* ------------------------------------------------------------------------ */
2952 
2953 extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... );
2954 
2955 extern void __kmp_serial_initialize( void );
2956 extern void __kmp_middle_initialize( void );
2957 extern void __kmp_parallel_initialize( void );
2958 
2959 extern void __kmp_internal_begin( void );
2960 extern void __kmp_internal_end_library( int gtid );
2961 extern void __kmp_internal_end_thread( int gtid );
2962 extern void __kmp_internal_end_atexit( void );
2963 extern void __kmp_internal_end_fini( void );
2964 extern void __kmp_internal_end_dtor( void );
2965 extern void __kmp_internal_end_dest( void* );
2966 
2967 extern int __kmp_register_root( int initial_thread );
2968 extern void __kmp_unregister_root( int gtid );
2969 
2970 extern int __kmp_ignore_mppbeg( void );
2971 extern int __kmp_ignore_mppend( void );
2972 
2973 extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws );
2974 extern void __kmp_exit_single( int gtid );
2975 
2976 extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
2977 extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
2978 
2979 #ifdef USE_LOAD_BALANCE
2980 extern int __kmp_get_load_balance( int );
2981 #endif
2982 
2983 #ifdef BUILD_TV
2984 extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr );
2985 #endif
2986 
2987 extern int __kmp_get_global_thread_id( void );
2988 extern int __kmp_get_global_thread_id_reg( void );
2989 extern void __kmp_exit_thread( int exit_status );
2990 extern void __kmp_abort( char const * format, ... );
2991 extern void __kmp_abort_thread( void );
2992 extern void __kmp_abort_process( void );
2993 extern void __kmp_warn( char const * format, ... );
2994 
2995 extern void __kmp_set_num_threads( int new_nth, int gtid );
2996 
2997 // Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
2998 static inline kmp_info_t * __kmp_entry_thread()
2999 {
3000  int gtid = __kmp_entry_gtid();
3001 
3002  return __kmp_threads[gtid];
3003 }
3004 
3005 extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels );
3006 extern int __kmp_get_max_active_levels( int gtid );
3007 extern int __kmp_get_ancestor_thread_num( int gtid, int level );
3008 extern int __kmp_get_team_size( int gtid, int level );
3009 extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk );
3010 extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk );
3011 
3012 extern unsigned short __kmp_get_random( kmp_info_t * thread );
3013 extern void __kmp_init_random( kmp_info_t * thread );
3014 
3015 extern kmp_r_sched_t __kmp_get_schedule_global( void );
3016 extern void __kmp_adjust_num_threads( int new_nproc );
3017 
3018 extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL );
3019 extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL );
3020 extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL );
3021 #define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR )
3022 #define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR )
3023 #define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR )
3024 
3025 #if USE_FAST_MEMORY
3026 extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL );
3027 extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL );
3028 extern void __kmp_free_fast_memory( kmp_info_t *this_thr );
3029 extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr );
3030 #define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR )
3031 #define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR )
3032 #endif
3033 
3034 extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL );
3035 extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL );
3036 extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL );
3037 extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL );
3038 #define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR )
3039 #define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR )
3040 #define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR )
3041 #define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR )
3042 
3043 #define KMP_INTERNAL_MALLOC(sz) malloc(sz)
3044 #define KMP_INTERNAL_FREE(p) free(p)
3045 #define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz))
3046 #define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz))
3047 
3048 extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads );
3049 
3050 #if OMP_40_ENABLED
3051 extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind );
3052 extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads );
3053 #endif
3054 
3055 extern void __kmp_yield( int cond );
3056 
3057 extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
3058  enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
3059  kmp_int32 chunk );
3060 extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
3061  enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
3062  kmp_int32 chunk );
3063 extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
3064  enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
3065  kmp_int64 chunk );
3066 extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
3067  enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
3068  kmp_int64 chunk );
3069 
3070 extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid,
3071  kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st );
3072 extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid,
3073  kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st );
3074 extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid,
3075  kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st );
3076 extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid,
3077  kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st );
3078 
3079 extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid );
3080 extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid );
3081 extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid );
3082 extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid );
3083 
3084 
3085 #ifdef KMP_GOMP_COMPAT
3086 
3087 extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
3088  enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
3089  kmp_int32 chunk, int push_ws );
3090 extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
3091  enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
3092  kmp_int32 chunk, int push_ws );
3093 extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
3094  enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
3095  kmp_int64 chunk, int push_ws );
3096 extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
3097  enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
3098  kmp_int64 chunk, int push_ws );
3099 extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid );
3100 extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid );
3101 extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid );
3102 extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid );
3103 
3104 #endif /* KMP_GOMP_COMPAT */
3105 
3106 
3107 extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker );
3108 extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker );
3109 extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker );
3110 extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker );
3111 extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker );
3112 extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj );
3113 extern void __kmp_wait_yield_4_ptr( void * spinner, kmp_uint32 checker, kmp_uint32 (* pred)( void *, kmp_uint32 ), void * obj );
3114 
3115 class kmp_flag_32;
3116 class kmp_flag_64;
3117 class kmp_flag_oncore;
3118 extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
3119 #if USE_ITT_BUILD
3120  , void * itt_sync_obj
3121 #endif
3122  );
3123 extern void __kmp_release_64(kmp_flag_64 *flag);
3124 
3125 extern void __kmp_infinite_loop( void );
3126 
3127 extern void __kmp_cleanup( void );
3128 
3129 #if KMP_HANDLE_SIGNALS
3130  extern int __kmp_handle_signals;
3131  extern void __kmp_install_signals( int parallel_init );
3132  extern void __kmp_remove_signals( void );
3133 #endif
3134 
3135 extern void __kmp_clear_system_time( void );
3136 extern void __kmp_read_system_time( double *delta );
3137 
3138 extern void __kmp_check_stack_overlap( kmp_info_t *thr );
3139 
3140 extern void __kmp_expand_host_name( char *buffer, size_t size );
3141 extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern );
3142 
3143 #if KMP_OS_WINDOWS
3144 extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */
3145 #endif
3146 
3147 extern void __kmp_runtime_initialize( void ); /* machine specific initialization */
3148 extern void __kmp_runtime_destroy( void );
3149 
3150 #if KMP_AFFINITY_SUPPORTED
3151 extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask);
3152 extern void __kmp_affinity_initialize(void);
3153 extern void __kmp_affinity_uninitialize(void);
3154 extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
3155 #if OMP_40_ENABLED
3156 extern void __kmp_affinity_set_place(int gtid);
3157 #endif
3158 extern void __kmp_affinity_determine_capable( const char *env_var );
3159 extern int __kmp_aux_set_affinity(void **mask);
3160 extern int __kmp_aux_get_affinity(void **mask);
3161 extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3162 extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
3163 extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
3164 extern void __kmp_balanced_affinity( int tid, int team_size );
3165 #endif /* KMP_AFFINITY_SUPPORTED */
3166 
3167 extern void __kmp_cleanup_hierarchy();
3168 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
3169 
3170 #if KMP_USE_FUTEX
3171 
3172 extern int __kmp_futex_determine_capable( void );
3173 
3174 #endif // KMP_USE_FUTEX
3175 
3176 extern void __kmp_gtid_set_specific( int gtid );
3177 extern int __kmp_gtid_get_specific( void );
3178 
3179 extern double __kmp_read_cpu_time( void );
3180 
3181 extern int __kmp_read_system_info( struct kmp_sys_info *info );
3182 
3183 extern void __kmp_create_monitor( kmp_info_t *th );
3184 
3185 extern void *__kmp_launch_thread( kmp_info_t *thr );
3186 
3187 extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size );
3188 
3189 #if KMP_OS_WINDOWS
3190 extern int __kmp_still_running(kmp_info_t *th);
3191 extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val );
3192 extern void __kmp_free_handle( kmp_thread_t tHandle );
3193 #endif
3194 
3195 extern void __kmp_reap_monitor( kmp_info_t *th );
3196 extern void __kmp_reap_worker( kmp_info_t *th );
3197 extern void __kmp_terminate_thread( int gtid );
3198 
3199 extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag );
3200 extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag );
3201 extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag );
3202 extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag );
3203 extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag );
3204 extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag );
3205 
3206 extern void __kmp_elapsed( double * );
3207 extern void __kmp_elapsed_tick( double * );
3208 
3209 extern void __kmp_enable( int old_state );
3210 extern void __kmp_disable( int *old_state );
3211 
3212 extern void __kmp_thread_sleep( int millis );
3213 
3214 extern void __kmp_common_initialize( void );
3215 extern void __kmp_common_destroy( void );
3216 extern void __kmp_common_destroy_gtid( int gtid );
3217 
3218 #if KMP_OS_UNIX
3219 extern void __kmp_register_atfork( void );
3220 #endif
3221 extern void __kmp_suspend_initialize( void );
3222 extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th );
3223 
3224 extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
3225  kmp_team_t *team, int tid);
3226 #if OMP_40_ENABLED
3227 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
3228 #if OMPT_SUPPORT
3229  ompt_parallel_id_t ompt_parallel_id,
3230 #endif
3231  kmp_proc_bind_t proc_bind,
3232  kmp_internal_control_t *new_icvs,
3233  int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
3234 #else
3235 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
3236 #if OMPT_SUPPORT
3237  ompt_parallel_id_t ompt_parallel_id,
3238 #endif
3239  kmp_internal_control_t *new_icvs,
3240  int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
3241 #endif // OMP_40_ENABLED
3242 extern void __kmp_free_thread( kmp_info_t * );
3243 extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) );
3244 extern kmp_team_t * __kmp_reap_team( kmp_team_t * );
3245 
3246 /* ------------------------------------------------------------------------ */
3247 
3248 extern void __kmp_initialize_bget( kmp_info_t *th );
3249 extern void __kmp_finalize_bget( kmp_info_t *th );
3250 
3251 KMP_EXPORT void *kmpc_malloc( size_t size );
3252 KMP_EXPORT void *kmpc_aligned_malloc( size_t size, size_t alignment );
3253 KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize );
3254 KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size );
3255 KMP_EXPORT void kmpc_free( void *ptr );
3256 
3257 /* ------------------------------------------------------------------------ */
3258 /* declarations for internal use */
3259 
3260 extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
3261  size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) );
3262 extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid );
3263 
3267 enum fork_context_e
3268 {
3269  fork_context_gnu,
3270  fork_context_intel,
3271  fork_context_last
3272 };
3273 extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
3274  kmp_int32 argc,
3275 #if OMPT_SUPPORT
3276  void *unwrapped_task,
3277 #endif
3278  microtask_t microtask, launch_t invoker,
3279 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
3280 #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
3281  va_list *ap
3282 #else
3283  va_list ap
3284 #endif
3285  );
3286 
3287 extern void __kmp_join_call( ident_t *loc, int gtid
3288 #if OMPT_SUPPORT
3289  , enum fork_context_e fork_context
3290 #endif
3291 #if OMP_40_ENABLED
3292  , int exit_teams = 0
3293 #endif
3294  );
3295 
3296 extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
3297 extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team );
3298 extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team );
3299 extern int __kmp_invoke_task_func( int gtid );
3300 extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
3301 extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
3302 
3303 // should never have been exported
3304 KMP_EXPORT int __kmpc_invoke_task_func( int gtid );
3305 #if OMP_40_ENABLED
3306 extern int __kmp_invoke_teams_master( int gtid );
3307 extern void __kmp_teams_master( int gtid );
3308 #endif
3309 extern void __kmp_save_internal_controls( kmp_info_t * thread );
3310 extern void __kmp_user_set_library (enum library_type arg);
3311 extern void __kmp_aux_set_library (enum library_type arg);
3312 extern void __kmp_aux_set_stacksize( size_t arg);
3313 extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid);
3314 extern void __kmp_aux_set_defaults( char const * str, int len );
3315 
3316 /* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */
3317 void kmpc_set_blocktime (int arg);
3318 void ompc_set_nested( int flag );
3319 void ompc_set_dynamic( int flag );
3320 void ompc_set_num_threads( int arg );
3321 
3322 extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr,
3323  kmp_team_t *team, int tid );
3324 extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr );
3325 extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid,
3326  kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3327  kmp_routine_entry_t task_entry );
3328 extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr,
3329  kmp_team_t *team, int tid, int set_curr_task );
3330 
3331 int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
3332  int *thread_finished,
3333 #if USE_ITT_BUILD
3334  void * itt_sync_obj,
3335 #endif /* USE_ITT_BUILD */
3336  kmp_int32 is_constrained);
3337 int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
3338  int *thread_finished,
3339 #if USE_ITT_BUILD
3340  void * itt_sync_obj,
3341 #endif /* USE_ITT_BUILD */
3342  kmp_int32 is_constrained);
3343 int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
3344  int *thread_finished,
3345 #if USE_ITT_BUILD
3346  void * itt_sync_obj,
3347 #endif /* USE_ITT_BUILD */
3348  kmp_int32 is_constrained);
3349 
3350 extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team );
3351 extern void __kmp_reap_task_teams( void );
3352 extern void __kmp_wait_to_unref_task_teams( void );
3353 extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always );
3354 extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team );
3355 extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team
3356 #if USE_ITT_BUILD
3357  , void * itt_sync_obj
3358 #endif /* USE_ITT_BUILD */
3359  , int wait=1
3360 );
3361 extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );
3362 
3363 extern int __kmp_is_address_mapped( void *addr );
3364 extern kmp_uint64 __kmp_hardware_timestamp(void);
3365 
3366 #if KMP_OS_UNIX
3367 extern int __kmp_read_from_file( char const *path, char const *format, ... );
3368 #endif
3369 
3370 /* ------------------------------------------------------------------------ */
3371 //
3372 // Assembly routines that have no compiler intrinsic replacement
3373 //
3374 
3375 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3376 
3377 extern void __kmp_query_cpuid( kmp_cpuinfo_t *p );
3378 
3379 #define __kmp_load_mxcsr(p) _mm_setcsr(*(p))
3380 static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
3381 
3382 extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
3383 extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p );
3384 extern void __kmp_clear_x87_fpu_status_word();
3385 # define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
3386 
3387 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
3388 
3389 extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[]
3390 #if OMPT_SUPPORT
3391  , void **exit_frame_ptr
3392 #endif
3393 );
3394 
3395 
3396 /* ------------------------------------------------------------------------ */
3397 
3398 KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags );
3399 KMP_EXPORT void __kmpc_end ( ident_t * );
3400 
3401 KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor,
3402  kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length );
3403 KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor );
3404 KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size );
3405 
3406 KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * );
3407 KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * );
3408 KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * );
3409 KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * );
3410 
3411 KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * );
3412 KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... );
3413 
3414 KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid );
3415 KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid );
3416 
3417 KMP_EXPORT void __kmpc_flush ( ident_t *);
3418 KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid );
3419 KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid );
3420 KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid );
3421 KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid );
3422 KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid );
3423 KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
3424 KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
3425 
3426 #if OMP_45_ENABLED
3427 KMP_EXPORT void __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint );
3428 #endif
3429 
3430 KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid );
3431 KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid );
3432 
3433 KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid );
3434 
3435 KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid );
3436 KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid );
3437 
3438 KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter,
3439  kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk );
3440 
3441 KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid );
3442 
3443 KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit );
3444 
3445 extern void KMPC_SET_NUM_THREADS ( int arg );
3446 extern void KMPC_SET_DYNAMIC ( int flag );
3447 extern void KMPC_SET_NESTED ( int flag );
3448 
3449 /* --------------------------------------------------------------------------- */
3450 
3451 /*
3452  * Taskq interface routines
3453  */
3454 
3455 KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk,
3456  size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds);
3457 KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
3458 KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
3459 KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status);
3460 KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
3461 KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task);
3462 
3463 /* ------------------------------------------------------------------------ */
3464 
3465 /*
3466  * OMP 3.0 tasking interface routines
3467  */
3468 
3469 KMP_EXPORT kmp_int32
3470 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
3471 KMP_EXPORT kmp_task_t*
3472 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
3473  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3474  kmp_routine_entry_t task_entry );
3475 KMP_EXPORT void
3476 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
3477 KMP_EXPORT void
3478 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
3479 KMP_EXPORT kmp_int32
3480 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
3481 KMP_EXPORT kmp_int32
3482 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid );
3483 
3484 KMP_EXPORT kmp_int32
3485 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part );
3486 
3487 #if TASK_UNUSED
3488 void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
3489 void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
3490 #endif // TASK_UNUSED
3491 
3492 /* ------------------------------------------------------------------------ */
3493 
3494 #if OMP_40_ENABLED
3495 
3496 KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
3497 KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
3498 
3499 KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
3500  kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3501  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
3502 KMP_EXPORT void __kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3503  kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list );
3504 extern void __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task );
3505 
3506 extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate );
3507 
3508 KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
3509 KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
3510 KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
3511 KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
3512 
3513 #if OMP_45_ENABLED
3514 
3515 KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask );
3516 KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
3517 KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, kmp_int32 if_val,
3518  kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
3519  kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize, void * task_dup );
3520 #endif
3521 
3522 #endif
3523 
3524 
3525 /*
3526  * Lock interface routines (fast versions with gtid passed in)
3527  */
3528 KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3529 KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3530 KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3531 KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3532 KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3533 KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3534 KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3535 KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3536 KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3537 KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
3538 
3539 #if OMP_45_ENABLED
3540 KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint );
3541 KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint );
3542 #endif
3543 
3544 /* ------------------------------------------------------------------------ */
3545 
3546 /*
3547  * Interface to fast scalable reduce methods routines
3548  */
3549 
3550 KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid,
3551  kmp_int32 num_vars, size_t reduce_size,
3552  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3553  kmp_critical_name *lck );
3554 KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
3555 KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid,
3556  kmp_int32 num_vars, size_t reduce_size,
3557  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3558  kmp_critical_name *lck );
3559 KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
3560 
3561 /*
3562  * internal fast reduction routines
3563  */
3564 
3565 extern PACKED_REDUCTION_METHOD_T
3566 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
3567  kmp_int32 num_vars, size_t reduce_size,
3568  void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
3569  kmp_critical_name *lck );
3570 
3571 // this function is for testing set/get/determine reduce method
3572 KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
3573 
3574 KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
3575 KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
3576 
3577 // this function exported for testing of KMP_PLACE_THREADS functionality
3578 KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
3579 
3580 /* ------------------------------------------------------------------------ */
3581 /* ------------------------------------------------------------------------ */
3582 
3583 // C++ port
3584 // missing 'extern "C"' declarations
3585 
3586 KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc );
3587 KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid );
3588 KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads );
3589 
3590 #if OMP_40_ENABLED
3591 KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind );
3592 KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads );
3593 KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...);
3594 #endif
3595 #if OMP_45_ENABLED
3596 struct kmp_dim { // loop bounds info casted to kmp_int64
3597  kmp_int64 lo; // lower
3598  kmp_int64 up; // upper
3599  kmp_int64 st; // stride
3600 };
3601 KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, struct kmp_dim * dims);
3602 KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec);
3603 KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec);
3604 KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
3605 #endif
3606 
3607 KMP_EXPORT void*
3608 __kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid,
3609  void * data, size_t size, void *** cache );
3610 
3611 // Symbols for MS mutual detection.
3612 extern int _You_must_link_with_exactly_one_OpenMP_library;
3613 extern int _You_must_link_with_Intel_OpenMP_library;
3614 #if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 )
3615  extern int _You_must_link_with_Microsoft_OpenMP_library;
3616 #endif
3617 
3618 
3619 // The routines below are not exported.
3620 // Consider making them 'static' in corresponding source files.
3621 void
3622 kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
3623 struct private_common *
3624 kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
3625 
3626 //
3627 // ompc_, kmpc_ entries moved from omp.h.
3628 //
3629 #if KMP_OS_WINDOWS
3630 # define KMPC_CONVENTION __cdecl
3631 #else
3632 # define KMPC_CONVENTION
3633 #endif
3634 
3635 #ifndef __OMP_H
3636 typedef enum omp_sched_t {
3637  omp_sched_static = 1,
3638  omp_sched_dynamic = 2,
3639  omp_sched_guided = 3,
3640  omp_sched_auto = 4
3641 } omp_sched_t;
3642 typedef void * kmp_affinity_mask_t;
3643 #endif
3644 
3645 KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
3646 KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
3647 KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
3648 KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
3649 KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
3650 KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
3651 KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
3652 
3653 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
3654 KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
3655 KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
3656 KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
3657 KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
3658 
3659 #ifdef __cplusplus
3660 }
3661 #endif
3662 
3663 #endif /* KMP_H */
3664 
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:700
KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid)
kmp_int32 reserved_2
Definition: kmp.h:197
void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid)
void(* kmpc_dtor)(void *)
Definition: kmp.h:1461
void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk)
KMP_EXPORT kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *)
Definition: kmp_csupport.c:99
int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st)
void(* kmpc_dtor_vec)(void *, size_t)
Definition: kmp.h:1482
KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
kmp_int32 reserved_1
Definition: kmp.h:195
void *(* kmpc_ctor_vec)(void *, size_t)
Definition: kmp.h:1476
KMP_EXPORT void * __kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data, size_t size, void ***cache)
kmp_int32 reserved_3
Definition: kmp.h:202
void *(* kmpc_cctor_vec)(void *, void *, size_t)
Definition: kmp.h:1488
KMP_EXPORT void __kmpc_flush(ident_t *)
Definition: kmp_csupport.c:597
void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk)
KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st)
KMP_EXPORT void __kmpc_end(ident_t *)
Definition: kmp_csupport.c:65
KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:856
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:485
void *(* kmpc_cctor)(void *, void *)
Definition: kmp.h:1466
KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags)
Definition: kmp_csupport.c:47
KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *)
Definition: kmp_csupport.c:136
KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st)
KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:792
KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
Definition: kmp.h:194
KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:756
KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.c:241
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.c:381
KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.c:226
KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *)
Definition: kmp_csupport.c:162
KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *)
Definition: kmp_csupport.c:122
void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *)
Definition: kmp_csupport.c:148
KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid)
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:665
KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, kmp_critical_name *)
void *(* kmpc_ctor)(void *)
Definition: kmp.h:1455
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.c:363
void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid)
void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1439
KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, kmp_int32 global_tid)
int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st)
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
Definition: kmp_csupport.c:470
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
Definition: kmp_csupport.c:282
KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length)
sched_type
Definition: kmp.h:295
char const * psource
Definition: kmp.h:203
void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk)
kmp_int32 flags
Definition: kmp.h:196
struct ident ident_t