LLVM OpenMP* Runtime Library
kmp_lock.h
1 /*
2  * kmp_lock.h -- lock header file
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #ifndef KMP_LOCK_H
17 #define KMP_LOCK_H
18 
19 #include <limits.h> // CHAR_BIT
20 #include <stddef.h> // offsetof
21 
22 #include "kmp_os.h"
23 #include "kmp_debug.h"
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif // __cplusplus
28 
29 // ----------------------------------------------------------------------------
30 // Have to copy these definitions from kmp.h because kmp.h cannot be included
31 // due to circular dependencies. Will undef these at end of file.
32 
33 #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
34 #define KMP_GTID_DNE (-2)
35 
36 // Forward declaration of ident and ident_t
37 
38 struct ident;
39 typedef struct ident ident_t;
40 
41 // End of copied code.
42 // ----------------------------------------------------------------------------
43 
44 //
45 // We need to know the size of the area we can assume that the compiler(s)
46 // allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel
47 // compiler always allocates a pointer-sized area, as does visual studio.
48 //
49 // gcc however, only allocates 4 bytes for regular locks, even on 64-bit
50 // intel archs. It allocates at least 8 bytes for nested lock (more on
51 // recent versions), but we are bounded by the pointer-sized chunks that
52 // the Intel compiler allocates.
53 //
54 
55 #if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT)
56 # define OMP_LOCK_T_SIZE sizeof(int)
57 # define OMP_NEST_LOCK_T_SIZE sizeof(void *)
58 #else
59 # define OMP_LOCK_T_SIZE sizeof(void *)
60 # define OMP_NEST_LOCK_T_SIZE sizeof(void *)
61 #endif
62 
63 //
64 // The Intel compiler allocates a 32-byte chunk for a critical section.
65 // Both gcc and visual studio only allocate enough space for a pointer.
66 // Sometimes we know that the space was allocated by the Intel compiler.
67 //
68 #define OMP_CRITICAL_SIZE sizeof(void *)
69 #define INTEL_CRITICAL_SIZE 32
70 
71 //
72 // lock flags
73 //
74 typedef kmp_uint32 kmp_lock_flags_t;
75 
76 #define kmp_lf_critical_section 1
77 
78 //
79 // When a lock table is used, the indices are of kmp_lock_index_t
80 //
81 typedef kmp_uint32 kmp_lock_index_t;
82 
83 //
84 // When memory allocated for locks are on the lock pool (free list),
85 // it is treated as structs of this type.
86 //
87 struct kmp_lock_pool {
88  union kmp_user_lock *next;
89  kmp_lock_index_t index;
90 };
91 
92 typedef struct kmp_lock_pool kmp_lock_pool_t;
93 
94 
95 extern void __kmp_validate_locks( void );
96 
97 
98 // ----------------------------------------------------------------------------
99 //
100 // There are 5 lock implementations:
101 //
102 // 1. Test and set locks.
103 // 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture)
104 // 3. Ticket (Lamport bakery) locks.
105 // 4. Queuing locks (with separate spin fields).
106 // 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
107 //
108 // and 3 lock purposes:
109 //
110 // 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time.
111 // These do not require non-negative global thread ID's.
112 // 2. Internal RTL locks -- Used everywhere else in the RTL
113 // 3. User locks (includes critical sections)
114 //
115 // ----------------------------------------------------------------------------
116 
117 
118 // ============================================================================
119 // Lock implementations.
120 // ============================================================================
121 
122 
123 // ----------------------------------------------------------------------------
124 // Test and set locks.
125 //
126 // Non-nested test and set locks differ from the other lock kinds (except
127 // futex) in that we use the memory allocated by the compiler for the lock,
128 // rather than a pointer to it.
129 //
130 // On lin32, lin_32e, and win_32, the space allocated may be as small as 4
131 // bytes, so we have to use a lock table for nested locks, and avoid accessing
132 // the depth_locked field for non-nested locks.
133 //
134 // Information normally available to the tools, such as lock location,
135 // lock usage (normal lock vs. critical section), etc. is not available with
136 // test and set locks.
137 // ----------------------------------------------------------------------------
138 
139 struct kmp_base_tas_lock {
140  volatile kmp_int32 poll; // 0 => unlocked
141  // locked: (gtid+1) of owning thread
142  kmp_int32 depth_locked; // depth locked, for nested locks only
143 };
144 
145 typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
146 
147 union kmp_tas_lock {
148  kmp_base_tas_lock_t lk;
149  kmp_lock_pool_t pool; // make certain struct is large enough
150  double lk_align; // use worst case alignment
151  // no cache line padding
152 };
153 
154 typedef union kmp_tas_lock kmp_tas_lock_t;
155 
156 //
157 // Static initializer for test and set lock variables. Usage:
158 // kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
159 //
160 #define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } }
161 
162 extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
163 extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
164 extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
165 extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck );
166 extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck );
167 
168 extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
169 extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
170 extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
171 extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck );
172 extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );
173 
174 #define KMP_LOCK_RELEASED 1
175 #define KMP_LOCK_STILL_HELD 0
176 #define KMP_LOCK_ACQUIRED_FIRST 1
177 #define KMP_LOCK_ACQUIRED_NEXT 0
178 
179 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
180 
181 // ----------------------------------------------------------------------------
182 // futex locks. futex locks are only available on Linux* OS.
183 //
184 // Like non-nested test and set lock, non-nested futex locks use the memory
185 // allocated by the compiler for the lock, rather than a pointer to it.
186 //
187 // Information normally available to the tools, such as lock location,
188 // lock usage (normal lock vs. critical section), etc. is not available with
189 // test and set locks. With non-nested futex locks, the lock owner is not
190 // even available.
191 // ----------------------------------------------------------------------------
192 
193 struct kmp_base_futex_lock {
194  volatile kmp_int32 poll; // 0 => unlocked
195  // 2*(gtid+1) of owning thread, 0 if unlocked
196  // locked: (gtid+1) of owning thread
197  kmp_int32 depth_locked; // depth locked, for nested locks only
198 };
199 
200 typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
201 
202 union kmp_futex_lock {
203  kmp_base_futex_lock_t lk;
204  kmp_lock_pool_t pool; // make certain struct is large enough
205  double lk_align; // use worst case alignment
206  // no cache line padding
207 };
208 
209 typedef union kmp_futex_lock kmp_futex_lock_t;
210 
211 //
212 // Static initializer for futex lock variables. Usage:
213 // kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
214 //
215 #define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } }
216 
217 extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
218 extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
219 extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
220 extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck );
221 extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck );
222 
223 extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
224 extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
225 extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
226 extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
227 extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );
228 
229 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
230 
231 
232 // ----------------------------------------------------------------------------
233 // Ticket locks.
234 // ----------------------------------------------------------------------------
235 
236 struct kmp_base_ticket_lock {
237  // `initialized' must be the first entry in the lock data structure!
238  volatile union kmp_ticket_lock * initialized; // points to the lock union if in initialized state
239  ident_t const * location; // Source code location of omp_init_lock().
240  volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires
241  volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock
242  volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
243  kmp_int32 depth_locked; // depth locked, for nested locks only
244  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
245 };
246 
247 typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
248 
249 union KMP_ALIGN_CACHE kmp_ticket_lock {
250  kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing.
251  kmp_lock_pool_t pool;
252  double lk_align; // use worst case alignment
253  char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ];
254 };
255 
256 typedef union kmp_ticket_lock kmp_ticket_lock_t;
257 
258 //
259 // Static initializer for simple ticket lock variables. Usage:
260 // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
261 // Note the macro argument. It is important to make var properly initialized.
262 //
263 #define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } }
264 
265 extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
266 extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
267 extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid );
268 extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
269 extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck );
270 extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck );
271 
272 extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
273 extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
274 extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
275 extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck );
276 extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck );
277 
278 
279 // ----------------------------------------------------------------------------
280 // Queuing locks.
281 // ----------------------------------------------------------------------------
282 
283 #if KMP_USE_ADAPTIVE_LOCKS
284 
285 struct kmp_adaptive_lock_info;
286 
287 typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
288 
289 #if KMP_DEBUG_ADAPTIVE_LOCKS
290 
291 struct kmp_adaptive_lock_statistics {
292  /* So we can get stats from locks that haven't been destroyed. */
293  kmp_adaptive_lock_info_t * next;
294  kmp_adaptive_lock_info_t * prev;
295 
296  /* Other statistics */
297  kmp_uint32 successfulSpeculations;
298  kmp_uint32 hardFailedSpeculations;
299  kmp_uint32 softFailedSpeculations;
300  kmp_uint32 nonSpeculativeAcquires;
301  kmp_uint32 nonSpeculativeAcquireAttempts;
302  kmp_uint32 lemmingYields;
303 };
304 
305 typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
306 
307 extern void __kmp_print_speculative_stats();
308 extern void __kmp_init_speculative_stats();
309 
310 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
311 
312 struct kmp_adaptive_lock_info
313 {
314  /* Values used for adaptivity.
315  * Although these are accessed from multiple threads we don't access them atomically,
316  * because if we miss updates it probably doesn't matter much. (It just affects our
317  * decision about whether to try speculation on the lock).
318  */
319  kmp_uint32 volatile badness;
320  kmp_uint32 volatile acquire_attempts;
321  /* Parameters of the lock. */
322  kmp_uint32 max_badness;
323  kmp_uint32 max_soft_retries;
324 
325 #if KMP_DEBUG_ADAPTIVE_LOCKS
326  kmp_adaptive_lock_statistics_t volatile stats;
327 #endif
328 };
329 
330 #endif // KMP_USE_ADAPTIVE_LOCKS
331 
332 
333 struct kmp_base_queuing_lock {
334 
335  // `initialized' must be the first entry in the lock data structure!
336  volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state.
337 
338  ident_t const * location; // Source code location of omp_init_lock().
339 
340  KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned!
341 
342  volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty
343  // Must be no padding here since head/tail used in 8-byte CAS
344  volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty
345  // Decl order assumes little endian
346  // bakery-style lock
347  volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires
348  volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock
349  volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
350  kmp_int32 depth_locked; // depth locked, for nested locks only
351 
352  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
353 };
354 
355 typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
356 
357 KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 );
358 
359 union KMP_ALIGN_CACHE kmp_queuing_lock {
360  kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing.
361  kmp_lock_pool_t pool;
362  double lk_align; // use worst case alignment
363  char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ];
364 };
365 
366 typedef union kmp_queuing_lock kmp_queuing_lock_t;
367 
368 extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
369 extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
370 extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
371 extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck );
372 extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck );
373 
374 extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
375 extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
376 extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
377 extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck );
378 extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck );
379 
380 #if KMP_USE_ADAPTIVE_LOCKS
381 
382 // ----------------------------------------------------------------------------
383 // Adaptive locks.
384 // ----------------------------------------------------------------------------
385 struct kmp_base_adaptive_lock {
386  kmp_base_queuing_lock qlk;
387  KMP_ALIGN(CACHE_LINE)
388  kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock
389 };
390 
391 typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
392 
393 union KMP_ALIGN_CACHE kmp_adaptive_lock {
394  kmp_base_adaptive_lock_t lk;
395  kmp_lock_pool_t pool;
396  double lk_align;
397  char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ];
398 };
399 typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
400 
401 # define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk)
402 
403 #endif // KMP_USE_ADAPTIVE_LOCKS
404 
405 // ----------------------------------------------------------------------------
406 // DRDPA ticket locks.
407 // ----------------------------------------------------------------------------
408 
409 struct kmp_base_drdpa_lock {
410  //
411  // All of the fields on the first cache line are only written when
412  // initializing or reconfiguring the lock. These are relatively rare
413  // operations, so data from the first cache line will usually stay
414  // resident in the cache of each thread trying to acquire the lock.
415  //
416  // initialized must be the first entry in the lock data structure!
417  //
418  KMP_ALIGN_CACHE
419 
420  volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state
421  ident_t const * location; // Source code location of omp_init_lock().
422  volatile struct kmp_lock_poll {
423  kmp_uint64 poll;
424  } * volatile polls;
425  volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op
426  kmp_uint64 cleanup_ticket; // thread with cleanup ticket
427  volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls
428  kmp_uint32 num_polls; // must be power of 2
429 
430  //
431  // next_ticket it needs to exist in a separate cache line, as it is
432  // invalidated every time a thread takes a new ticket.
433  //
434  KMP_ALIGN_CACHE
435 
436  volatile kmp_uint64 next_ticket;
437 
438  //
439  // now_serving is used to store our ticket value while we hold the lock.
440  // It has a slightly different meaning in the DRDPA ticket locks (where
441  // it is written by the acquiring thread) than it does in the simple
442  // ticket locks (where it is written by the releasing thread).
443  //
444  // Since now_serving is only read an written in the critical section,
445  // it is non-volatile, but it needs to exist on a separate cache line,
446  // as it is invalidated at every lock acquire.
447  //
448  // Likewise, the vars used for nested locks (owner_id and depth_locked)
449  // are only written by the thread owning the lock, so they are put in
450  // this cache line. owner_id is read by other threads, so it must be
451  // declared volatile.
452  //
453  KMP_ALIGN_CACHE
454 
455  kmp_uint64 now_serving; // doesn't have to be volatile
456  volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
457  kmp_int32 depth_locked; // depth locked
458  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
459 };
460 
461 typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
462 
463 union KMP_ALIGN_CACHE kmp_drdpa_lock {
464  kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */
465  kmp_lock_pool_t pool;
466  double lk_align; // use worst case alignment
467  char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ];
468 };
469 
470 typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
471 
472 extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
473 extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
474 extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
475 extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck );
476 extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck );
477 
478 extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
479 extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
480 extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
481 extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
482 extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
483 
484 
485 // ============================================================================
486 // Lock purposes.
487 // ============================================================================
488 
489 
490 // ----------------------------------------------------------------------------
491 // Bootstrap locks.
492 // ----------------------------------------------------------------------------
493 
494 // Bootstrap locks -- very few locks used at library initialization time.
495 // Bootstrap locks are currently implemented as ticket locks.
496 // They could also be implemented as test and set lock, but cannot be
497 // implemented with other lock kinds as they require gtids which are not
498 // available at initialization time.
499 
500 typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
501 
502 #define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) )
503 
504 static inline int
505 __kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck )
506 {
507  return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE );
508 }
509 
510 static inline int
511 __kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck )
512 {
513  return __kmp_test_ticket_lock( lck, KMP_GTID_DNE );
514 }
515 
516 static inline void
517 __kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck )
518 {
519  __kmp_release_ticket_lock( lck, KMP_GTID_DNE );
520 }
521 
522 static inline void
523 __kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck )
524 {
525  __kmp_init_ticket_lock( lck );
526 }
527 
528 static inline void
529 __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
530 {
531  __kmp_destroy_ticket_lock( lck );
532 }
533 
534 
535 // ----------------------------------------------------------------------------
536 // Internal RTL locks.
537 // ----------------------------------------------------------------------------
538 
539 //
540 // Internal RTL locks are also implemented as ticket locks, for now.
541 //
542 // FIXME - We should go through and figure out which lock kind works best for
543 // each internal lock, and use the type declaration and function calls for
544 // that explicit lock kind (and get rid of this section).
545 //
546 
547 typedef kmp_ticket_lock_t kmp_lock_t;
548 
549 static inline int
550 __kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid )
551 {
552  return __kmp_acquire_ticket_lock( lck, gtid );
553 }
554 
555 static inline int
556 __kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid )
557 {
558  return __kmp_test_ticket_lock( lck, gtid );
559 }
560 
561 static inline void
562 __kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid )
563 {
564  __kmp_release_ticket_lock( lck, gtid );
565 }
566 
567 static inline void
568 __kmp_init_lock( kmp_lock_t *lck )
569 {
570  __kmp_init_ticket_lock( lck );
571 }
572 
573 static inline void
574 __kmp_destroy_lock( kmp_lock_t *lck )
575 {
576  __kmp_destroy_ticket_lock( lck );
577 }
578 
579 
580 // ----------------------------------------------------------------------------
581 // User locks.
582 // ----------------------------------------------------------------------------
583 
584 //
585 // Do not allocate objects of type union kmp_user_lock!!!
586 // This will waste space unless __kmp_user_lock_kind == lk_drdpa.
587 // Instead, check the value of __kmp_user_lock_kind and allocate objects of
588 // the type of the appropriate union member, and cast their addresses to
589 // kmp_user_lock_p.
590 //
591 
592 enum kmp_lock_kind {
593  lk_default = 0,
594  lk_tas,
595 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
596  lk_futex,
597 #endif
598 #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
599  lk_hle,
600  lk_rtm,
601 #endif
602  lk_ticket,
603  lk_queuing,
604  lk_drdpa,
605 #if KMP_USE_ADAPTIVE_LOCKS
606  lk_adaptive
607 #endif // KMP_USE_ADAPTIVE_LOCKS
608 };
609 
610 typedef enum kmp_lock_kind kmp_lock_kind_t;
611 
612 extern kmp_lock_kind_t __kmp_user_lock_kind;
613 
614 union kmp_user_lock {
615  kmp_tas_lock_t tas;
616 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
617  kmp_futex_lock_t futex;
618 #endif
619  kmp_ticket_lock_t ticket;
620  kmp_queuing_lock_t queuing;
621  kmp_drdpa_lock_t drdpa;
622 #if KMP_USE_ADAPTIVE_LOCKS
623  kmp_adaptive_lock_t adaptive;
624 #endif // KMP_USE_ADAPTIVE_LOCKS
625  kmp_lock_pool_t pool;
626 };
627 
628 typedef union kmp_user_lock *kmp_user_lock_p;
629 
630 #if ! KMP_USE_DYNAMIC_LOCK
631 
632 extern size_t __kmp_base_user_lock_size;
633 extern size_t __kmp_user_lock_size;
634 
635 extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck );
636 
637 static inline kmp_int32
638 __kmp_get_user_lock_owner( kmp_user_lock_p lck )
639 {
640  KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL );
641  return ( *__kmp_get_user_lock_owner_ )( lck );
642 }
643 
644 extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
645 
646 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
647 
648 #define __kmp_acquire_user_lock_with_checks(lck,gtid) \
649  if (__kmp_user_lock_kind == lk_tas) { \
650  if ( __kmp_env_consistency_check ) { \
651  char const * const func = "omp_set_lock"; \
652  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \
653  && lck->tas.lk.depth_locked != -1 ) { \
654  KMP_FATAL( LockNestableUsedAsSimple, func ); \
655  } \
656  if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \
657  KMP_FATAL( LockIsAlreadyOwned, func ); \
658  } \
659  } \
660  if ( ( lck->tas.lk.poll != 0 ) || \
661  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
662  kmp_uint32 spins; \
663  KMP_FSYNC_PREPARE( lck ); \
664  KMP_INIT_YIELD( spins ); \
665  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
666  KMP_YIELD( TRUE ); \
667  } else { \
668  KMP_YIELD_SPIN( spins ); \
669  } \
670  while ( ( lck->tas.lk.poll != 0 ) || \
671  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
672  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
673  KMP_YIELD( TRUE ); \
674  } else { \
675  KMP_YIELD_SPIN( spins ); \
676  } \
677  } \
678  } \
679  KMP_FSYNC_ACQUIRED( lck ); \
680  } else { \
681  KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \
682  ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \
683  }
684 
685 #else
686 static inline int
687 __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
688 {
689  KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );
690  return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );
691 }
692 #endif
693 
694 extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
695 
696 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
697 
698 #include "kmp_i18n.h" /* AC: KMP_FATAL definition */
699 extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
700 static inline int
701 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
702 {
703  if ( __kmp_user_lock_kind == lk_tas ) {
704  if ( __kmp_env_consistency_check ) {
705  char const * const func = "omp_test_lock";
706  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
707  && lck->tas.lk.depth_locked != -1 ) {
708  KMP_FATAL( LockNestableUsedAsSimple, func );
709  }
710  }
711  return ( ( lck->tas.lk.poll == 0 ) &&
712  KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
713  } else {
714  KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
715  return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
716  }
717 }
718 #else
719 static inline int
720 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
721 {
722  KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
723  return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
724 }
725 #endif
726 
727 extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
728 
729 static inline void
730 __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
731 {
732  KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL );
733  ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid );
734 }
735 
736 extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck );
737 
738 static inline void
739 __kmp_init_user_lock_with_checks( kmp_user_lock_p lck )
740 {
741  KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL );
742  ( *__kmp_init_user_lock_with_checks_ )( lck );
743 }
744 
745 //
746 // We need a non-checking version of destroy lock for when the RTL is
747 // doing the cleanup as it can't always tell if the lock is nested or not.
748 //
749 extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck );
750 
751 static inline void
752 __kmp_destroy_user_lock( kmp_user_lock_p lck )
753 {
754  KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL );
755  ( *__kmp_destroy_user_lock_ )( lck );
756 }
757 
758 extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck );
759 
760 static inline void
761 __kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck )
762 {
763  KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL );
764  ( *__kmp_destroy_user_lock_with_checks_ )( lck );
765 }
766 
767 extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
768 
769 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
770 
771 #define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \
772  if (__kmp_user_lock_kind == lk_tas) { \
773  if ( __kmp_env_consistency_check ) { \
774  char const * const func = "omp_set_nest_lock"; \
775  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \
776  && lck->tas.lk.depth_locked == -1 ) { \
777  KMP_FATAL( LockSimpleUsedAsNestable, func ); \
778  } \
779  } \
780  if ( lck->tas.lk.poll - 1 == gtid ) { \
781  lck->tas.lk.depth_locked += 1; \
782  *depth = KMP_LOCK_ACQUIRED_NEXT; \
783  } else { \
784  if ( ( lck->tas.lk.poll != 0 ) || \
785  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
786  kmp_uint32 spins; \
787  KMP_FSYNC_PREPARE( lck ); \
788  KMP_INIT_YIELD( spins ); \
789  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
790  KMP_YIELD( TRUE ); \
791  } else { \
792  KMP_YIELD_SPIN( spins ); \
793  } \
794  while ( ( lck->tas.lk.poll != 0 ) || \
795  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
796  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
797  KMP_YIELD( TRUE ); \
798  } else { \
799  KMP_YIELD_SPIN( spins ); \
800  } \
801  } \
802  } \
803  lck->tas.lk.depth_locked = 1; \
804  *depth = KMP_LOCK_ACQUIRED_FIRST; \
805  } \
806  KMP_FSYNC_ACQUIRED( lck ); \
807  } else { \
808  KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \
809  *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \
810  }
811 
812 #else
813 static inline void
814 __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth )
815 {
816  KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );
817  *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );
818 }
819 #endif
820 
821 extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
822 
823 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
824 static inline int
825 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
826 {
827  if ( __kmp_user_lock_kind == lk_tas ) {
828  int retval;
829  if ( __kmp_env_consistency_check ) {
830  char const * const func = "omp_test_nest_lock";
831  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )
832  && lck->tas.lk.depth_locked == -1 ) {
833  KMP_FATAL( LockSimpleUsedAsNestable, func );
834  }
835  }
836  KMP_DEBUG_ASSERT( gtid >= 0 );
837  if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */
838  return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
839  }
840  retval = ( ( lck->tas.lk.poll == 0 ) &&
841  KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
842  if ( retval ) {
843  KMP_MB();
844  lck->tas.lk.depth_locked = 1;
845  }
846  return retval;
847  } else {
848  KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
849  return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
850  }
851 }
852 #else
853 static inline int
854 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
855 {
856  KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
857  return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
858 }
859 #endif
860 
861 extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
862 
863 static inline int
864 __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
865 {
866  KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL );
867  return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid );
868 }
869 
870 extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
871 
872 static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
873 {
874  KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL );
875  ( *__kmp_init_nested_user_lock_with_checks_ )( lck );
876 }
877 
878 extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
879 
880 static inline void
881 __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck )
882 {
883  KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL );
884  ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck );
885 }
886 
887 //
888 // user lock functions which do not necessarily exist for all lock kinds.
889 //
890 // The "set" functions usually have wrapper routines that check for a NULL set
891 // function pointer and call it if non-NULL.
892 //
893 // In some cases, it makes sense to have a "get" wrapper function check for a
894 // NULL get function pointer and return NULL / invalid value / error code if
895 // the function pointer is NULL.
896 //
897 // In other cases, the calling code really should differentiate between an
898 // unimplemented function and one that is implemented but returning NULL /
899 // invalied value. If this is the case, no get function wrapper exists.
900 //
901 
902 extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck );
903 
904 // no set function; fields set durining local allocation
905 
906 extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck );
907 
908 static inline const ident_t *
909 __kmp_get_user_lock_location( kmp_user_lock_p lck )
910 {
911  if ( __kmp_get_user_lock_location_ != NULL ) {
912  return ( *__kmp_get_user_lock_location_ )( lck );
913  }
914  else {
915  return NULL;
916  }
917 }
918 
919 extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc );
920 
921 static inline void
922 __kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc )
923 {
924  if ( __kmp_set_user_lock_location_ != NULL ) {
925  ( *__kmp_set_user_lock_location_ )( lck, loc );
926  }
927 }
928 
929 extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck );
930 
931 extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags );
932 
933 static inline void
934 __kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags )
935 {
936  if ( __kmp_set_user_lock_flags_ != NULL ) {
937  ( *__kmp_set_user_lock_flags_ )( lck, flags );
938  }
939 }
940 
941 //
942 // The fuction which sets up all of the vtbl pointers for kmp_user_lock_t.
943 //
944 extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind );
945 
946 //
947 // Macros for binding user lock functions.
948 //
949 #define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \
950  __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \
951  __kmp_acquire##nest##kind##_##suffix; \
952  __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \
953  __kmp_release##nest##kind##_##suffix; \
954  __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \
955  __kmp_test##nest##kind##_##suffix; \
956  __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \
957  __kmp_init##nest##kind##_##suffix; \
958  __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \
959  __kmp_destroy##nest##kind##_##suffix; \
960 }
961 
962 #define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
963 #define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
964 #define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
965 #define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
966 
967 // ----------------------------------------------------------------------------
968 // User lock table & lock allocation
969 // ----------------------------------------------------------------------------
970 
971 /*
972  On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which
973  is not enough to store a pointer, so we have to use lock indexes instead of pointers and
974  maintain lock table to map indexes to pointers.
975 
976 
977  Note: The first element of the table is not a pointer to lock! It is a pointer to previously
978  allocated table (or NULL if it is the first table).
979 
980  Usage:
981 
982  if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
983  Lock table is fully utilized. User locks are indexes, so table is
984  used on user lock operation.
985  Note: it may be the case (lin_32) that we don't need to use a lock
986  table for regular locks, but do need the table for nested locks.
987  }
988  else {
989  Lock table initialized but not actually used.
990  }
991 */
992 
993 struct kmp_lock_table {
994  kmp_lock_index_t used; // Number of used elements
995  kmp_lock_index_t allocated; // Number of allocated elements
996  kmp_user_lock_p * table; // Lock table.
997 };
998 
999 typedef struct kmp_lock_table kmp_lock_table_t;
1000 
1001 extern kmp_lock_table_t __kmp_user_lock_table;
1002 extern kmp_user_lock_p __kmp_lock_pool;
1003 
1004 struct kmp_block_of_locks {
1005  struct kmp_block_of_locks * next_block;
1006  void * locks;
1007 };
1008 
1009 typedef struct kmp_block_of_locks kmp_block_of_locks_t;
1010 
1011 extern kmp_block_of_locks_t *__kmp_lock_blocks;
1012 extern int __kmp_num_locks_in_block;
1013 
1014 extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags );
1015 extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck );
1016 extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func );
1017 extern void __kmp_cleanup_user_locks();
1018 
1019 #define KMP_CHECK_USER_LOCK_INIT() \
1020  { \
1021  if ( ! TCR_4( __kmp_init_user_locks ) ) { \
1022  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \
1023  if ( ! TCR_4( __kmp_init_user_locks ) ) { \
1024  TCW_4( __kmp_init_user_locks, TRUE ); \
1025  } \
1026  __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \
1027  } \
1028  }
1029 
1030 #endif // KMP_USE_DYNAMIC_LOCK
1031 
1032 #undef KMP_PAD
1033 #undef KMP_GTID_DNE
1034 
1035 #if KMP_USE_DYNAMIC_LOCK
1036 
1037 //
1038 // KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current
1039 // compatibility. Essential functionality of this new code is dynamic dispatch, but it also
1040 // implements (or enables implementation of) hinted user lock and critical section which will be
1041 // part of OMP 4.1 soon.
1042 //
1043 // Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock
1044 // function call on the created lock object requires type extraction and call through jump table
1045 // using the extracted type. This type information is stored in two different ways depending on
1046 // the size of the lock object, and we differentiate lock types by this size requirement - direct
1047 // and indirect locks.
1048 //
1049 // Direct locks:
1050 // A direct lock object fits into the space created by the compiler for an omp_lock_t object, and
1051 // TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage
1052 // for the lock type, and appropriate bit operation is required to access the data meaningful to
1053 // the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB
1054 // of the lock object. The newly introduced "hle" lock is also a direct lock.
1055 //
1056 // Indirect locks:
1057 // An indirect lock object requires more space than the compiler-generated space, and it should be
1058 // allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e.,
1059 // size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated
1060 // indirect lock (void * fits in the object) or an index to the indirect lock table entry that
1061 // holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly
1062 // introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock.
1063 // When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to
1064 // differentiate the lock from a direct lock, and the remaining part is the actual index to the
1065 // indirect lock table.
1066 //
1067 
1068 #include <stdint.h> // for uintptr_t
1069 
1070 // Shortcuts
1071 #define KMP_USE_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
1072 #define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1
1073 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0
1074 
1075 // List of lock definitions; all nested locks are indirect locks.
1076 // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
1077 // All nested locks are indirect lock types.
1078 #if KMP_USE_TSX
1079 # if KMP_USE_FUTEX
1080 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a)
1081 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
1082  m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
1083  m(nested_queuing, a) m(nested_drdpa, a)
1084 # else
1085 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a)
1086 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
1087  m(nested_tas, a) m(nested_ticket, a) \
1088  m(nested_queuing, a) m(nested_drdpa, a)
1089 # endif // KMP_USE_FUTEX
1090 # define KMP_LAST_D_LOCK lockseq_hle
1091 #else
1092 # if KMP_USE_FUTEX
1093 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a)
1094 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \
1095  m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
1096  m(nested_queuing, a) m(nested_drdpa, a)
1097 # define KMP_LAST_D_LOCK lockseq_futex
1098 # else
1099 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a)
1100 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \
1101  m(nested_tas, a) m(nested_ticket, a) \
1102  m(nested_queuing, a) m(nested_drdpa, a)
1103 # define KMP_LAST_D_LOCK lockseq_tas
1104 # endif // KMP_USE_FUTEX
1105 #endif // KMP_USE_TSX
1106 
1107 // Information used in dynamic dispatch
1108 #define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks
1109 #define KMP_FIRST_D_LOCK lockseq_tas
1110 #define KMP_FIRST_I_LOCK lockseq_ticket
1111 #define KMP_LAST_I_LOCK lockseq_nested_drdpa
1112 #define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types
1113 
1114 // Base type for dynamic locks.
1115 typedef kmp_uint32 kmp_dyna_lock_t;
1116 
1117 // Lock sequence that enumerates all lock kinds.
1118 // Always make this enumeration consistent with kmp_lockseq_t in the include directory.
1119 typedef enum {
1120  lockseq_indirect = 0,
1121 #define expand_seq(l,a) lockseq_##l,
1122  KMP_FOREACH_D_LOCK(expand_seq, 0)
1123  KMP_FOREACH_I_LOCK(expand_seq, 0)
1124 #undef expand_seq
1125 } kmp_dyna_lockseq_t;
1126 
1127 // Enumerates indirect lock tags.
1128 typedef enum {
1129 #define expand_tag(l,a) locktag_##l,
1130  KMP_FOREACH_I_LOCK(expand_tag, 0)
1131 #undef expand_tag
1132 } kmp_indirect_locktag_t;
1133 
1134 // Utility macros that extract information from lock sequences.
1135 #define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK)
1136 #define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK)
1137 #define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK)
1138 #define KMP_GET_D_TAG(seq) ((seq)<<1 | 1)
1139 
1140 // Enumerates direct lock tags starting from indirect tag.
1141 typedef enum {
1142 #define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l),
1143  KMP_FOREACH_D_LOCK(expand_tag, 0)
1144 #undef expand_tag
1145 } kmp_direct_locktag_t;
1146 
1147 // Indirect lock type
1148 typedef struct {
1149  kmp_user_lock_p lock;
1150  kmp_indirect_locktag_t type;
1151 } kmp_indirect_lock_t;
1152 
1153 // Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking.
1154 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
1155 extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *);
1156 extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
1157 extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
1158 extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
1159 
1160 // Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking.
1161 extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
1162 extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p);
1163 extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
1164 extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
1165 extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);
1166 
1167 // Extracts direct lock tag from a user lock pointer
1168 #define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1))
1169 
1170 // Extracts indirect lock index from a user lock pointer
1171 #define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1)
1172 
1173 // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type).
1174 #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)]
1175 
1176 // Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type).
1177 #define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
1178 
1179 // Initializes a direct lock with the given lock pointer and lock sequence.
1180 #define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)
1181 
1182 // Initializes an indirect lock with the given lock pointer and lock sequence.
1183 #define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
1184 
1185 // Returns "free" lock value for the given lock type.
1186 #define KMP_LOCK_FREE(type) (locktag_##type)
1187 
1188 // Returns "busy" lock value for the given lock teyp.
1189 #define KMP_LOCK_BUSY(v, type) ((v)<<KMP_LOCK_SHIFT | locktag_##type)
1190 
1191 // Returns lock value after removing (shifting) lock tag.
1192 #define KMP_LOCK_STRIP(v) ((v)>>KMP_LOCK_SHIFT)
1193 
1194 // Initializes global states and data structures for managing dynamic user locks.
1195 extern void __kmp_init_dynamic_user_locks();
1196 
1197 // Allocates and returns an indirect lock with the given indirect lock tag.
1198 extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
1199 
1200 // Cleans up global states and data structures for managing dynamic user locks.
1201 extern void __kmp_cleanup_indirect_user_locks();
1202 
1203 // Default user lock sequence when not using hinted locks.
1204 extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
1205 
1206 // Jump table for "set lock location", available only for indirect locks.
1207 extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *);
1208 #define KMP_SET_I_LOCK_LOCATION(lck, loc) { \
1209  if (__kmp_indirect_set_location[(lck)->type] != NULL) \
1210  __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
1211 }
1212 
1213 // Jump table for "set lock flags", available only for indirect locks.
1214 extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t);
1215 #define KMP_SET_I_LOCK_FLAGS(lck, flag) { \
1216  if (__kmp_indirect_set_flags[(lck)->type] != NULL) \
1217  __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
1218 }
1219 
1220 // Jump table for "get lock location", available only for indirect locks.
1221 extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
1222 #define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \
1223  ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
1224  : NULL )
1225 
1226 // Jump table for "get lock flags", available only for indirect locks.
1227 extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
1228 #define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \
1229  ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
1230  : NULL )
1231 
1232 #define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together
1233 
1234 // Lock table for indirect locks.
1235 typedef struct kmp_indirect_lock_table {
1236  kmp_indirect_lock_t **table; // blocks of indirect locks allocated
1237  kmp_lock_index_t size; // size of the indirect lock table
1238  kmp_lock_index_t next; // index to the next lock to be allocated
1239 } kmp_indirect_lock_table_t;
1240 
1241 extern kmp_indirect_lock_table_t __kmp_i_lock_table;
1242 
1243 // Returns the indirect lock associated with the given index.
1244 #define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK)
1245 
1246 // Number of locks in a lock block, which is fixed to "1" now.
1247 // TODO: No lock block implementation now. If we do support, we need to manage lock block data
1248 // structure for each indirect lock type.
1249 extern int __kmp_num_locks_in_block;
1250 
1251 // Fast lock table lookup without consistency checking
1252 #define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \
1253  ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \
1254  : *((kmp_indirect_lock_t **)(l)) )
1255 
1256 // Used once in kmp_error.c
1257 extern kmp_int32
1258 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
1259 
1260 #else // KMP_USE_DYNAMIC_LOCK
1261 
1262 # define KMP_LOCK_BUSY(v, type) (v)
1263 # define KMP_LOCK_FREE(type) 0
1264 # define KMP_LOCK_STRIP(v) (v)
1265 
1266 #endif // KMP_USE_DYNAMIC_LOCK
1267 
1268 #ifdef __cplusplus
1269 } // extern "C"
1270 #endif // __cplusplus
1271 
1272 #endif /* KMP_LOCK_H */
1273 
Definition: kmp.h:200