LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 
18 #include "kmp.h"
19 #include "kmp_itt.h"
20 #include "kmp_i18n.h"
21 #include "kmp_lock.h"
22 #include "kmp_io.h"
23 
24 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
25 # include <unistd.h>
26 # include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on different
28 // Linux* OS distributions that either require that you include (or break when you try to include)
29 // <pci/types.h>.
30 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
31 // we just define the constants here and don't include <futex.h>
32 # ifndef FUTEX_WAIT
33 # define FUTEX_WAIT 0
34 # endif
35 # ifndef FUTEX_WAKE
36 # define FUTEX_WAKE 1
37 # endif
38 #endif
39 
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void
44 __kmp_validate_locks( void )
45 {
46  int i;
47  kmp_uint32 x, y;
48 
49  /* Check to make sure unsigned arithmetic does wraps properly */
50  x = ~((kmp_uint32) 0) - 2;
51  y = x - 2;
52 
53  for (i = 0; i < 8; ++i, ++x, ++y) {
54  kmp_uint32 z = (x - y);
55  KMP_ASSERT( z == 2 );
56  }
57 
58  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
59 }
60 
61 
62 /* ------------------------------------------------------------------------ */
63 /* test and set locks */
64 
65 //
66 // For the non-nested locks, we can only assume that the first 4 bytes were
67 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
68 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
69 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
70 //
71 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
72 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
73 //
74 
75 static kmp_int32
76 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
77 {
78  return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
79 }
80 
81 static inline bool
82 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
83 {
84  return lck->lk.depth_locked != -1;
85 }
86 
87 __forceinline static int
88 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
89 {
90  KMP_MB();
91 
92 #ifdef USE_LOCK_PROFILE
93  kmp_uint32 curr = TCR_4( lck->lk.poll );
94  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
95  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
96  /* else __kmp_printf( "." );*/
97 #endif /* USE_LOCK_PROFILE */
98 
99  if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
100  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
101  KMP_FSYNC_ACQUIRED(lck);
102  return KMP_LOCK_ACQUIRED_FIRST;
103  }
104 
105  kmp_uint32 spins;
106  KMP_FSYNC_PREPARE( lck );
107  KMP_INIT_YIELD( spins );
108  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
109  __kmp_xproc ) ) {
110  KMP_YIELD( TRUE );
111  }
112  else {
113  KMP_YIELD_SPIN( spins );
114  }
115 
116  while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
117  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
118  //
119  // FIXME - use exponential backoff here
120  //
121  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
122  __kmp_xproc ) ) {
123  KMP_YIELD( TRUE );
124  }
125  else {
126  KMP_YIELD_SPIN( spins );
127  }
128  }
129  KMP_FSYNC_ACQUIRED( lck );
130  return KMP_LOCK_ACQUIRED_FIRST;
131 }
132 
133 int
134 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
135 {
136  return __kmp_acquire_tas_lock_timed_template( lck, gtid );
137 }
138 
139 static int
140 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
141 {
142  char const * const func = "omp_set_lock";
143  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
144  && __kmp_is_tas_lock_nestable( lck ) ) {
145  KMP_FATAL( LockNestableUsedAsSimple, func );
146  }
147  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
148  KMP_FATAL( LockIsAlreadyOwned, func );
149  }
150  return __kmp_acquire_tas_lock( lck, gtid );
151 }
152 
153 int
154 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
155 {
156  if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
157  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
158  KMP_FSYNC_ACQUIRED( lck );
159  return TRUE;
160  }
161  return FALSE;
162 }
163 
164 static int
165 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
166 {
167  char const * const func = "omp_test_lock";
168  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
169  && __kmp_is_tas_lock_nestable( lck ) ) {
170  KMP_FATAL( LockNestableUsedAsSimple, func );
171  }
172  return __kmp_test_tas_lock( lck, gtid );
173 }
174 
175 int
176 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
177 {
178  KMP_MB(); /* Flush all pending memory write invalidates. */
179 
180  KMP_FSYNC_RELEASING(lck);
181  KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
182  KMP_MB(); /* Flush all pending memory write invalidates. */
183 
184  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
185  __kmp_xproc ) );
186  return KMP_LOCK_RELEASED;
187 }
188 
189 static int
190 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
191 {
192  char const * const func = "omp_unset_lock";
193  KMP_MB(); /* in case another processor initialized lock */
194  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
195  && __kmp_is_tas_lock_nestable( lck ) ) {
196  KMP_FATAL( LockNestableUsedAsSimple, func );
197  }
198  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
199  KMP_FATAL( LockUnsettingFree, func );
200  }
201  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
202  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
203  KMP_FATAL( LockUnsettingSetByAnother, func );
204  }
205  return __kmp_release_tas_lock( lck, gtid );
206 }
207 
208 void
209 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
210 {
211  TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
212 }
213 
214 static void
215 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
216 {
217  __kmp_init_tas_lock( lck );
218 }
219 
220 void
221 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
222 {
223  lck->lk.poll = 0;
224 }
225 
226 static void
227 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
228 {
229  char const * const func = "omp_destroy_lock";
230  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
231  && __kmp_is_tas_lock_nestable( lck ) ) {
232  KMP_FATAL( LockNestableUsedAsSimple, func );
233  }
234  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
235  KMP_FATAL( LockStillOwned, func );
236  }
237  __kmp_destroy_tas_lock( lck );
238 }
239 
240 
241 //
242 // nested test and set locks
243 //
244 
245 int
246 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
247 {
248  KMP_DEBUG_ASSERT( gtid >= 0 );
249 
250  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
251  lck->lk.depth_locked += 1;
252  return KMP_LOCK_ACQUIRED_NEXT;
253  }
254  else {
255  __kmp_acquire_tas_lock_timed_template( lck, gtid );
256  lck->lk.depth_locked = 1;
257  return KMP_LOCK_ACQUIRED_FIRST;
258  }
259 }
260 
261 static int
262 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
263 {
264  char const * const func = "omp_set_nest_lock";
265  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
266  KMP_FATAL( LockSimpleUsedAsNestable, func );
267  }
268  return __kmp_acquire_nested_tas_lock( lck, gtid );
269 }
270 
271 int
272 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
273 {
274  int retval;
275 
276  KMP_DEBUG_ASSERT( gtid >= 0 );
277 
278  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
279  retval = ++lck->lk.depth_locked;
280  }
281  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
282  retval = 0;
283  }
284  else {
285  KMP_MB();
286  retval = lck->lk.depth_locked = 1;
287  }
288  return retval;
289 }
290 
291 static int
292 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
293 {
294  char const * const func = "omp_test_nest_lock";
295  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
296  KMP_FATAL( LockSimpleUsedAsNestable, func );
297  }
298  return __kmp_test_nested_tas_lock( lck, gtid );
299 }
300 
301 int
302 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
303 {
304  KMP_DEBUG_ASSERT( gtid >= 0 );
305 
306  KMP_MB();
307  if ( --(lck->lk.depth_locked) == 0 ) {
308  __kmp_release_tas_lock( lck, gtid );
309  return KMP_LOCK_RELEASED;
310  }
311  return KMP_LOCK_STILL_HELD;
312 }
313 
314 static int
315 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
316 {
317  char const * const func = "omp_unset_nest_lock";
318  KMP_MB(); /* in case another processor initialized lock */
319  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
320  KMP_FATAL( LockSimpleUsedAsNestable, func );
321  }
322  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
323  KMP_FATAL( LockUnsettingFree, func );
324  }
325  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
326  KMP_FATAL( LockUnsettingSetByAnother, func );
327  }
328  return __kmp_release_nested_tas_lock( lck, gtid );
329 }
330 
331 void
332 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
333 {
334  __kmp_init_tas_lock( lck );
335  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
336 }
337 
338 static void
339 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
340 {
341  __kmp_init_nested_tas_lock( lck );
342 }
343 
344 void
345 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
346 {
347  __kmp_destroy_tas_lock( lck );
348  lck->lk.depth_locked = 0;
349 }
350 
351 static void
352 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
353 {
354  char const * const func = "omp_destroy_nest_lock";
355  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
356  KMP_FATAL( LockSimpleUsedAsNestable, func );
357  }
358  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
359  KMP_FATAL( LockStillOwned, func );
360  }
361  __kmp_destroy_nested_tas_lock( lck );
362 }
363 
364 
365 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
366 
367 /* ------------------------------------------------------------------------ */
368 /* futex locks */
369 
370 // futex locks are really just test and set locks, with a different method
371 // of handling contention. They take the same amount of space as test and
372 // set locks, and are allocated the same way (i.e. use the area allocated by
373 // the compiler for non-nested locks / allocate nested locks on the heap).
374 
375 static kmp_int32
376 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
377 {
378  return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
379 }
380 
381 static inline bool
382 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
383 {
384  return lck->lk.depth_locked != -1;
385 }
386 
387 __forceinline static int
388 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
389 {
390  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
391 
392  KMP_MB();
393 
394 #ifdef USE_LOCK_PROFILE
395  kmp_uint32 curr = TCR_4( lck->lk.poll );
396  if ( ( curr != 0 ) && ( curr != gtid_code ) )
397  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
398  /* else __kmp_printf( "." );*/
399 #endif /* USE_LOCK_PROFILE */
400 
401  KMP_FSYNC_PREPARE( lck );
402  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
403  lck, lck->lk.poll, gtid ) );
404 
405  kmp_int32 poll_val;
406 
407  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
408  KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
409 
410  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
411  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
412  lck, gtid, poll_val, cond ) );
413 
414  //
415  // NOTE: if you try to use the following condition for this branch
416  //
417  // if ( poll_val & 1 == 0 )
418  //
419  // Then the 12.0 compiler has a bug where the following block will
420  // always be skipped, regardless of the value of the LSB of poll_val.
421  //
422  if ( ! cond ) {
423  //
424  // Try to set the lsb in the poll to indicate to the owner
425  // thread that they need to wake this thread up.
426  //
427  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
428  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
429  lck, lck->lk.poll, gtid ) );
430  continue;
431  }
432  poll_val |= KMP_LOCK_BUSY(1, futex);
433 
434  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
435  lck, lck->lk.poll, gtid ) );
436  }
437 
438  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
439  lck, gtid, poll_val ) );
440 
441  kmp_int32 rc;
442  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
443  poll_val, NULL, NULL, 0 ) ) != 0 ) {
444  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
445  lck, gtid, poll_val, rc, errno ) );
446  continue;
447  }
448 
449  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
450  lck, gtid, poll_val ) );
451  //
452  // This thread has now done a successful futex wait call and was
453  // entered on the OS futex queue. We must now perform a futex
454  // wake call when releasing the lock, as we have no idea how many
455  // other threads are in the queue.
456  //
457  gtid_code |= 1;
458  }
459 
460  KMP_FSYNC_ACQUIRED( lck );
461  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
462  lck, lck->lk.poll, gtid ) );
463  return KMP_LOCK_ACQUIRED_FIRST;
464 }
465 
466 int
467 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
468 {
469  return __kmp_acquire_futex_lock_timed_template( lck, gtid );
470 }
471 
472 static int
473 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
474 {
475  char const * const func = "omp_set_lock";
476  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
477  && __kmp_is_futex_lock_nestable( lck ) ) {
478  KMP_FATAL( LockNestableUsedAsSimple, func );
479  }
480  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
481  KMP_FATAL( LockIsAlreadyOwned, func );
482  }
483  return __kmp_acquire_futex_lock( lck, gtid );
484 }
485 
486 int
487 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
488 {
489  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1, futex) << 1 ) ) {
490  KMP_FSYNC_ACQUIRED( lck );
491  return TRUE;
492  }
493  return FALSE;
494 }
495 
496 static int
497 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
498 {
499  char const * const func = "omp_test_lock";
500  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
501  && __kmp_is_futex_lock_nestable( lck ) ) {
502  KMP_FATAL( LockNestableUsedAsSimple, func );
503  }
504  return __kmp_test_futex_lock( lck, gtid );
505 }
506 
507 int
508 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
509 {
510  KMP_MB(); /* Flush all pending memory write invalidates. */
511 
512  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
513  lck, lck->lk.poll, gtid ) );
514 
515  KMP_FSYNC_RELEASING(lck);
516 
517  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
518 
519  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
520  lck, gtid, poll_val ) );
521 
522  if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
523  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
524  lck, gtid ) );
525  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
526  }
527 
528  KMP_MB(); /* Flush all pending memory write invalidates. */
529 
530  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
531  lck, lck->lk.poll, gtid ) );
532 
533  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
534  __kmp_xproc ) );
535  return KMP_LOCK_RELEASED;
536 }
537 
538 static int
539 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
540 {
541  char const * const func = "omp_unset_lock";
542  KMP_MB(); /* in case another processor initialized lock */
543  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
544  && __kmp_is_futex_lock_nestable( lck ) ) {
545  KMP_FATAL( LockNestableUsedAsSimple, func );
546  }
547  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
548  KMP_FATAL( LockUnsettingFree, func );
549  }
550  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
551  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
552  KMP_FATAL( LockUnsettingSetByAnother, func );
553  }
554  return __kmp_release_futex_lock( lck, gtid );
555 }
556 
557 void
558 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
559 {
560  TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
561 }
562 
563 static void
564 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
565 {
566  __kmp_init_futex_lock( lck );
567 }
568 
569 void
570 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
571 {
572  lck->lk.poll = 0;
573 }
574 
575 static void
576 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
577 {
578  char const * const func = "omp_destroy_lock";
579  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
580  && __kmp_is_futex_lock_nestable( lck ) ) {
581  KMP_FATAL( LockNestableUsedAsSimple, func );
582  }
583  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
584  KMP_FATAL( LockStillOwned, func );
585  }
586  __kmp_destroy_futex_lock( lck );
587 }
588 
589 
590 //
591 // nested futex locks
592 //
593 
594 int
595 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
596 {
597  KMP_DEBUG_ASSERT( gtid >= 0 );
598 
599  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
600  lck->lk.depth_locked += 1;
601  return KMP_LOCK_ACQUIRED_NEXT;
602  }
603  else {
604  __kmp_acquire_futex_lock_timed_template( lck, gtid );
605  lck->lk.depth_locked = 1;
606  return KMP_LOCK_ACQUIRED_FIRST;
607  }
608 }
609 
610 static int
611 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
612 {
613  char const * const func = "omp_set_nest_lock";
614  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
615  KMP_FATAL( LockSimpleUsedAsNestable, func );
616  }
617  return __kmp_acquire_nested_futex_lock( lck, gtid );
618 }
619 
620 int
621 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
622 {
623  int retval;
624 
625  KMP_DEBUG_ASSERT( gtid >= 0 );
626 
627  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
628  retval = ++lck->lk.depth_locked;
629  }
630  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
631  retval = 0;
632  }
633  else {
634  KMP_MB();
635  retval = lck->lk.depth_locked = 1;
636  }
637  return retval;
638 }
639 
640 static int
641 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
642 {
643  char const * const func = "omp_test_nest_lock";
644  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
645  KMP_FATAL( LockSimpleUsedAsNestable, func );
646  }
647  return __kmp_test_nested_futex_lock( lck, gtid );
648 }
649 
650 int
651 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
652 {
653  KMP_DEBUG_ASSERT( gtid >= 0 );
654 
655  KMP_MB();
656  if ( --(lck->lk.depth_locked) == 0 ) {
657  __kmp_release_futex_lock( lck, gtid );
658  return KMP_LOCK_RELEASED;
659  }
660  return KMP_LOCK_STILL_HELD;
661 }
662 
663 static int
664 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
665 {
666  char const * const func = "omp_unset_nest_lock";
667  KMP_MB(); /* in case another processor initialized lock */
668  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
669  KMP_FATAL( LockSimpleUsedAsNestable, func );
670  }
671  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
672  KMP_FATAL( LockUnsettingFree, func );
673  }
674  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
675  KMP_FATAL( LockUnsettingSetByAnother, func );
676  }
677  return __kmp_release_nested_futex_lock( lck, gtid );
678 }
679 
680 void
681 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
682 {
683  __kmp_init_futex_lock( lck );
684  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
685 }
686 
687 static void
688 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
689 {
690  __kmp_init_nested_futex_lock( lck );
691 }
692 
693 void
694 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
695 {
696  __kmp_destroy_futex_lock( lck );
697  lck->lk.depth_locked = 0;
698 }
699 
700 static void
701 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
702 {
703  char const * const func = "omp_destroy_nest_lock";
704  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
705  KMP_FATAL( LockSimpleUsedAsNestable, func );
706  }
707  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
708  KMP_FATAL( LockStillOwned, func );
709  }
710  __kmp_destroy_nested_futex_lock( lck );
711 }
712 
713 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
714 
715 
716 /* ------------------------------------------------------------------------ */
717 /* ticket (bakery) locks */
718 
719 static kmp_int32
720 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
721 {
722  return TCR_4( lck->lk.owner_id ) - 1;
723 }
724 
725 static inline bool
726 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
727 {
728  return lck->lk.depth_locked != -1;
729 }
730 
731 static kmp_uint32
732 __kmp_bakery_check(kmp_uint32 value, kmp_uint32 checker)
733 {
734  register kmp_uint32 pause;
735 
736  if (value == checker) {
737  return TRUE;
738  }
739  for (pause = checker - value; pause != 0; --pause);
740  return FALSE;
741 }
742 
743 __forceinline static int
744 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
745 {
746  kmp_uint32 my_ticket;
747  KMP_MB();
748 
749  my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
750 
751 #ifdef USE_LOCK_PROFILE
752  if ( TCR_4( lck->lk.now_serving ) != my_ticket )
753  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
754  /* else __kmp_printf( "." );*/
755 #endif /* USE_LOCK_PROFILE */
756 
757  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
758  KMP_FSYNC_ACQUIRED(lck);
759  return KMP_LOCK_ACQUIRED_FIRST;
760  }
761  KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
762  KMP_FSYNC_ACQUIRED(lck);
763  return KMP_LOCK_ACQUIRED_FIRST;
764 }
765 
766 int
767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768 {
769  return __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770 }
771 
772 static int
773 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
774 {
775  char const * const func = "omp_set_lock";
776  if ( lck->lk.initialized != lck ) {
777  KMP_FATAL( LockIsUninitialized, func );
778  }
779  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
780  KMP_FATAL( LockNestableUsedAsSimple, func );
781  }
782  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
783  KMP_FATAL( LockIsAlreadyOwned, func );
784  }
785 
786  __kmp_acquire_ticket_lock( lck, gtid );
787 
788  lck->lk.owner_id = gtid + 1;
789  return KMP_LOCK_ACQUIRED_FIRST;
790 }
791 
792 int
793 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
794 {
795  kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
796  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
797  kmp_uint32 next_ticket = my_ticket + 1;
798  if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
799  my_ticket, next_ticket ) ) {
800  KMP_FSYNC_ACQUIRED( lck );
801  return TRUE;
802  }
803  }
804  return FALSE;
805 }
806 
807 static int
808 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
809 {
810  char const * const func = "omp_test_lock";
811  if ( lck->lk.initialized != lck ) {
812  KMP_FATAL( LockIsUninitialized, func );
813  }
814  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
815  KMP_FATAL( LockNestableUsedAsSimple, func );
816  }
817 
818  int retval = __kmp_test_ticket_lock( lck, gtid );
819 
820  if ( retval ) {
821  lck->lk.owner_id = gtid + 1;
822  }
823  return retval;
824 }
825 
826 int
827 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
828 {
829  kmp_uint32 distance;
830 
831  KMP_MB(); /* Flush all pending memory write invalidates. */
832 
833  KMP_FSYNC_RELEASING(lck);
834  distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
835 
836  KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 );
837 
838  KMP_MB(); /* Flush all pending memory write invalidates. */
839 
840  KMP_YIELD( distance
841  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
842  return KMP_LOCK_RELEASED;
843 }
844 
845 static int
846 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
847 {
848  char const * const func = "omp_unset_lock";
849  KMP_MB(); /* in case another processor initialized lock */
850  if ( lck->lk.initialized != lck ) {
851  KMP_FATAL( LockIsUninitialized, func );
852  }
853  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
854  KMP_FATAL( LockNestableUsedAsSimple, func );
855  }
856  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
857  KMP_FATAL( LockUnsettingFree, func );
858  }
859  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
860  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
861  KMP_FATAL( LockUnsettingSetByAnother, func );
862  }
863  lck->lk.owner_id = 0;
864  return __kmp_release_ticket_lock( lck, gtid );
865 }
866 
867 void
868 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
869 {
870  lck->lk.location = NULL;
871  TCW_4( lck->lk.next_ticket, 0 );
872  TCW_4( lck->lk.now_serving, 0 );
873  lck->lk.owner_id = 0; // no thread owns the lock.
874  lck->lk.depth_locked = -1; // -1 => not a nested lock.
875  lck->lk.initialized = (kmp_ticket_lock *)lck;
876 }
877 
878 static void
879 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
880 {
881  __kmp_init_ticket_lock( lck );
882 }
883 
884 void
885 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
886 {
887  lck->lk.initialized = NULL;
888  lck->lk.location = NULL;
889  lck->lk.next_ticket = 0;
890  lck->lk.now_serving = 0;
891  lck->lk.owner_id = 0;
892  lck->lk.depth_locked = -1;
893 }
894 
895 static void
896 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
897 {
898  char const * const func = "omp_destroy_lock";
899  if ( lck->lk.initialized != lck ) {
900  KMP_FATAL( LockIsUninitialized, func );
901  }
902  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
903  KMP_FATAL( LockNestableUsedAsSimple, func );
904  }
905  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
906  KMP_FATAL( LockStillOwned, func );
907  }
908  __kmp_destroy_ticket_lock( lck );
909 }
910 
911 
912 //
913 // nested ticket locks
914 //
915 
916 int
917 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
918 {
919  KMP_DEBUG_ASSERT( gtid >= 0 );
920 
921  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
922  lck->lk.depth_locked += 1;
923  return KMP_LOCK_ACQUIRED_NEXT;
924  }
925  else {
926  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
927  KMP_MB();
928  lck->lk.depth_locked = 1;
929  KMP_MB();
930  lck->lk.owner_id = gtid + 1;
931  return KMP_LOCK_ACQUIRED_FIRST;
932  }
933 }
934 
935 static int
936 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
937 {
938  char const * const func = "omp_set_nest_lock";
939  if ( lck->lk.initialized != lck ) {
940  KMP_FATAL( LockIsUninitialized, func );
941  }
942  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
943  KMP_FATAL( LockSimpleUsedAsNestable, func );
944  }
945  return __kmp_acquire_nested_ticket_lock( lck, gtid );
946 }
947 
948 int
949 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
950 {
951  int retval;
952 
953  KMP_DEBUG_ASSERT( gtid >= 0 );
954 
955  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
956  retval = ++lck->lk.depth_locked;
957  }
958  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
959  retval = 0;
960  }
961  else {
962  KMP_MB();
963  retval = lck->lk.depth_locked = 1;
964  KMP_MB();
965  lck->lk.owner_id = gtid + 1;
966  }
967  return retval;
968 }
969 
970 static int
971 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
972  kmp_int32 gtid )
973 {
974  char const * const func = "omp_test_nest_lock";
975  if ( lck->lk.initialized != lck ) {
976  KMP_FATAL( LockIsUninitialized, func );
977  }
978  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
979  KMP_FATAL( LockSimpleUsedAsNestable, func );
980  }
981  return __kmp_test_nested_ticket_lock( lck, gtid );
982 }
983 
984 int
985 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
986 {
987  KMP_DEBUG_ASSERT( gtid >= 0 );
988 
989  KMP_MB();
990  if ( --(lck->lk.depth_locked) == 0 ) {
991  KMP_MB();
992  lck->lk.owner_id = 0;
993  __kmp_release_ticket_lock( lck, gtid );
994  return KMP_LOCK_RELEASED;
995  }
996  return KMP_LOCK_STILL_HELD;
997 }
998 
999 static int
1000 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1001 {
1002  char const * const func = "omp_unset_nest_lock";
1003  KMP_MB(); /* in case another processor initialized lock */
1004  if ( lck->lk.initialized != lck ) {
1005  KMP_FATAL( LockIsUninitialized, func );
1006  }
1007  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1008  KMP_FATAL( LockSimpleUsedAsNestable, func );
1009  }
1010  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1011  KMP_FATAL( LockUnsettingFree, func );
1012  }
1013  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1014  KMP_FATAL( LockUnsettingSetByAnother, func );
1015  }
1016  return __kmp_release_nested_ticket_lock( lck, gtid );
1017 }
1018 
1019 void
1020 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1021 {
1022  __kmp_init_ticket_lock( lck );
1023  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1024 }
1025 
1026 static void
1027 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1028 {
1029  __kmp_init_nested_ticket_lock( lck );
1030 }
1031 
1032 void
1033 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1034 {
1035  __kmp_destroy_ticket_lock( lck );
1036  lck->lk.depth_locked = 0;
1037 }
1038 
1039 static void
1040 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1041 {
1042  char const * const func = "omp_destroy_nest_lock";
1043  if ( lck->lk.initialized != lck ) {
1044  KMP_FATAL( LockIsUninitialized, func );
1045  }
1046  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1047  KMP_FATAL( LockSimpleUsedAsNestable, func );
1048  }
1049  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1050  KMP_FATAL( LockStillOwned, func );
1051  }
1052  __kmp_destroy_nested_ticket_lock( lck );
1053 }
1054 
1055 
1056 //
1057 // access functions to fields which don't exist for all lock kinds.
1058 //
1059 
1060 static int
1061 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1062 {
1063  return lck == lck->lk.initialized;
1064 }
1065 
1066 static const ident_t *
1067 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1068 {
1069  return lck->lk.location;
1070 }
1071 
1072 static void
1073 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1074 {
1075  lck->lk.location = loc;
1076 }
1077 
1078 static kmp_lock_flags_t
1079 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1080 {
1081  return lck->lk.flags;
1082 }
1083 
1084 static void
1085 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1086 {
1087  lck->lk.flags = flags;
1088 }
1089 
1090 /* ------------------------------------------------------------------------ */
1091 /* queuing locks */
1092 
1093 /*
1094  * First the states
1095  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1096  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1097  * h, h means lock is held or about to transition, 1 element on queue
1098  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1099  *
1100  * Now the transitions
1101  * Acquire(0,0) = -1 ,0
1102  * Release(0,0) = Error
1103  * Acquire(-1,0) = h ,h h > 0
1104  * Release(-1,0) = 0 ,0
1105  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1106  * Release(h,h) = -1 ,0 h > 0
1107  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1108  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1109  *
1110  * And pictorially
1111  *
1112  *
1113  * +-----+
1114  * | 0, 0|------- release -------> Error
1115  * +-----+
1116  * | ^
1117  * acquire| |release
1118  * | |
1119  * | |
1120  * v |
1121  * +-----+
1122  * |-1, 0|
1123  * +-----+
1124  * | ^
1125  * acquire| |release
1126  * | |
1127  * | |
1128  * v |
1129  * +-----+
1130  * | h, h|
1131  * +-----+
1132  * | ^
1133  * acquire| |release
1134  * | |
1135  * | |
1136  * v |
1137  * +-----+
1138  * | h, t|----- acquire, release loopback ---+
1139  * +-----+ |
1140  * ^ |
1141  * | |
1142  * +------------------------------------+
1143  *
1144  */
1145 
1146 #ifdef DEBUG_QUEUING_LOCKS
1147 
1148 /* Stuff for circular trace buffer */
1149 #define TRACE_BUF_ELE 1024
1150 static char traces[TRACE_BUF_ELE][128] = { 0 }
1151 static int tc = 0;
1152 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1153 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1154 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1155 
1156 static void
1157 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1158  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1159 {
1160  kmp_int32 t, i;
1161 
1162  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1163 
1164  i = tc % TRACE_BUF_ELE;
1165  __kmp_printf_no_lock( "%s\n", traces[i] );
1166  i = (i+1) % TRACE_BUF_ELE;
1167  while ( i != (tc % TRACE_BUF_ELE) ) {
1168  __kmp_printf_no_lock( "%s", traces[i] );
1169  i = (i+1) % TRACE_BUF_ELE;
1170  }
1171  __kmp_printf_no_lock( "\n" );
1172 
1173  __kmp_printf_no_lock(
1174  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1175  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1176  head_id, tail_id );
1177 
1178  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1179 
1180  if ( lck->lk.head_id >= 1 ) {
1181  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1182  while (t > 0) {
1183  __kmp_printf_no_lock( "-> %d ", t );
1184  t = __kmp_threads[t-1]->th.th_next_waiting;
1185  }
1186  }
1187  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1188  __kmp_printf_no_lock( "\n\n" );
1189 }
1190 
1191 #endif /* DEBUG_QUEUING_LOCKS */
1192 
1193 static kmp_int32
1194 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1195 {
1196  return TCR_4( lck->lk.owner_id ) - 1;
1197 }
1198 
1199 static inline bool
1200 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1201 {
1202  return lck->lk.depth_locked != -1;
1203 }
1204 
1205 /* Acquire a lock using a the queuing lock implementation */
1206 template <bool takeTime>
1207 /* [TLW] The unused template above is left behind because of what BEB believes is a
1208  potential compiler problem with __forceinline. */
1209 __forceinline static int
1210 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1211  kmp_int32 gtid )
1212 {
1213  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1214  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1215  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1216  volatile kmp_uint32 *spin_here_p;
1217  kmp_int32 need_mf = 1;
1218 
1219 #if OMPT_SUPPORT
1220  ompt_state_t prev_state = ompt_state_undefined;
1221 #endif
1222 
1223  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1224 
1225  KMP_FSYNC_PREPARE( lck );
1226  KMP_DEBUG_ASSERT( this_thr != NULL );
1227  spin_here_p = & this_thr->th.th_spin_here;
1228 
1229 #ifdef DEBUG_QUEUING_LOCKS
1230  TRACE_LOCK( gtid+1, "acq ent" );
1231  if ( *spin_here_p )
1232  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1233  if ( this_thr->th.th_next_waiting != 0 )
1234  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1235 #endif
1236  KMP_DEBUG_ASSERT( !*spin_here_p );
1237  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1238 
1239 
1240  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1241  that may follow, not just in execution order, but also in visibility order. This way,
1242  when a releasing thread observes the changes to the queue by this thread, it can
1243  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1244  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1245  to FALSE before this thread sets it to TRUE, this thread will hang.
1246  */
1247  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1248 
1249  while( 1 ) {
1250  kmp_int32 enqueued;
1251  kmp_int32 head;
1252  kmp_int32 tail;
1253 
1254  head = *head_id_p;
1255 
1256  switch ( head ) {
1257 
1258  case -1:
1259  {
1260 #ifdef DEBUG_QUEUING_LOCKS
1261  tail = *tail_id_p;
1262  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1263 #endif
1264  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1265  this assignment prevents us from entering the if ( t > 0 )
1266  condition in the enqueued case below, which is not necessary for
1267  this state transition */
1268 
1269  need_mf = 0;
1270  /* try (-1,0)->(tid,tid) */
1271  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1272  KMP_PACK_64( -1, 0 ),
1273  KMP_PACK_64( gtid+1, gtid+1 ) );
1274 #ifdef DEBUG_QUEUING_LOCKS
1275  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1276 #endif
1277  }
1278  break;
1279 
1280  default:
1281  {
1282  tail = *tail_id_p;
1283  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1284 
1285 #ifdef DEBUG_QUEUING_LOCKS
1286  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287 #endif
1288 
1289  if ( tail == 0 ) {
1290  enqueued = FALSE;
1291  }
1292  else {
1293  need_mf = 0;
1294  /* try (h,t) or (h,h)->(h,tid) */
1295  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1296 
1297 #ifdef DEBUG_QUEUING_LOCKS
1298  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1299 #endif
1300  }
1301  }
1302  break;
1303 
1304  case 0: /* empty queue */
1305  {
1306  kmp_int32 grabbed_lock;
1307 
1308 #ifdef DEBUG_QUEUING_LOCKS
1309  tail = *tail_id_p;
1310  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311 #endif
1312  /* try (0,0)->(-1,0) */
1313 
1314  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1315  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1316 
1317  if ( grabbed_lock ) {
1318 
1319  *spin_here_p = FALSE;
1320 
1321  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1322  lck, gtid ));
1323 #ifdef DEBUG_QUEUING_LOCKS
1324  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1325 #endif
1326 
1327 #if OMPT_SUPPORT
1328  if (ompt_enabled && prev_state != ompt_state_undefined) {
1329  /* change the state before clearing wait_id */
1330  this_thr->th.ompt_thread_info.state = prev_state;
1331  this_thr->th.ompt_thread_info.wait_id = 0;
1332  }
1333 #endif
1334 
1335  KMP_FSYNC_ACQUIRED( lck );
1336  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1337  }
1338  enqueued = FALSE;
1339  }
1340  break;
1341  }
1342 
1343 #if OMPT_SUPPORT
1344  if (ompt_enabled && prev_state == ompt_state_undefined) {
1345  /* this thread will spin; set wait_id before entering wait state */
1346  prev_state = this_thr->th.ompt_thread_info.state;
1347  this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1348  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1349  }
1350 #endif
1351 
1352  if ( enqueued ) {
1353  if ( tail > 0 ) {
1354  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1355  KMP_ASSERT( tail_thr != NULL );
1356  tail_thr->th.th_next_waiting = gtid+1;
1357  /* corresponding wait for this write in release code */
1358  }
1359  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1360 
1361 
1362  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1363  * throughput only here.
1364  */
1365  KMP_MB();
1366  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1367 
1368 #ifdef DEBUG_QUEUING_LOCKS
1369  TRACE_LOCK( gtid+1, "acq spin" );
1370 
1371  if ( this_thr->th.th_next_waiting != 0 )
1372  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1373 #endif
1374  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1375  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1376  lck, gtid ));
1377 
1378 #ifdef DEBUG_QUEUING_LOCKS
1379  TRACE_LOCK( gtid+1, "acq exit 2" );
1380 #endif
1381 
1382 #if OMPT_SUPPORT
1383  /* change the state before clearing wait_id */
1384  this_thr->th.ompt_thread_info.state = prev_state;
1385  this_thr->th.ompt_thread_info.wait_id = 0;
1386 #endif
1387 
1388  /* got lock, we were dequeued by the thread that released lock */
1389  return KMP_LOCK_ACQUIRED_FIRST;
1390  }
1391 
1392  /* Yield if number of threads > number of logical processors */
1393  /* ToDo: Not sure why this should only be in oversubscription case,
1394  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1395  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1396  __kmp_xproc ) );
1397 #ifdef DEBUG_QUEUING_LOCKS
1398  TRACE_LOCK( gtid+1, "acq retry" );
1399 #endif
1400 
1401  }
1402  KMP_ASSERT2( 0, "should not get here" );
1403  return KMP_LOCK_ACQUIRED_FIRST;
1404 }
1405 
1406 int
1407 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1408 {
1409  KMP_DEBUG_ASSERT( gtid >= 0 );
1410 
1411  return __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1412 }
1413 
1414 static int
1415 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1416  kmp_int32 gtid )
1417 {
1418  char const * const func = "omp_set_lock";
1419  if ( lck->lk.initialized != lck ) {
1420  KMP_FATAL( LockIsUninitialized, func );
1421  }
1422  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1423  KMP_FATAL( LockNestableUsedAsSimple, func );
1424  }
1425  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1426  KMP_FATAL( LockIsAlreadyOwned, func );
1427  }
1428 
1429  __kmp_acquire_queuing_lock( lck, gtid );
1430 
1431  lck->lk.owner_id = gtid + 1;
1432  return KMP_LOCK_ACQUIRED_FIRST;
1433 }
1434 
1435 int
1436 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1437 {
1438  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1439  kmp_int32 head;
1440 #ifdef KMP_DEBUG
1441  kmp_info_t *this_thr;
1442 #endif
1443 
1444  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1445  KMP_DEBUG_ASSERT( gtid >= 0 );
1446 #ifdef KMP_DEBUG
1447  this_thr = __kmp_thread_from_gtid( gtid );
1448  KMP_DEBUG_ASSERT( this_thr != NULL );
1449  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1450 #endif
1451 
1452  head = *head_id_p;
1453 
1454  if ( head == 0 ) { /* nobody on queue, nobody holding */
1455 
1456  /* try (0,0)->(-1,0) */
1457 
1458  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1459  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1460  KMP_FSYNC_ACQUIRED(lck);
1461  return TRUE;
1462  }
1463  }
1464 
1465  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1466  return FALSE;
1467 }
1468 
1469 static int
1470 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1471 {
1472  char const * const func = "omp_test_lock";
1473  if ( lck->lk.initialized != lck ) {
1474  KMP_FATAL( LockIsUninitialized, func );
1475  }
1476  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1477  KMP_FATAL( LockNestableUsedAsSimple, func );
1478  }
1479 
1480  int retval = __kmp_test_queuing_lock( lck, gtid );
1481 
1482  if ( retval ) {
1483  lck->lk.owner_id = gtid + 1;
1484  }
1485  return retval;
1486 }
1487 
1488 int
1489 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1490 {
1491  register kmp_info_t *this_thr;
1492  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1493  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1494 
1495  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1496  KMP_DEBUG_ASSERT( gtid >= 0 );
1497  this_thr = __kmp_thread_from_gtid( gtid );
1498  KMP_DEBUG_ASSERT( this_thr != NULL );
1499 #ifdef DEBUG_QUEUING_LOCKS
1500  TRACE_LOCK( gtid+1, "rel ent" );
1501 
1502  if ( this_thr->th.th_spin_here )
1503  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1504  if ( this_thr->th.th_next_waiting != 0 )
1505  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1506 #endif
1507  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1508  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1509 
1510  KMP_FSYNC_RELEASING(lck);
1511 
1512  while( 1 ) {
1513  kmp_int32 dequeued;
1514  kmp_int32 head;
1515  kmp_int32 tail;
1516 
1517  head = *head_id_p;
1518 
1519 #ifdef DEBUG_QUEUING_LOCKS
1520  tail = *tail_id_p;
1521  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1522  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1523 #endif
1524  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1525 
1526  if ( head == -1 ) { /* nobody on queue */
1527 
1528  /* try (-1,0)->(0,0) */
1529  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1530  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1531  lck, gtid ));
1532 #ifdef DEBUG_QUEUING_LOCKS
1533  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1534 #endif
1535 
1536 #if OMPT_SUPPORT
1537  /* nothing to do - no other thread is trying to shift blame */
1538 #endif
1539 
1540  return KMP_LOCK_RELEASED;
1541  }
1542  dequeued = FALSE;
1543 
1544  }
1545  else {
1546 
1547  tail = *tail_id_p;
1548  if ( head == tail ) { /* only one thread on the queue */
1549 
1550 #ifdef DEBUG_QUEUING_LOCKS
1551  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1552 #endif
1553  KMP_DEBUG_ASSERT( head > 0 );
1554 
1555  /* try (h,h)->(-1,0) */
1556  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1557  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1558 #ifdef DEBUG_QUEUING_LOCKS
1559  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1560 #endif
1561 
1562  }
1563  else {
1564  volatile kmp_int32 *waiting_id_p;
1565  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1566  KMP_DEBUG_ASSERT( head_thr != NULL );
1567  waiting_id_p = & head_thr->th.th_next_waiting;
1568 
1569  /* Does this require synchronous reads? */
1570 #ifdef DEBUG_QUEUING_LOCKS
1571  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1572 #endif
1573  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1574 
1575  /* try (h,t)->(h',t) or (t,t) */
1576 
1577  KMP_MB();
1578  /* make sure enqueuing thread has time to update next waiting thread field */
1579  *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1580 #ifdef DEBUG_QUEUING_LOCKS
1581  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1582 #endif
1583  dequeued = TRUE;
1584  }
1585  }
1586 
1587  if ( dequeued ) {
1588  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1589  KMP_DEBUG_ASSERT( head_thr != NULL );
1590 
1591  /* Does this require synchronous reads? */
1592 #ifdef DEBUG_QUEUING_LOCKS
1593  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1594 #endif
1595  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1596 
1597  /* For clean code only.
1598  * Thread not released until next statement prevents race with acquire code.
1599  */
1600  head_thr->th.th_next_waiting = 0;
1601 #ifdef DEBUG_QUEUING_LOCKS
1602  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1603 #endif
1604 
1605  KMP_MB();
1606  /* reset spin value */
1607  head_thr->th.th_spin_here = FALSE;
1608 
1609  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1610  lck, gtid ));
1611 #ifdef DEBUG_QUEUING_LOCKS
1612  TRACE_LOCK( gtid+1, "rel exit 2" );
1613 #endif
1614  return KMP_LOCK_RELEASED;
1615  }
1616  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1617 
1618 #ifdef DEBUG_QUEUING_LOCKS
1619  TRACE_LOCK( gtid+1, "rel retry" );
1620 #endif
1621 
1622  } /* while */
1623  KMP_ASSERT2( 0, "should not get here" );
1624  return KMP_LOCK_RELEASED;
1625 }
1626 
1627 static int
1628 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1629  kmp_int32 gtid )
1630 {
1631  char const * const func = "omp_unset_lock";
1632  KMP_MB(); /* in case another processor initialized lock */
1633  if ( lck->lk.initialized != lck ) {
1634  KMP_FATAL( LockIsUninitialized, func );
1635  }
1636  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1637  KMP_FATAL( LockNestableUsedAsSimple, func );
1638  }
1639  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1640  KMP_FATAL( LockUnsettingFree, func );
1641  }
1642  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1643  KMP_FATAL( LockUnsettingSetByAnother, func );
1644  }
1645  lck->lk.owner_id = 0;
1646  return __kmp_release_queuing_lock( lck, gtid );
1647 }
1648 
1649 void
1650 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1651 {
1652  lck->lk.location = NULL;
1653  lck->lk.head_id = 0;
1654  lck->lk.tail_id = 0;
1655  lck->lk.next_ticket = 0;
1656  lck->lk.now_serving = 0;
1657  lck->lk.owner_id = 0; // no thread owns the lock.
1658  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1659  lck->lk.initialized = lck;
1660 
1661  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1662 }
1663 
1664 static void
1665 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1666 {
1667  __kmp_init_queuing_lock( lck );
1668 }
1669 
1670 void
1671 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1672 {
1673  lck->lk.initialized = NULL;
1674  lck->lk.location = NULL;
1675  lck->lk.head_id = 0;
1676  lck->lk.tail_id = 0;
1677  lck->lk.next_ticket = 0;
1678  lck->lk.now_serving = 0;
1679  lck->lk.owner_id = 0;
1680  lck->lk.depth_locked = -1;
1681 }
1682 
1683 static void
1684 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1685 {
1686  char const * const func = "omp_destroy_lock";
1687  if ( lck->lk.initialized != lck ) {
1688  KMP_FATAL( LockIsUninitialized, func );
1689  }
1690  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1691  KMP_FATAL( LockNestableUsedAsSimple, func );
1692  }
1693  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1694  KMP_FATAL( LockStillOwned, func );
1695  }
1696  __kmp_destroy_queuing_lock( lck );
1697 }
1698 
1699 
1700 //
1701 // nested queuing locks
1702 //
1703 
1704 int
1705 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1706 {
1707  KMP_DEBUG_ASSERT( gtid >= 0 );
1708 
1709  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1710  lck->lk.depth_locked += 1;
1711  return KMP_LOCK_ACQUIRED_NEXT;
1712  }
1713  else {
1714  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1715  KMP_MB();
1716  lck->lk.depth_locked = 1;
1717  KMP_MB();
1718  lck->lk.owner_id = gtid + 1;
1719  return KMP_LOCK_ACQUIRED_FIRST;
1720  }
1721 }
1722 
1723 static int
1724 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1725 {
1726  char const * const func = "omp_set_nest_lock";
1727  if ( lck->lk.initialized != lck ) {
1728  KMP_FATAL( LockIsUninitialized, func );
1729  }
1730  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1731  KMP_FATAL( LockSimpleUsedAsNestable, func );
1732  }
1733  return __kmp_acquire_nested_queuing_lock( lck, gtid );
1734 }
1735 
1736 int
1737 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1738 {
1739  int retval;
1740 
1741  KMP_DEBUG_ASSERT( gtid >= 0 );
1742 
1743  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1744  retval = ++lck->lk.depth_locked;
1745  }
1746  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1747  retval = 0;
1748  }
1749  else {
1750  KMP_MB();
1751  retval = lck->lk.depth_locked = 1;
1752  KMP_MB();
1753  lck->lk.owner_id = gtid + 1;
1754  }
1755  return retval;
1756 }
1757 
1758 static int
1759 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1760  kmp_int32 gtid )
1761 {
1762  char const * const func = "omp_test_nest_lock";
1763  if ( lck->lk.initialized != lck ) {
1764  KMP_FATAL( LockIsUninitialized, func );
1765  }
1766  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1767  KMP_FATAL( LockSimpleUsedAsNestable, func );
1768  }
1769  return __kmp_test_nested_queuing_lock( lck, gtid );
1770 }
1771 
1772 int
1773 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1774 {
1775  KMP_DEBUG_ASSERT( gtid >= 0 );
1776 
1777  KMP_MB();
1778  if ( --(lck->lk.depth_locked) == 0 ) {
1779  KMP_MB();
1780  lck->lk.owner_id = 0;
1781  __kmp_release_queuing_lock( lck, gtid );
1782  return KMP_LOCK_RELEASED;
1783  }
1784  return KMP_LOCK_STILL_HELD;
1785 }
1786 
1787 static int
1788 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1789 {
1790  char const * const func = "omp_unset_nest_lock";
1791  KMP_MB(); /* in case another processor initialized lock */
1792  if ( lck->lk.initialized != lck ) {
1793  KMP_FATAL( LockIsUninitialized, func );
1794  }
1795  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796  KMP_FATAL( LockSimpleUsedAsNestable, func );
1797  }
1798  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1799  KMP_FATAL( LockUnsettingFree, func );
1800  }
1801  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1802  KMP_FATAL( LockUnsettingSetByAnother, func );
1803  }
1804  return __kmp_release_nested_queuing_lock( lck, gtid );
1805 }
1806 
1807 void
1808 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1809 {
1810  __kmp_init_queuing_lock( lck );
1811  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1812 }
1813 
1814 static void
1815 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1816 {
1817  __kmp_init_nested_queuing_lock( lck );
1818 }
1819 
1820 void
1821 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1822 {
1823  __kmp_destroy_queuing_lock( lck );
1824  lck->lk.depth_locked = 0;
1825 }
1826 
1827 static void
1828 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1829 {
1830  char const * const func = "omp_destroy_nest_lock";
1831  if ( lck->lk.initialized != lck ) {
1832  KMP_FATAL( LockIsUninitialized, func );
1833  }
1834  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1835  KMP_FATAL( LockSimpleUsedAsNestable, func );
1836  }
1837  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1838  KMP_FATAL( LockStillOwned, func );
1839  }
1840  __kmp_destroy_nested_queuing_lock( lck );
1841 }
1842 
1843 
1844 //
1845 // access functions to fields which don't exist for all lock kinds.
1846 //
1847 
1848 static int
1849 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1850 {
1851  return lck == lck->lk.initialized;
1852 }
1853 
1854 static const ident_t *
1855 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1856 {
1857  return lck->lk.location;
1858 }
1859 
1860 static void
1861 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1862 {
1863  lck->lk.location = loc;
1864 }
1865 
1866 static kmp_lock_flags_t
1867 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1868 {
1869  return lck->lk.flags;
1870 }
1871 
1872 static void
1873 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1874 {
1875  lck->lk.flags = flags;
1876 }
1877 
1878 #if KMP_USE_ADAPTIVE_LOCKS
1879 
1880 /*
1881  RTM Adaptive locks
1882 */
1883 
1884 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1885 
1886 #include <immintrin.h>
1887 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1888 
1889 #else
1890 
1891 // Values from the status register after failed speculation.
1892 #define _XBEGIN_STARTED (~0u)
1893 #define _XABORT_EXPLICIT (1 << 0)
1894 #define _XABORT_RETRY (1 << 1)
1895 #define _XABORT_CONFLICT (1 << 2)
1896 #define _XABORT_CAPACITY (1 << 3)
1897 #define _XABORT_DEBUG (1 << 4)
1898 #define _XABORT_NESTED (1 << 5)
1899 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1900 
1901 // Aborts for which it's worth trying again immediately
1902 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1903 
1904 #define STRINGIZE_INTERNAL(arg) #arg
1905 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1906 
1907 // Access to RTM instructions
1908 
1909 /*
1910  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1911  This is the same definition as the compiler intrinsic that will be supported at some point.
1912 */
1913 static __inline int _xbegin()
1914 {
1915  int res = -1;
1916 
1917 #if KMP_OS_WINDOWS
1918 #if KMP_ARCH_X86_64
1919  _asm {
1920  _emit 0xC7
1921  _emit 0xF8
1922  _emit 2
1923  _emit 0
1924  _emit 0
1925  _emit 0
1926  jmp L2
1927  mov res, eax
1928  L2:
1929  }
1930 #else /* IA32 */
1931  _asm {
1932  _emit 0xC7
1933  _emit 0xF8
1934  _emit 2
1935  _emit 0
1936  _emit 0
1937  _emit 0
1938  jmp L2
1939  mov res, eax
1940  L2:
1941  }
1942 #endif // KMP_ARCH_X86_64
1943 #else
1944  /* Note that %eax must be noted as killed (clobbered), because
1945  * the XSR is returned in %eax(%rax) on abort. Other register
1946  * values are restored, so don't need to be killed.
1947  *
1948  * We must also mark 'res' as an input and an output, since otherwise
1949  * 'res=-1' may be dropped as being dead, whereas we do need the
1950  * assignment on the successful (i.e., non-abort) path.
1951  */
1952  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1953  " .long 1f-1b-6\n"
1954  " jmp 2f\n"
1955  "1: movl %%eax,%0\n"
1956  "2:"
1957  :"+r"(res)::"memory","%eax");
1958 #endif // KMP_OS_WINDOWS
1959  return res;
1960 }
1961 
1962 /*
1963  Transaction end
1964 */
1965 static __inline void _xend()
1966 {
1967 #if KMP_OS_WINDOWS
1968  __asm {
1969  _emit 0x0f
1970  _emit 0x01
1971  _emit 0xd5
1972  }
1973 #else
1974  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
1975 #endif
1976 }
1977 
1978 /*
1979  This is a macro, the argument must be a single byte constant which
1980  can be evaluated by the inline assembler, since it is emitted as a
1981  byte into the assembly code.
1982 */
1983 #if KMP_OS_WINDOWS
1984 #define _xabort(ARG) \
1985  _asm _emit 0xc6 \
1986  _asm _emit 0xf8 \
1987  _asm _emit ARG
1988 #else
1989 #define _xabort(ARG) \
1990  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
1991 #endif
1992 
1993 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1994 
1995 //
1996 // Statistics is collected for testing purpose
1997 //
1998 #if KMP_DEBUG_ADAPTIVE_LOCKS
1999 
2000 // We accumulate speculative lock statistics when the lock is destroyed.
2001 // We keep locks that haven't been destroyed in the liveLocks list
2002 // so that we can grab their statistics too.
2003 static kmp_adaptive_lock_statistics_t destroyedStats;
2004 
2005 // To hold the list of live locks.
2006 static kmp_adaptive_lock_info_t liveLocks;
2007 
2008 // A lock so we can safely update the list of locks.
2009 static kmp_bootstrap_lock_t chain_lock;
2010 
2011 // Initialize the list of stats.
2012 void
2013 __kmp_init_speculative_stats()
2014 {
2015  kmp_adaptive_lock_info_t *lck = &liveLocks;
2016 
2017  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2018  lck->stats.next = lck;
2019  lck->stats.prev = lck;
2020 
2021  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2022  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2023 
2024  __kmp_init_bootstrap_lock( &chain_lock );
2025 
2026 }
2027 
2028 // Insert the lock into the circular list
2029 static void
2030 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2031 {
2032  __kmp_acquire_bootstrap_lock( &chain_lock );
2033 
2034  lck->stats.next = liveLocks.stats.next;
2035  lck->stats.prev = &liveLocks;
2036 
2037  liveLocks.stats.next = lck;
2038  lck->stats.next->stats.prev = lck;
2039 
2040  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2041  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2042 
2043  __kmp_release_bootstrap_lock( &chain_lock );
2044 }
2045 
2046 static void
2047 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2048 {
2049  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2050  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2051 
2052  kmp_adaptive_lock_info_t * n = lck->stats.next;
2053  kmp_adaptive_lock_info_t * p = lck->stats.prev;
2054 
2055  n->stats.prev = p;
2056  p->stats.next = n;
2057 }
2058 
2059 static void
2060 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2061 {
2062  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2063  __kmp_remember_lock( lck );
2064 }
2065 
2066 static void
2067 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2068 {
2069  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2070 
2071  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2072  t->successfulSpeculations += s->successfulSpeculations;
2073  t->hardFailedSpeculations += s->hardFailedSpeculations;
2074  t->softFailedSpeculations += s->softFailedSpeculations;
2075  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2076  t->lemmingYields += s->lemmingYields;
2077 }
2078 
2079 static void
2080 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2081 {
2082  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2083 
2084  __kmp_acquire_bootstrap_lock( &chain_lock );
2085 
2086  __kmp_add_stats( &destroyedStats, lck );
2087  __kmp_forget_lock( lck );
2088 
2089  __kmp_release_bootstrap_lock( &chain_lock );
2090 }
2091 
2092 static float
2093 percent (kmp_uint32 count, kmp_uint32 total)
2094 {
2095  return (total == 0) ? 0.0: (100.0 * count)/total;
2096 }
2097 
2098 static
2099 FILE * __kmp_open_stats_file()
2100 {
2101  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2102  return stdout;
2103 
2104  size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2105  char buffer[buffLen];
2106  KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2107  (kmp_int32)getpid());
2108  FILE * result = fopen(&buffer[0], "w");
2109 
2110  // Maybe we should issue a warning here...
2111  return result ? result : stdout;
2112 }
2113 
2114 void
2115 __kmp_print_speculative_stats()
2116 {
2117  if (__kmp_user_lock_kind != lk_adaptive)
2118  return;
2119 
2120  FILE * statsFile = __kmp_open_stats_file();
2121 
2122  kmp_adaptive_lock_statistics_t total = destroyedStats;
2123  kmp_adaptive_lock_info_t *lck;
2124 
2125  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2126  __kmp_add_stats( &total, lck );
2127  }
2128  kmp_adaptive_lock_statistics_t *t = &total;
2129  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2130  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2131  t->softFailedSpeculations;
2132 
2133  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2134  fprintf ( statsFile, " Lock parameters: \n"
2135  " max_soft_retries : %10d\n"
2136  " max_badness : %10d\n",
2137  __kmp_adaptive_backoff_params.max_soft_retries,
2138  __kmp_adaptive_backoff_params.max_badness);
2139  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2140  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2141  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2142  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2143  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2144  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2145  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2146 
2147  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2148  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2149  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2150  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2151  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2152  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2153  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2154 
2155  if (statsFile != stdout)
2156  fclose( statsFile );
2157 }
2158 
2159 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2160 #else
2161 # define KMP_INC_STAT(lck,stat)
2162 
2163 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2164 
2165 static inline bool
2166 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2167 {
2168  // It is enough to check that the head_id is zero.
2169  // We don't also need to check the tail.
2170  bool res = lck->lk.head_id == 0;
2171 
2172  // We need a fence here, since we must ensure that no memory operations
2173  // from later in this thread float above that read.
2174 #if KMP_COMPILER_ICC
2175  _mm_mfence();
2176 #else
2177  __sync_synchronize();
2178 #endif
2179 
2180  return res;
2181 }
2182 
2183 // Functions for manipulating the badness
2184 static __inline void
2185 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2186 {
2187  // Reset the badness to zero so we eagerly try to speculate again
2188  lck->lk.adaptive.badness = 0;
2189  KMP_INC_STAT(lck,successfulSpeculations);
2190 }
2191 
2192 // Create a bit mask with one more set bit.
2193 static __inline void
2194 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2195 {
2196  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2197  if ( newBadness > lck->lk.adaptive.max_badness) {
2198  return;
2199  } else {
2200  lck->lk.adaptive.badness = newBadness;
2201  }
2202 }
2203 
2204 // Check whether speculation should be attempted.
2205 static __inline int
2206 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2207 {
2208  kmp_uint32 badness = lck->lk.adaptive.badness;
2209  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2210  int res = (attempts & badness) == 0;
2211  return res;
2212 }
2213 
2214 // Attempt to acquire only the speculative lock.
2215 // Does not back off to the non-speculative lock.
2216 //
2217 static int
2218 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2219 {
2220  int retries = lck->lk.adaptive.max_soft_retries;
2221 
2222  // We don't explicitly count the start of speculation, rather we record
2223  // the results (success, hard fail, soft fail). The sum of all of those
2224  // is the total number of times we started speculation since all
2225  // speculations must end one of those ways.
2226  do
2227  {
2228  kmp_uint32 status = _xbegin();
2229  // Switch this in to disable actual speculation but exercise
2230  // at least some of the rest of the code. Useful for debugging...
2231  // kmp_uint32 status = _XABORT_NESTED;
2232 
2233  if (status == _XBEGIN_STARTED )
2234  { /* We have successfully started speculation
2235  * Check that no-one acquired the lock for real between when we last looked
2236  * and now. This also gets the lock cache line into our read-set,
2237  * which we need so that we'll abort if anyone later claims it for real.
2238  */
2239  if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2240  {
2241  // Lock is now visibly acquired, so someone beat us to it.
2242  // Abort the transaction so we'll restart from _xbegin with the
2243  // failure status.
2244  _xabort(0x01);
2245  KMP_ASSERT2( 0, "should not get here" );
2246  }
2247  return 1; // Lock has been acquired (speculatively)
2248  } else {
2249  // We have aborted, update the statistics
2250  if ( status & SOFT_ABORT_MASK)
2251  {
2252  KMP_INC_STAT(lck,softFailedSpeculations);
2253  // and loop round to retry.
2254  }
2255  else
2256  {
2257  KMP_INC_STAT(lck,hardFailedSpeculations);
2258  // Give up if we had a hard failure.
2259  break;
2260  }
2261  }
2262  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2263 
2264  // Either we had a hard failure or we didn't succeed softly after
2265  // the full set of attempts, so back off the badness.
2266  __kmp_step_badness( lck );
2267  return 0;
2268 }
2269 
2270 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2271 // if the speculative lock cannot be acquired.
2272 // We can succeed speculatively, non-speculatively, or fail.
2273 static int
2274 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2275 {
2276  // First try to acquire the lock speculatively
2277  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2278  return 1;
2279 
2280  // Speculative acquisition failed, so try to acquire it non-speculatively.
2281  // Count the non-speculative acquire attempt
2282  lck->lk.adaptive.acquire_attempts++;
2283 
2284  // Use base, non-speculative lock.
2285  if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2286  {
2287  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2288  return 1; // Lock is acquired (non-speculatively)
2289  }
2290  else
2291  {
2292  return 0; // Failed to acquire the lock, it's already visibly locked.
2293  }
2294 }
2295 
2296 static int
2297 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2298 {
2299  char const * const func = "omp_test_lock";
2300  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2301  KMP_FATAL( LockIsUninitialized, func );
2302  }
2303 
2304  int retval = __kmp_test_adaptive_lock( lck, gtid );
2305 
2306  if ( retval ) {
2307  lck->lk.qlk.owner_id = gtid + 1;
2308  }
2309  return retval;
2310 }
2311 
2312 // Block until we can acquire a speculative, adaptive lock.
2313 // We check whether we should be trying to speculate.
2314 // If we should be, we check the real lock to see if it is free,
2315 // and, if not, pause without attempting to acquire it until it is.
2316 // Then we try the speculative acquire.
2317 // This means that although we suffer from lemmings a little (
2318 // because all we can't acquire the lock speculatively until
2319 // the queue of threads waiting has cleared), we don't get into a
2320 // state where we can never acquire the lock speculatively (because we
2321 // force the queue to clear by preventing new arrivals from entering the
2322 // queue).
2323 // This does mean that when we're trying to break lemmings, the lock
2324 // is no longer fair. However OpenMP makes no guarantee that its
2325 // locks are fair, so this isn't a real problem.
2326 static void
2327 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2328 {
2329  if ( __kmp_should_speculate( lck, gtid ) )
2330  {
2331  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2332  {
2333  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2334  return;
2335  // We tried speculation and failed, so give up.
2336  }
2337  else
2338  {
2339  // We can't try speculation until the lock is free, so we
2340  // pause here (without suspending on the queueing lock,
2341  // to allow it to drain, then try again.
2342  // All other threads will also see the same result for
2343  // shouldSpeculate, so will be doing the same if they
2344  // try to claim the lock from now on.
2345  while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2346  {
2347  KMP_INC_STAT(lck,lemmingYields);
2348  __kmp_yield (TRUE);
2349  }
2350 
2351  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2352  return;
2353  }
2354  }
2355 
2356  // Speculative acquisition failed, so acquire it non-speculatively.
2357  // Count the non-speculative acquire attempt
2358  lck->lk.adaptive.acquire_attempts++;
2359 
2360  __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2361  // We have acquired the base lock, so count that.
2362  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2363 }
2364 
2365 static void
2366 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2367 {
2368  char const * const func = "omp_set_lock";
2369  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2370  KMP_FATAL( LockIsUninitialized, func );
2371  }
2372  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2373  KMP_FATAL( LockIsAlreadyOwned, func );
2374  }
2375 
2376  __kmp_acquire_adaptive_lock( lck, gtid );
2377 
2378  lck->lk.qlk.owner_id = gtid + 1;
2379 }
2380 
2381 static int
2382 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2383 {
2384  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2385  { // If the lock doesn't look claimed we must be speculating.
2386  // (Or the user's code is buggy and they're releasing without locking;
2387  // if we had XTEST we'd be able to check that case...)
2388  _xend(); // Exit speculation
2389  __kmp_update_badness_after_success( lck );
2390  }
2391  else
2392  { // Since the lock *is* visibly locked we're not speculating,
2393  // so should use the underlying lock's release scheme.
2394  __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2395  }
2396  return KMP_LOCK_RELEASED;
2397 }
2398 
2399 static int
2400 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2401 {
2402  char const * const func = "omp_unset_lock";
2403  KMP_MB(); /* in case another processor initialized lock */
2404  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2405  KMP_FATAL( LockIsUninitialized, func );
2406  }
2407  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2408  KMP_FATAL( LockUnsettingFree, func );
2409  }
2410  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2411  KMP_FATAL( LockUnsettingSetByAnother, func );
2412  }
2413  lck->lk.qlk.owner_id = 0;
2414  __kmp_release_adaptive_lock( lck, gtid );
2415  return KMP_LOCK_RELEASED;
2416 }
2417 
2418 static void
2419 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2420 {
2421  __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2422  lck->lk.adaptive.badness = 0;
2423  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2424  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2425  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2426 #if KMP_DEBUG_ADAPTIVE_LOCKS
2427  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2428 #endif
2429  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2430 }
2431 
2432 static void
2433 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2434 {
2435  __kmp_init_adaptive_lock( lck );
2436 }
2437 
2438 static void
2439 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2440 {
2441 #if KMP_DEBUG_ADAPTIVE_LOCKS
2442  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2443 #endif
2444  __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2445  // Nothing needed for the speculative part.
2446 }
2447 
2448 static void
2449 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2450 {
2451  char const * const func = "omp_destroy_lock";
2452  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2453  KMP_FATAL( LockIsUninitialized, func );
2454  }
2455  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2456  KMP_FATAL( LockStillOwned, func );
2457  }
2458  __kmp_destroy_adaptive_lock( lck );
2459 }
2460 
2461 
2462 #endif // KMP_USE_ADAPTIVE_LOCKS
2463 
2464 
2465 /* ------------------------------------------------------------------------ */
2466 /* DRDPA ticket locks */
2467 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2468 
2469 static kmp_int32
2470 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2471 {
2472  return TCR_4( lck->lk.owner_id ) - 1;
2473 }
2474 
2475 static inline bool
2476 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2477 {
2478  return lck->lk.depth_locked != -1;
2479 }
2480 
2481 __forceinline static int
2482 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2483 {
2484  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2485  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2486  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2487  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2488  TCR_PTR(lck->lk.polls); // volatile load
2489 
2490 #ifdef USE_LOCK_PROFILE
2491  if (TCR_8(polls[ticket & mask].poll) != ticket)
2492  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2493  /* else __kmp_printf( "." );*/
2494 #endif /* USE_LOCK_PROFILE */
2495 
2496  //
2497  // Now spin-wait, but reload the polls pointer and mask, in case the
2498  // polling area has been reconfigured. Unless it is reconfigured, the
2499  // reloads stay in L1 cache and are cheap.
2500  //
2501  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2502  //
2503  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2504  // and poll to be re-read every spin iteration.
2505  //
2506  kmp_uint32 spins;
2507 
2508  KMP_FSYNC_PREPARE(lck);
2509  KMP_INIT_YIELD(spins);
2510  while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2511  // If we are oversubscribed,
2512  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2513  // CPU Pause is in the macros for yield.
2514  //
2515  KMP_YIELD(TCR_4(__kmp_nth)
2516  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2517  KMP_YIELD_SPIN(spins);
2518 
2519  // Re-read the mask and the poll pointer from the lock structure.
2520  //
2521  // Make certain that "mask" is read before "polls" !!!
2522  //
2523  // If another thread picks reconfigures the polling area and updates
2524  // their values, and we get the new value of mask and the old polls
2525  // pointer, we could access memory beyond the end of the old polling
2526  // area.
2527  //
2528  mask = TCR_8(lck->lk.mask); // volatile load
2529  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2530  TCR_PTR(lck->lk.polls); // volatile load
2531  }
2532 
2533  //
2534  // Critical section starts here
2535  //
2536  KMP_FSYNC_ACQUIRED(lck);
2537  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2538  ticket, lck));
2539  lck->lk.now_serving = ticket; // non-volatile store
2540 
2541  //
2542  // Deallocate a garbage polling area if we know that we are the last
2543  // thread that could possibly access it.
2544  //
2545  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2546  // ticket.
2547  //
2548  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2549  __kmp_free((void *)lck->lk.old_polls);
2550  lck->lk.old_polls = NULL;
2551  lck->lk.cleanup_ticket = 0;
2552  }
2553 
2554  //
2555  // Check to see if we should reconfigure the polling area.
2556  // If there is still a garbage polling area to be deallocated from a
2557  // previous reconfiguration, let a later thread reconfigure it.
2558  //
2559  if (lck->lk.old_polls == NULL) {
2560  bool reconfigure = false;
2561  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2562  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2563 
2564  if (TCR_4(__kmp_nth)
2565  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2566  //
2567  // We are in oversubscription mode. Contract the polling area
2568  // down to a single location, if that hasn't been done already.
2569  //
2570  if (num_polls > 1) {
2571  reconfigure = true;
2572  num_polls = TCR_4(lck->lk.num_polls);
2573  mask = 0;
2574  num_polls = 1;
2575  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2576  __kmp_allocate(num_polls * sizeof(*polls));
2577  polls[0].poll = ticket;
2578  }
2579  }
2580  else {
2581  //
2582  // We are in under/fully subscribed mode. Check the number of
2583  // threads waiting on the lock. The size of the polling area
2584  // should be at least the number of threads waiting.
2585  //
2586  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2587  if (num_waiting > num_polls) {
2588  kmp_uint32 old_num_polls = num_polls;
2589  reconfigure = true;
2590  do {
2591  mask = (mask << 1) | 1;
2592  num_polls *= 2;
2593  } while (num_polls <= num_waiting);
2594 
2595  //
2596  // Allocate the new polling area, and copy the relevant portion
2597  // of the old polling area to the new area. __kmp_allocate()
2598  // zeroes the memory it allocates, and most of the old area is
2599  // just zero padding, so we only copy the release counters.
2600  //
2601  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2602  __kmp_allocate(num_polls * sizeof(*polls));
2603  kmp_uint32 i;
2604  for (i = 0; i < old_num_polls; i++) {
2605  polls[i].poll = old_polls[i].poll;
2606  }
2607  }
2608  }
2609 
2610  if (reconfigure) {
2611  //
2612  // Now write the updated fields back to the lock structure.
2613  //
2614  // Make certain that "polls" is written before "mask" !!!
2615  //
2616  // If another thread picks up the new value of mask and the old
2617  // polls pointer , it could access memory beyond the end of the
2618  // old polling area.
2619  //
2620  // On x86, we need memory fences.
2621  //
2622  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2623  ticket, lck, num_polls));
2624 
2625  lck->lk.old_polls = old_polls; // non-volatile store
2626  lck->lk.polls = polls; // volatile store
2627 
2628  KMP_MB();
2629 
2630  lck->lk.num_polls = num_polls; // non-volatile store
2631  lck->lk.mask = mask; // volatile store
2632 
2633  KMP_MB();
2634 
2635  //
2636  // Only after the new polling area and mask have been flushed
2637  // to main memory can we update the cleanup ticket field.
2638  //
2639  // volatile load / non-volatile store
2640  //
2641  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2642  }
2643  }
2644  return KMP_LOCK_ACQUIRED_FIRST;
2645 }
2646 
2647 int
2648 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2649 {
2650  return __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2651 }
2652 
2653 static int
2654 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2655 {
2656  char const * const func = "omp_set_lock";
2657  if ( lck->lk.initialized != lck ) {
2658  KMP_FATAL( LockIsUninitialized, func );
2659  }
2660  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2661  KMP_FATAL( LockNestableUsedAsSimple, func );
2662  }
2663  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2664  KMP_FATAL( LockIsAlreadyOwned, func );
2665  }
2666 
2667  __kmp_acquire_drdpa_lock( lck, gtid );
2668 
2669  lck->lk.owner_id = gtid + 1;
2670  return KMP_LOCK_ACQUIRED_FIRST;
2671 }
2672 
2673 int
2674 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2675 {
2676  //
2677  // First get a ticket, then read the polls pointer and the mask.
2678  // The polls pointer must be read before the mask!!! (See above)
2679  //
2680  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2681  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2682  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2683  TCR_PTR(lck->lk.polls); // volatile load
2684  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2685  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2686  kmp_uint64 next_ticket = ticket + 1;
2687  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2688  ticket, next_ticket)) {
2689  KMP_FSYNC_ACQUIRED(lck);
2690  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2691  ticket, lck));
2692  lck->lk.now_serving = ticket; // non-volatile store
2693 
2694  //
2695  // Since no threads are waiting, there is no possibility that
2696  // we would want to reconfigure the polling area. We might
2697  // have the cleanup ticket value (which says that it is now
2698  // safe to deallocate old_polls), but we'll let a later thread
2699  // which calls __kmp_acquire_lock do that - this routine
2700  // isn't supposed to block, and we would risk blocks if we
2701  // called __kmp_free() to do the deallocation.
2702  //
2703  return TRUE;
2704  }
2705  }
2706  return FALSE;
2707 }
2708 
2709 static int
2710 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2711 {
2712  char const * const func = "omp_test_lock";
2713  if ( lck->lk.initialized != lck ) {
2714  KMP_FATAL( LockIsUninitialized, func );
2715  }
2716  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2717  KMP_FATAL( LockNestableUsedAsSimple, func );
2718  }
2719 
2720  int retval = __kmp_test_drdpa_lock( lck, gtid );
2721 
2722  if ( retval ) {
2723  lck->lk.owner_id = gtid + 1;
2724  }
2725  return retval;
2726 }
2727 
2728 int
2729 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2730 {
2731  //
2732  // Read the ticket value from the lock data struct, then the polls
2733  // pointer and the mask. The polls pointer must be read before the
2734  // mask!!! (See above)
2735  //
2736  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2737  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2738  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2739  TCR_PTR(lck->lk.polls); // volatile load
2740  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2741  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2742  ticket - 1, lck));
2743  KMP_FSYNC_RELEASING(lck);
2744  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2745  return KMP_LOCK_RELEASED;
2746 }
2747 
2748 static int
2749 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2750 {
2751  char const * const func = "omp_unset_lock";
2752  KMP_MB(); /* in case another processor initialized lock */
2753  if ( lck->lk.initialized != lck ) {
2754  KMP_FATAL( LockIsUninitialized, func );
2755  }
2756  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2757  KMP_FATAL( LockNestableUsedAsSimple, func );
2758  }
2759  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2760  KMP_FATAL( LockUnsettingFree, func );
2761  }
2762  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2763  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2764  KMP_FATAL( LockUnsettingSetByAnother, func );
2765  }
2766  lck->lk.owner_id = 0;
2767  return __kmp_release_drdpa_lock( lck, gtid );
2768 }
2769 
2770 void
2771 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2772 {
2773  lck->lk.location = NULL;
2774  lck->lk.mask = 0;
2775  lck->lk.num_polls = 1;
2776  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2777  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2778  lck->lk.cleanup_ticket = 0;
2779  lck->lk.old_polls = NULL;
2780  lck->lk.next_ticket = 0;
2781  lck->lk.now_serving = 0;
2782  lck->lk.owner_id = 0; // no thread owns the lock.
2783  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2784  lck->lk.initialized = lck;
2785 
2786  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2787 }
2788 
2789 static void
2790 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2791 {
2792  __kmp_init_drdpa_lock( lck );
2793 }
2794 
2795 void
2796 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2797 {
2798  lck->lk.initialized = NULL;
2799  lck->lk.location = NULL;
2800  if (lck->lk.polls != NULL) {
2801  __kmp_free((void *)lck->lk.polls);
2802  lck->lk.polls = NULL;
2803  }
2804  if (lck->lk.old_polls != NULL) {
2805  __kmp_free((void *)lck->lk.old_polls);
2806  lck->lk.old_polls = NULL;
2807  }
2808  lck->lk.mask = 0;
2809  lck->lk.num_polls = 0;
2810  lck->lk.cleanup_ticket = 0;
2811  lck->lk.next_ticket = 0;
2812  lck->lk.now_serving = 0;
2813  lck->lk.owner_id = 0;
2814  lck->lk.depth_locked = -1;
2815 }
2816 
2817 static void
2818 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2819 {
2820  char const * const func = "omp_destroy_lock";
2821  if ( lck->lk.initialized != lck ) {
2822  KMP_FATAL( LockIsUninitialized, func );
2823  }
2824  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2825  KMP_FATAL( LockNestableUsedAsSimple, func );
2826  }
2827  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2828  KMP_FATAL( LockStillOwned, func );
2829  }
2830  __kmp_destroy_drdpa_lock( lck );
2831 }
2832 
2833 
2834 //
2835 // nested drdpa ticket locks
2836 //
2837 
2838 int
2839 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2840 {
2841  KMP_DEBUG_ASSERT( gtid >= 0 );
2842 
2843  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2844  lck->lk.depth_locked += 1;
2845  return KMP_LOCK_ACQUIRED_NEXT;
2846  }
2847  else {
2848  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2849  KMP_MB();
2850  lck->lk.depth_locked = 1;
2851  KMP_MB();
2852  lck->lk.owner_id = gtid + 1;
2853  return KMP_LOCK_ACQUIRED_FIRST;
2854  }
2855 }
2856 
2857 static void
2858 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2859 {
2860  char const * const func = "omp_set_nest_lock";
2861  if ( lck->lk.initialized != lck ) {
2862  KMP_FATAL( LockIsUninitialized, func );
2863  }
2864  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2865  KMP_FATAL( LockSimpleUsedAsNestable, func );
2866  }
2867  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2868 }
2869 
2870 int
2871 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2872 {
2873  int retval;
2874 
2875  KMP_DEBUG_ASSERT( gtid >= 0 );
2876 
2877  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2878  retval = ++lck->lk.depth_locked;
2879  }
2880  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2881  retval = 0;
2882  }
2883  else {
2884  KMP_MB();
2885  retval = lck->lk.depth_locked = 1;
2886  KMP_MB();
2887  lck->lk.owner_id = gtid + 1;
2888  }
2889  return retval;
2890 }
2891 
2892 static int
2893 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2894 {
2895  char const * const func = "omp_test_nest_lock";
2896  if ( lck->lk.initialized != lck ) {
2897  KMP_FATAL( LockIsUninitialized, func );
2898  }
2899  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2900  KMP_FATAL( LockSimpleUsedAsNestable, func );
2901  }
2902  return __kmp_test_nested_drdpa_lock( lck, gtid );
2903 }
2904 
2905 int
2906 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2907 {
2908  KMP_DEBUG_ASSERT( gtid >= 0 );
2909 
2910  KMP_MB();
2911  if ( --(lck->lk.depth_locked) == 0 ) {
2912  KMP_MB();
2913  lck->lk.owner_id = 0;
2914  __kmp_release_drdpa_lock( lck, gtid );
2915  return KMP_LOCK_RELEASED;
2916  }
2917  return KMP_LOCK_STILL_HELD;
2918 }
2919 
2920 static int
2921 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2922 {
2923  char const * const func = "omp_unset_nest_lock";
2924  KMP_MB(); /* in case another processor initialized lock */
2925  if ( lck->lk.initialized != lck ) {
2926  KMP_FATAL( LockIsUninitialized, func );
2927  }
2928  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2929  KMP_FATAL( LockSimpleUsedAsNestable, func );
2930  }
2931  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2932  KMP_FATAL( LockUnsettingFree, func );
2933  }
2934  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2935  KMP_FATAL( LockUnsettingSetByAnother, func );
2936  }
2937  return __kmp_release_nested_drdpa_lock( lck, gtid );
2938 }
2939 
2940 void
2941 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2942 {
2943  __kmp_init_drdpa_lock( lck );
2944  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2945 }
2946 
2947 static void
2948 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2949 {
2950  __kmp_init_nested_drdpa_lock( lck );
2951 }
2952 
2953 void
2954 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2955 {
2956  __kmp_destroy_drdpa_lock( lck );
2957  lck->lk.depth_locked = 0;
2958 }
2959 
2960 static void
2961 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2962 {
2963  char const * const func = "omp_destroy_nest_lock";
2964  if ( lck->lk.initialized != lck ) {
2965  KMP_FATAL( LockIsUninitialized, func );
2966  }
2967  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2968  KMP_FATAL( LockSimpleUsedAsNestable, func );
2969  }
2970  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2971  KMP_FATAL( LockStillOwned, func );
2972  }
2973  __kmp_destroy_nested_drdpa_lock( lck );
2974 }
2975 
2976 
2977 //
2978 // access functions to fields which don't exist for all lock kinds.
2979 //
2980 
2981 static int
2982 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
2983 {
2984  return lck == lck->lk.initialized;
2985 }
2986 
2987 static const ident_t *
2988 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
2989 {
2990  return lck->lk.location;
2991 }
2992 
2993 static void
2994 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
2995 {
2996  lck->lk.location = loc;
2997 }
2998 
2999 static kmp_lock_flags_t
3000 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3001 {
3002  return lck->lk.flags;
3003 }
3004 
3005 static void
3006 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3007 {
3008  lck->lk.flags = flags;
3009 }
3010 
3011 #if KMP_USE_DYNAMIC_LOCK
3012 
3013 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3014 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3015 {
3016  TCW_4(*lck, KMP_GET_D_TAG(seq));
3017  KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3018 }
3019 
3020 #if KMP_USE_TSX
3021 
3022 // HLE lock functions - imported from the testbed runtime.
3023 #define HLE_ACQUIRE ".byte 0xf2;"
3024 #define HLE_RELEASE ".byte 0xf3;"
3025 
3026 static inline kmp_uint32
3027 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3028 {
3029  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3030  : "+r"(v), "+m"(*p)
3031  :
3032  : "memory");
3033  return v;
3034 }
3035 
3036 static void
3037 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3038 {
3039  TCW_4(*lck, 0);
3040 }
3041 
3042 static void
3043 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3044 {
3045  // Use gtid for KMP_LOCK_BUSY if necessary
3046  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3047  int delay = 1;
3048  do {
3049  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3050  for (int i = delay; i != 0; --i)
3051  KMP_CPU_PAUSE();
3052  delay = ((delay << 1) | 1) & 7;
3053  }
3054  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3055  }
3056 }
3057 
3058 static void
3059 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3060 {
3061  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3062 }
3063 
3064 static int
3065 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3066 {
3067  __asm__ volatile(HLE_RELEASE "movl %1,%0"
3068  : "=m"(*lck)
3069  : "r"(KMP_LOCK_FREE(hle))
3070  : "memory");
3071  return KMP_LOCK_RELEASED;
3072 }
3073 
3074 static int
3075 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3076 {
3077  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3078 }
3079 
3080 static int
3081 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3082 {
3083  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3084 }
3085 
3086 static int
3087 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3088 {
3089  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3090 }
3091 
3092 static void
3093 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3094 {
3095  __kmp_init_queuing_lock(lck);
3096 }
3097 
3098 static void
3099 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3100 {
3101  __kmp_destroy_queuing_lock(lck);
3102 }
3103 
3104 static void
3105 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3106 {
3107  unsigned retries=3, status;
3108  do {
3109  status = _xbegin();
3110  if (status == _XBEGIN_STARTED) {
3111  if (__kmp_is_unlocked_queuing_lock(lck))
3112  return;
3113  _xabort(0xff);
3114  }
3115  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3116  // Wait until lock becomes free
3117  while (! __kmp_is_unlocked_queuing_lock(lck))
3118  __kmp_yield(TRUE);
3119  }
3120  else if (!(status & _XABORT_RETRY))
3121  break;
3122  } while (retries--);
3123 
3124  // Fall-back non-speculative lock (xchg)
3125  __kmp_acquire_queuing_lock(lck, gtid);
3126 }
3127 
3128 static void
3129 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3130 {
3131  __kmp_acquire_rtm_lock(lck, gtid);
3132 }
3133 
3134 static int
3135 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3136 {
3137  if (__kmp_is_unlocked_queuing_lock(lck)) {
3138  // Releasing from speculation
3139  _xend();
3140  }
3141  else {
3142  // Releasing from a real lock
3143  __kmp_release_queuing_lock(lck, gtid);
3144  }
3145  return KMP_LOCK_RELEASED;
3146 }
3147 
3148 static int
3149 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3150 {
3151  return __kmp_release_rtm_lock(lck, gtid);
3152 }
3153 
3154 static int
3155 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3156 {
3157  unsigned retries=3, status;
3158  do {
3159  status = _xbegin();
3160  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3161  return 1;
3162  }
3163  if (!(status & _XABORT_RETRY))
3164  break;
3165  } while (retries--);
3166 
3167  return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3168 }
3169 
3170 static int
3171 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3172 {
3173  return __kmp_test_rtm_lock(lck, gtid);
3174 }
3175 
3176 #endif // KMP_USE_TSX
3177 
3178 // Entry functions for indirect locks (first element of direct lock jump tables).
3179 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3180 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3181 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3182 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3183 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3184 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3185 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3186 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3187 
3188 //
3189 // Jump tables for the indirect lock functions.
3190 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3191 //
3192 
3193 // init functions
3194 #define expand(l, op) 0,__kmp_init_direct_lock,
3195 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3196  = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3197 #undef expand
3198 
3199 // destroy functions
3200 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3201 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3202  = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3203 #undef expand
3204 
3205 // set/acquire functions
3206 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3207 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3208  = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3209 #undef expand
3210 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3211 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3212  = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3213 #undef expand
3214 
3215 // unset/release and test functions
3216 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3217 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3218  = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3219 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3220  = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3221 #undef expand
3222 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3223 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3224  = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3225 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3226  = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3227 #undef expand
3228 
3229 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3230 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3231 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3232 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3233 
3234 //
3235 // Jump tables for the indirect lock functions.
3236 //
3237 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3238 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3239 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3240 #undef expand
3241 
3242 // set/acquire functions
3243 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3244 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3245 #undef expand
3246 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3247 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3248 #undef expand
3249 
3250 // unset/release and test functions
3251 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3252 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3253 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3254 #undef expand
3255 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3256 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3257 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3258 #undef expand
3259 
3260 // Exposes only one jump tables (*lock or *lock_with_checks).
3261 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3262 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3263 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3264 
3265 // Lock index table.
3266 kmp_indirect_lock_table_t __kmp_i_lock_table;
3267 
3268 // Size of indirect locks.
3269 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3270 
3271 // Jump tables for lock accessor/modifier.
3272 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3273 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3274 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3275 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3276 
3277 // Use different lock pools for different lock types.
3278 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3279 
3280 // User lock allocator for dynamically dispatched indirect locks.
3281 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3282 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3283 // object is returned to the reusable pool of locks, unique to each lock type.
3284 kmp_indirect_lock_t *
3285 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3286 {
3287  kmp_indirect_lock_t *lck;
3288  kmp_lock_index_t idx;
3289 
3290  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3291 
3292  if (__kmp_indirect_lock_pool[tag] != NULL) {
3293  // Reuse the allocated and destroyed lock object
3294  lck = __kmp_indirect_lock_pool[tag];
3295  if (OMP_LOCK_T_SIZE < sizeof(void *))
3296  idx = lck->lock->pool.index;
3297  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3298  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3299  } else {
3300  idx = __kmp_i_lock_table.next;
3301  // Check capacity and double the size if it is full
3302  if (idx == __kmp_i_lock_table.size) {
3303  // Double up the space for block pointers
3304  int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3305  kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3306  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3307  KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3308  __kmp_free(old_table);
3309  // Allocate new objects in the new blocks
3310  for (int i = row; i < 2*row; ++i)
3311  *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3312  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3313  __kmp_i_lock_table.size = 2*idx;
3314  }
3315  __kmp_i_lock_table.next++;
3316  lck = KMP_GET_I_LOCK(idx);
3317  // Allocate a new base lock object
3318  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3319  KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3320  }
3321 
3322  __kmp_release_lock(&__kmp_global_lock, gtid);
3323 
3324  lck->type = tag;
3325 
3326  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3327  *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3328  } else {
3329  *((kmp_indirect_lock_t **)user_lock) = lck;
3330  }
3331 
3332  return lck;
3333 }
3334 
3335 // User lock lookup for dynamically dispatched locks.
3336 static __forceinline
3337 kmp_indirect_lock_t *
3338 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3339 {
3340  if (__kmp_env_consistency_check) {
3341  kmp_indirect_lock_t *lck = NULL;
3342  if (user_lock == NULL) {
3343  KMP_FATAL(LockIsUninitialized, func);
3344  }
3345  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3346  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3347  if (idx >= __kmp_i_lock_table.size) {
3348  KMP_FATAL(LockIsUninitialized, func);
3349  }
3350  lck = KMP_GET_I_LOCK(idx);
3351  } else {
3352  lck = *((kmp_indirect_lock_t **)user_lock);
3353  }
3354  if (lck == NULL) {
3355  KMP_FATAL(LockIsUninitialized, func);
3356  }
3357  return lck;
3358  } else {
3359  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3360  return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3361  } else {
3362  return *((kmp_indirect_lock_t **)user_lock);
3363  }
3364  }
3365 }
3366 
3367 static void
3368 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3369 {
3370 #if KMP_USE_ADAPTIVE_LOCKS
3371  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3372  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3373  seq = lockseq_queuing;
3374  }
3375 #endif
3376 #if KMP_USE_TSX
3377  if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3378  seq = lockseq_queuing;
3379  }
3380 #endif
3381  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3382  kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3383  KMP_I_LOCK_FUNC(l, init)(l->lock);
3384  KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3385 }
3386 
3387 static void
3388 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3389 {
3390  kmp_uint32 gtid = __kmp_entry_gtid();
3391  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3392  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3393  kmp_indirect_locktag_t tag = l->type;
3394 
3395  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3396 
3397  // Use the base lock's space to keep the pool chain.
3398  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3399  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3400  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3401  }
3402  __kmp_indirect_lock_pool[tag] = l;
3403 
3404  __kmp_release_lock(&__kmp_global_lock, gtid);
3405 }
3406 
3407 static void
3408 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3409 {
3410  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3411  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3412 }
3413 
3414 static int
3415 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3416 {
3417  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3418  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3419 }
3420 
3421 static int
3422 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3423 {
3424  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3425  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3426 }
3427 
3428 static void
3429 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3430 {
3431  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3432  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3433 }
3434 
3435 static int
3436 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3437 {
3438  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3439  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3440 }
3441 
3442 static int
3443 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3444 {
3445  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3446  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3447 }
3448 
3449 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3450 
3451 // This is used only in kmp_error.c when consistency checking is on.
3452 kmp_int32
3453 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3454 {
3455  switch (seq) {
3456  case lockseq_tas:
3457  case lockseq_nested_tas:
3458  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3459 #if KMP_HAS_FUTEX
3460  case lockseq_futex:
3461  case lockseq_nested_futex:
3462  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3463 #endif
3464  case lockseq_ticket:
3465  case lockseq_nested_ticket:
3466  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3467  case lockseq_queuing:
3468  case lockseq_nested_queuing:
3469 #if KMP_USE_ADAPTIVE_LOCKS
3470  case lockseq_adaptive:
3471  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3472 #endif
3473  case lockseq_drdpa:
3474  case lockseq_nested_drdpa:
3475  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3476  default:
3477  return 0;
3478  }
3479 }
3480 
3481 // Initializes data for dynamic user locks.
3482 void
3483 __kmp_init_dynamic_user_locks()
3484 {
3485  // Initialize jump table for the lock functions
3486  if (__kmp_env_consistency_check) {
3487  __kmp_direct_set = direct_set_check;
3488  __kmp_direct_unset = direct_unset_check;
3489  __kmp_direct_test = direct_test_check;
3490  __kmp_indirect_set = indirect_set_check;
3491  __kmp_indirect_unset = indirect_unset_check;
3492  __kmp_indirect_test = indirect_test_check;
3493  }
3494  else {
3495  __kmp_direct_set = direct_set;
3496  __kmp_direct_unset = direct_unset;
3497  __kmp_direct_test = direct_test;
3498  __kmp_indirect_set = indirect_set;
3499  __kmp_indirect_unset = indirect_unset;
3500  __kmp_indirect_test = indirect_test;
3501  }
3502 
3503  // Initialize lock index table
3504  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3505  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3506  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3507  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3508  __kmp_i_lock_table.next = 0;
3509 
3510  // Indirect lock size
3511  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3512  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3513 #if KMP_USE_ADAPTIVE_LOCKS
3514  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3515 #endif
3516  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3517 #if KMP_USE_TSX
3518  __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3519 #endif
3520  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3521 #if KMP_USE_FUTEX
3522  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3523 #endif
3524  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3525  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3526  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3527 
3528  // Initialize lock accessor/modifier
3529 #define fill_jumps(table, expand, sep) { \
3530  table[locktag##sep##ticket] = expand(ticket); \
3531  table[locktag##sep##queuing] = expand(queuing); \
3532  table[locktag##sep##drdpa] = expand(drdpa); \
3533 }
3534 
3535 #if KMP_USE_ADAPTIVE_LOCKS
3536 # define fill_table(table, expand) { \
3537  fill_jumps(table, expand, _); \
3538  table[locktag_adaptive] = expand(queuing); \
3539  fill_jumps(table, expand, _nested_); \
3540 }
3541 #else
3542 # define fill_table(table, expand) { \
3543  fill_jumps(table, expand, _); \
3544  fill_jumps(table, expand, _nested_); \
3545 }
3546 #endif // KMP_USE_ADAPTIVE_LOCKS
3547 
3548 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3549  fill_table(__kmp_indirect_set_location, expand);
3550 #undef expand
3551 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3552  fill_table(__kmp_indirect_set_flags, expand);
3553 #undef expand
3554 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3555  fill_table(__kmp_indirect_get_location, expand);
3556 #undef expand
3557 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3558  fill_table(__kmp_indirect_get_flags, expand);
3559 #undef expand
3560 
3561  __kmp_init_user_locks = TRUE;
3562 }
3563 
3564 // Clean up the lock table.
3565 void
3566 __kmp_cleanup_indirect_user_locks()
3567 {
3568  kmp_lock_index_t i;
3569  int k;
3570 
3571  // Clean up locks in the pools first (they were already destroyed before going into the pools).
3572  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3573  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3574  while (l != NULL) {
3575  kmp_indirect_lock_t *ll = l;
3576  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3577  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3578  __kmp_free(ll->lock);
3579  ll->lock = NULL;
3580  }
3581  __kmp_indirect_lock_pool[k] = NULL;
3582  }
3583  // Clean up the remaining undestroyed locks.
3584  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3585  kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3586  if (l->lock != NULL) {
3587  // Locks not destroyed explicitly need to be destroyed here.
3588  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3589  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3590  __kmp_free(l->lock);
3591  }
3592  }
3593  // Free the table
3594  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3595  __kmp_free(__kmp_i_lock_table.table[i]);
3596  __kmp_free(__kmp_i_lock_table.table);
3597 
3598  __kmp_init_user_locks = FALSE;
3599 }
3600 
3601 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3602 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3603 
3604 #else // KMP_USE_DYNAMIC_LOCK
3605 
3606 /* ------------------------------------------------------------------------ */
3607 /* user locks
3608  *
3609  * They are implemented as a table of function pointers which are set to the
3610  * lock functions of the appropriate kind, once that has been determined.
3611  */
3612 
3613 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3614 
3615 size_t __kmp_base_user_lock_size = 0;
3616 size_t __kmp_user_lock_size = 0;
3617 
3618 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3619 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3620 
3621 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3622 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3623 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3624 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3625 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3626 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3627 
3628 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3629 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3630 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3631 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3632 
3633 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3634 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3635 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3636 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3637 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3638 
3639 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3640 {
3641  switch ( user_lock_kind ) {
3642  case lk_default:
3643  default:
3644  KMP_ASSERT( 0 );
3645 
3646  case lk_tas: {
3647  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3648  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3649 
3650  __kmp_get_user_lock_owner_ =
3651  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3652  ( &__kmp_get_tas_lock_owner );
3653 
3654  if ( __kmp_env_consistency_check ) {
3655  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3656  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3657  }
3658  else {
3659  KMP_BIND_USER_LOCK(tas);
3660  KMP_BIND_NESTED_USER_LOCK(tas);
3661  }
3662 
3663  __kmp_destroy_user_lock_ =
3664  ( void ( * )( kmp_user_lock_p ) )
3665  ( &__kmp_destroy_tas_lock );
3666 
3667  __kmp_is_user_lock_initialized_ =
3668  ( int ( * )( kmp_user_lock_p ) ) NULL;
3669 
3670  __kmp_get_user_lock_location_ =
3671  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3672 
3673  __kmp_set_user_lock_location_ =
3674  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3675 
3676  __kmp_get_user_lock_flags_ =
3677  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3678 
3679  __kmp_set_user_lock_flags_ =
3680  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3681  }
3682  break;
3683 
3684 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3685 
3686  case lk_futex: {
3687  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3688  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3689 
3690  __kmp_get_user_lock_owner_ =
3691  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3692  ( &__kmp_get_futex_lock_owner );
3693 
3694  if ( __kmp_env_consistency_check ) {
3695  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3696  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3697  }
3698  else {
3699  KMP_BIND_USER_LOCK(futex);
3700  KMP_BIND_NESTED_USER_LOCK(futex);
3701  }
3702 
3703  __kmp_destroy_user_lock_ =
3704  ( void ( * )( kmp_user_lock_p ) )
3705  ( &__kmp_destroy_futex_lock );
3706 
3707  __kmp_is_user_lock_initialized_ =
3708  ( int ( * )( kmp_user_lock_p ) ) NULL;
3709 
3710  __kmp_get_user_lock_location_ =
3711  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3712 
3713  __kmp_set_user_lock_location_ =
3714  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3715 
3716  __kmp_get_user_lock_flags_ =
3717  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3718 
3719  __kmp_set_user_lock_flags_ =
3720  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3721  }
3722  break;
3723 
3724 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
3725 
3726  case lk_ticket: {
3727  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3728  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3729 
3730  __kmp_get_user_lock_owner_ =
3731  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3732  ( &__kmp_get_ticket_lock_owner );
3733 
3734  if ( __kmp_env_consistency_check ) {
3735  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3736  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3737  }
3738  else {
3739  KMP_BIND_USER_LOCK(ticket);
3740  KMP_BIND_NESTED_USER_LOCK(ticket);
3741  }
3742 
3743  __kmp_destroy_user_lock_ =
3744  ( void ( * )( kmp_user_lock_p ) )
3745  ( &__kmp_destroy_ticket_lock );
3746 
3747  __kmp_is_user_lock_initialized_ =
3748  ( int ( * )( kmp_user_lock_p ) )
3749  ( &__kmp_is_ticket_lock_initialized );
3750 
3751  __kmp_get_user_lock_location_ =
3752  ( const ident_t * ( * )( kmp_user_lock_p ) )
3753  ( &__kmp_get_ticket_lock_location );
3754 
3755  __kmp_set_user_lock_location_ =
3756  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3757  ( &__kmp_set_ticket_lock_location );
3758 
3759  __kmp_get_user_lock_flags_ =
3760  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3761  ( &__kmp_get_ticket_lock_flags );
3762 
3763  __kmp_set_user_lock_flags_ =
3764  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3765  ( &__kmp_set_ticket_lock_flags );
3766  }
3767  break;
3768 
3769  case lk_queuing: {
3770  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3771  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3772 
3773  __kmp_get_user_lock_owner_ =
3774  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3775  ( &__kmp_get_queuing_lock_owner );
3776 
3777  if ( __kmp_env_consistency_check ) {
3778  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3779  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3780  }
3781  else {
3782  KMP_BIND_USER_LOCK(queuing);
3783  KMP_BIND_NESTED_USER_LOCK(queuing);
3784  }
3785 
3786  __kmp_destroy_user_lock_ =
3787  ( void ( * )( kmp_user_lock_p ) )
3788  ( &__kmp_destroy_queuing_lock );
3789 
3790  __kmp_is_user_lock_initialized_ =
3791  ( int ( * )( kmp_user_lock_p ) )
3792  ( &__kmp_is_queuing_lock_initialized );
3793 
3794  __kmp_get_user_lock_location_ =
3795  ( const ident_t * ( * )( kmp_user_lock_p ) )
3796  ( &__kmp_get_queuing_lock_location );
3797 
3798  __kmp_set_user_lock_location_ =
3799  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3800  ( &__kmp_set_queuing_lock_location );
3801 
3802  __kmp_get_user_lock_flags_ =
3803  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3804  ( &__kmp_get_queuing_lock_flags );
3805 
3806  __kmp_set_user_lock_flags_ =
3807  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3808  ( &__kmp_set_queuing_lock_flags );
3809  }
3810  break;
3811 
3812 #if KMP_USE_ADAPTIVE_LOCKS
3813  case lk_adaptive: {
3814  __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3815  __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3816 
3817  __kmp_get_user_lock_owner_ =
3818  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3819  ( &__kmp_get_queuing_lock_owner );
3820 
3821  if ( __kmp_env_consistency_check ) {
3822  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3823  }
3824  else {
3825  KMP_BIND_USER_LOCK(adaptive);
3826  }
3827 
3828  __kmp_destroy_user_lock_ =
3829  ( void ( * )( kmp_user_lock_p ) )
3830  ( &__kmp_destroy_adaptive_lock );
3831 
3832  __kmp_is_user_lock_initialized_ =
3833  ( int ( * )( kmp_user_lock_p ) )
3834  ( &__kmp_is_queuing_lock_initialized );
3835 
3836  __kmp_get_user_lock_location_ =
3837  ( const ident_t * ( * )( kmp_user_lock_p ) )
3838  ( &__kmp_get_queuing_lock_location );
3839 
3840  __kmp_set_user_lock_location_ =
3841  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3842  ( &__kmp_set_queuing_lock_location );
3843 
3844  __kmp_get_user_lock_flags_ =
3845  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3846  ( &__kmp_get_queuing_lock_flags );
3847 
3848  __kmp_set_user_lock_flags_ =
3849  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3850  ( &__kmp_set_queuing_lock_flags );
3851 
3852  }
3853  break;
3854 #endif // KMP_USE_ADAPTIVE_LOCKS
3855 
3856  case lk_drdpa: {
3857  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3858  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3859 
3860  __kmp_get_user_lock_owner_ =
3861  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3862  ( &__kmp_get_drdpa_lock_owner );
3863 
3864  if ( __kmp_env_consistency_check ) {
3865  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3866  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3867  }
3868  else {
3869  KMP_BIND_USER_LOCK(drdpa);
3870  KMP_BIND_NESTED_USER_LOCK(drdpa);
3871  }
3872 
3873  __kmp_destroy_user_lock_ =
3874  ( void ( * )( kmp_user_lock_p ) )
3875  ( &__kmp_destroy_drdpa_lock );
3876 
3877  __kmp_is_user_lock_initialized_ =
3878  ( int ( * )( kmp_user_lock_p ) )
3879  ( &__kmp_is_drdpa_lock_initialized );
3880 
3881  __kmp_get_user_lock_location_ =
3882  ( const ident_t * ( * )( kmp_user_lock_p ) )
3883  ( &__kmp_get_drdpa_lock_location );
3884 
3885  __kmp_set_user_lock_location_ =
3886  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3887  ( &__kmp_set_drdpa_lock_location );
3888 
3889  __kmp_get_user_lock_flags_ =
3890  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3891  ( &__kmp_get_drdpa_lock_flags );
3892 
3893  __kmp_set_user_lock_flags_ =
3894  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3895  ( &__kmp_set_drdpa_lock_flags );
3896  }
3897  break;
3898  }
3899 }
3900 
3901 
3902 // ----------------------------------------------------------------------------
3903 // User lock table & lock allocation
3904 
3905 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3906 kmp_user_lock_p __kmp_lock_pool = NULL;
3907 
3908 // Lock block-allocation support.
3909 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3910 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3911 
3912 static kmp_lock_index_t
3913 __kmp_lock_table_insert( kmp_user_lock_p lck )
3914 {
3915  // Assume that kmp_global_lock is held upon entry/exit.
3916  kmp_lock_index_t index;
3917  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3918  kmp_lock_index_t size;
3919  kmp_user_lock_p *table;
3920  // Reallocate lock table.
3921  if ( __kmp_user_lock_table.allocated == 0 ) {
3922  size = 1024;
3923  }
3924  else {
3925  size = __kmp_user_lock_table.allocated * 2;
3926  }
3927  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3928  KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3929  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3930  // We cannot free the previous table now, since it may be in use by other
3931  // threads. So save the pointer to the previous table in in the first element of the
3932  // new table. All the tables will be organized into a list, and could be freed when
3933  // library shutting down.
3934  __kmp_user_lock_table.table = table;
3935  __kmp_user_lock_table.allocated = size;
3936  }
3937  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3938  index = __kmp_user_lock_table.used;
3939  __kmp_user_lock_table.table[ index ] = lck;
3940  ++ __kmp_user_lock_table.used;
3941  return index;
3942 }
3943 
3944 static kmp_user_lock_p
3945 __kmp_lock_block_allocate()
3946 {
3947  // Assume that kmp_global_lock is held upon entry/exit.
3948  static int last_index = 0;
3949  if ( ( last_index >= __kmp_num_locks_in_block )
3950  || ( __kmp_lock_blocks == NULL ) ) {
3951  // Restart the index.
3952  last_index = 0;
3953  // Need to allocate a new block.
3954  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3955  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3956  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3957  // Set up the new block.
3958  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3959  new_block->next_block = __kmp_lock_blocks;
3960  new_block->locks = (void *)buffer;
3961  // Publish the new block.
3962  KMP_MB();
3963  __kmp_lock_blocks = new_block;
3964  }
3965  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3966  [ last_index * __kmp_user_lock_size ] ) );
3967  last_index++;
3968  return ret;
3969 }
3970 
3971 //
3972 // Get memory for a lock. It may be freshly allocated memory or reused memory
3973 // from lock pool.
3974 //
3975 kmp_user_lock_p
3976 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3977  kmp_lock_flags_t flags )
3978 {
3979  kmp_user_lock_p lck;
3980  kmp_lock_index_t index;
3981  KMP_DEBUG_ASSERT( user_lock );
3982 
3983  __kmp_acquire_lock( &__kmp_global_lock, gtid );
3984 
3985  if ( __kmp_lock_pool == NULL ) {
3986  // Lock pool is empty. Allocate new memory.
3987  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3988  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3989  }
3990  else {
3991  lck = __kmp_lock_block_allocate();
3992  }
3993 
3994  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3995  // and debugger has info on all allocated locks.
3996  index = __kmp_lock_table_insert( lck );
3997  }
3998  else {
3999  // Pick up lock from pool.
4000  lck = __kmp_lock_pool;
4001  index = __kmp_lock_pool->pool.index;
4002  __kmp_lock_pool = __kmp_lock_pool->pool.next;
4003  }
4004 
4005  //
4006  // We could potentially differentiate between nested and regular locks
4007  // here, and do the lock table lookup for regular locks only.
4008  //
4009  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4010  * ( (kmp_lock_index_t *) user_lock ) = index;
4011  }
4012  else {
4013  * ( (kmp_user_lock_p *) user_lock ) = lck;
4014  }
4015 
4016  // mark the lock if it is critical section lock.
4017  __kmp_set_user_lock_flags( lck, flags );
4018 
4019  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4020 
4021  return lck;
4022 }
4023 
4024 // Put lock's memory to pool for reusing.
4025 void
4026 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4027 {
4028  KMP_DEBUG_ASSERT( user_lock != NULL );
4029  KMP_DEBUG_ASSERT( lck != NULL );
4030 
4031  __kmp_acquire_lock( & __kmp_global_lock, gtid );
4032 
4033  lck->pool.next = __kmp_lock_pool;
4034  __kmp_lock_pool = lck;
4035  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4036  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4037  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4038  lck->pool.index = index;
4039  }
4040 
4041  __kmp_release_lock( & __kmp_global_lock, gtid );
4042 }
4043 
4044 kmp_user_lock_p
4045 __kmp_lookup_user_lock( void **user_lock, char const *func )
4046 {
4047  kmp_user_lock_p lck = NULL;
4048 
4049  if ( __kmp_env_consistency_check ) {
4050  if ( user_lock == NULL ) {
4051  KMP_FATAL( LockIsUninitialized, func );
4052  }
4053  }
4054 
4055  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4056  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4057  if ( __kmp_env_consistency_check ) {
4058  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4059  KMP_FATAL( LockIsUninitialized, func );
4060  }
4061  }
4062  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4063  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4064  lck = __kmp_user_lock_table.table[index];
4065  }
4066  else {
4067  lck = *( (kmp_user_lock_p *)user_lock );
4068  }
4069 
4070  if ( __kmp_env_consistency_check ) {
4071  if ( lck == NULL ) {
4072  KMP_FATAL( LockIsUninitialized, func );
4073  }
4074  }
4075 
4076  return lck;
4077 }
4078 
4079 void
4080 __kmp_cleanup_user_locks( void )
4081 {
4082  //
4083  // Reset lock pool. Do not worry about lock in the pool -- we will free
4084  // them when iterating through lock table (it includes all the locks,
4085  // dead or alive).
4086  //
4087  __kmp_lock_pool = NULL;
4088 
4089 #define IS_CRITICAL(lck) \
4090  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4091  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4092 
4093  //
4094  // Loop through lock table, free all locks.
4095  //
4096  // Do not free item [0], it is reserved for lock tables list.
4097  //
4098  // FIXME - we are iterating through a list of (pointers to) objects of
4099  // type union kmp_user_lock, but we have no way of knowing whether the
4100  // base type is currently "pool" or whatever the global user lock type
4101  // is.
4102  //
4103  // We are relying on the fact that for all of the user lock types
4104  // (except "tas"), the first field in the lock struct is the "initialized"
4105  // field, which is set to the address of the lock object itself when
4106  // the lock is initialized. When the union is of type "pool", the
4107  // first field is a pointer to the next object in the free list, which
4108  // will not be the same address as the object itself.
4109  //
4110  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4111  // will fail for "pool" objects on the free list. This must happen as
4112  // the "location" field of real user locks overlaps the "index" field
4113  // of "pool" objects.
4114  //
4115  // It would be better to run through the free list, and remove all "pool"
4116  // objects from the lock table before executing this loop. However,
4117  // "pool" objects do not always have their index field set (only on
4118  // lin_32e), and I don't want to search the lock table for the address
4119  // of every "pool" object on the free list.
4120  //
4121  while ( __kmp_user_lock_table.used > 1 ) {
4122  const ident *loc;
4123 
4124  //
4125  // reduce __kmp_user_lock_table.used before freeing the lock,
4126  // so that state of locks is consistent
4127  //
4128  kmp_user_lock_p lck = __kmp_user_lock_table.table[
4129  --__kmp_user_lock_table.used ];
4130 
4131  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4132  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4133  //
4134  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4135  // initialized AND it is NOT a critical section (user is not
4136  // responsible for destroying criticals) AND we know source
4137  // location to report.
4138  //
4139  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4140  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4141  ( loc->psource != NULL ) ) {
4142  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4143  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4144  __kmp_str_loc_free( &str_loc);
4145  }
4146 
4147 #ifdef KMP_DEBUG
4148  if ( IS_CRITICAL( lck ) ) {
4149  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4150  }
4151  else {
4152  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4153  }
4154 #endif // KMP_DEBUG
4155 
4156  //
4157  // Cleanup internal lock dynamic resources
4158  // (for drdpa locks particularly).
4159  //
4160  __kmp_destroy_user_lock( lck );
4161  }
4162 
4163  //
4164  // Free the lock if block allocation of locks is not used.
4165  //
4166  if ( __kmp_lock_blocks == NULL ) {
4167  __kmp_free( lck );
4168  }
4169  }
4170 
4171 #undef IS_CRITICAL
4172 
4173  //
4174  // delete lock table(s).
4175  //
4176  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4177  __kmp_user_lock_table.table = NULL;
4178  __kmp_user_lock_table.allocated = 0;
4179 
4180  while ( table_ptr != NULL ) {
4181  //
4182  // In the first element we saved the pointer to the previous
4183  // (smaller) lock table.
4184  //
4185  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4186  __kmp_free( table_ptr );
4187  table_ptr = next;
4188  }
4189 
4190  //
4191  // Free buffers allocated for blocks of locks.
4192  //
4193  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4194  __kmp_lock_blocks = NULL;
4195 
4196  while ( block_ptr != NULL ) {
4197  kmp_block_of_locks_t *next = block_ptr->next_block;
4198  __kmp_free( block_ptr->locks );
4199  //
4200  // *block_ptr itself was allocated at the end of the locks vector.
4201  //
4202  block_ptr = next;
4203  }
4204 
4205  TCW_4(__kmp_init_user_locks, FALSE);
4206 }
4207 
4208 #endif // KMP_USE_DYNAMIC_LOCK
Definition: kmp.h:200
char const * psource
Definition: kmp.h:209