LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include <stddef.h>
17 #include <atomic>
18 
19 #include "kmp.h"
20 #include "kmp_itt.h"
21 #include "kmp_i18n.h"
22 #include "kmp_lock.h"
23 #include "kmp_io.h"
24 
25 #include "tsan_annotations.h"
26 
27 #if KMP_USE_FUTEX
28 # include <unistd.h>
29 # include <sys/syscall.h>
30 // We should really include <futex.h>, but that causes compatibility problems on different
31 // Linux* OS distributions that either require that you include (or break when you try to include)
32 // <pci/types.h>.
33 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
34 // we just define the constants here and don't include <futex.h>
35 # ifndef FUTEX_WAIT
36 # define FUTEX_WAIT 0
37 # endif
38 # ifndef FUTEX_WAKE
39 # define FUTEX_WAKE 1
40 # endif
41 #endif
42 
43 /* Implement spin locks for internal library use. */
44 /* The algorithm implemented is Lamport's bakery lock [1974]. */
45 
46 void
47 __kmp_validate_locks( void )
48 {
49  int i;
50  kmp_uint32 x, y;
51 
52  /* Check to make sure unsigned arithmetic does wraps properly */
53  x = ~((kmp_uint32) 0) - 2;
54  y = x - 2;
55 
56  for (i = 0; i < 8; ++i, ++x, ++y) {
57  kmp_uint32 z = (x - y);
58  KMP_ASSERT( z == 2 );
59  }
60 
61  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
62 }
63 
64 
65 /* ------------------------------------------------------------------------ */
66 /* test and set locks */
67 
68 //
69 // For the non-nested locks, we can only assume that the first 4 bytes were
70 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
71 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
72 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
73 //
74 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
75 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
76 //
77 
78 static kmp_int32
79 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
80 {
81  return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1;
82 }
83 
84 static inline bool
85 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
86 {
87  return lck->lk.depth_locked != -1;
88 }
89 
90 __forceinline static int
91 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
92 {
93  KMP_MB();
94 
95 #ifdef USE_LOCK_PROFILE
96  kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
97  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
98  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
99  /* else __kmp_printf( "." );*/
100 #endif /* USE_LOCK_PROFILE */
101 
102  if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
103  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
104  KMP_FSYNC_ACQUIRED(lck);
105  return KMP_LOCK_ACQUIRED_FIRST;
106  }
107 
108  kmp_uint32 spins;
109  KMP_FSYNC_PREPARE( lck );
110  KMP_INIT_YIELD( spins );
111  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
112  __kmp_xproc ) ) {
113  KMP_YIELD( TRUE );
114  }
115  else {
116  KMP_YIELD_SPIN( spins );
117  }
118 
119  kmp_backoff_t backoff = __kmp_spin_backoff_params;
120  while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) ||
121  ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) {
122 
123  __kmp_spin_backoff(&backoff);
124  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
125  __kmp_xproc ) ) {
126  KMP_YIELD( TRUE );
127  }
128  else {
129  KMP_YIELD_SPIN( spins );
130  }
131  }
132  KMP_FSYNC_ACQUIRED( lck );
133  return KMP_LOCK_ACQUIRED_FIRST;
134 }
135 
136 int
137 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
138 {
139  int retval = __kmp_acquire_tas_lock_timed_template( lck, gtid );
140  ANNOTATE_TAS_ACQUIRED(lck);
141  return retval;
142 }
143 
144 static int
145 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
146 {
147  char const * const func = "omp_set_lock";
148  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
149  && __kmp_is_tas_lock_nestable( lck ) ) {
150  KMP_FATAL( LockNestableUsedAsSimple, func );
151  }
152  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
153  KMP_FATAL( LockIsAlreadyOwned, func );
154  }
155  return __kmp_acquire_tas_lock( lck, gtid );
156 }
157 
158 int
159 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
160 {
161  if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) )
162  && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) {
163  KMP_FSYNC_ACQUIRED( lck );
164  return TRUE;
165  }
166  return FALSE;
167 }
168 
169 static int
170 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
171 {
172  char const * const func = "omp_test_lock";
173  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
174  && __kmp_is_tas_lock_nestable( lck ) ) {
175  KMP_FATAL( LockNestableUsedAsSimple, func );
176  }
177  return __kmp_test_tas_lock( lck, gtid );
178 }
179 
180 int
181 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
182 {
183  KMP_MB(); /* Flush all pending memory write invalidates. */
184 
185  KMP_FSYNC_RELEASING(lck);
186  ANNOTATE_TAS_RELEASED(lck);
187  KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) );
188  KMP_MB(); /* Flush all pending memory write invalidates. */
189 
190  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
191  __kmp_xproc ) );
192  return KMP_LOCK_RELEASED;
193 }
194 
195 static int
196 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
197 {
198  char const * const func = "omp_unset_lock";
199  KMP_MB(); /* in case another processor initialized lock */
200  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
201  && __kmp_is_tas_lock_nestable( lck ) ) {
202  KMP_FATAL( LockNestableUsedAsSimple, func );
203  }
204  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
205  KMP_FATAL( LockUnsettingFree, func );
206  }
207  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
208  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
209  KMP_FATAL( LockUnsettingSetByAnother, func );
210  }
211  return __kmp_release_tas_lock( lck, gtid );
212 }
213 
214 void
215 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
216 {
217  TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) );
218 }
219 
220 static void
221 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
222 {
223  __kmp_init_tas_lock( lck );
224 }
225 
226 void
227 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
228 {
229  lck->lk.poll = 0;
230 }
231 
232 static void
233 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
234 {
235  char const * const func = "omp_destroy_lock";
236  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
237  && __kmp_is_tas_lock_nestable( lck ) ) {
238  KMP_FATAL( LockNestableUsedAsSimple, func );
239  }
240  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
241  KMP_FATAL( LockStillOwned, func );
242  }
243  __kmp_destroy_tas_lock( lck );
244 }
245 
246 
247 //
248 // nested test and set locks
249 //
250 
251 int
252 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
253 {
254  KMP_DEBUG_ASSERT( gtid >= 0 );
255 
256  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
257  lck->lk.depth_locked += 1;
258  return KMP_LOCK_ACQUIRED_NEXT;
259  }
260  else {
261  __kmp_acquire_tas_lock_timed_template( lck, gtid );
262  ANNOTATE_TAS_ACQUIRED(lck);
263  lck->lk.depth_locked = 1;
264  return KMP_LOCK_ACQUIRED_FIRST;
265  }
266 }
267 
268 static int
269 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
270 {
271  char const * const func = "omp_set_nest_lock";
272  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
273  KMP_FATAL( LockSimpleUsedAsNestable, func );
274  }
275  return __kmp_acquire_nested_tas_lock( lck, gtid );
276 }
277 
278 int
279 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
280 {
281  int retval;
282 
283  KMP_DEBUG_ASSERT( gtid >= 0 );
284 
285  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
286  retval = ++lck->lk.depth_locked;
287  }
288  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
289  retval = 0;
290  }
291  else {
292  KMP_MB();
293  retval = lck->lk.depth_locked = 1;
294  }
295  return retval;
296 }
297 
298 static int
299 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
300 {
301  char const * const func = "omp_test_nest_lock";
302  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
303  KMP_FATAL( LockSimpleUsedAsNestable, func );
304  }
305  return __kmp_test_nested_tas_lock( lck, gtid );
306 }
307 
308 int
309 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
310 {
311  KMP_DEBUG_ASSERT( gtid >= 0 );
312 
313  KMP_MB();
314  if ( --(lck->lk.depth_locked) == 0 ) {
315  __kmp_release_tas_lock( lck, gtid );
316  return KMP_LOCK_RELEASED;
317  }
318  return KMP_LOCK_STILL_HELD;
319 }
320 
321 static int
322 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
323 {
324  char const * const func = "omp_unset_nest_lock";
325  KMP_MB(); /* in case another processor initialized lock */
326  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
327  KMP_FATAL( LockSimpleUsedAsNestable, func );
328  }
329  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
330  KMP_FATAL( LockUnsettingFree, func );
331  }
332  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
333  KMP_FATAL( LockUnsettingSetByAnother, func );
334  }
335  return __kmp_release_nested_tas_lock( lck, gtid );
336 }
337 
338 void
339 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
340 {
341  __kmp_init_tas_lock( lck );
342  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
343 }
344 
345 static void
346 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
347 {
348  __kmp_init_nested_tas_lock( lck );
349 }
350 
351 void
352 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
353 {
354  __kmp_destroy_tas_lock( lck );
355  lck->lk.depth_locked = 0;
356 }
357 
358 static void
359 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
360 {
361  char const * const func = "omp_destroy_nest_lock";
362  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
363  KMP_FATAL( LockSimpleUsedAsNestable, func );
364  }
365  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
366  KMP_FATAL( LockStillOwned, func );
367  }
368  __kmp_destroy_nested_tas_lock( lck );
369 }
370 
371 
372 #if KMP_USE_FUTEX
373 
374 /* ------------------------------------------------------------------------ */
375 /* futex locks */
376 
377 // futex locks are really just test and set locks, with a different method
378 // of handling contention. They take the same amount of space as test and
379 // set locks, and are allocated the same way (i.e. use the area allocated by
380 // the compiler for non-nested locks / allocate nested locks on the heap).
381 
382 static kmp_int32
383 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
384 {
385  return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1;
386 }
387 
388 static inline bool
389 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
390 {
391  return lck->lk.depth_locked != -1;
392 }
393 
394 __forceinline static int
395 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
396 {
397  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
398 
399  KMP_MB();
400 
401 #ifdef USE_LOCK_PROFILE
402  kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) );
403  if ( ( curr != 0 ) && ( curr != gtid_code ) )
404  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
405  /* else __kmp_printf( "." );*/
406 #endif /* USE_LOCK_PROFILE */
407 
408  KMP_FSYNC_PREPARE( lck );
409  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
410  lck, lck->lk.poll, gtid ) );
411 
412  kmp_int32 poll_val;
413 
414  while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex),
415  KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) {
416 
417  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
418  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
419  lck, gtid, poll_val, cond ) );
420 
421  //
422  // NOTE: if you try to use the following condition for this branch
423  //
424  // if ( poll_val & 1 == 0 )
425  //
426  // Then the 12.0 compiler has a bug where the following block will
427  // always be skipped, regardless of the value of the LSB of poll_val.
428  //
429  if ( ! cond ) {
430  //
431  // Try to set the lsb in the poll to indicate to the owner
432  // thread that they need to wake this thread up.
433  //
434  if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) {
435  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
436  lck, lck->lk.poll, gtid ) );
437  continue;
438  }
439  poll_val |= KMP_LOCK_BUSY(1, futex);
440 
441  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
442  lck, lck->lk.poll, gtid ) );
443  }
444 
445  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
446  lck, gtid, poll_val ) );
447 
448  kmp_int32 rc;
449  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
450  poll_val, NULL, NULL, 0 ) ) != 0 ) {
451  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
452  lck, gtid, poll_val, rc, errno ) );
453  continue;
454  }
455 
456  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
457  lck, gtid, poll_val ) );
458  //
459  // This thread has now done a successful futex wait call and was
460  // entered on the OS futex queue. We must now perform a futex
461  // wake call when releasing the lock, as we have no idea how many
462  // other threads are in the queue.
463  //
464  gtid_code |= 1;
465  }
466 
467  KMP_FSYNC_ACQUIRED( lck );
468  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
469  lck, lck->lk.poll, gtid ) );
470  return KMP_LOCK_ACQUIRED_FIRST;
471 }
472 
473 int
474 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
475 {
476  int retval = __kmp_acquire_futex_lock_timed_template( lck, gtid );
477  ANNOTATE_FUTEX_ACQUIRED(lck);
478  return retval;
479 }
480 
481 static int
482 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
483 {
484  char const * const func = "omp_set_lock";
485  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
486  && __kmp_is_futex_lock_nestable( lck ) ) {
487  KMP_FATAL( LockNestableUsedAsSimple, func );
488  }
489  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
490  KMP_FATAL( LockIsAlreadyOwned, func );
491  }
492  return __kmp_acquire_futex_lock( lck, gtid );
493 }
494 
495 int
496 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
497 {
498  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) {
499  KMP_FSYNC_ACQUIRED( lck );
500  return TRUE;
501  }
502  return FALSE;
503 }
504 
505 static int
506 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
507 {
508  char const * const func = "omp_test_lock";
509  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
510  && __kmp_is_futex_lock_nestable( lck ) ) {
511  KMP_FATAL( LockNestableUsedAsSimple, func );
512  }
513  return __kmp_test_futex_lock( lck, gtid );
514 }
515 
516 int
517 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
518 {
519  KMP_MB(); /* Flush all pending memory write invalidates. */
520 
521  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
522  lck, lck->lk.poll, gtid ) );
523 
524  KMP_FSYNC_RELEASING(lck);
525  ANNOTATE_FUTEX_RELEASED(lck);
526 
527  kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) );
528 
529  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
530  lck, gtid, poll_val ) );
531 
532  if ( KMP_LOCK_STRIP(poll_val) & 1 ) {
533  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
534  lck, gtid ) );
535  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 );
536  }
537 
538  KMP_MB(); /* Flush all pending memory write invalidates. */
539 
540  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
541  lck, lck->lk.poll, gtid ) );
542 
543  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
544  __kmp_xproc ) );
545  return KMP_LOCK_RELEASED;
546 }
547 
548 static int
549 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
550 {
551  char const * const func = "omp_unset_lock";
552  KMP_MB(); /* in case another processor initialized lock */
553  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
554  && __kmp_is_futex_lock_nestable( lck ) ) {
555  KMP_FATAL( LockNestableUsedAsSimple, func );
556  }
557  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
558  KMP_FATAL( LockUnsettingFree, func );
559  }
560  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
561  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
562  KMP_FATAL( LockUnsettingSetByAnother, func );
563  }
564  return __kmp_release_futex_lock( lck, gtid );
565 }
566 
567 void
568 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
569 {
570  TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) );
571 }
572 
573 static void
574 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
575 {
576  __kmp_init_futex_lock( lck );
577 }
578 
579 void
580 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
581 {
582  lck->lk.poll = 0;
583 }
584 
585 static void
586 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
587 {
588  char const * const func = "omp_destroy_lock";
589  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
590  && __kmp_is_futex_lock_nestable( lck ) ) {
591  KMP_FATAL( LockNestableUsedAsSimple, func );
592  }
593  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
594  KMP_FATAL( LockStillOwned, func );
595  }
596  __kmp_destroy_futex_lock( lck );
597 }
598 
599 
600 //
601 // nested futex locks
602 //
603 
604 int
605 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
606 {
607  KMP_DEBUG_ASSERT( gtid >= 0 );
608 
609  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
610  lck->lk.depth_locked += 1;
611  return KMP_LOCK_ACQUIRED_NEXT;
612  }
613  else {
614  __kmp_acquire_futex_lock_timed_template( lck, gtid );
615  ANNOTATE_FUTEX_ACQUIRED(lck);
616  lck->lk.depth_locked = 1;
617  return KMP_LOCK_ACQUIRED_FIRST;
618  }
619 }
620 
621 static int
622 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
623 {
624  char const * const func = "omp_set_nest_lock";
625  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
626  KMP_FATAL( LockSimpleUsedAsNestable, func );
627  }
628  return __kmp_acquire_nested_futex_lock( lck, gtid );
629 }
630 
631 int
632 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
633 {
634  int retval;
635 
636  KMP_DEBUG_ASSERT( gtid >= 0 );
637 
638  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
639  retval = ++lck->lk.depth_locked;
640  }
641  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
642  retval = 0;
643  }
644  else {
645  KMP_MB();
646  retval = lck->lk.depth_locked = 1;
647  }
648  return retval;
649 }
650 
651 static int
652 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
653 {
654  char const * const func = "omp_test_nest_lock";
655  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
656  KMP_FATAL( LockSimpleUsedAsNestable, func );
657  }
658  return __kmp_test_nested_futex_lock( lck, gtid );
659 }
660 
661 int
662 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
663 {
664  KMP_DEBUG_ASSERT( gtid >= 0 );
665 
666  KMP_MB();
667  if ( --(lck->lk.depth_locked) == 0 ) {
668  __kmp_release_futex_lock( lck, gtid );
669  return KMP_LOCK_RELEASED;
670  }
671  return KMP_LOCK_STILL_HELD;
672 }
673 
674 static int
675 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
676 {
677  char const * const func = "omp_unset_nest_lock";
678  KMP_MB(); /* in case another processor initialized lock */
679  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
680  KMP_FATAL( LockSimpleUsedAsNestable, func );
681  }
682  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
683  KMP_FATAL( LockUnsettingFree, func );
684  }
685  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
686  KMP_FATAL( LockUnsettingSetByAnother, func );
687  }
688  return __kmp_release_nested_futex_lock( lck, gtid );
689 }
690 
691 void
692 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
693 {
694  __kmp_init_futex_lock( lck );
695  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
696 }
697 
698 static void
699 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
700 {
701  __kmp_init_nested_futex_lock( lck );
702 }
703 
704 void
705 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
706 {
707  __kmp_destroy_futex_lock( lck );
708  lck->lk.depth_locked = 0;
709 }
710 
711 static void
712 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
713 {
714  char const * const func = "omp_destroy_nest_lock";
715  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
716  KMP_FATAL( LockSimpleUsedAsNestable, func );
717  }
718  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
719  KMP_FATAL( LockStillOwned, func );
720  }
721  __kmp_destroy_nested_futex_lock( lck );
722 }
723 
724 #endif // KMP_USE_FUTEX
725 
726 
727 /* ------------------------------------------------------------------------ */
728 /* ticket (bakery) locks */
729 
730 static kmp_int32
731 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
732 {
733  return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1;
734 }
735 
736 static inline bool
737 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
738 {
739  return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1;
740 }
741 
742 static kmp_uint32
743 __kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket )
744 {
745  return std::atomic_load_explicit( (std::atomic<unsigned> *)now_serving, std::memory_order_acquire ) == my_ticket;
746 }
747 
748 __forceinline static int
749 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
750 {
751  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed );
752 
753 #ifdef USE_LOCK_PROFILE
754  if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket )
755  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
756  /* else __kmp_printf( "." );*/
757 #endif /* USE_LOCK_PROFILE */
758 
759  if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) {
760  return KMP_LOCK_ACQUIRED_FIRST;
761  }
762  KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
763  return KMP_LOCK_ACQUIRED_FIRST;
764 }
765 
766 int
767 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
768 {
769  int retval = __kmp_acquire_ticket_lock_timed_template( lck, gtid );
770  ANNOTATE_TICKET_ACQUIRED(lck);
771  return retval;
772 }
773 
774 static int
775 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
776 {
777  char const * const func = "omp_set_lock";
778 
779  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
780  KMP_FATAL( LockIsUninitialized, func );
781  }
782  if ( lck->lk.self != lck ) {
783  KMP_FATAL( LockIsUninitialized, func );
784  }
785  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
786  KMP_FATAL( LockNestableUsedAsSimple, func );
787  }
788  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
789  KMP_FATAL( LockIsAlreadyOwned, func );
790  }
791 
792  __kmp_acquire_ticket_lock( lck, gtid );
793 
794  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
795  return KMP_LOCK_ACQUIRED_FIRST;
796 }
797 
798 int
799 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
800 {
801  kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed );
802 
803  if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) {
804  kmp_uint32 next_ticket = my_ticket + 1;
805  if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket,
806  &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) {
807  return TRUE;
808  }
809  }
810  return FALSE;
811 }
812 
813 static int
814 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
815 {
816  char const * const func = "omp_test_lock";
817 
818  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
819  KMP_FATAL( LockIsUninitialized, func );
820  }
821  if ( lck->lk.self != lck ) {
822  KMP_FATAL( LockIsUninitialized, func );
823  }
824  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
825  KMP_FATAL( LockNestableUsedAsSimple, func );
826  }
827 
828  int retval = __kmp_test_ticket_lock( lck, gtid );
829 
830  if ( retval ) {
831  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
832  }
833  return retval;
834 }
835 
836 int
837 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
838 {
839  kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed );
840 
841  ANNOTATE_TICKET_RELEASED(lck);
842  std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release );
843 
844  KMP_YIELD( distance
845  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
846  return KMP_LOCK_RELEASED;
847 }
848 
849 static int
850 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
851 {
852  char const * const func = "omp_unset_lock";
853 
854  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
855  KMP_FATAL( LockIsUninitialized, func );
856  }
857  if ( lck->lk.self != lck ) {
858  KMP_FATAL( LockIsUninitialized, func );
859  }
860  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
861  KMP_FATAL( LockNestableUsedAsSimple, func );
862  }
863  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
864  KMP_FATAL( LockUnsettingFree, func );
865  }
866  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
867  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
868  KMP_FATAL( LockUnsettingSetByAnother, func );
869  }
870  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
871  return __kmp_release_ticket_lock( lck, gtid );
872 }
873 
874 void
875 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
876 {
877  lck->lk.location = NULL;
878  lck->lk.self = lck;
879  std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
880  std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
881  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock.
882  std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock.
883  std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release );
884 }
885 
886 static void
887 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
888 {
889  __kmp_init_ticket_lock( lck );
890 }
891 
892 void
893 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
894 {
895  std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release );
896  lck->lk.self = NULL;
897  lck->lk.location = NULL;
898  std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed );
899  std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed );
900  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
901  std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed );
902 }
903 
904 static void
905 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
906 {
907  char const * const func = "omp_destroy_lock";
908 
909  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
910  KMP_FATAL( LockIsUninitialized, func );
911  }
912  if ( lck->lk.self != lck ) {
913  KMP_FATAL( LockIsUninitialized, func );
914  }
915  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
916  KMP_FATAL( LockNestableUsedAsSimple, func );
917  }
918  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
919  KMP_FATAL( LockStillOwned, func );
920  }
921  __kmp_destroy_ticket_lock( lck );
922 }
923 
924 
925 //
926 // nested ticket locks
927 //
928 
929 int
930 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
931 {
932  KMP_DEBUG_ASSERT( gtid >= 0 );
933 
934  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
935  std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
936  return KMP_LOCK_ACQUIRED_NEXT;
937  }
938  else {
939  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
940  ANNOTATE_TICKET_ACQUIRED(lck);
941  std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
942  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
943  return KMP_LOCK_ACQUIRED_FIRST;
944  }
945 }
946 
947 static int
948 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
949 {
950  char const * const func = "omp_set_nest_lock";
951 
952  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
953  KMP_FATAL( LockIsUninitialized, func );
954  }
955  if ( lck->lk.self != lck ) {
956  KMP_FATAL( LockIsUninitialized, func );
957  }
958  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
959  KMP_FATAL( LockSimpleUsedAsNestable, func );
960  }
961  return __kmp_acquire_nested_ticket_lock( lck, gtid );
962 }
963 
964 int
965 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
966 {
967  int retval;
968 
969  KMP_DEBUG_ASSERT( gtid >= 0 );
970 
971  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
972  retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1;
973  }
974  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
975  retval = 0;
976  }
977  else {
978  std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed );
979  std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed );
980  retval = 1;
981  }
982  return retval;
983 }
984 
985 static int
986 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
987  kmp_int32 gtid )
988 {
989  char const * const func = "omp_test_nest_lock";
990 
991  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
992  KMP_FATAL( LockIsUninitialized, func );
993  }
994  if ( lck->lk.self != lck ) {
995  KMP_FATAL( LockIsUninitialized, func );
996  }
997  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
998  KMP_FATAL( LockSimpleUsedAsNestable, func );
999  }
1000  return __kmp_test_nested_ticket_lock( lck, gtid );
1001 }
1002 
1003 int
1004 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1005 {
1006  KMP_DEBUG_ASSERT( gtid >= 0 );
1007 
1008  if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) {
1009  std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed );
1010  __kmp_release_ticket_lock( lck, gtid );
1011  return KMP_LOCK_RELEASED;
1012  }
1013  return KMP_LOCK_STILL_HELD;
1014 }
1015 
1016 static int
1017 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1018 {
1019  char const * const func = "omp_unset_nest_lock";
1020 
1021  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1022  KMP_FATAL( LockIsUninitialized, func );
1023  }
1024  if ( lck->lk.self != lck ) {
1025  KMP_FATAL( LockIsUninitialized, func );
1026  }
1027  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1028  KMP_FATAL( LockSimpleUsedAsNestable, func );
1029  }
1030  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1031  KMP_FATAL( LockUnsettingFree, func );
1032  }
1033  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1034  KMP_FATAL( LockUnsettingSetByAnother, func );
1035  }
1036  return __kmp_release_nested_ticket_lock( lck, gtid );
1037 }
1038 
1039 void
1040 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1041 {
1042  __kmp_init_ticket_lock( lck );
1043  std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks
1044 }
1045 
1046 static void
1047 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1048 {
1049  __kmp_init_nested_ticket_lock( lck );
1050 }
1051 
1052 void
1053 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1054 {
1055  __kmp_destroy_ticket_lock( lck );
1056  std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed );
1057 }
1058 
1059 static void
1060 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1061 {
1062  char const * const func = "omp_destroy_nest_lock";
1063 
1064  if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) {
1065  KMP_FATAL( LockIsUninitialized, func );
1066  }
1067  if ( lck->lk.self != lck ) {
1068  KMP_FATAL( LockIsUninitialized, func );
1069  }
1070  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1071  KMP_FATAL( LockSimpleUsedAsNestable, func );
1072  }
1073  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1074  KMP_FATAL( LockStillOwned, func );
1075  }
1076  __kmp_destroy_nested_ticket_lock( lck );
1077 }
1078 
1079 
1080 //
1081 // access functions to fields which don't exist for all lock kinds.
1082 //
1083 
1084 static int
1085 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1086 {
1087  return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck);
1088 }
1089 
1090 static const ident_t *
1091 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1092 {
1093  return lck->lk.location;
1094 }
1095 
1096 static void
1097 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1098 {
1099  lck->lk.location = loc;
1100 }
1101 
1102 static kmp_lock_flags_t
1103 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1104 {
1105  return lck->lk.flags;
1106 }
1107 
1108 static void
1109 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1110 {
1111  lck->lk.flags = flags;
1112 }
1113 
1114 /* ------------------------------------------------------------------------ */
1115 /* queuing locks */
1116 
1117 /*
1118  * First the states
1119  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1120  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1121  * h, h means lock is held or about to transition, 1 element on queue
1122  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1123  *
1124  * Now the transitions
1125  * Acquire(0,0) = -1 ,0
1126  * Release(0,0) = Error
1127  * Acquire(-1,0) = h ,h h > 0
1128  * Release(-1,0) = 0 ,0
1129  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1130  * Release(h,h) = -1 ,0 h > 0
1131  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1132  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1133  *
1134  * And pictorially
1135  *
1136  *
1137  * +-----+
1138  * | 0, 0|------- release -------> Error
1139  * +-----+
1140  * | ^
1141  * acquire| |release
1142  * | |
1143  * | |
1144  * v |
1145  * +-----+
1146  * |-1, 0|
1147  * +-----+
1148  * | ^
1149  * acquire| |release
1150  * | |
1151  * | |
1152  * v |
1153  * +-----+
1154  * | h, h|
1155  * +-----+
1156  * | ^
1157  * acquire| |release
1158  * | |
1159  * | |
1160  * v |
1161  * +-----+
1162  * | h, t|----- acquire, release loopback ---+
1163  * +-----+ |
1164  * ^ |
1165  * | |
1166  * +------------------------------------+
1167  *
1168  */
1169 
1170 #ifdef DEBUG_QUEUING_LOCKS
1171 
1172 /* Stuff for circular trace buffer */
1173 #define TRACE_BUF_ELE 1024
1174 static char traces[TRACE_BUF_ELE][128] = { 0 }
1175 static int tc = 0;
1176 #define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y );
1177 #define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z );
1178 #define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q );
1179 
1180 static void
1181 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1182  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1183 {
1184  kmp_int32 t, i;
1185 
1186  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1187 
1188  i = tc % TRACE_BUF_ELE;
1189  __kmp_printf_no_lock( "%s\n", traces[i] );
1190  i = (i+1) % TRACE_BUF_ELE;
1191  while ( i != (tc % TRACE_BUF_ELE) ) {
1192  __kmp_printf_no_lock( "%s", traces[i] );
1193  i = (i+1) % TRACE_BUF_ELE;
1194  }
1195  __kmp_printf_no_lock( "\n" );
1196 
1197  __kmp_printf_no_lock(
1198  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1199  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1200  head_id, tail_id );
1201 
1202  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1203 
1204  if ( lck->lk.head_id >= 1 ) {
1205  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1206  while (t > 0) {
1207  __kmp_printf_no_lock( "-> %d ", t );
1208  t = __kmp_threads[t-1]->th.th_next_waiting;
1209  }
1210  }
1211  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1212  __kmp_printf_no_lock( "\n\n" );
1213 }
1214 
1215 #endif /* DEBUG_QUEUING_LOCKS */
1216 
1217 static kmp_int32
1218 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1219 {
1220  return TCR_4( lck->lk.owner_id ) - 1;
1221 }
1222 
1223 static inline bool
1224 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1225 {
1226  return lck->lk.depth_locked != -1;
1227 }
1228 
1229 /* Acquire a lock using a the queuing lock implementation */
1230 template <bool takeTime>
1231 /* [TLW] The unused template above is left behind because of what BEB believes is a
1232  potential compiler problem with __forceinline. */
1233 __forceinline static int
1234 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1235  kmp_int32 gtid )
1236 {
1237  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1238  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1239  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1240  volatile kmp_uint32 *spin_here_p;
1241  kmp_int32 need_mf = 1;
1242 
1243 #if OMPT_SUPPORT
1244  ompt_state_t prev_state = ompt_state_undefined;
1245 #endif
1246 
1247  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1248 
1249  KMP_FSYNC_PREPARE( lck );
1250  KMP_DEBUG_ASSERT( this_thr != NULL );
1251  spin_here_p = & this_thr->th.th_spin_here;
1252 
1253 #ifdef DEBUG_QUEUING_LOCKS
1254  TRACE_LOCK( gtid+1, "acq ent" );
1255  if ( *spin_here_p )
1256  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1257  if ( this_thr->th.th_next_waiting != 0 )
1258  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1259 #endif
1260  KMP_DEBUG_ASSERT( !*spin_here_p );
1261  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1262 
1263 
1264  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1265  that may follow, not just in execution order, but also in visibility order. This way,
1266  when a releasing thread observes the changes to the queue by this thread, it can
1267  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1268  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1269  to FALSE before this thread sets it to TRUE, this thread will hang.
1270  */
1271  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1272 
1273  while( 1 ) {
1274  kmp_int32 enqueued;
1275  kmp_int32 head;
1276  kmp_int32 tail;
1277 
1278  head = *head_id_p;
1279 
1280  switch ( head ) {
1281 
1282  case -1:
1283  {
1284 #ifdef DEBUG_QUEUING_LOCKS
1285  tail = *tail_id_p;
1286  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1287 #endif
1288  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1289  this assignment prevents us from entering the if ( t > 0 )
1290  condition in the enqueued case below, which is not necessary for
1291  this state transition */
1292 
1293  need_mf = 0;
1294  /* try (-1,0)->(tid,tid) */
1295  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1296  KMP_PACK_64( -1, 0 ),
1297  KMP_PACK_64( gtid+1, gtid+1 ) );
1298 #ifdef DEBUG_QUEUING_LOCKS
1299  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1300 #endif
1301  }
1302  break;
1303 
1304  default:
1305  {
1306  tail = *tail_id_p;
1307  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1308 
1309 #ifdef DEBUG_QUEUING_LOCKS
1310  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1311 #endif
1312 
1313  if ( tail == 0 ) {
1314  enqueued = FALSE;
1315  }
1316  else {
1317  need_mf = 0;
1318  /* try (h,t) or (h,h)->(h,tid) */
1319  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1320 
1321 #ifdef DEBUG_QUEUING_LOCKS
1322  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1323 #endif
1324  }
1325  }
1326  break;
1327 
1328  case 0: /* empty queue */
1329  {
1330  kmp_int32 grabbed_lock;
1331 
1332 #ifdef DEBUG_QUEUING_LOCKS
1333  tail = *tail_id_p;
1334  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1335 #endif
1336  /* try (0,0)->(-1,0) */
1337 
1338  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1339  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1340 
1341  if ( grabbed_lock ) {
1342 
1343  *spin_here_p = FALSE;
1344 
1345  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1346  lck, gtid ));
1347 #ifdef DEBUG_QUEUING_LOCKS
1348  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1349 #endif
1350 
1351 #if OMPT_SUPPORT
1352  if (ompt_enabled && prev_state != ompt_state_undefined) {
1353  /* change the state before clearing wait_id */
1354  this_thr->th.ompt_thread_info.state = prev_state;
1355  this_thr->th.ompt_thread_info.wait_id = 0;
1356  }
1357 #endif
1358 
1359  KMP_FSYNC_ACQUIRED( lck );
1360  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1361  }
1362  enqueued = FALSE;
1363  }
1364  break;
1365  }
1366 
1367 #if OMPT_SUPPORT
1368  if (ompt_enabled && prev_state == ompt_state_undefined) {
1369  /* this thread will spin; set wait_id before entering wait state */
1370  prev_state = this_thr->th.ompt_thread_info.state;
1371  this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck;
1372  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1373  }
1374 #endif
1375 
1376  if ( enqueued ) {
1377  if ( tail > 0 ) {
1378  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1379  KMP_ASSERT( tail_thr != NULL );
1380  tail_thr->th.th_next_waiting = gtid+1;
1381  /* corresponding wait for this write in release code */
1382  }
1383  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1384 
1385 
1386  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1387  * throughput only here.
1388  */
1389  KMP_MB();
1390  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1391 
1392 #ifdef DEBUG_QUEUING_LOCKS
1393  TRACE_LOCK( gtid+1, "acq spin" );
1394 
1395  if ( this_thr->th.th_next_waiting != 0 )
1396  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1397 #endif
1398  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1399  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1400  lck, gtid ));
1401 
1402 #ifdef DEBUG_QUEUING_LOCKS
1403  TRACE_LOCK( gtid+1, "acq exit 2" );
1404 #endif
1405 
1406 #if OMPT_SUPPORT
1407  /* change the state before clearing wait_id */
1408  this_thr->th.ompt_thread_info.state = prev_state;
1409  this_thr->th.ompt_thread_info.wait_id = 0;
1410 #endif
1411 
1412  /* got lock, we were dequeued by the thread that released lock */
1413  return KMP_LOCK_ACQUIRED_FIRST;
1414  }
1415 
1416  /* Yield if number of threads > number of logical processors */
1417  /* ToDo: Not sure why this should only be in oversubscription case,
1418  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1419  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1420  __kmp_xproc ) );
1421 #ifdef DEBUG_QUEUING_LOCKS
1422  TRACE_LOCK( gtid+1, "acq retry" );
1423 #endif
1424 
1425  }
1426  KMP_ASSERT2( 0, "should not get here" );
1427  return KMP_LOCK_ACQUIRED_FIRST;
1428 }
1429 
1430 int
1431 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1432 {
1433  KMP_DEBUG_ASSERT( gtid >= 0 );
1434 
1435  int retval = __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1436  ANNOTATE_QUEUING_ACQUIRED(lck);
1437  return retval;
1438 }
1439 
1440 static int
1441 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1442  kmp_int32 gtid )
1443 {
1444  char const * const func = "omp_set_lock";
1445  if ( lck->lk.initialized != lck ) {
1446  KMP_FATAL( LockIsUninitialized, func );
1447  }
1448  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1449  KMP_FATAL( LockNestableUsedAsSimple, func );
1450  }
1451  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1452  KMP_FATAL( LockIsAlreadyOwned, func );
1453  }
1454 
1455  __kmp_acquire_queuing_lock( lck, gtid );
1456 
1457  lck->lk.owner_id = gtid + 1;
1458  return KMP_LOCK_ACQUIRED_FIRST;
1459 }
1460 
1461 int
1462 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1463 {
1464  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1465  kmp_int32 head;
1466 #ifdef KMP_DEBUG
1467  kmp_info_t *this_thr;
1468 #endif
1469 
1470  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1471  KMP_DEBUG_ASSERT( gtid >= 0 );
1472 #ifdef KMP_DEBUG
1473  this_thr = __kmp_thread_from_gtid( gtid );
1474  KMP_DEBUG_ASSERT( this_thr != NULL );
1475  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1476 #endif
1477 
1478  head = *head_id_p;
1479 
1480  if ( head == 0 ) { /* nobody on queue, nobody holding */
1481 
1482  /* try (0,0)->(-1,0) */
1483 
1484  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1485  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1486  KMP_FSYNC_ACQUIRED(lck);
1487  ANNOTATE_QUEUING_ACQUIRED(lck);
1488  return TRUE;
1489  }
1490  }
1491 
1492  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1493  return FALSE;
1494 }
1495 
1496 static int
1497 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1498 {
1499  char const * const func = "omp_test_lock";
1500  if ( lck->lk.initialized != lck ) {
1501  KMP_FATAL( LockIsUninitialized, func );
1502  }
1503  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1504  KMP_FATAL( LockNestableUsedAsSimple, func );
1505  }
1506 
1507  int retval = __kmp_test_queuing_lock( lck, gtid );
1508 
1509  if ( retval ) {
1510  lck->lk.owner_id = gtid + 1;
1511  }
1512  return retval;
1513 }
1514 
1515 int
1516 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1517 {
1518  register kmp_info_t *this_thr;
1519  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1520  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1521 
1522  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1523  KMP_DEBUG_ASSERT( gtid >= 0 );
1524  this_thr = __kmp_thread_from_gtid( gtid );
1525  KMP_DEBUG_ASSERT( this_thr != NULL );
1526 #ifdef DEBUG_QUEUING_LOCKS
1527  TRACE_LOCK( gtid+1, "rel ent" );
1528 
1529  if ( this_thr->th.th_spin_here )
1530  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1531  if ( this_thr->th.th_next_waiting != 0 )
1532  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1533 #endif
1534  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1535  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1536 
1537  KMP_FSYNC_RELEASING(lck);
1538  ANNOTATE_QUEUING_RELEASED(lck);
1539 
1540  while( 1 ) {
1541  kmp_int32 dequeued;
1542  kmp_int32 head;
1543  kmp_int32 tail;
1544 
1545  head = *head_id_p;
1546 
1547 #ifdef DEBUG_QUEUING_LOCKS
1548  tail = *tail_id_p;
1549  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1550  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1551 #endif
1552  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1553 
1554  if ( head == -1 ) { /* nobody on queue */
1555 
1556  /* try (-1,0)->(0,0) */
1557  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1558  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1559  lck, gtid ));
1560 #ifdef DEBUG_QUEUING_LOCKS
1561  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1562 #endif
1563 
1564 #if OMPT_SUPPORT
1565  /* nothing to do - no other thread is trying to shift blame */
1566 #endif
1567 
1568  return KMP_LOCK_RELEASED;
1569  }
1570  dequeued = FALSE;
1571 
1572  }
1573  else {
1574 
1575  tail = *tail_id_p;
1576  if ( head == tail ) { /* only one thread on the queue */
1577 
1578 #ifdef DEBUG_QUEUING_LOCKS
1579  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1580 #endif
1581  KMP_DEBUG_ASSERT( head > 0 );
1582 
1583  /* try (h,h)->(-1,0) */
1584  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1585  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1586 #ifdef DEBUG_QUEUING_LOCKS
1587  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1588 #endif
1589 
1590  }
1591  else {
1592  volatile kmp_int32 *waiting_id_p;
1593  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1594  KMP_DEBUG_ASSERT( head_thr != NULL );
1595  waiting_id_p = & head_thr->th.th_next_waiting;
1596 
1597  /* Does this require synchronous reads? */
1598 #ifdef DEBUG_QUEUING_LOCKS
1599  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1600 #endif
1601  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1602 
1603  /* try (h,t)->(h',t) or (t,t) */
1604 
1605  KMP_MB();
1606  /* make sure enqueuing thread has time to update next waiting thread field */
1607  *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL);
1608 #ifdef DEBUG_QUEUING_LOCKS
1609  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1610 #endif
1611  dequeued = TRUE;
1612  }
1613  }
1614 
1615  if ( dequeued ) {
1616  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1617  KMP_DEBUG_ASSERT( head_thr != NULL );
1618 
1619  /* Does this require synchronous reads? */
1620 #ifdef DEBUG_QUEUING_LOCKS
1621  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1622 #endif
1623  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1624 
1625  /* For clean code only.
1626  * Thread not released until next statement prevents race with acquire code.
1627  */
1628  head_thr->th.th_next_waiting = 0;
1629 #ifdef DEBUG_QUEUING_LOCKS
1630  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1631 #endif
1632 
1633  KMP_MB();
1634  /* reset spin value */
1635  head_thr->th.th_spin_here = FALSE;
1636 
1637  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1638  lck, gtid ));
1639 #ifdef DEBUG_QUEUING_LOCKS
1640  TRACE_LOCK( gtid+1, "rel exit 2" );
1641 #endif
1642  return KMP_LOCK_RELEASED;
1643  }
1644  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1645 
1646 #ifdef DEBUG_QUEUING_LOCKS
1647  TRACE_LOCK( gtid+1, "rel retry" );
1648 #endif
1649 
1650  } /* while */
1651  KMP_ASSERT2( 0, "should not get here" );
1652  return KMP_LOCK_RELEASED;
1653 }
1654 
1655 static int
1656 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1657  kmp_int32 gtid )
1658 {
1659  char const * const func = "omp_unset_lock";
1660  KMP_MB(); /* in case another processor initialized lock */
1661  if ( lck->lk.initialized != lck ) {
1662  KMP_FATAL( LockIsUninitialized, func );
1663  }
1664  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1665  KMP_FATAL( LockNestableUsedAsSimple, func );
1666  }
1667  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1668  KMP_FATAL( LockUnsettingFree, func );
1669  }
1670  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1671  KMP_FATAL( LockUnsettingSetByAnother, func );
1672  }
1673  lck->lk.owner_id = 0;
1674  return __kmp_release_queuing_lock( lck, gtid );
1675 }
1676 
1677 void
1678 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1679 {
1680  lck->lk.location = NULL;
1681  lck->lk.head_id = 0;
1682  lck->lk.tail_id = 0;
1683  lck->lk.next_ticket = 0;
1684  lck->lk.now_serving = 0;
1685  lck->lk.owner_id = 0; // no thread owns the lock.
1686  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1687  lck->lk.initialized = lck;
1688 
1689  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1690 }
1691 
1692 static void
1693 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1694 {
1695  __kmp_init_queuing_lock( lck );
1696 }
1697 
1698 void
1699 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1700 {
1701  lck->lk.initialized = NULL;
1702  lck->lk.location = NULL;
1703  lck->lk.head_id = 0;
1704  lck->lk.tail_id = 0;
1705  lck->lk.next_ticket = 0;
1706  lck->lk.now_serving = 0;
1707  lck->lk.owner_id = 0;
1708  lck->lk.depth_locked = -1;
1709 }
1710 
1711 static void
1712 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1713 {
1714  char const * const func = "omp_destroy_lock";
1715  if ( lck->lk.initialized != lck ) {
1716  KMP_FATAL( LockIsUninitialized, func );
1717  }
1718  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1719  KMP_FATAL( LockNestableUsedAsSimple, func );
1720  }
1721  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1722  KMP_FATAL( LockStillOwned, func );
1723  }
1724  __kmp_destroy_queuing_lock( lck );
1725 }
1726 
1727 
1728 //
1729 // nested queuing locks
1730 //
1731 
1732 int
1733 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1734 {
1735  KMP_DEBUG_ASSERT( gtid >= 0 );
1736 
1737  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1738  lck->lk.depth_locked += 1;
1739  return KMP_LOCK_ACQUIRED_NEXT;
1740  }
1741  else {
1742  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1743  ANNOTATE_QUEUING_ACQUIRED(lck);
1744  KMP_MB();
1745  lck->lk.depth_locked = 1;
1746  KMP_MB();
1747  lck->lk.owner_id = gtid + 1;
1748  return KMP_LOCK_ACQUIRED_FIRST;
1749  }
1750 }
1751 
1752 static int
1753 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1754 {
1755  char const * const func = "omp_set_nest_lock";
1756  if ( lck->lk.initialized != lck ) {
1757  KMP_FATAL( LockIsUninitialized, func );
1758  }
1759  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1760  KMP_FATAL( LockSimpleUsedAsNestable, func );
1761  }
1762  return __kmp_acquire_nested_queuing_lock( lck, gtid );
1763 }
1764 
1765 int
1766 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1767 {
1768  int retval;
1769 
1770  KMP_DEBUG_ASSERT( gtid >= 0 );
1771 
1772  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1773  retval = ++lck->lk.depth_locked;
1774  }
1775  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1776  retval = 0;
1777  }
1778  else {
1779  KMP_MB();
1780  retval = lck->lk.depth_locked = 1;
1781  KMP_MB();
1782  lck->lk.owner_id = gtid + 1;
1783  }
1784  return retval;
1785 }
1786 
1787 static int
1788 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1789  kmp_int32 gtid )
1790 {
1791  char const * const func = "omp_test_nest_lock";
1792  if ( lck->lk.initialized != lck ) {
1793  KMP_FATAL( LockIsUninitialized, func );
1794  }
1795  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1796  KMP_FATAL( LockSimpleUsedAsNestable, func );
1797  }
1798  return __kmp_test_nested_queuing_lock( lck, gtid );
1799 }
1800 
1801 int
1802 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1803 {
1804  KMP_DEBUG_ASSERT( gtid >= 0 );
1805 
1806  KMP_MB();
1807  if ( --(lck->lk.depth_locked) == 0 ) {
1808  KMP_MB();
1809  lck->lk.owner_id = 0;
1810  __kmp_release_queuing_lock( lck, gtid );
1811  return KMP_LOCK_RELEASED;
1812  }
1813  return KMP_LOCK_STILL_HELD;
1814 }
1815 
1816 static int
1817 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1818 {
1819  char const * const func = "omp_unset_nest_lock";
1820  KMP_MB(); /* in case another processor initialized lock */
1821  if ( lck->lk.initialized != lck ) {
1822  KMP_FATAL( LockIsUninitialized, func );
1823  }
1824  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1825  KMP_FATAL( LockSimpleUsedAsNestable, func );
1826  }
1827  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1828  KMP_FATAL( LockUnsettingFree, func );
1829  }
1830  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1831  KMP_FATAL( LockUnsettingSetByAnother, func );
1832  }
1833  return __kmp_release_nested_queuing_lock( lck, gtid );
1834 }
1835 
1836 void
1837 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1838 {
1839  __kmp_init_queuing_lock( lck );
1840  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1841 }
1842 
1843 static void
1844 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1845 {
1846  __kmp_init_nested_queuing_lock( lck );
1847 }
1848 
1849 void
1850 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1851 {
1852  __kmp_destroy_queuing_lock( lck );
1853  lck->lk.depth_locked = 0;
1854 }
1855 
1856 static void
1857 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1858 {
1859  char const * const func = "omp_destroy_nest_lock";
1860  if ( lck->lk.initialized != lck ) {
1861  KMP_FATAL( LockIsUninitialized, func );
1862  }
1863  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1864  KMP_FATAL( LockSimpleUsedAsNestable, func );
1865  }
1866  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1867  KMP_FATAL( LockStillOwned, func );
1868  }
1869  __kmp_destroy_nested_queuing_lock( lck );
1870 }
1871 
1872 
1873 //
1874 // access functions to fields which don't exist for all lock kinds.
1875 //
1876 
1877 static int
1878 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1879 {
1880  return lck == lck->lk.initialized;
1881 }
1882 
1883 static const ident_t *
1884 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1885 {
1886  return lck->lk.location;
1887 }
1888 
1889 static void
1890 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1891 {
1892  lck->lk.location = loc;
1893 }
1894 
1895 static kmp_lock_flags_t
1896 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1897 {
1898  return lck->lk.flags;
1899 }
1900 
1901 static void
1902 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1903 {
1904  lck->lk.flags = flags;
1905 }
1906 
1907 #if KMP_USE_ADAPTIVE_LOCKS
1908 
1909 /*
1910  RTM Adaptive locks
1911 */
1912 
1913 #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1914 
1915 #include <immintrin.h>
1916 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1917 
1918 #else
1919 
1920 // Values from the status register after failed speculation.
1921 #define _XBEGIN_STARTED (~0u)
1922 #define _XABORT_EXPLICIT (1 << 0)
1923 #define _XABORT_RETRY (1 << 1)
1924 #define _XABORT_CONFLICT (1 << 2)
1925 #define _XABORT_CAPACITY (1 << 3)
1926 #define _XABORT_DEBUG (1 << 4)
1927 #define _XABORT_NESTED (1 << 5)
1928 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1929 
1930 // Aborts for which it's worth trying again immediately
1931 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1932 
1933 #define STRINGIZE_INTERNAL(arg) #arg
1934 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1935 
1936 // Access to RTM instructions
1937 
1938 /*
1939  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1940  This is the same definition as the compiler intrinsic that will be supported at some point.
1941 */
1942 static __inline int _xbegin()
1943 {
1944  int res = -1;
1945 
1946 #if KMP_OS_WINDOWS
1947 #if KMP_ARCH_X86_64
1948  _asm {
1949  _emit 0xC7
1950  _emit 0xF8
1951  _emit 2
1952  _emit 0
1953  _emit 0
1954  _emit 0
1955  jmp L2
1956  mov res, eax
1957  L2:
1958  }
1959 #else /* IA32 */
1960  _asm {
1961  _emit 0xC7
1962  _emit 0xF8
1963  _emit 2
1964  _emit 0
1965  _emit 0
1966  _emit 0
1967  jmp L2
1968  mov res, eax
1969  L2:
1970  }
1971 #endif // KMP_ARCH_X86_64
1972 #else
1973  /* Note that %eax must be noted as killed (clobbered), because
1974  * the XSR is returned in %eax(%rax) on abort. Other register
1975  * values are restored, so don't need to be killed.
1976  *
1977  * We must also mark 'res' as an input and an output, since otherwise
1978  * 'res=-1' may be dropped as being dead, whereas we do need the
1979  * assignment on the successful (i.e., non-abort) path.
1980  */
1981  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
1982  " .long 1f-1b-6\n"
1983  " jmp 2f\n"
1984  "1: movl %%eax,%0\n"
1985  "2:"
1986  :"+r"(res)::"memory","%eax");
1987 #endif // KMP_OS_WINDOWS
1988  return res;
1989 }
1990 
1991 /*
1992  Transaction end
1993 */
1994 static __inline void _xend()
1995 {
1996 #if KMP_OS_WINDOWS
1997  __asm {
1998  _emit 0x0f
1999  _emit 0x01
2000  _emit 0xd5
2001  }
2002 #else
2003  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2004 #endif
2005 }
2006 
2007 /*
2008  This is a macro, the argument must be a single byte constant which
2009  can be evaluated by the inline assembler, since it is emitted as a
2010  byte into the assembly code.
2011 */
2012 #if KMP_OS_WINDOWS
2013 #define _xabort(ARG) \
2014  _asm _emit 0xc6 \
2015  _asm _emit 0xf8 \
2016  _asm _emit ARG
2017 #else
2018 #define _xabort(ARG) \
2019  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2020 #endif
2021 
2022 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
2023 
2024 //
2025 // Statistics is collected for testing purpose
2026 //
2027 #if KMP_DEBUG_ADAPTIVE_LOCKS
2028 
2029 // We accumulate speculative lock statistics when the lock is destroyed.
2030 // We keep locks that haven't been destroyed in the liveLocks list
2031 // so that we can grab their statistics too.
2032 static kmp_adaptive_lock_statistics_t destroyedStats;
2033 
2034 // To hold the list of live locks.
2035 static kmp_adaptive_lock_info_t liveLocks;
2036 
2037 // A lock so we can safely update the list of locks.
2038 static kmp_bootstrap_lock_t chain_lock;
2039 
2040 // Initialize the list of stats.
2041 void
2042 __kmp_init_speculative_stats()
2043 {
2044  kmp_adaptive_lock_info_t *lck = &liveLocks;
2045 
2046  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2047  lck->stats.next = lck;
2048  lck->stats.prev = lck;
2049 
2050  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2051  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2052 
2053  __kmp_init_bootstrap_lock( &chain_lock );
2054 
2055 }
2056 
2057 // Insert the lock into the circular list
2058 static void
2059 __kmp_remember_lock( kmp_adaptive_lock_info_t * lck )
2060 {
2061  __kmp_acquire_bootstrap_lock( &chain_lock );
2062 
2063  lck->stats.next = liveLocks.stats.next;
2064  lck->stats.prev = &liveLocks;
2065 
2066  liveLocks.stats.next = lck;
2067  lck->stats.next->stats.prev = lck;
2068 
2069  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2070  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2071 
2072  __kmp_release_bootstrap_lock( &chain_lock );
2073 }
2074 
2075 static void
2076 __kmp_forget_lock( kmp_adaptive_lock_info_t * lck )
2077 {
2078  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2079  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2080 
2081  kmp_adaptive_lock_info_t * n = lck->stats.next;
2082  kmp_adaptive_lock_info_t * p = lck->stats.prev;
2083 
2084  n->stats.prev = p;
2085  p->stats.next = n;
2086 }
2087 
2088 static void
2089 __kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck )
2090 {
2091  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2092  __kmp_remember_lock( lck );
2093 }
2094 
2095 static void
2096 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck )
2097 {
2098  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2099 
2100  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2101  t->successfulSpeculations += s->successfulSpeculations;
2102  t->hardFailedSpeculations += s->hardFailedSpeculations;
2103  t->softFailedSpeculations += s->softFailedSpeculations;
2104  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2105  t->lemmingYields += s->lemmingYields;
2106 }
2107 
2108 static void
2109 __kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck)
2110 {
2111  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2112 
2113  __kmp_acquire_bootstrap_lock( &chain_lock );
2114 
2115  __kmp_add_stats( &destroyedStats, lck );
2116  __kmp_forget_lock( lck );
2117 
2118  __kmp_release_bootstrap_lock( &chain_lock );
2119 }
2120 
2121 static float
2122 percent (kmp_uint32 count, kmp_uint32 total)
2123 {
2124  return (total == 0) ? 0.0: (100.0 * count)/total;
2125 }
2126 
2127 static
2128 FILE * __kmp_open_stats_file()
2129 {
2130  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2131  return stdout;
2132 
2133  size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20;
2134  char buffer[buffLen];
2135  KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile,
2136  (kmp_int32)getpid());
2137  FILE * result = fopen(&buffer[0], "w");
2138 
2139  // Maybe we should issue a warning here...
2140  return result ? result : stdout;
2141 }
2142 
2143 void
2144 __kmp_print_speculative_stats()
2145 {
2146  if (__kmp_user_lock_kind != lk_adaptive)
2147  return;
2148 
2149  FILE * statsFile = __kmp_open_stats_file();
2150 
2151  kmp_adaptive_lock_statistics_t total = destroyedStats;
2152  kmp_adaptive_lock_info_t *lck;
2153 
2154  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2155  __kmp_add_stats( &total, lck );
2156  }
2157  kmp_adaptive_lock_statistics_t *t = &total;
2158  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2159  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2160  t->softFailedSpeculations;
2161 
2162  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2163  fprintf ( statsFile, " Lock parameters: \n"
2164  " max_soft_retries : %10d\n"
2165  " max_badness : %10d\n",
2166  __kmp_adaptive_backoff_params.max_soft_retries,
2167  __kmp_adaptive_backoff_params.max_badness);
2168  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2169  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2170  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2171  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2172  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2173  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2174  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2175 
2176  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2177  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2178  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2179  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2180  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2181  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2182  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2183 
2184  if (statsFile != stdout)
2185  fclose( statsFile );
2186 }
2187 
2188 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2189 #else
2190 # define KMP_INC_STAT(lck,stat)
2191 
2192 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2193 
2194 static inline bool
2195 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2196 {
2197  // It is enough to check that the head_id is zero.
2198  // We don't also need to check the tail.
2199  bool res = lck->lk.head_id == 0;
2200 
2201  // We need a fence here, since we must ensure that no memory operations
2202  // from later in this thread float above that read.
2203 #if KMP_COMPILER_ICC
2204  _mm_mfence();
2205 #else
2206  __sync_synchronize();
2207 #endif
2208 
2209  return res;
2210 }
2211 
2212 // Functions for manipulating the badness
2213 static __inline void
2214 __kmp_update_badness_after_success( kmp_adaptive_lock_t *lck )
2215 {
2216  // Reset the badness to zero so we eagerly try to speculate again
2217  lck->lk.adaptive.badness = 0;
2218  KMP_INC_STAT(lck,successfulSpeculations);
2219 }
2220 
2221 // Create a bit mask with one more set bit.
2222 static __inline void
2223 __kmp_step_badness( kmp_adaptive_lock_t *lck )
2224 {
2225  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2226  if ( newBadness > lck->lk.adaptive.max_badness) {
2227  return;
2228  } else {
2229  lck->lk.adaptive.badness = newBadness;
2230  }
2231 }
2232 
2233 // Check whether speculation should be attempted.
2234 static __inline int
2235 __kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2236 {
2237  kmp_uint32 badness = lck->lk.adaptive.badness;
2238  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2239  int res = (attempts & badness) == 0;
2240  return res;
2241 }
2242 
2243 // Attempt to acquire only the speculative lock.
2244 // Does not back off to the non-speculative lock.
2245 //
2246 static int
2247 __kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2248 {
2249  int retries = lck->lk.adaptive.max_soft_retries;
2250 
2251  // We don't explicitly count the start of speculation, rather we record
2252  // the results (success, hard fail, soft fail). The sum of all of those
2253  // is the total number of times we started speculation since all
2254  // speculations must end one of those ways.
2255  do
2256  {
2257  kmp_uint32 status = _xbegin();
2258  // Switch this in to disable actual speculation but exercise
2259  // at least some of the rest of the code. Useful for debugging...
2260  // kmp_uint32 status = _XABORT_NESTED;
2261 
2262  if (status == _XBEGIN_STARTED )
2263  { /* We have successfully started speculation
2264  * Check that no-one acquired the lock for real between when we last looked
2265  * and now. This also gets the lock cache line into our read-set,
2266  * which we need so that we'll abort if anyone later claims it for real.
2267  */
2268  if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2269  {
2270  // Lock is now visibly acquired, so someone beat us to it.
2271  // Abort the transaction so we'll restart from _xbegin with the
2272  // failure status.
2273  _xabort(0x01);
2274  KMP_ASSERT2( 0, "should not get here" );
2275  }
2276  return 1; // Lock has been acquired (speculatively)
2277  } else {
2278  // We have aborted, update the statistics
2279  if ( status & SOFT_ABORT_MASK)
2280  {
2281  KMP_INC_STAT(lck,softFailedSpeculations);
2282  // and loop round to retry.
2283  }
2284  else
2285  {
2286  KMP_INC_STAT(lck,hardFailedSpeculations);
2287  // Give up if we had a hard failure.
2288  break;
2289  }
2290  }
2291  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2292 
2293  // Either we had a hard failure or we didn't succeed softly after
2294  // the full set of attempts, so back off the badness.
2295  __kmp_step_badness( lck );
2296  return 0;
2297 }
2298 
2299 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2300 // if the speculative lock cannot be acquired.
2301 // We can succeed speculatively, non-speculatively, or fail.
2302 static int
2303 __kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2304 {
2305  // First try to acquire the lock speculatively
2306  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2307  return 1;
2308 
2309  // Speculative acquisition failed, so try to acquire it non-speculatively.
2310  // Count the non-speculative acquire attempt
2311  lck->lk.adaptive.acquire_attempts++;
2312 
2313  // Use base, non-speculative lock.
2314  if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) )
2315  {
2316  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2317  return 1; // Lock is acquired (non-speculatively)
2318  }
2319  else
2320  {
2321  return 0; // Failed to acquire the lock, it's already visibly locked.
2322  }
2323 }
2324 
2325 static int
2326 __kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2327 {
2328  char const * const func = "omp_test_lock";
2329  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2330  KMP_FATAL( LockIsUninitialized, func );
2331  }
2332 
2333  int retval = __kmp_test_adaptive_lock( lck, gtid );
2334 
2335  if ( retval ) {
2336  lck->lk.qlk.owner_id = gtid + 1;
2337  }
2338  return retval;
2339 }
2340 
2341 // Block until we can acquire a speculative, adaptive lock.
2342 // We check whether we should be trying to speculate.
2343 // If we should be, we check the real lock to see if it is free,
2344 // and, if not, pause without attempting to acquire it until it is.
2345 // Then we try the speculative acquire.
2346 // This means that although we suffer from lemmings a little (
2347 // because all we can't acquire the lock speculatively until
2348 // the queue of threads waiting has cleared), we don't get into a
2349 // state where we can never acquire the lock speculatively (because we
2350 // force the queue to clear by preventing new arrivals from entering the
2351 // queue).
2352 // This does mean that when we're trying to break lemmings, the lock
2353 // is no longer fair. However OpenMP makes no guarantee that its
2354 // locks are fair, so this isn't a real problem.
2355 static void
2356 __kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid )
2357 {
2358  if ( __kmp_should_speculate( lck, gtid ) )
2359  {
2360  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2361  {
2362  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2363  return;
2364  // We tried speculation and failed, so give up.
2365  }
2366  else
2367  {
2368  // We can't try speculation until the lock is free, so we
2369  // pause here (without suspending on the queueing lock,
2370  // to allow it to drain, then try again.
2371  // All other threads will also see the same result for
2372  // shouldSpeculate, so will be doing the same if they
2373  // try to claim the lock from now on.
2374  while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2375  {
2376  KMP_INC_STAT(lck,lemmingYields);
2377  __kmp_yield (TRUE);
2378  }
2379 
2380  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2381  return;
2382  }
2383  }
2384 
2385  // Speculative acquisition failed, so acquire it non-speculatively.
2386  // Count the non-speculative acquire attempt
2387  lck->lk.adaptive.acquire_attempts++;
2388 
2389  __kmp_acquire_queuing_lock_timed_template<FALSE>( GET_QLK_PTR(lck), gtid );
2390  // We have acquired the base lock, so count that.
2391  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2392  ANNOTATE_QUEUING_ACQUIRED(lck);
2393 }
2394 
2395 static void
2396 __kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2397 {
2398  char const * const func = "omp_set_lock";
2399  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2400  KMP_FATAL( LockIsUninitialized, func );
2401  }
2402  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) {
2403  KMP_FATAL( LockIsAlreadyOwned, func );
2404  }
2405 
2406  __kmp_acquire_adaptive_lock( lck, gtid );
2407 
2408  lck->lk.qlk.owner_id = gtid + 1;
2409 }
2410 
2411 static int
2412 __kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2413 {
2414  if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) )
2415  { // If the lock doesn't look claimed we must be speculating.
2416  // (Or the user's code is buggy and they're releasing without locking;
2417  // if we had XTEST we'd be able to check that case...)
2418  _xend(); // Exit speculation
2419  __kmp_update_badness_after_success( lck );
2420  }
2421  else
2422  { // Since the lock *is* visibly locked we're not speculating,
2423  // so should use the underlying lock's release scheme.
2424  __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid );
2425  }
2426  return KMP_LOCK_RELEASED;
2427 }
2428 
2429 static int
2430 __kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid )
2431 {
2432  char const * const func = "omp_unset_lock";
2433  KMP_MB(); /* in case another processor initialized lock */
2434  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2435  KMP_FATAL( LockIsUninitialized, func );
2436  }
2437  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) {
2438  KMP_FATAL( LockUnsettingFree, func );
2439  }
2440  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) {
2441  KMP_FATAL( LockUnsettingSetByAnother, func );
2442  }
2443  lck->lk.qlk.owner_id = 0;
2444  __kmp_release_adaptive_lock( lck, gtid );
2445  return KMP_LOCK_RELEASED;
2446 }
2447 
2448 static void
2449 __kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck )
2450 {
2451  __kmp_init_queuing_lock( GET_QLK_PTR(lck) );
2452  lck->lk.adaptive.badness = 0;
2453  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2454  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2455  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2456 #if KMP_DEBUG_ADAPTIVE_LOCKS
2457  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2458 #endif
2459  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2460 }
2461 
2462 static void
2463 __kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck )
2464 {
2465  __kmp_init_adaptive_lock( lck );
2466 }
2467 
2468 static void
2469 __kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck )
2470 {
2471 #if KMP_DEBUG_ADAPTIVE_LOCKS
2472  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2473 #endif
2474  __kmp_destroy_queuing_lock (GET_QLK_PTR(lck));
2475  // Nothing needed for the speculative part.
2476 }
2477 
2478 static void
2479 __kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck )
2480 {
2481  char const * const func = "omp_destroy_lock";
2482  if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) {
2483  KMP_FATAL( LockIsUninitialized, func );
2484  }
2485  if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) {
2486  KMP_FATAL( LockStillOwned, func );
2487  }
2488  __kmp_destroy_adaptive_lock( lck );
2489 }
2490 
2491 
2492 #endif // KMP_USE_ADAPTIVE_LOCKS
2493 
2494 
2495 /* ------------------------------------------------------------------------ */
2496 /* DRDPA ticket locks */
2497 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2498 
2499 static kmp_int32
2500 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2501 {
2502  return TCR_4( lck->lk.owner_id ) - 1;
2503 }
2504 
2505 static inline bool
2506 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2507 {
2508  return lck->lk.depth_locked != -1;
2509 }
2510 
2511 __forceinline static int
2512 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2513 {
2514  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2515  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2516  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2517  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2518  TCR_PTR(lck->lk.polls); // volatile load
2519 
2520 #ifdef USE_LOCK_PROFILE
2521  if (TCR_8(polls[ticket & mask].poll) != ticket)
2522  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2523  /* else __kmp_printf( "." );*/
2524 #endif /* USE_LOCK_PROFILE */
2525 
2526  //
2527  // Now spin-wait, but reload the polls pointer and mask, in case the
2528  // polling area has been reconfigured. Unless it is reconfigured, the
2529  // reloads stay in L1 cache and are cheap.
2530  //
2531  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!!
2532  //
2533  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2534  // and poll to be re-read every spin iteration.
2535  //
2536  kmp_uint32 spins;
2537 
2538  KMP_FSYNC_PREPARE(lck);
2539  KMP_INIT_YIELD(spins);
2540  while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load
2541  // If we are oversubscribed,
2542  // or have waited a bit (and KMP_LIBRARY=turnaround), then yield.
2543  // CPU Pause is in the macros for yield.
2544  //
2545  KMP_YIELD(TCR_4(__kmp_nth)
2546  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2547  KMP_YIELD_SPIN(spins);
2548 
2549  // Re-read the mask and the poll pointer from the lock structure.
2550  //
2551  // Make certain that "mask" is read before "polls" !!!
2552  //
2553  // If another thread picks reconfigures the polling area and updates
2554  // their values, and we get the new value of mask and the old polls
2555  // pointer, we could access memory beyond the end of the old polling
2556  // area.
2557  //
2558  mask = TCR_8(lck->lk.mask); // volatile load
2559  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2560  TCR_PTR(lck->lk.polls); // volatile load
2561  }
2562 
2563  //
2564  // Critical section starts here
2565  //
2566  KMP_FSYNC_ACQUIRED(lck);
2567  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2568  ticket, lck));
2569  lck->lk.now_serving = ticket; // non-volatile store
2570 
2571  //
2572  // Deallocate a garbage polling area if we know that we are the last
2573  // thread that could possibly access it.
2574  //
2575  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2576  // ticket.
2577  //
2578  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2579  __kmp_free((void *)lck->lk.old_polls);
2580  lck->lk.old_polls = NULL;
2581  lck->lk.cleanup_ticket = 0;
2582  }
2583 
2584  //
2585  // Check to see if we should reconfigure the polling area.
2586  // If there is still a garbage polling area to be deallocated from a
2587  // previous reconfiguration, let a later thread reconfigure it.
2588  //
2589  if (lck->lk.old_polls == NULL) {
2590  bool reconfigure = false;
2591  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2592  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2593 
2594  if (TCR_4(__kmp_nth)
2595  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2596  //
2597  // We are in oversubscription mode. Contract the polling area
2598  // down to a single location, if that hasn't been done already.
2599  //
2600  if (num_polls > 1) {
2601  reconfigure = true;
2602  num_polls = TCR_4(lck->lk.num_polls);
2603  mask = 0;
2604  num_polls = 1;
2605  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2606  __kmp_allocate(num_polls * sizeof(*polls));
2607  polls[0].poll = ticket;
2608  }
2609  }
2610  else {
2611  //
2612  // We are in under/fully subscribed mode. Check the number of
2613  // threads waiting on the lock. The size of the polling area
2614  // should be at least the number of threads waiting.
2615  //
2616  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2617  if (num_waiting > num_polls) {
2618  kmp_uint32 old_num_polls = num_polls;
2619  reconfigure = true;
2620  do {
2621  mask = (mask << 1) | 1;
2622  num_polls *= 2;
2623  } while (num_polls <= num_waiting);
2624 
2625  //
2626  // Allocate the new polling area, and copy the relevant portion
2627  // of the old polling area to the new area. __kmp_allocate()
2628  // zeroes the memory it allocates, and most of the old area is
2629  // just zero padding, so we only copy the release counters.
2630  //
2631  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2632  __kmp_allocate(num_polls * sizeof(*polls));
2633  kmp_uint32 i;
2634  for (i = 0; i < old_num_polls; i++) {
2635  polls[i].poll = old_polls[i].poll;
2636  }
2637  }
2638  }
2639 
2640  if (reconfigure) {
2641  //
2642  // Now write the updated fields back to the lock structure.
2643  //
2644  // Make certain that "polls" is written before "mask" !!!
2645  //
2646  // If another thread picks up the new value of mask and the old
2647  // polls pointer , it could access memory beyond the end of the
2648  // old polling area.
2649  //
2650  // On x86, we need memory fences.
2651  //
2652  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2653  ticket, lck, num_polls));
2654 
2655  lck->lk.old_polls = old_polls; // non-volatile store
2656  lck->lk.polls = polls; // volatile store
2657 
2658  KMP_MB();
2659 
2660  lck->lk.num_polls = num_polls; // non-volatile store
2661  lck->lk.mask = mask; // volatile store
2662 
2663  KMP_MB();
2664 
2665  //
2666  // Only after the new polling area and mask have been flushed
2667  // to main memory can we update the cleanup ticket field.
2668  //
2669  // volatile load / non-volatile store
2670  //
2671  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2672  }
2673  }
2674  return KMP_LOCK_ACQUIRED_FIRST;
2675 }
2676 
2677 int
2678 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2679 {
2680  int retval = __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2681  ANNOTATE_DRDPA_ACQUIRED(lck);
2682  return retval;
2683 }
2684 
2685 static int
2686 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2687 {
2688  char const * const func = "omp_set_lock";
2689  if ( lck->lk.initialized != lck ) {
2690  KMP_FATAL( LockIsUninitialized, func );
2691  }
2692  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2693  KMP_FATAL( LockNestableUsedAsSimple, func );
2694  }
2695  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2696  KMP_FATAL( LockIsAlreadyOwned, func );
2697  }
2698 
2699  __kmp_acquire_drdpa_lock( lck, gtid );
2700 
2701  lck->lk.owner_id = gtid + 1;
2702  return KMP_LOCK_ACQUIRED_FIRST;
2703 }
2704 
2705 int
2706 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2707 {
2708  //
2709  // First get a ticket, then read the polls pointer and the mask.
2710  // The polls pointer must be read before the mask!!! (See above)
2711  //
2712  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2713  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2714  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2715  TCR_PTR(lck->lk.polls); // volatile load
2716  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2717  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2718  kmp_uint64 next_ticket = ticket + 1;
2719  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2720  ticket, next_ticket)) {
2721  KMP_FSYNC_ACQUIRED(lck);
2722  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2723  ticket, lck));
2724  lck->lk.now_serving = ticket; // non-volatile store
2725 
2726  //
2727  // Since no threads are waiting, there is no possibility that
2728  // we would want to reconfigure the polling area. We might
2729  // have the cleanup ticket value (which says that it is now
2730  // safe to deallocate old_polls), but we'll let a later thread
2731  // which calls __kmp_acquire_lock do that - this routine
2732  // isn't supposed to block, and we would risk blocks if we
2733  // called __kmp_free() to do the deallocation.
2734  //
2735  return TRUE;
2736  }
2737  }
2738  return FALSE;
2739 }
2740 
2741 static int
2742 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2743 {
2744  char const * const func = "omp_test_lock";
2745  if ( lck->lk.initialized != lck ) {
2746  KMP_FATAL( LockIsUninitialized, func );
2747  }
2748  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2749  KMP_FATAL( LockNestableUsedAsSimple, func );
2750  }
2751 
2752  int retval = __kmp_test_drdpa_lock( lck, gtid );
2753 
2754  if ( retval ) {
2755  lck->lk.owner_id = gtid + 1;
2756  }
2757  return retval;
2758 }
2759 
2760 int
2761 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2762 {
2763  //
2764  // Read the ticket value from the lock data struct, then the polls
2765  // pointer and the mask. The polls pointer must be read before the
2766  // mask!!! (See above)
2767  //
2768  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2769  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2770  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2771  TCR_PTR(lck->lk.polls); // volatile load
2772  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2773  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2774  ticket - 1, lck));
2775  KMP_FSYNC_RELEASING(lck);
2776  ANNOTATE_DRDPA_RELEASED(lck);
2777  KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store
2778  return KMP_LOCK_RELEASED;
2779 }
2780 
2781 static int
2782 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2783 {
2784  char const * const func = "omp_unset_lock";
2785  KMP_MB(); /* in case another processor initialized lock */
2786  if ( lck->lk.initialized != lck ) {
2787  KMP_FATAL( LockIsUninitialized, func );
2788  }
2789  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2790  KMP_FATAL( LockNestableUsedAsSimple, func );
2791  }
2792  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2793  KMP_FATAL( LockUnsettingFree, func );
2794  }
2795  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2796  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2797  KMP_FATAL( LockUnsettingSetByAnother, func );
2798  }
2799  lck->lk.owner_id = 0;
2800  return __kmp_release_drdpa_lock( lck, gtid );
2801 }
2802 
2803 void
2804 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2805 {
2806  lck->lk.location = NULL;
2807  lck->lk.mask = 0;
2808  lck->lk.num_polls = 1;
2809  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2810  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2811  lck->lk.cleanup_ticket = 0;
2812  lck->lk.old_polls = NULL;
2813  lck->lk.next_ticket = 0;
2814  lck->lk.now_serving = 0;
2815  lck->lk.owner_id = 0; // no thread owns the lock.
2816  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2817  lck->lk.initialized = lck;
2818 
2819  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2820 }
2821 
2822 static void
2823 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2824 {
2825  __kmp_init_drdpa_lock( lck );
2826 }
2827 
2828 void
2829 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2830 {
2831  lck->lk.initialized = NULL;
2832  lck->lk.location = NULL;
2833  if (lck->lk.polls != NULL) {
2834  __kmp_free((void *)lck->lk.polls);
2835  lck->lk.polls = NULL;
2836  }
2837  if (lck->lk.old_polls != NULL) {
2838  __kmp_free((void *)lck->lk.old_polls);
2839  lck->lk.old_polls = NULL;
2840  }
2841  lck->lk.mask = 0;
2842  lck->lk.num_polls = 0;
2843  lck->lk.cleanup_ticket = 0;
2844  lck->lk.next_ticket = 0;
2845  lck->lk.now_serving = 0;
2846  lck->lk.owner_id = 0;
2847  lck->lk.depth_locked = -1;
2848 }
2849 
2850 static void
2851 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2852 {
2853  char const * const func = "omp_destroy_lock";
2854  if ( lck->lk.initialized != lck ) {
2855  KMP_FATAL( LockIsUninitialized, func );
2856  }
2857  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2858  KMP_FATAL( LockNestableUsedAsSimple, func );
2859  }
2860  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2861  KMP_FATAL( LockStillOwned, func );
2862  }
2863  __kmp_destroy_drdpa_lock( lck );
2864 }
2865 
2866 
2867 //
2868 // nested drdpa ticket locks
2869 //
2870 
2871 int
2872 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2873 {
2874  KMP_DEBUG_ASSERT( gtid >= 0 );
2875 
2876  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2877  lck->lk.depth_locked += 1;
2878  return KMP_LOCK_ACQUIRED_NEXT;
2879  }
2880  else {
2881  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2882  ANNOTATE_DRDPA_ACQUIRED(lck);
2883  KMP_MB();
2884  lck->lk.depth_locked = 1;
2885  KMP_MB();
2886  lck->lk.owner_id = gtid + 1;
2887  return KMP_LOCK_ACQUIRED_FIRST;
2888  }
2889 }
2890 
2891 static void
2892 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2893 {
2894  char const * const func = "omp_set_nest_lock";
2895  if ( lck->lk.initialized != lck ) {
2896  KMP_FATAL( LockIsUninitialized, func );
2897  }
2898  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2899  KMP_FATAL( LockSimpleUsedAsNestable, func );
2900  }
2901  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2902 }
2903 
2904 int
2905 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2906 {
2907  int retval;
2908 
2909  KMP_DEBUG_ASSERT( gtid >= 0 );
2910 
2911  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2912  retval = ++lck->lk.depth_locked;
2913  }
2914  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2915  retval = 0;
2916  }
2917  else {
2918  KMP_MB();
2919  retval = lck->lk.depth_locked = 1;
2920  KMP_MB();
2921  lck->lk.owner_id = gtid + 1;
2922  }
2923  return retval;
2924 }
2925 
2926 static int
2927 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2928 {
2929  char const * const func = "omp_test_nest_lock";
2930  if ( lck->lk.initialized != lck ) {
2931  KMP_FATAL( LockIsUninitialized, func );
2932  }
2933  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2934  KMP_FATAL( LockSimpleUsedAsNestable, func );
2935  }
2936  return __kmp_test_nested_drdpa_lock( lck, gtid );
2937 }
2938 
2939 int
2940 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2941 {
2942  KMP_DEBUG_ASSERT( gtid >= 0 );
2943 
2944  KMP_MB();
2945  if ( --(lck->lk.depth_locked) == 0 ) {
2946  KMP_MB();
2947  lck->lk.owner_id = 0;
2948  __kmp_release_drdpa_lock( lck, gtid );
2949  return KMP_LOCK_RELEASED;
2950  }
2951  return KMP_LOCK_STILL_HELD;
2952 }
2953 
2954 static int
2955 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2956 {
2957  char const * const func = "omp_unset_nest_lock";
2958  KMP_MB(); /* in case another processor initialized lock */
2959  if ( lck->lk.initialized != lck ) {
2960  KMP_FATAL( LockIsUninitialized, func );
2961  }
2962  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2963  KMP_FATAL( LockSimpleUsedAsNestable, func );
2964  }
2965  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2966  KMP_FATAL( LockUnsettingFree, func );
2967  }
2968  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
2969  KMP_FATAL( LockUnsettingSetByAnother, func );
2970  }
2971  return __kmp_release_nested_drdpa_lock( lck, gtid );
2972 }
2973 
2974 void
2975 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
2976 {
2977  __kmp_init_drdpa_lock( lck );
2978  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2979 }
2980 
2981 static void
2982 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2983 {
2984  __kmp_init_nested_drdpa_lock( lck );
2985 }
2986 
2987 void
2988 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
2989 {
2990  __kmp_destroy_drdpa_lock( lck );
2991  lck->lk.depth_locked = 0;
2992 }
2993 
2994 static void
2995 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2996 {
2997  char const * const func = "omp_destroy_nest_lock";
2998  if ( lck->lk.initialized != lck ) {
2999  KMP_FATAL( LockIsUninitialized, func );
3000  }
3001  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3002  KMP_FATAL( LockSimpleUsedAsNestable, func );
3003  }
3004  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3005  KMP_FATAL( LockStillOwned, func );
3006  }
3007  __kmp_destroy_nested_drdpa_lock( lck );
3008 }
3009 
3010 
3011 //
3012 // access functions to fields which don't exist for all lock kinds.
3013 //
3014 
3015 static int
3016 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3017 {
3018  return lck == lck->lk.initialized;
3019 }
3020 
3021 static const ident_t *
3022 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3023 {
3024  return lck->lk.location;
3025 }
3026 
3027 static void
3028 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3029 {
3030  lck->lk.location = loc;
3031 }
3032 
3033 static kmp_lock_flags_t
3034 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3035 {
3036  return lck->lk.flags;
3037 }
3038 
3039 static void
3040 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3041 {
3042  lck->lk.flags = flags;
3043 }
3044 
3045 // Time stamp counter
3046 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3047 # define __kmp_tsc() __kmp_hardware_timestamp()
3048 // Runtime's default backoff parameters
3049 kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 };
3050 #else
3051 // Use nanoseconds for other platforms
3052 extern kmp_uint64 __kmp_now_nsec();
3053 kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 };
3054 # define __kmp_tsc() __kmp_now_nsec()
3055 #endif
3056 
3057 // A useful predicate for dealing with timestamps that may wrap.
3058 // Is a before b?
3059 // Since the timestamps may wrap, this is asking whether it's
3060 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
3061 // Times where going clockwise is less distance than going anti-clockwise
3062 // are in the future, others are in the past.
3063 // e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b
3064 // whereas signed(a) = -2, signed(b) = 0 captures the actual difference
3065 static inline bool before(kmp_uint64 a, kmp_uint64 b)
3066 {
3067  return ((kmp_int64)b - (kmp_int64)a) > 0;
3068 }
3069 
3070 // Truncated binary exponential backoff function
3071 void
3072 __kmp_spin_backoff(kmp_backoff_t *boff)
3073 {
3074  // We could flatten this loop, but making it a nested loop gives better result.
3075  kmp_uint32 i;
3076  for (i = boff->step; i > 0; i--) {
3077  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
3078  do {
3079  KMP_CPU_PAUSE();
3080  } while (before(__kmp_tsc(), goal));
3081  }
3082  boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1);
3083 }
3084 
3085 #if KMP_USE_DYNAMIC_LOCK
3086 
3087 // Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word.
3088 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq)
3089 {
3090  TCW_4(*lck, KMP_GET_D_TAG(seq));
3091  KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
3092 }
3093 
3094 #if KMP_USE_TSX
3095 
3096 // HLE lock functions - imported from the testbed runtime.
3097 #define HLE_ACQUIRE ".byte 0xf2;"
3098 #define HLE_RELEASE ".byte 0xf3;"
3099 
3100 static inline kmp_uint32
3101 swap4(kmp_uint32 volatile *p, kmp_uint32 v)
3102 {
3103  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0"
3104  : "+r"(v), "+m"(*p)
3105  :
3106  : "memory");
3107  return v;
3108 }
3109 
3110 static void
3111 __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck)
3112 {
3113  TCW_4(*lck, 0);
3114 }
3115 
3116 static void
3117 __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3118 {
3119  // Use gtid for KMP_LOCK_BUSY if necessary
3120  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
3121  int delay = 1;
3122  do {
3123  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
3124  for (int i = delay; i != 0; --i)
3125  KMP_CPU_PAUSE();
3126  delay = ((delay << 1) | 1) & 7;
3127  }
3128  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
3129  }
3130 }
3131 
3132 static void
3133 __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3134 {
3135  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
3136 }
3137 
3138 static int
3139 __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3140 {
3141  __asm__ volatile(HLE_RELEASE "movl %1,%0"
3142  : "=m"(*lck)
3143  : "r"(KMP_LOCK_FREE(hle))
3144  : "memory");
3145  return KMP_LOCK_RELEASED;
3146 }
3147 
3148 static int
3149 __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3150 {
3151  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
3152 }
3153 
3154 static int
3155 __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3156 {
3157  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
3158 }
3159 
3160 static int
3161 __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid)
3162 {
3163  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
3164 }
3165 
3166 static void
3167 __kmp_init_rtm_lock(kmp_queuing_lock_t *lck)
3168 {
3169  __kmp_init_queuing_lock(lck);
3170 }
3171 
3172 static void
3173 __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck)
3174 {
3175  __kmp_destroy_queuing_lock(lck);
3176 }
3177 
3178 static void
3179 __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3180 {
3181  unsigned retries=3, status;
3182  do {
3183  status = _xbegin();
3184  if (status == _XBEGIN_STARTED) {
3185  if (__kmp_is_unlocked_queuing_lock(lck))
3186  return;
3187  _xabort(0xff);
3188  }
3189  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
3190  // Wait until lock becomes free
3191  while (! __kmp_is_unlocked_queuing_lock(lck))
3192  __kmp_yield(TRUE);
3193  }
3194  else if (!(status & _XABORT_RETRY))
3195  break;
3196  } while (retries--);
3197 
3198  // Fall-back non-speculative lock (xchg)
3199  __kmp_acquire_queuing_lock(lck, gtid);
3200 }
3201 
3202 static void
3203 __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3204 {
3205  __kmp_acquire_rtm_lock(lck, gtid);
3206 }
3207 
3208 static int
3209 __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3210 {
3211  if (__kmp_is_unlocked_queuing_lock(lck)) {
3212  // Releasing from speculation
3213  _xend();
3214  }
3215  else {
3216  // Releasing from a real lock
3217  __kmp_release_queuing_lock(lck, gtid);
3218  }
3219  return KMP_LOCK_RELEASED;
3220 }
3221 
3222 static int
3223 __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3224 {
3225  return __kmp_release_rtm_lock(lck, gtid);
3226 }
3227 
3228 static int
3229 __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3230 {
3231  unsigned retries=3, status;
3232  do {
3233  status = _xbegin();
3234  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
3235  return 1;
3236  }
3237  if (!(status & _XABORT_RETRY))
3238  break;
3239  } while (retries--);
3240 
3241  return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0;
3242 }
3243 
3244 static int
3245 __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid)
3246 {
3247  return __kmp_test_rtm_lock(lck, gtid);
3248 }
3249 
3250 #endif // KMP_USE_TSX
3251 
3252 // Entry functions for indirect locks (first element of direct lock jump tables).
3253 static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag);
3254 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock);
3255 static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3256 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3257 static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32);
3258 static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3259 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3260 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32);
3261 
3262 //
3263 // Jump tables for the indirect lock functions.
3264 // Only fill in the odd entries, that avoids the need to shift out the low bit.
3265 //
3266 
3267 // init functions
3268 #define expand(l, op) 0,__kmp_init_direct_lock,
3269 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t)
3270  = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) };
3271 #undef expand
3272 
3273 // destroy functions
3274 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3275 void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *)
3276  = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) };
3277 #undef expand
3278 
3279 // set/acquire functions
3280 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3281 static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32)
3282  = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3283 #undef expand
3284 #define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3285 static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32)
3286  = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) };
3287 #undef expand
3288 
3289 // unset/release and test functions
3290 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3291 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32)
3292  = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) };
3293 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32)
3294  = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) };
3295 #undef expand
3296 #define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3297 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32)
3298  = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) };
3299 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32)
3300  = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) };
3301 #undef expand
3302 
3303 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3304 void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0;
3305 int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0;
3306 int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0;
3307 
3308 //
3309 // Jump tables for the indirect lock functions.
3310 //
3311 #define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock,
3312 void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) };
3313 void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) };
3314 #undef expand
3315 
3316 // set/acquire functions
3317 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3318 static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3319 #undef expand
3320 #define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3321 static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) };
3322 #undef expand
3323 
3324 // unset/release and test functions
3325 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock,
3326 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3327 static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3328 #undef expand
3329 #define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks,
3330 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) };
3331 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) };
3332 #undef expand
3333 
3334 // Exposes only one jump tables (*lock or *lock_with_checks).
3335 void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32) = 0;
3336 int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32) = 0;
3337 int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0;
3338 
3339 // Lock index table.
3340 kmp_indirect_lock_table_t __kmp_i_lock_table;
3341 
3342 // Size of indirect locks.
3343 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 };
3344 
3345 // Jump tables for lock accessor/modifier.
3346 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 };
3347 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 };
3348 const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3349 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 };
3350 
3351 // Use different lock pools for different lock types.
3352 static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 };
3353 
3354 // User lock allocator for dynamically dispatched indirect locks.
3355 // Every entry of the indirect lock table holds the address and type of the allocated indrect lock
3356 // (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock
3357 // object is returned to the reusable pool of locks, unique to each lock type.
3358 kmp_indirect_lock_t *
3359 __kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag)
3360 {
3361  kmp_indirect_lock_t *lck;
3362  kmp_lock_index_t idx;
3363 
3364  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3365 
3366  if (__kmp_indirect_lock_pool[tag] != NULL) {
3367  // Reuse the allocated and destroyed lock object
3368  lck = __kmp_indirect_lock_pool[tag];
3369  if (OMP_LOCK_T_SIZE < sizeof(void *))
3370  idx = lck->lock->pool.index;
3371  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3372  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck));
3373  } else {
3374  idx = __kmp_i_lock_table.next;
3375  // Check capacity and double the size if it is full
3376  if (idx == __kmp_i_lock_table.size) {
3377  // Double up the space for block pointers
3378  int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK;
3379  kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3380  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *));
3381  KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *));
3382  __kmp_free(old_table);
3383  // Allocate new objects in the new blocks
3384  for (int i = row; i < 2*row; ++i)
3385  *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)
3386  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3387  __kmp_i_lock_table.size = 2*idx;
3388  }
3389  __kmp_i_lock_table.next++;
3390  lck = KMP_GET_I_LOCK(idx);
3391  // Allocate a new base lock object
3392  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3393  KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3394  }
3395 
3396  __kmp_release_lock(&__kmp_global_lock, gtid);
3397 
3398  lck->type = tag;
3399 
3400  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3401  *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even.
3402  } else {
3403  *((kmp_indirect_lock_t **)user_lock) = lck;
3404  }
3405 
3406  return lck;
3407 }
3408 
3409 // User lock lookup for dynamically dispatched locks.
3410 static __forceinline
3411 kmp_indirect_lock_t *
3412 __kmp_lookup_indirect_lock(void **user_lock, const char *func)
3413 {
3414  if (__kmp_env_consistency_check) {
3415  kmp_indirect_lock_t *lck = NULL;
3416  if (user_lock == NULL) {
3417  KMP_FATAL(LockIsUninitialized, func);
3418  }
3419  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3420  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3421  if (idx >= __kmp_i_lock_table.size) {
3422  KMP_FATAL(LockIsUninitialized, func);
3423  }
3424  lck = KMP_GET_I_LOCK(idx);
3425  } else {
3426  lck = *((kmp_indirect_lock_t **)user_lock);
3427  }
3428  if (lck == NULL) {
3429  KMP_FATAL(LockIsUninitialized, func);
3430  }
3431  return lck;
3432  } else {
3433  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3434  return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3435  } else {
3436  return *((kmp_indirect_lock_t **)user_lock);
3437  }
3438  }
3439 }
3440 
3441 static void
3442 __kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq)
3443 {
3444 #if KMP_USE_ADAPTIVE_LOCKS
3445  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3446  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3447  seq = lockseq_queuing;
3448  }
3449 #endif
3450 #if KMP_USE_TSX
3451  if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) {
3452  seq = lockseq_queuing;
3453  }
3454 #endif
3455  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3456  kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3457  KMP_I_LOCK_FUNC(l, init)(l->lock);
3458  KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq));
3459 }
3460 
3461 static void
3462 __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock)
3463 {
3464  kmp_uint32 gtid = __kmp_entry_gtid();
3465  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3466  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3467  kmp_indirect_locktag_t tag = l->type;
3468 
3469  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3470 
3471  // Use the base lock's space to keep the pool chain.
3472  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3473  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3474  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3475  }
3476  __kmp_indirect_lock_pool[tag] = l;
3477 
3478  __kmp_release_lock(&__kmp_global_lock, gtid);
3479 }
3480 
3481 static void
3482 __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3483 {
3484  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3485  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3486 }
3487 
3488 static int
3489 __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3490 {
3491  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3492  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3493 }
3494 
3495 static int
3496 __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3497 {
3498  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3499  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3500 }
3501 
3502 static void
3503 __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3504 {
3505  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3506  KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3507 }
3508 
3509 static int
3510 __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3511 {
3512  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3513  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3514 }
3515 
3516 static int
3517 __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid)
3518 {
3519  kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3520  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3521 }
3522 
3523 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3524 
3525 // This is used only in kmp_error.cpp when consistency checking is on.
3526 kmp_int32
3527 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq)
3528 {
3529  switch (seq) {
3530  case lockseq_tas:
3531  case lockseq_nested_tas:
3532  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3533 #if KMP_USE_FUTEX
3534  case lockseq_futex:
3535  case lockseq_nested_futex:
3536  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3537 #endif
3538  case lockseq_ticket:
3539  case lockseq_nested_ticket:
3540  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3541  case lockseq_queuing:
3542  case lockseq_nested_queuing:
3543 #if KMP_USE_ADAPTIVE_LOCKS
3544  case lockseq_adaptive:
3545 #endif
3546  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3547  case lockseq_drdpa:
3548  case lockseq_nested_drdpa:
3549  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3550  default:
3551  return 0;
3552  }
3553 }
3554 
3555 // Initializes data for dynamic user locks.
3556 void
3557 __kmp_init_dynamic_user_locks()
3558 {
3559  // Initialize jump table for the lock functions
3560  if (__kmp_env_consistency_check) {
3561  __kmp_direct_set = direct_set_check;
3562  __kmp_direct_unset = direct_unset_check;
3563  __kmp_direct_test = direct_test_check;
3564  __kmp_indirect_set = indirect_set_check;
3565  __kmp_indirect_unset = indirect_unset_check;
3566  __kmp_indirect_test = indirect_test_check;
3567  }
3568  else {
3569  __kmp_direct_set = direct_set;
3570  __kmp_direct_unset = direct_unset;
3571  __kmp_direct_test = direct_test;
3572  __kmp_indirect_set = indirect_set;
3573  __kmp_indirect_unset = indirect_unset;
3574  __kmp_indirect_test = indirect_test;
3575  }
3576  // If the user locks have already been initialized, then return.
3577  // Allow the switch between different KMP_CONSISTENCY_CHECK values,
3578  // but do not allocate new lock tables if they have already been
3579  // allocated.
3580  if (__kmp_init_user_locks)
3581  return;
3582 
3583  // Initialize lock index table
3584  __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3585  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3586  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)
3587  __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t));
3588  __kmp_i_lock_table.next = 0;
3589 
3590  // Indirect lock size
3591  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3592  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3593 #if KMP_USE_ADAPTIVE_LOCKS
3594  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3595 #endif
3596  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3597 #if KMP_USE_TSX
3598  __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t);
3599 #endif
3600  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3601 #if KMP_USE_FUTEX
3602  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3603 #endif
3604  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3605  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3606  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3607 
3608  // Initialize lock accessor/modifier
3609 #define fill_jumps(table, expand, sep) { \
3610  table[locktag##sep##ticket] = expand(ticket); \
3611  table[locktag##sep##queuing] = expand(queuing); \
3612  table[locktag##sep##drdpa] = expand(drdpa); \
3613 }
3614 
3615 #if KMP_USE_ADAPTIVE_LOCKS
3616 # define fill_table(table, expand) { \
3617  fill_jumps(table, expand, _); \
3618  table[locktag_adaptive] = expand(queuing); \
3619  fill_jumps(table, expand, _nested_); \
3620 }
3621 #else
3622 # define fill_table(table, expand) { \
3623  fill_jumps(table, expand, _); \
3624  fill_jumps(table, expand, _nested_); \
3625 }
3626 #endif // KMP_USE_ADAPTIVE_LOCKS
3627 
3628 #define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location
3629  fill_table(__kmp_indirect_set_location, expand);
3630 #undef expand
3631 #define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags
3632  fill_table(__kmp_indirect_set_flags, expand);
3633 #undef expand
3634 #define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location
3635  fill_table(__kmp_indirect_get_location, expand);
3636 #undef expand
3637 #define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags
3638  fill_table(__kmp_indirect_get_flags, expand);
3639 #undef expand
3640 
3641  __kmp_init_user_locks = TRUE;
3642 }
3643 
3644 // Clean up the lock table.
3645 void
3646 __kmp_cleanup_indirect_user_locks()
3647 {
3648  kmp_lock_index_t i;
3649  int k;
3650 
3651  // Clean up locks in the pools first (they were already destroyed before going into the pools).
3652  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3653  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3654  while (l != NULL) {
3655  kmp_indirect_lock_t *ll = l;
3656  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3657  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll));
3658  __kmp_free(ll->lock);
3659  ll->lock = NULL;
3660  }
3661  __kmp_indirect_lock_pool[k] = NULL;
3662  }
3663  // Clean up the remaining undestroyed locks.
3664  for (i = 0; i < __kmp_i_lock_table.next; i++) {
3665  kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3666  if (l->lock != NULL) {
3667  // Locks not destroyed explicitly need to be destroyed here.
3668  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3669  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l));
3670  __kmp_free(l->lock);
3671  }
3672  }
3673  // Free the table
3674  for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3675  __kmp_free(__kmp_i_lock_table.table[i]);
3676  __kmp_free(__kmp_i_lock_table.table);
3677 
3678  __kmp_init_user_locks = FALSE;
3679 }
3680 
3681 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3682 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3683 
3684 #else // KMP_USE_DYNAMIC_LOCK
3685 
3686 /* ------------------------------------------------------------------------ */
3687 /* user locks
3688  *
3689  * They are implemented as a table of function pointers which are set to the
3690  * lock functions of the appropriate kind, once that has been determined.
3691  */
3692 
3693 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3694 
3695 size_t __kmp_base_user_lock_size = 0;
3696 size_t __kmp_user_lock_size = 0;
3697 
3698 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3699 int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3700 
3701 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3702 int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3703 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3704 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3705 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3706 int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3707 
3708 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3709 int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3710 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3711 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3712 
3713 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3714 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3715 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3716 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3717 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3718 
3719 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3720 {
3721  switch ( user_lock_kind ) {
3722  case lk_default:
3723  default:
3724  KMP_ASSERT( 0 );
3725 
3726  case lk_tas: {
3727  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3728  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3729 
3730  __kmp_get_user_lock_owner_ =
3731  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3732  ( &__kmp_get_tas_lock_owner );
3733 
3734  if ( __kmp_env_consistency_check ) {
3735  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3736  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3737  }
3738  else {
3739  KMP_BIND_USER_LOCK(tas);
3740  KMP_BIND_NESTED_USER_LOCK(tas);
3741  }
3742 
3743  __kmp_destroy_user_lock_ =
3744  ( void ( * )( kmp_user_lock_p ) )
3745  ( &__kmp_destroy_tas_lock );
3746 
3747  __kmp_is_user_lock_initialized_ =
3748  ( int ( * )( kmp_user_lock_p ) ) NULL;
3749 
3750  __kmp_get_user_lock_location_ =
3751  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3752 
3753  __kmp_set_user_lock_location_ =
3754  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3755 
3756  __kmp_get_user_lock_flags_ =
3757  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3758 
3759  __kmp_set_user_lock_flags_ =
3760  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3761  }
3762  break;
3763 
3764 #if KMP_USE_FUTEX
3765 
3766  case lk_futex: {
3767  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3768  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3769 
3770  __kmp_get_user_lock_owner_ =
3771  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3772  ( &__kmp_get_futex_lock_owner );
3773 
3774  if ( __kmp_env_consistency_check ) {
3775  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3776  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3777  }
3778  else {
3779  KMP_BIND_USER_LOCK(futex);
3780  KMP_BIND_NESTED_USER_LOCK(futex);
3781  }
3782 
3783  __kmp_destroy_user_lock_ =
3784  ( void ( * )( kmp_user_lock_p ) )
3785  ( &__kmp_destroy_futex_lock );
3786 
3787  __kmp_is_user_lock_initialized_ =
3788  ( int ( * )( kmp_user_lock_p ) ) NULL;
3789 
3790  __kmp_get_user_lock_location_ =
3791  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3792 
3793  __kmp_set_user_lock_location_ =
3794  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3795 
3796  __kmp_get_user_lock_flags_ =
3797  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3798 
3799  __kmp_set_user_lock_flags_ =
3800  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3801  }
3802  break;
3803 
3804 #endif // KMP_USE_FUTEX
3805 
3806  case lk_ticket: {
3807  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3808  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3809 
3810  __kmp_get_user_lock_owner_ =
3811  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3812  ( &__kmp_get_ticket_lock_owner );
3813 
3814  if ( __kmp_env_consistency_check ) {
3815  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3816  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3817  }
3818  else {
3819  KMP_BIND_USER_LOCK(ticket);
3820  KMP_BIND_NESTED_USER_LOCK(ticket);
3821  }
3822 
3823  __kmp_destroy_user_lock_ =
3824  ( void ( * )( kmp_user_lock_p ) )
3825  ( &__kmp_destroy_ticket_lock );
3826 
3827  __kmp_is_user_lock_initialized_ =
3828  ( int ( * )( kmp_user_lock_p ) )
3829  ( &__kmp_is_ticket_lock_initialized );
3830 
3831  __kmp_get_user_lock_location_ =
3832  ( const ident_t * ( * )( kmp_user_lock_p ) )
3833  ( &__kmp_get_ticket_lock_location );
3834 
3835  __kmp_set_user_lock_location_ =
3836  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3837  ( &__kmp_set_ticket_lock_location );
3838 
3839  __kmp_get_user_lock_flags_ =
3840  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3841  ( &__kmp_get_ticket_lock_flags );
3842 
3843  __kmp_set_user_lock_flags_ =
3844  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3845  ( &__kmp_set_ticket_lock_flags );
3846  }
3847  break;
3848 
3849  case lk_queuing: {
3850  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3851  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3852 
3853  __kmp_get_user_lock_owner_ =
3854  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3855  ( &__kmp_get_queuing_lock_owner );
3856 
3857  if ( __kmp_env_consistency_check ) {
3858  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3859  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3860  }
3861  else {
3862  KMP_BIND_USER_LOCK(queuing);
3863  KMP_BIND_NESTED_USER_LOCK(queuing);
3864  }
3865 
3866  __kmp_destroy_user_lock_ =
3867  ( void ( * )( kmp_user_lock_p ) )
3868  ( &__kmp_destroy_queuing_lock );
3869 
3870  __kmp_is_user_lock_initialized_ =
3871  ( int ( * )( kmp_user_lock_p ) )
3872  ( &__kmp_is_queuing_lock_initialized );
3873 
3874  __kmp_get_user_lock_location_ =
3875  ( const ident_t * ( * )( kmp_user_lock_p ) )
3876  ( &__kmp_get_queuing_lock_location );
3877 
3878  __kmp_set_user_lock_location_ =
3879  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3880  ( &__kmp_set_queuing_lock_location );
3881 
3882  __kmp_get_user_lock_flags_ =
3883  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3884  ( &__kmp_get_queuing_lock_flags );
3885 
3886  __kmp_set_user_lock_flags_ =
3887  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3888  ( &__kmp_set_queuing_lock_flags );
3889  }
3890  break;
3891 
3892 #if KMP_USE_ADAPTIVE_LOCKS
3893  case lk_adaptive: {
3894  __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t );
3895  __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t );
3896 
3897  __kmp_get_user_lock_owner_ =
3898  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3899  ( &__kmp_get_queuing_lock_owner );
3900 
3901  if ( __kmp_env_consistency_check ) {
3902  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3903  }
3904  else {
3905  KMP_BIND_USER_LOCK(adaptive);
3906  }
3907 
3908  __kmp_destroy_user_lock_ =
3909  ( void ( * )( kmp_user_lock_p ) )
3910  ( &__kmp_destroy_adaptive_lock );
3911 
3912  __kmp_is_user_lock_initialized_ =
3913  ( int ( * )( kmp_user_lock_p ) )
3914  ( &__kmp_is_queuing_lock_initialized );
3915 
3916  __kmp_get_user_lock_location_ =
3917  ( const ident_t * ( * )( kmp_user_lock_p ) )
3918  ( &__kmp_get_queuing_lock_location );
3919 
3920  __kmp_set_user_lock_location_ =
3921  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3922  ( &__kmp_set_queuing_lock_location );
3923 
3924  __kmp_get_user_lock_flags_ =
3925  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3926  ( &__kmp_get_queuing_lock_flags );
3927 
3928  __kmp_set_user_lock_flags_ =
3929  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3930  ( &__kmp_set_queuing_lock_flags );
3931 
3932  }
3933  break;
3934 #endif // KMP_USE_ADAPTIVE_LOCKS
3935 
3936  case lk_drdpa: {
3937  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3938  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3939 
3940  __kmp_get_user_lock_owner_ =
3941  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3942  ( &__kmp_get_drdpa_lock_owner );
3943 
3944  if ( __kmp_env_consistency_check ) {
3945  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3946  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3947  }
3948  else {
3949  KMP_BIND_USER_LOCK(drdpa);
3950  KMP_BIND_NESTED_USER_LOCK(drdpa);
3951  }
3952 
3953  __kmp_destroy_user_lock_ =
3954  ( void ( * )( kmp_user_lock_p ) )
3955  ( &__kmp_destroy_drdpa_lock );
3956 
3957  __kmp_is_user_lock_initialized_ =
3958  ( int ( * )( kmp_user_lock_p ) )
3959  ( &__kmp_is_drdpa_lock_initialized );
3960 
3961  __kmp_get_user_lock_location_ =
3962  ( const ident_t * ( * )( kmp_user_lock_p ) )
3963  ( &__kmp_get_drdpa_lock_location );
3964 
3965  __kmp_set_user_lock_location_ =
3966  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3967  ( &__kmp_set_drdpa_lock_location );
3968 
3969  __kmp_get_user_lock_flags_ =
3970  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3971  ( &__kmp_get_drdpa_lock_flags );
3972 
3973  __kmp_set_user_lock_flags_ =
3974  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3975  ( &__kmp_set_drdpa_lock_flags );
3976  }
3977  break;
3978  }
3979 }
3980 
3981 
3982 // ----------------------------------------------------------------------------
3983 // User lock table & lock allocation
3984 
3985 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3986 kmp_user_lock_p __kmp_lock_pool = NULL;
3987 
3988 // Lock block-allocation support.
3989 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3990 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3991 
3992 static kmp_lock_index_t
3993 __kmp_lock_table_insert( kmp_user_lock_p lck )
3994 {
3995  // Assume that kmp_global_lock is held upon entry/exit.
3996  kmp_lock_index_t index;
3997  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3998  kmp_lock_index_t size;
3999  kmp_user_lock_p *table;
4000  // Reallocate lock table.
4001  if ( __kmp_user_lock_table.allocated == 0 ) {
4002  size = 1024;
4003  }
4004  else {
4005  size = __kmp_user_lock_table.allocated * 2;
4006  }
4007  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
4008  KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
4009  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
4010  // We cannot free the previous table now, since it may be in use by other
4011  // threads. So save the pointer to the previous table in in the first element of the
4012  // new table. All the tables will be organized into a list, and could be freed when
4013  // library shutting down.
4014  __kmp_user_lock_table.table = table;
4015  __kmp_user_lock_table.allocated = size;
4016  }
4017  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
4018  index = __kmp_user_lock_table.used;
4019  __kmp_user_lock_table.table[ index ] = lck;
4020  ++ __kmp_user_lock_table.used;
4021  return index;
4022 }
4023 
4024 static kmp_user_lock_p
4025 __kmp_lock_block_allocate()
4026 {
4027  // Assume that kmp_global_lock is held upon entry/exit.
4028  static int last_index = 0;
4029  if ( ( last_index >= __kmp_num_locks_in_block )
4030  || ( __kmp_lock_blocks == NULL ) ) {
4031  // Restart the index.
4032  last_index = 0;
4033  // Need to allocate a new block.
4034  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4035  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
4036  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
4037  // Set up the new block.
4038  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
4039  new_block->next_block = __kmp_lock_blocks;
4040  new_block->locks = (void *)buffer;
4041  // Publish the new block.
4042  KMP_MB();
4043  __kmp_lock_blocks = new_block;
4044  }
4045  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
4046  [ last_index * __kmp_user_lock_size ] ) );
4047  last_index++;
4048  return ret;
4049 }
4050 
4051 //
4052 // Get memory for a lock. It may be freshly allocated memory or reused memory
4053 // from lock pool.
4054 //
4055 kmp_user_lock_p
4056 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
4057  kmp_lock_flags_t flags )
4058 {
4059  kmp_user_lock_p lck;
4060  kmp_lock_index_t index;
4061  KMP_DEBUG_ASSERT( user_lock );
4062 
4063  __kmp_acquire_lock( &__kmp_global_lock, gtid );
4064 
4065  if ( __kmp_lock_pool == NULL ) {
4066  // Lock pool is empty. Allocate new memory.
4067 
4068  // ANNOTATION: Found no good way to express the syncronisation
4069  // between allocation and usage, so ignore the allocation
4070  ANNOTATE_IGNORE_WRITES_BEGIN();
4071  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
4072  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
4073  }
4074  else {
4075  lck = __kmp_lock_block_allocate();
4076  }
4077  ANNOTATE_IGNORE_WRITES_END();
4078 
4079  // Insert lock in the table so that it can be freed in __kmp_cleanup,
4080  // and debugger has info on all allocated locks.
4081  index = __kmp_lock_table_insert( lck );
4082  }
4083  else {
4084  // Pick up lock from pool.
4085  lck = __kmp_lock_pool;
4086  index = __kmp_lock_pool->pool.index;
4087  __kmp_lock_pool = __kmp_lock_pool->pool.next;
4088  }
4089 
4090  //
4091  // We could potentially differentiate between nested and regular locks
4092  // here, and do the lock table lookup for regular locks only.
4093  //
4094  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4095  * ( (kmp_lock_index_t *) user_lock ) = index;
4096  }
4097  else {
4098  * ( (kmp_user_lock_p *) user_lock ) = lck;
4099  }
4100 
4101  // mark the lock if it is critical section lock.
4102  __kmp_set_user_lock_flags( lck, flags );
4103 
4104  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
4105 
4106  return lck;
4107 }
4108 
4109 // Put lock's memory to pool for reusing.
4110 void
4111 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
4112 {
4113  KMP_DEBUG_ASSERT( user_lock != NULL );
4114  KMP_DEBUG_ASSERT( lck != NULL );
4115 
4116  __kmp_acquire_lock( & __kmp_global_lock, gtid );
4117 
4118  lck->pool.next = __kmp_lock_pool;
4119  __kmp_lock_pool = lck;
4120  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4121  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
4122  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
4123  lck->pool.index = index;
4124  }
4125 
4126  __kmp_release_lock( & __kmp_global_lock, gtid );
4127 }
4128 
4129 kmp_user_lock_p
4130 __kmp_lookup_user_lock( void **user_lock, char const *func )
4131 {
4132  kmp_user_lock_p lck = NULL;
4133 
4134  if ( __kmp_env_consistency_check ) {
4135  if ( user_lock == NULL ) {
4136  KMP_FATAL( LockIsUninitialized, func );
4137  }
4138  }
4139 
4140  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
4141  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
4142  if ( __kmp_env_consistency_check ) {
4143  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
4144  KMP_FATAL( LockIsUninitialized, func );
4145  }
4146  }
4147  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
4148  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
4149  lck = __kmp_user_lock_table.table[index];
4150  }
4151  else {
4152  lck = *( (kmp_user_lock_p *)user_lock );
4153  }
4154 
4155  if ( __kmp_env_consistency_check ) {
4156  if ( lck == NULL ) {
4157  KMP_FATAL( LockIsUninitialized, func );
4158  }
4159  }
4160 
4161  return lck;
4162 }
4163 
4164 void
4165 __kmp_cleanup_user_locks( void )
4166 {
4167  //
4168  // Reset lock pool. Do not worry about lock in the pool -- we will free
4169  // them when iterating through lock table (it includes all the locks,
4170  // dead or alive).
4171  //
4172  __kmp_lock_pool = NULL;
4173 
4174 #define IS_CRITICAL(lck) \
4175  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
4176  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
4177 
4178  //
4179  // Loop through lock table, free all locks.
4180  //
4181  // Do not free item [0], it is reserved for lock tables list.
4182  //
4183  // FIXME - we are iterating through a list of (pointers to) objects of
4184  // type union kmp_user_lock, but we have no way of knowing whether the
4185  // base type is currently "pool" or whatever the global user lock type
4186  // is.
4187  //
4188  // We are relying on the fact that for all of the user lock types
4189  // (except "tas"), the first field in the lock struct is the "initialized"
4190  // field, which is set to the address of the lock object itself when
4191  // the lock is initialized. When the union is of type "pool", the
4192  // first field is a pointer to the next object in the free list, which
4193  // will not be the same address as the object itself.
4194  //
4195  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
4196  // will fail for "pool" objects on the free list. This must happen as
4197  // the "location" field of real user locks overlaps the "index" field
4198  // of "pool" objects.
4199  //
4200  // It would be better to run through the free list, and remove all "pool"
4201  // objects from the lock table before executing this loop. However,
4202  // "pool" objects do not always have their index field set (only on
4203  // lin_32e), and I don't want to search the lock table for the address
4204  // of every "pool" object on the free list.
4205  //
4206  while ( __kmp_user_lock_table.used > 1 ) {
4207  const ident *loc;
4208 
4209  //
4210  // reduce __kmp_user_lock_table.used before freeing the lock,
4211  // so that state of locks is consistent
4212  //
4213  kmp_user_lock_p lck = __kmp_user_lock_table.table[
4214  --__kmp_user_lock_table.used ];
4215 
4216  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
4217  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
4218  //
4219  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
4220  // initialized AND it is NOT a critical section (user is not
4221  // responsible for destroying criticals) AND we know source
4222  // location to report.
4223  //
4224  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
4225  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
4226  ( loc->psource != NULL ) ) {
4227  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
4228  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line );
4229  __kmp_str_loc_free( &str_loc);
4230  }
4231 
4232 #ifdef KMP_DEBUG
4233  if ( IS_CRITICAL( lck ) ) {
4234  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
4235  }
4236  else {
4237  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
4238  }
4239 #endif // KMP_DEBUG
4240 
4241  //
4242  // Cleanup internal lock dynamic resources
4243  // (for drdpa locks particularly).
4244  //
4245  __kmp_destroy_user_lock( lck );
4246  }
4247 
4248  //
4249  // Free the lock if block allocation of locks is not used.
4250  //
4251  if ( __kmp_lock_blocks == NULL ) {
4252  __kmp_free( lck );
4253  }
4254  }
4255 
4256 #undef IS_CRITICAL
4257 
4258  //
4259  // delete lock table(s).
4260  //
4261  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4262  __kmp_user_lock_table.table = NULL;
4263  __kmp_user_lock_table.allocated = 0;
4264 
4265  while ( table_ptr != NULL ) {
4266  //
4267  // In the first element we saved the pointer to the previous
4268  // (smaller) lock table.
4269  //
4270  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
4271  __kmp_free( table_ptr );
4272  table_ptr = next;
4273  }
4274 
4275  //
4276  // Free buffers allocated for blocks of locks.
4277  //
4278  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4279  __kmp_lock_blocks = NULL;
4280 
4281  while ( block_ptr != NULL ) {
4282  kmp_block_of_locks_t *next = block_ptr->next_block;
4283  __kmp_free( block_ptr->locks );
4284  //
4285  // *block_ptr itself was allocated at the end of the locks vector.
4286  //
4287  block_ptr = next;
4288  }
4289 
4290  TCW_4(__kmp_init_user_locks, FALSE);
4291 }
4292 
4293 #endif // KMP_USE_DYNAMIC_LOCK
Definition: kmp.h:200
char const * psource
Definition: kmp.h:209