LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_lock.cpp
1/*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include <stddef.h>
14#include <atomic>
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#if KMP_USE_FUTEX
25#include <sys/syscall.h>
26#include <unistd.h>
27// We should really include <futex.h>, but that causes compatibility problems on
28// different Linux* OS distributions that either require that you include (or
29// break when you try to include) <pci/types.h>. Since all we need is the two
30// macros below (which are part of the kernel ABI, so can't change) we just
31// define the constants here and don't include <futex.h>
32#ifndef FUTEX_WAIT
33#define FUTEX_WAIT 0
34#endif
35#ifndef FUTEX_WAKE
36#define FUTEX_WAKE 1
37#endif
38#endif
39
40/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void __kmp_validate_locks(void) {
44 int i;
45 kmp_uint32 x, y;
46
47 /* Check to make sure unsigned arithmetic does wraps properly */
48 x = ~((kmp_uint32)0) - 2;
49 y = x - 2;
50
51 for (i = 0; i < 8; ++i, ++x, ++y) {
52 kmp_uint32 z = (x - y);
53 KMP_ASSERT(z == 2);
54 }
55
56 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57}
58
59/* ------------------------------------------------------------------------ */
60/* test and set locks */
61
62// For the non-nested locks, we can only assume that the first 4 bytes were
63// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64// compiler only allocates a 4 byte pointer on IA-32 architecture. On
65// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66//
67// gcc reserves >= 8 bytes for nested locks, so we can assume that the
68// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
70static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72}
73
74static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75 return lck->lk.depth_locked != -1;
76}
77
78__forceinline static int
79__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80 KMP_MB();
81
82#ifdef USE_LOCK_PROFILE
83 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84 if ((curr != 0) && (curr != gtid + 1))
85 __kmp_printf("LOCK CONTENTION: %p\n", lck);
86/* else __kmp_printf( "." );*/
87#endif /* USE_LOCK_PROFILE */
88
89 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94 KMP_FSYNC_ACQUIRED(lck);
95 return KMP_LOCK_ACQUIRED_FIRST;
96 }
97
98 kmp_uint32 spins;
99 KMP_FSYNC_PREPARE(lck);
100 KMP_INIT_YIELD(spins);
101 kmp_backoff_t backoff = __kmp_spin_backoff_params;
102 do {
103 __kmp_spin_backoff(&backoff);
104 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
105 } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
106 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
107 KMP_FSYNC_ACQUIRED(lck);
108 return KMP_LOCK_ACQUIRED_FIRST;
109}
110
111int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
112 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
113 return retval;
114}
115
116static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
117 kmp_int32 gtid) {
118 char const *const func = "omp_set_lock";
119 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
120 __kmp_is_tas_lock_nestable(lck)) {
121 KMP_FATAL(LockNestableUsedAsSimple, func);
122 }
123 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
124 KMP_FATAL(LockIsAlreadyOwned, func);
125 }
126 return __kmp_acquire_tas_lock(lck, gtid);
127}
128
129int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
130 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
131 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
132 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
133 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
134 KMP_FSYNC_ACQUIRED(lck);
135 return TRUE;
136 }
137 return FALSE;
138}
139
140static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
141 kmp_int32 gtid) {
142 char const *const func = "omp_test_lock";
143 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
144 __kmp_is_tas_lock_nestable(lck)) {
145 KMP_FATAL(LockNestableUsedAsSimple, func);
146 }
147 return __kmp_test_tas_lock(lck, gtid);
148}
149
150int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
151 KMP_MB(); /* Flush all pending memory write invalidates. */
152
153 KMP_FSYNC_RELEASING(lck);
154 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
155 KMP_MB(); /* Flush all pending memory write invalidates. */
156
157 KMP_YIELD_OVERSUB();
158 return KMP_LOCK_RELEASED;
159}
160
161static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
162 kmp_int32 gtid) {
163 char const *const func = "omp_unset_lock";
164 KMP_MB(); /* in case another processor initialized lock */
165 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
166 __kmp_is_tas_lock_nestable(lck)) {
167 KMP_FATAL(LockNestableUsedAsSimple, func);
168 }
169 if (__kmp_get_tas_lock_owner(lck) == -1) {
170 KMP_FATAL(LockUnsettingFree, func);
171 }
172 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
173 (__kmp_get_tas_lock_owner(lck) != gtid)) {
174 KMP_FATAL(LockUnsettingSetByAnother, func);
175 }
176 return __kmp_release_tas_lock(lck, gtid);
177}
178
179void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
180 lck->lk.poll = KMP_LOCK_FREE(tas);
181}
182
183void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
184
185static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
186 char const *const func = "omp_destroy_lock";
187 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
188 __kmp_is_tas_lock_nestable(lck)) {
189 KMP_FATAL(LockNestableUsedAsSimple, func);
190 }
191 if (__kmp_get_tas_lock_owner(lck) != -1) {
192 KMP_FATAL(LockStillOwned, func);
193 }
194 __kmp_destroy_tas_lock(lck);
195}
196
197// nested test and set locks
198
199int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
200 KMP_DEBUG_ASSERT(gtid >= 0);
201
202 if (__kmp_get_tas_lock_owner(lck) == gtid) {
203 lck->lk.depth_locked += 1;
204 return KMP_LOCK_ACQUIRED_NEXT;
205 } else {
206 __kmp_acquire_tas_lock_timed_template(lck, gtid);
207 lck->lk.depth_locked = 1;
208 return KMP_LOCK_ACQUIRED_FIRST;
209 }
210}
211
212static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
213 kmp_int32 gtid) {
214 char const *const func = "omp_set_nest_lock";
215 if (!__kmp_is_tas_lock_nestable(lck)) {
216 KMP_FATAL(LockSimpleUsedAsNestable, func);
217 }
218 return __kmp_acquire_nested_tas_lock(lck, gtid);
219}
220
221int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
222 int retval;
223
224 KMP_DEBUG_ASSERT(gtid >= 0);
225
226 if (__kmp_get_tas_lock_owner(lck) == gtid) {
227 retval = ++lck->lk.depth_locked;
228 } else if (!__kmp_test_tas_lock(lck, gtid)) {
229 retval = 0;
230 } else {
231 KMP_MB();
232 retval = lck->lk.depth_locked = 1;
233 }
234 return retval;
235}
236
237static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
238 kmp_int32 gtid) {
239 char const *const func = "omp_test_nest_lock";
240 if (!__kmp_is_tas_lock_nestable(lck)) {
241 KMP_FATAL(LockSimpleUsedAsNestable, func);
242 }
243 return __kmp_test_nested_tas_lock(lck, gtid);
244}
245
246int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
247 KMP_DEBUG_ASSERT(gtid >= 0);
248
249 KMP_MB();
250 if (--(lck->lk.depth_locked) == 0) {
251 __kmp_release_tas_lock(lck, gtid);
252 return KMP_LOCK_RELEASED;
253 }
254 return KMP_LOCK_STILL_HELD;
255}
256
257static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
258 kmp_int32 gtid) {
259 char const *const func = "omp_unset_nest_lock";
260 KMP_MB(); /* in case another processor initialized lock */
261 if (!__kmp_is_tas_lock_nestable(lck)) {
262 KMP_FATAL(LockSimpleUsedAsNestable, func);
263 }
264 if (__kmp_get_tas_lock_owner(lck) == -1) {
265 KMP_FATAL(LockUnsettingFree, func);
266 }
267 if (__kmp_get_tas_lock_owner(lck) != gtid) {
268 KMP_FATAL(LockUnsettingSetByAnother, func);
269 }
270 return __kmp_release_nested_tas_lock(lck, gtid);
271}
272
273void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
274 __kmp_init_tas_lock(lck);
275 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
276}
277
278void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
279 __kmp_destroy_tas_lock(lck);
280 lck->lk.depth_locked = 0;
281}
282
283static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
284 char const *const func = "omp_destroy_nest_lock";
285 if (!__kmp_is_tas_lock_nestable(lck)) {
286 KMP_FATAL(LockSimpleUsedAsNestable, func);
287 }
288 if (__kmp_get_tas_lock_owner(lck) != -1) {
289 KMP_FATAL(LockStillOwned, func);
290 }
291 __kmp_destroy_nested_tas_lock(lck);
292}
293
294#if KMP_USE_FUTEX
295
296/* ------------------------------------------------------------------------ */
297/* futex locks */
298
299// futex locks are really just test and set locks, with a different method
300// of handling contention. They take the same amount of space as test and
301// set locks, and are allocated the same way (i.e. use the area allocated by
302// the compiler for non-nested locks / allocate nested locks on the heap).
303
304static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
305 return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
306}
307
308static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
309 return lck->lk.depth_locked != -1;
310}
311
312__forceinline static int
313__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
314 kmp_int32 gtid_code = (gtid + 1) << 1;
315
316 KMP_MB();
317
318#ifdef USE_LOCK_PROFILE
319 kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
320 if ((curr != 0) && (curr != gtid_code))
321 __kmp_printf("LOCK CONTENTION: %p\n", lck);
322/* else __kmp_printf( "." );*/
323#endif /* USE_LOCK_PROFILE */
324
325 KMP_FSYNC_PREPARE(lck);
326 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
327 lck, lck->lk.poll, gtid));
328
329 kmp_int32 poll_val;
330
331 while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
332 &(lck->lk.poll), KMP_LOCK_FREE(futex),
333 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
334
335 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
336 KA_TRACE(
337 1000,
338 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
339 lck, gtid, poll_val, cond));
340
341 // NOTE: if you try to use the following condition for this branch
342 //
343 // if ( poll_val & 1 == 0 )
344 //
345 // Then the 12.0 compiler has a bug where the following block will
346 // always be skipped, regardless of the value of the LSB of poll_val.
347 if (!cond) {
348 // Try to set the lsb in the poll to indicate to the owner
349 // thread that they need to wake this thread up.
350 if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
351 poll_val | KMP_LOCK_BUSY(1, futex))) {
352 KA_TRACE(
353 1000,
354 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
355 lck, lck->lk.poll, gtid));
356 continue;
357 }
358 poll_val |= KMP_LOCK_BUSY(1, futex);
359
360 KA_TRACE(1000,
361 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
362 lck->lk.poll, gtid));
363 }
364
365 KA_TRACE(
366 1000,
367 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
368 lck, gtid, poll_val));
369
370 long rc;
371 if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
372 NULL, 0)) != 0) {
373 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
374 "failed (rc=%ld errno=%d)\n",
375 lck, gtid, poll_val, rc, errno));
376 continue;
377 }
378
379 KA_TRACE(1000,
380 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
381 lck, gtid, poll_val));
382 // This thread has now done a successful futex wait call and was entered on
383 // the OS futex queue. We must now perform a futex wake call when releasing
384 // the lock, as we have no idea how many other threads are in the queue.
385 gtid_code |= 1;
386 }
387
388 KMP_FSYNC_ACQUIRED(lck);
389 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
390 lck->lk.poll, gtid));
391 return KMP_LOCK_ACQUIRED_FIRST;
392}
393
394int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
395 int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
396 return retval;
397}
398
399static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
400 kmp_int32 gtid) {
401 char const *const func = "omp_set_lock";
402 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
403 __kmp_is_futex_lock_nestable(lck)) {
404 KMP_FATAL(LockNestableUsedAsSimple, func);
405 }
406 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
407 KMP_FATAL(LockIsAlreadyOwned, func);
408 }
409 return __kmp_acquire_futex_lock(lck, gtid);
410}
411
412int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
413 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
414 KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
415 KMP_FSYNC_ACQUIRED(lck);
416 return TRUE;
417 }
418 return FALSE;
419}
420
421static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
422 kmp_int32 gtid) {
423 char const *const func = "omp_test_lock";
424 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
425 __kmp_is_futex_lock_nestable(lck)) {
426 KMP_FATAL(LockNestableUsedAsSimple, func);
427 }
428 return __kmp_test_futex_lock(lck, gtid);
429}
430
431int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
432 KMP_MB(); /* Flush all pending memory write invalidates. */
433
434 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
435 lck, lck->lk.poll, gtid));
436
437 KMP_FSYNC_RELEASING(lck);
438
439 kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
440
441 KA_TRACE(1000,
442 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
443 lck, gtid, poll_val));
444
445 if (KMP_LOCK_STRIP(poll_val) & 1) {
446 KA_TRACE(1000,
447 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
448 lck, gtid));
449 syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
450 NULL, NULL, 0);
451 }
452
453 KMP_MB(); /* Flush all pending memory write invalidates. */
454
455 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
456 lck->lk.poll, gtid));
457
458 KMP_YIELD_OVERSUB();
459 return KMP_LOCK_RELEASED;
460}
461
462static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
463 kmp_int32 gtid) {
464 char const *const func = "omp_unset_lock";
465 KMP_MB(); /* in case another processor initialized lock */
466 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
467 __kmp_is_futex_lock_nestable(lck)) {
468 KMP_FATAL(LockNestableUsedAsSimple, func);
469 }
470 if (__kmp_get_futex_lock_owner(lck) == -1) {
471 KMP_FATAL(LockUnsettingFree, func);
472 }
473 if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
474 (__kmp_get_futex_lock_owner(lck) != gtid)) {
475 KMP_FATAL(LockUnsettingSetByAnother, func);
476 }
477 return __kmp_release_futex_lock(lck, gtid);
478}
479
480void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
481 TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
482}
483
484void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
485
486static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
487 char const *const func = "omp_destroy_lock";
488 if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
489 __kmp_is_futex_lock_nestable(lck)) {
490 KMP_FATAL(LockNestableUsedAsSimple, func);
491 }
492 if (__kmp_get_futex_lock_owner(lck) != -1) {
493 KMP_FATAL(LockStillOwned, func);
494 }
495 __kmp_destroy_futex_lock(lck);
496}
497
498// nested futex locks
499
500int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
501 KMP_DEBUG_ASSERT(gtid >= 0);
502
503 if (__kmp_get_futex_lock_owner(lck) == gtid) {
504 lck->lk.depth_locked += 1;
505 return KMP_LOCK_ACQUIRED_NEXT;
506 } else {
507 __kmp_acquire_futex_lock_timed_template(lck, gtid);
508 lck->lk.depth_locked = 1;
509 return KMP_LOCK_ACQUIRED_FIRST;
510 }
511}
512
513static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
514 kmp_int32 gtid) {
515 char const *const func = "omp_set_nest_lock";
516 if (!__kmp_is_futex_lock_nestable(lck)) {
517 KMP_FATAL(LockSimpleUsedAsNestable, func);
518 }
519 return __kmp_acquire_nested_futex_lock(lck, gtid);
520}
521
522int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
523 int retval;
524
525 KMP_DEBUG_ASSERT(gtid >= 0);
526
527 if (__kmp_get_futex_lock_owner(lck) == gtid) {
528 retval = ++lck->lk.depth_locked;
529 } else if (!__kmp_test_futex_lock(lck, gtid)) {
530 retval = 0;
531 } else {
532 KMP_MB();
533 retval = lck->lk.depth_locked = 1;
534 }
535 return retval;
536}
537
538static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
539 kmp_int32 gtid) {
540 char const *const func = "omp_test_nest_lock";
541 if (!__kmp_is_futex_lock_nestable(lck)) {
542 KMP_FATAL(LockSimpleUsedAsNestable, func);
543 }
544 return __kmp_test_nested_futex_lock(lck, gtid);
545}
546
547int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
548 KMP_DEBUG_ASSERT(gtid >= 0);
549
550 KMP_MB();
551 if (--(lck->lk.depth_locked) == 0) {
552 __kmp_release_futex_lock(lck, gtid);
553 return KMP_LOCK_RELEASED;
554 }
555 return KMP_LOCK_STILL_HELD;
556}
557
558static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
559 kmp_int32 gtid) {
560 char const *const func = "omp_unset_nest_lock";
561 KMP_MB(); /* in case another processor initialized lock */
562 if (!__kmp_is_futex_lock_nestable(lck)) {
563 KMP_FATAL(LockSimpleUsedAsNestable, func);
564 }
565 if (__kmp_get_futex_lock_owner(lck) == -1) {
566 KMP_FATAL(LockUnsettingFree, func);
567 }
568 if (__kmp_get_futex_lock_owner(lck) != gtid) {
569 KMP_FATAL(LockUnsettingSetByAnother, func);
570 }
571 return __kmp_release_nested_futex_lock(lck, gtid);
572}
573
574void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
575 __kmp_init_futex_lock(lck);
576 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
577}
578
579void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
580 __kmp_destroy_futex_lock(lck);
581 lck->lk.depth_locked = 0;
582}
583
584static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
585 char const *const func = "omp_destroy_nest_lock";
586 if (!__kmp_is_futex_lock_nestable(lck)) {
587 KMP_FATAL(LockSimpleUsedAsNestable, func);
588 }
589 if (__kmp_get_futex_lock_owner(lck) != -1) {
590 KMP_FATAL(LockStillOwned, func);
591 }
592 __kmp_destroy_nested_futex_lock(lck);
593}
594
595#endif // KMP_USE_FUTEX
596
597/* ------------------------------------------------------------------------ */
598/* ticket (bakery) locks */
599
600static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
601 return std::atomic_load_explicit(&lck->lk.owner_id,
602 std::memory_order_relaxed) -
603 1;
604}
605
606static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
607 return std::atomic_load_explicit(&lck->lk.depth_locked,
608 std::memory_order_relaxed) != -1;
609}
610
611static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
612 return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
613 std::memory_order_acquire) == my_ticket;
614}
615
616__forceinline static int
617__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
618 kmp_int32 gtid) {
619 kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
620 &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
621
622#ifdef USE_LOCK_PROFILE
623 if (std::atomic_load_explicit(&lck->lk.now_serving,
624 std::memory_order_relaxed) != my_ticket)
625 __kmp_printf("LOCK CONTENTION: %p\n", lck);
626/* else __kmp_printf( "." );*/
627#endif /* USE_LOCK_PROFILE */
628
629 if (std::atomic_load_explicit(&lck->lk.now_serving,
630 std::memory_order_acquire) == my_ticket) {
631 return KMP_LOCK_ACQUIRED_FIRST;
632 }
633 KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
634 return KMP_LOCK_ACQUIRED_FIRST;
635}
636
637int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
638 int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
639 return retval;
640}
641
642static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
643 kmp_int32 gtid) {
644 char const *const func = "omp_set_lock";
645
646 if (!std::atomic_load_explicit(&lck->lk.initialized,
647 std::memory_order_relaxed)) {
648 KMP_FATAL(LockIsUninitialized, func);
649 }
650 if (lck->lk.self != lck) {
651 KMP_FATAL(LockIsUninitialized, func);
652 }
653 if (__kmp_is_ticket_lock_nestable(lck)) {
654 KMP_FATAL(LockNestableUsedAsSimple, func);
655 }
656 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
657 KMP_FATAL(LockIsAlreadyOwned, func);
658 }
659
660 __kmp_acquire_ticket_lock(lck, gtid);
661
662 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
663 std::memory_order_relaxed);
664 return KMP_LOCK_ACQUIRED_FIRST;
665}
666
667int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
668 kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
669 std::memory_order_relaxed);
670
671 if (std::atomic_load_explicit(&lck->lk.now_serving,
672 std::memory_order_relaxed) == my_ticket) {
673 kmp_uint32 next_ticket = my_ticket + 1;
674 if (std::atomic_compare_exchange_strong_explicit(
675 &lck->lk.next_ticket, &my_ticket, next_ticket,
676 std::memory_order_acquire, std::memory_order_acquire)) {
677 return TRUE;
678 }
679 }
680 return FALSE;
681}
682
683static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
684 kmp_int32 gtid) {
685 char const *const func = "omp_test_lock";
686
687 if (!std::atomic_load_explicit(&lck->lk.initialized,
688 std::memory_order_relaxed)) {
689 KMP_FATAL(LockIsUninitialized, func);
690 }
691 if (lck->lk.self != lck) {
692 KMP_FATAL(LockIsUninitialized, func);
693 }
694 if (__kmp_is_ticket_lock_nestable(lck)) {
695 KMP_FATAL(LockNestableUsedAsSimple, func);
696 }
697
698 int retval = __kmp_test_ticket_lock(lck, gtid);
699
700 if (retval) {
701 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
702 std::memory_order_relaxed);
703 }
704 return retval;
705}
706
707int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
708 kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
709 std::memory_order_relaxed) -
710 std::atomic_load_explicit(&lck->lk.now_serving,
711 std::memory_order_relaxed);
712
713 std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
714 std::memory_order_release);
715
716 KMP_YIELD(distance >
717 (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
718 return KMP_LOCK_RELEASED;
719}
720
721static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722 kmp_int32 gtid) {
723 char const *const func = "omp_unset_lock";
724
725 if (!std::atomic_load_explicit(&lck->lk.initialized,
726 std::memory_order_relaxed)) {
727 KMP_FATAL(LockIsUninitialized, func);
728 }
729 if (lck->lk.self != lck) {
730 KMP_FATAL(LockIsUninitialized, func);
731 }
732 if (__kmp_is_ticket_lock_nestable(lck)) {
733 KMP_FATAL(LockNestableUsedAsSimple, func);
734 }
735 if (__kmp_get_ticket_lock_owner(lck) == -1) {
736 KMP_FATAL(LockUnsettingFree, func);
737 }
738 if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739 (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740 KMP_FATAL(LockUnsettingSetByAnother, func);
741 }
742 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743 return __kmp_release_ticket_lock(lck, gtid);
744}
745
746void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747 lck->lk.location = NULL;
748 lck->lk.self = lck;
749 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750 std::memory_order_relaxed);
751 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752 std::memory_order_relaxed);
753 std::atomic_store_explicit(
754 &lck->lk.owner_id, 0,
755 std::memory_order_relaxed); // no thread owns the lock.
756 std::atomic_store_explicit(
757 &lck->lk.depth_locked, -1,
758 std::memory_order_relaxed); // -1 => not a nested lock.
759 std::atomic_store_explicit(&lck->lk.initialized, true,
760 std::memory_order_release);
761}
762
763void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764 std::atomic_store_explicit(&lck->lk.initialized, false,
765 std::memory_order_release);
766 lck->lk.self = NULL;
767 lck->lk.location = NULL;
768 std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769 std::memory_order_relaxed);
770 std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771 std::memory_order_relaxed);
772 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773 std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774 std::memory_order_relaxed);
775}
776
777static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778 char const *const func = "omp_destroy_lock";
779
780 if (!std::atomic_load_explicit(&lck->lk.initialized,
781 std::memory_order_relaxed)) {
782 KMP_FATAL(LockIsUninitialized, func);
783 }
784 if (lck->lk.self != lck) {
785 KMP_FATAL(LockIsUninitialized, func);
786 }
787 if (__kmp_is_ticket_lock_nestable(lck)) {
788 KMP_FATAL(LockNestableUsedAsSimple, func);
789 }
790 if (__kmp_get_ticket_lock_owner(lck) != -1) {
791 KMP_FATAL(LockStillOwned, func);
792 }
793 __kmp_destroy_ticket_lock(lck);
794}
795
796// nested ticket locks
797
798int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799 KMP_DEBUG_ASSERT(gtid >= 0);
800
801 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802 std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803 std::memory_order_relaxed);
804 return KMP_LOCK_ACQUIRED_NEXT;
805 } else {
806 __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808 std::memory_order_relaxed);
809 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810 std::memory_order_relaxed);
811 return KMP_LOCK_ACQUIRED_FIRST;
812 }
813}
814
815static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816 kmp_int32 gtid) {
817 char const *const func = "omp_set_nest_lock";
818
819 if (!std::atomic_load_explicit(&lck->lk.initialized,
820 std::memory_order_relaxed)) {
821 KMP_FATAL(LockIsUninitialized, func);
822 }
823 if (lck->lk.self != lck) {
824 KMP_FATAL(LockIsUninitialized, func);
825 }
826 if (!__kmp_is_ticket_lock_nestable(lck)) {
827 KMP_FATAL(LockSimpleUsedAsNestable, func);
828 }
829 return __kmp_acquire_nested_ticket_lock(lck, gtid);
830}
831
832int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833 int retval;
834
835 KMP_DEBUG_ASSERT(gtid >= 0);
836
837 if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838 retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839 std::memory_order_relaxed) +
840 1;
841 } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842 retval = 0;
843 } else {
844 std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845 std::memory_order_relaxed);
846 std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847 std::memory_order_relaxed);
848 retval = 1;
849 }
850 return retval;
851}
852
853static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854 kmp_int32 gtid) {
855 char const *const func = "omp_test_nest_lock";
856
857 if (!std::atomic_load_explicit(&lck->lk.initialized,
858 std::memory_order_relaxed)) {
859 KMP_FATAL(LockIsUninitialized, func);
860 }
861 if (lck->lk.self != lck) {
862 KMP_FATAL(LockIsUninitialized, func);
863 }
864 if (!__kmp_is_ticket_lock_nestable(lck)) {
865 KMP_FATAL(LockSimpleUsedAsNestable, func);
866 }
867 return __kmp_test_nested_ticket_lock(lck, gtid);
868}
869
870int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871 KMP_DEBUG_ASSERT(gtid >= 0);
872
873 if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874 std::memory_order_relaxed) -
875 1) == 0) {
876 std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877 __kmp_release_ticket_lock(lck, gtid);
878 return KMP_LOCK_RELEASED;
879 }
880 return KMP_LOCK_STILL_HELD;
881}
882
883static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884 kmp_int32 gtid) {
885 char const *const func = "omp_unset_nest_lock";
886
887 if (!std::atomic_load_explicit(&lck->lk.initialized,
888 std::memory_order_relaxed)) {
889 KMP_FATAL(LockIsUninitialized, func);
890 }
891 if (lck->lk.self != lck) {
892 KMP_FATAL(LockIsUninitialized, func);
893 }
894 if (!__kmp_is_ticket_lock_nestable(lck)) {
895 KMP_FATAL(LockSimpleUsedAsNestable, func);
896 }
897 if (__kmp_get_ticket_lock_owner(lck) == -1) {
898 KMP_FATAL(LockUnsettingFree, func);
899 }
900 if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901 KMP_FATAL(LockUnsettingSetByAnother, func);
902 }
903 return __kmp_release_nested_ticket_lock(lck, gtid);
904}
905
906void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907 __kmp_init_ticket_lock(lck);
908 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909 std::memory_order_relaxed);
910 // >= 0 for nestable locks, -1 for simple locks
911}
912
913void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914 __kmp_destroy_ticket_lock(lck);
915 std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916 std::memory_order_relaxed);
917}
918
919static void
920__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921 char const *const func = "omp_destroy_nest_lock";
922
923 if (!std::atomic_load_explicit(&lck->lk.initialized,
924 std::memory_order_relaxed)) {
925 KMP_FATAL(LockIsUninitialized, func);
926 }
927 if (lck->lk.self != lck) {
928 KMP_FATAL(LockIsUninitialized, func);
929 }
930 if (!__kmp_is_ticket_lock_nestable(lck)) {
931 KMP_FATAL(LockSimpleUsedAsNestable, func);
932 }
933 if (__kmp_get_ticket_lock_owner(lck) != -1) {
934 KMP_FATAL(LockStillOwned, func);
935 }
936 __kmp_destroy_nested_ticket_lock(lck);
937}
938
939// access functions to fields which don't exist for all lock kinds.
940
941static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942 return lck->lk.location;
943}
944
945static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946 const ident_t *loc) {
947 lck->lk.location = loc;
948}
949
950static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951 return lck->lk.flags;
952}
953
954static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955 kmp_lock_flags_t flags) {
956 lck->lk.flags = flags;
957}
958
959/* ------------------------------------------------------------------------ */
960/* queuing locks */
961
962/* First the states
963 (head,tail) = 0, 0 means lock is unheld, nobody on queue
964 UINT_MAX or -1, 0 means lock is held, nobody on queue
965 h, h means lock held or about to transition,
966 1 element on queue
967 h, t h <> t, means lock is held or about to
968 transition, >1 elements on queue
969
970 Now the transitions
971 Acquire(0,0) = -1 ,0
972 Release(0,0) = Error
973 Acquire(-1,0) = h ,h h > 0
974 Release(-1,0) = 0 ,0
975 Acquire(h,h) = h ,t h > 0, t > 0, h <> t
976 Release(h,h) = -1 ,0 h > 0
977 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
979
980 And pictorially
981
982 +-----+
983 | 0, 0|------- release -------> Error
984 +-----+
985 | ^
986 acquire| |release
987 | |
988 | |
989 v |
990 +-----+
991 |-1, 0|
992 +-----+
993 | ^
994 acquire| |release
995 | |
996 | |
997 v |
998 +-----+
999 | h, h|
1000 +-----+
1001 | ^
1002 acquire| |release
1003 | |
1004 | |
1005 v |
1006 +-----+
1007 | h, t|----- acquire, release loopback ---+
1008 +-----+ |
1009 ^ |
1010 | |
1011 +------------------------------------+
1012 */
1013
1014#ifdef DEBUG_QUEUING_LOCKS
1015
1016/* Stuff for circular trace buffer */
1017#define TRACE_BUF_ELE 1024
1018static char traces[TRACE_BUF_ELE][128] = {0};
1019static int tc = 0;
1020#define TRACE_LOCK(X, Y) \
1021 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022#define TRACE_LOCK_T(X, Y, Z) \
1023 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024#define TRACE_LOCK_HT(X, Y, Z, Q) \
1025 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1026 Z, Q);
1027
1028static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029 kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030 kmp_int32 tail_id) {
1031 kmp_int32 t, i;
1032
1033 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034
1035 i = tc % TRACE_BUF_ELE;
1036 __kmp_printf_no_lock("%s\n", traces[i]);
1037 i = (i + 1) % TRACE_BUF_ELE;
1038 while (i != (tc % TRACE_BUF_ELE)) {
1039 __kmp_printf_no_lock("%s", traces[i]);
1040 i = (i + 1) % TRACE_BUF_ELE;
1041 }
1042 __kmp_printf_no_lock("\n");
1043
1044 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045 "next_wait:%d, head_id:%d, tail_id:%d\n",
1046 gtid + 1, this_thr->th.th_spin_here,
1047 this_thr->th.th_next_waiting, head_id, tail_id);
1048
1049 __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050
1051 if (lck->lk.head_id >= 1) {
1052 t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053 while (t > 0) {
1054 __kmp_printf_no_lock("-> %d ", t);
1055 t = __kmp_threads[t - 1]->th.th_next_waiting;
1056 }
1057 }
1058 __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1059 __kmp_printf_no_lock("\n\n");
1060}
1061
1062#endif /* DEBUG_QUEUING_LOCKS */
1063
1064static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065 return TCR_4(lck->lk.owner_id) - 1;
1066}
1067
1068static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069 return lck->lk.depth_locked != -1;
1070}
1071
1072/* Acquire a lock using a the queuing lock implementation */
1073template <bool takeTime>
1074/* [TLW] The unused template above is left behind because of what BEB believes
1075 is a potential compiler problem with __forceinline. */
1076__forceinline static int
1077__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078 kmp_int32 gtid) {
1079 kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082 volatile kmp_uint32 *spin_here_p;
1083
1084#if OMPT_SUPPORT
1085 ompt_state_t prev_state = ompt_state_undefined;
1086#endif
1087
1088 KA_TRACE(1000,
1089 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090
1091 KMP_FSYNC_PREPARE(lck);
1092 KMP_DEBUG_ASSERT(this_thr != NULL);
1093 spin_here_p = &this_thr->th.th_spin_here;
1094
1095#ifdef DEBUG_QUEUING_LOCKS
1096 TRACE_LOCK(gtid + 1, "acq ent");
1097 if (*spin_here_p)
1098 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099 if (this_thr->th.th_next_waiting != 0)
1100 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101#endif
1102 KMP_DEBUG_ASSERT(!*spin_here_p);
1103 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104
1105 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106 head_id_p that may follow, not just in execution order, but also in
1107 visibility order. This way, when a releasing thread observes the changes to
1108 the queue by this thread, it can rightly assume that spin_here_p has
1109 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110 not premature. If the releasing thread sets spin_here_p to FALSE before
1111 this thread sets it to TRUE, this thread will hang. */
1112 *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113
1114 while (1) {
1115 kmp_int32 enqueued;
1116 kmp_int32 head;
1117 kmp_int32 tail;
1118
1119 head = *head_id_p;
1120
1121 switch (head) {
1122
1123 case -1: {
1124#ifdef DEBUG_QUEUING_LOCKS
1125 tail = *tail_id_p;
1126 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127#endif
1128 tail = 0; /* to make sure next link asynchronously read is not set
1129 accidentally; this assignment prevents us from entering the
1130 if ( t > 0 ) condition in the enqueued case below, which is not
1131 necessary for this state transition */
1132
1133 /* try (-1,0)->(tid,tid) */
1134 enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135 KMP_PACK_64(-1, 0),
1136 KMP_PACK_64(gtid + 1, gtid + 1));
1137#ifdef DEBUG_QUEUING_LOCKS
1138 if (enqueued)
1139 TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140#endif
1141 } break;
1142
1143 default: {
1144 tail = *tail_id_p;
1145 KMP_DEBUG_ASSERT(tail != gtid + 1);
1146
1147#ifdef DEBUG_QUEUING_LOCKS
1148 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149#endif
1150
1151 if (tail == 0) {
1152 enqueued = FALSE;
1153 } else {
1154 /* try (h,t) or (h,h)->(h,tid) */
1155 enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156
1157#ifdef DEBUG_QUEUING_LOCKS
1158 if (enqueued)
1159 TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160#endif
1161 }
1162 } break;
1163
1164 case 0: /* empty queue */
1165 {
1166 kmp_int32 grabbed_lock;
1167
1168#ifdef DEBUG_QUEUING_LOCKS
1169 tail = *tail_id_p;
1170 TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171#endif
1172 /* try (0,0)->(-1,0) */
1173
1174 /* only legal transition out of head = 0 is head = -1 with no change to
1175 * tail */
1176 grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177
1178 if (grabbed_lock) {
1179
1180 *spin_here_p = FALSE;
1181
1182 KA_TRACE(
1183 1000,
1184 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185 lck, gtid));
1186#ifdef DEBUG_QUEUING_LOCKS
1187 TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188#endif
1189
1190#if OMPT_SUPPORT
1191 if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192 /* change the state before clearing wait_id */
1193 this_thr->th.ompt_thread_info.state = prev_state;
1194 this_thr->th.ompt_thread_info.wait_id = 0;
1195 }
1196#endif
1197
1198 KMP_FSYNC_ACQUIRED(lck);
1199 return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200 }
1201 enqueued = FALSE;
1202 } break;
1203 }
1204
1205#if OMPT_SUPPORT
1206 if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207 /* this thread will spin; set wait_id before entering wait state */
1208 prev_state = this_thr->th.ompt_thread_info.state;
1209 this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210 this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211 }
1212#endif
1213
1214 if (enqueued) {
1215 if (tail > 0) {
1216 kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217 KMP_ASSERT(tail_thr != NULL);
1218 tail_thr->th.th_next_waiting = gtid + 1;
1219 /* corresponding wait for this write in release code */
1220 }
1221 KA_TRACE(1000,
1222 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223 lck, gtid));
1224
1225 KMP_MB();
1226 // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227 KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228 // Synchronize writes to both runtime thread structures
1229 // and writes in user code.
1230 KMP_MB();
1231
1232#ifdef DEBUG_QUEUING_LOCKS
1233 TRACE_LOCK(gtid + 1, "acq spin");
1234
1235 if (this_thr->th.th_next_waiting != 0)
1236 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237#endif
1238 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240 "waiting on queue\n",
1241 lck, gtid));
1242
1243#ifdef DEBUG_QUEUING_LOCKS
1244 TRACE_LOCK(gtid + 1, "acq exit 2");
1245#endif
1246
1247#if OMPT_SUPPORT
1248 /* change the state before clearing wait_id */
1249 this_thr->th.ompt_thread_info.state = prev_state;
1250 this_thr->th.ompt_thread_info.wait_id = 0;
1251#endif
1252
1253 /* got lock, we were dequeued by the thread that released lock */
1254 return KMP_LOCK_ACQUIRED_FIRST;
1255 }
1256
1257 /* Yield if number of threads > number of logical processors */
1258 /* ToDo: Not sure why this should only be in oversubscription case,
1259 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260 KMP_YIELD_OVERSUB();
1261
1262#ifdef DEBUG_QUEUING_LOCKS
1263 TRACE_LOCK(gtid + 1, "acq retry");
1264#endif
1265 }
1266 KMP_ASSERT2(0, "should not get here");
1267 return KMP_LOCK_ACQUIRED_FIRST;
1268}
1269
1270int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271 KMP_DEBUG_ASSERT(gtid >= 0);
1272
1273 int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274 return retval;
1275}
1276
1277static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278 kmp_int32 gtid) {
1279 char const *const func = "omp_set_lock";
1280 if (lck->lk.initialized != lck) {
1281 KMP_FATAL(LockIsUninitialized, func);
1282 }
1283 if (__kmp_is_queuing_lock_nestable(lck)) {
1284 KMP_FATAL(LockNestableUsedAsSimple, func);
1285 }
1286 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287 KMP_FATAL(LockIsAlreadyOwned, func);
1288 }
1289
1290 __kmp_acquire_queuing_lock(lck, gtid);
1291
1292 lck->lk.owner_id = gtid + 1;
1293 return KMP_LOCK_ACQUIRED_FIRST;
1294}
1295
1296int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298 kmp_int32 head;
1299#ifdef KMP_DEBUG
1300 kmp_info_t *this_thr;
1301#endif
1302
1303 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304 KMP_DEBUG_ASSERT(gtid >= 0);
1305#ifdef KMP_DEBUG
1306 this_thr = __kmp_thread_from_gtid(gtid);
1307 KMP_DEBUG_ASSERT(this_thr != NULL);
1308 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309#endif
1310
1311 head = *head_id_p;
1312
1313 if (head == 0) { /* nobody on queue, nobody holding */
1314 /* try (0,0)->(-1,0) */
1315 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316 KA_TRACE(1000,
1317 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318 KMP_FSYNC_ACQUIRED(lck);
1319 return TRUE;
1320 }
1321 }
1322
1323 KA_TRACE(1000,
1324 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325 return FALSE;
1326}
1327
1328static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329 kmp_int32 gtid) {
1330 char const *const func = "omp_test_lock";
1331 if (lck->lk.initialized != lck) {
1332 KMP_FATAL(LockIsUninitialized, func);
1333 }
1334 if (__kmp_is_queuing_lock_nestable(lck)) {
1335 KMP_FATAL(LockNestableUsedAsSimple, func);
1336 }
1337
1338 int retval = __kmp_test_queuing_lock(lck, gtid);
1339
1340 if (retval) {
1341 lck->lk.owner_id = gtid + 1;
1342 }
1343 return retval;
1344}
1345
1346int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347 kmp_info_t *this_thr;
1348 volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1349 volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1350
1351 KA_TRACE(1000,
1352 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1353 KMP_DEBUG_ASSERT(gtid >= 0);
1354 this_thr = __kmp_thread_from_gtid(gtid);
1355 KMP_DEBUG_ASSERT(this_thr != NULL);
1356#ifdef DEBUG_QUEUING_LOCKS
1357 TRACE_LOCK(gtid + 1, "rel ent");
1358
1359 if (this_thr->th.th_spin_here)
1360 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1361 if (this_thr->th.th_next_waiting != 0)
1362 __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1363#endif
1364 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1365 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1366
1367 KMP_FSYNC_RELEASING(lck);
1368
1369 while (1) {
1370 kmp_int32 dequeued;
1371 kmp_int32 head;
1372 kmp_int32 tail;
1373
1374 head = *head_id_p;
1375
1376#ifdef DEBUG_QUEUING_LOCKS
1377 tail = *tail_id_p;
1378 TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1379 if (head == 0)
1380 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1381#endif
1382 KMP_DEBUG_ASSERT(head !=
1383 0); /* holding the lock, head must be -1 or queue head */
1384
1385 if (head == -1) { /* nobody on queue */
1386 /* try (-1,0)->(0,0) */
1387 if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1388 KA_TRACE(
1389 1000,
1390 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1391 lck, gtid));
1392#ifdef DEBUG_QUEUING_LOCKS
1393 TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1394#endif
1395
1396#if OMPT_SUPPORT
1397/* nothing to do - no other thread is trying to shift blame */
1398#endif
1399 return KMP_LOCK_RELEASED;
1400 }
1401 dequeued = FALSE;
1402 } else {
1403 KMP_MB();
1404 tail = *tail_id_p;
1405 if (head == tail) { /* only one thread on the queue */
1406#ifdef DEBUG_QUEUING_LOCKS
1407 if (head <= 0)
1408 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1409#endif
1410 KMP_DEBUG_ASSERT(head > 0);
1411
1412 /* try (h,h)->(-1,0) */
1413 dequeued = KMP_COMPARE_AND_STORE_REL64(
1414 RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1415 KMP_PACK_64(-1, 0));
1416#ifdef DEBUG_QUEUING_LOCKS
1417 TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1418#endif
1419
1420 } else {
1421 volatile kmp_int32 *waiting_id_p;
1422 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1423 KMP_DEBUG_ASSERT(head_thr != NULL);
1424 waiting_id_p = &head_thr->th.th_next_waiting;
1425
1426/* Does this require synchronous reads? */
1427#ifdef DEBUG_QUEUING_LOCKS
1428 if (head <= 0 || tail <= 0)
1429 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1430#endif
1431 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1432
1433 /* try (h,t)->(h',t) or (t,t) */
1434 KMP_MB();
1435 /* make sure enqueuing thread has time to update next waiting thread
1436 * field */
1437 *head_id_p =
1438 KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1439#ifdef DEBUG_QUEUING_LOCKS
1440 TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1441#endif
1442 dequeued = TRUE;
1443 }
1444 }
1445
1446 if (dequeued) {
1447 kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1448 KMP_DEBUG_ASSERT(head_thr != NULL);
1449
1450/* Does this require synchronous reads? */
1451#ifdef DEBUG_QUEUING_LOCKS
1452 if (head <= 0 || tail <= 0)
1453 __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1454#endif
1455 KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1456
1457 /* For clean code only. Thread not released until next statement prevents
1458 race with acquire code. */
1459 head_thr->th.th_next_waiting = 0;
1460#ifdef DEBUG_QUEUING_LOCKS
1461 TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1462#endif
1463
1464 KMP_MB();
1465 /* reset spin value */
1466 head_thr->th.th_spin_here = FALSE;
1467
1468 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1469 "dequeuing\n",
1470 lck, gtid));
1471#ifdef DEBUG_QUEUING_LOCKS
1472 TRACE_LOCK(gtid + 1, "rel exit 2");
1473#endif
1474 return KMP_LOCK_RELEASED;
1475 }
1476 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1477 threads */
1478
1479#ifdef DEBUG_QUEUING_LOCKS
1480 TRACE_LOCK(gtid + 1, "rel retry");
1481#endif
1482
1483 } /* while */
1484 KMP_ASSERT2(0, "should not get here");
1485 return KMP_LOCK_RELEASED;
1486}
1487
1488static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1489 kmp_int32 gtid) {
1490 char const *const func = "omp_unset_lock";
1491 KMP_MB(); /* in case another processor initialized lock */
1492 if (lck->lk.initialized != lck) {
1493 KMP_FATAL(LockIsUninitialized, func);
1494 }
1495 if (__kmp_is_queuing_lock_nestable(lck)) {
1496 KMP_FATAL(LockNestableUsedAsSimple, func);
1497 }
1498 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1499 KMP_FATAL(LockUnsettingFree, func);
1500 }
1501 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1502 KMP_FATAL(LockUnsettingSetByAnother, func);
1503 }
1504 lck->lk.owner_id = 0;
1505 return __kmp_release_queuing_lock(lck, gtid);
1506}
1507
1508void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1509 lck->lk.location = NULL;
1510 lck->lk.head_id = 0;
1511 lck->lk.tail_id = 0;
1512 lck->lk.next_ticket = 0;
1513 lck->lk.now_serving = 0;
1514 lck->lk.owner_id = 0; // no thread owns the lock.
1515 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1516 lck->lk.initialized = lck;
1517
1518 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1519}
1520
1521void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1522 lck->lk.initialized = NULL;
1523 lck->lk.location = NULL;
1524 lck->lk.head_id = 0;
1525 lck->lk.tail_id = 0;
1526 lck->lk.next_ticket = 0;
1527 lck->lk.now_serving = 0;
1528 lck->lk.owner_id = 0;
1529 lck->lk.depth_locked = -1;
1530}
1531
1532static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1533 char const *const func = "omp_destroy_lock";
1534 if (lck->lk.initialized != lck) {
1535 KMP_FATAL(LockIsUninitialized, func);
1536 }
1537 if (__kmp_is_queuing_lock_nestable(lck)) {
1538 KMP_FATAL(LockNestableUsedAsSimple, func);
1539 }
1540 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1541 KMP_FATAL(LockStillOwned, func);
1542 }
1543 __kmp_destroy_queuing_lock(lck);
1544}
1545
1546// nested queuing locks
1547
1548int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1549 KMP_DEBUG_ASSERT(gtid >= 0);
1550
1551 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1552 lck->lk.depth_locked += 1;
1553 return KMP_LOCK_ACQUIRED_NEXT;
1554 } else {
1555 __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1556 KMP_MB();
1557 lck->lk.depth_locked = 1;
1558 KMP_MB();
1559 lck->lk.owner_id = gtid + 1;
1560 return KMP_LOCK_ACQUIRED_FIRST;
1561 }
1562}
1563
1564static int
1565__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1566 kmp_int32 gtid) {
1567 char const *const func = "omp_set_nest_lock";
1568 if (lck->lk.initialized != lck) {
1569 KMP_FATAL(LockIsUninitialized, func);
1570 }
1571 if (!__kmp_is_queuing_lock_nestable(lck)) {
1572 KMP_FATAL(LockSimpleUsedAsNestable, func);
1573 }
1574 return __kmp_acquire_nested_queuing_lock(lck, gtid);
1575}
1576
1577int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1578 int retval;
1579
1580 KMP_DEBUG_ASSERT(gtid >= 0);
1581
1582 if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1583 retval = ++lck->lk.depth_locked;
1584 } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1585 retval = 0;
1586 } else {
1587 KMP_MB();
1588 retval = lck->lk.depth_locked = 1;
1589 KMP_MB();
1590 lck->lk.owner_id = gtid + 1;
1591 }
1592 return retval;
1593}
1594
1595static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1596 kmp_int32 gtid) {
1597 char const *const func = "omp_test_nest_lock";
1598 if (lck->lk.initialized != lck) {
1599 KMP_FATAL(LockIsUninitialized, func);
1600 }
1601 if (!__kmp_is_queuing_lock_nestable(lck)) {
1602 KMP_FATAL(LockSimpleUsedAsNestable, func);
1603 }
1604 return __kmp_test_nested_queuing_lock(lck, gtid);
1605}
1606
1607int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1608 KMP_DEBUG_ASSERT(gtid >= 0);
1609
1610 KMP_MB();
1611 if (--(lck->lk.depth_locked) == 0) {
1612 KMP_MB();
1613 lck->lk.owner_id = 0;
1614 __kmp_release_queuing_lock(lck, gtid);
1615 return KMP_LOCK_RELEASED;
1616 }
1617 return KMP_LOCK_STILL_HELD;
1618}
1619
1620static int
1621__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1622 kmp_int32 gtid) {
1623 char const *const func = "omp_unset_nest_lock";
1624 KMP_MB(); /* in case another processor initialized lock */
1625 if (lck->lk.initialized != lck) {
1626 KMP_FATAL(LockIsUninitialized, func);
1627 }
1628 if (!__kmp_is_queuing_lock_nestable(lck)) {
1629 KMP_FATAL(LockSimpleUsedAsNestable, func);
1630 }
1631 if (__kmp_get_queuing_lock_owner(lck) == -1) {
1632 KMP_FATAL(LockUnsettingFree, func);
1633 }
1634 if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1635 KMP_FATAL(LockUnsettingSetByAnother, func);
1636 }
1637 return __kmp_release_nested_queuing_lock(lck, gtid);
1638}
1639
1640void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1641 __kmp_init_queuing_lock(lck);
1642 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1643}
1644
1645void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1646 __kmp_destroy_queuing_lock(lck);
1647 lck->lk.depth_locked = 0;
1648}
1649
1650static void
1651__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1652 char const *const func = "omp_destroy_nest_lock";
1653 if (lck->lk.initialized != lck) {
1654 KMP_FATAL(LockIsUninitialized, func);
1655 }
1656 if (!__kmp_is_queuing_lock_nestable(lck)) {
1657 KMP_FATAL(LockSimpleUsedAsNestable, func);
1658 }
1659 if (__kmp_get_queuing_lock_owner(lck) != -1) {
1660 KMP_FATAL(LockStillOwned, func);
1661 }
1662 __kmp_destroy_nested_queuing_lock(lck);
1663}
1664
1665// access functions to fields which don't exist for all lock kinds.
1666
1667static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1668 return lck->lk.location;
1669}
1670
1671static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1672 const ident_t *loc) {
1673 lck->lk.location = loc;
1674}
1675
1676static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1677 return lck->lk.flags;
1678}
1679
1680static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1681 kmp_lock_flags_t flags) {
1682 lck->lk.flags = flags;
1683}
1684
1685#if KMP_USE_ADAPTIVE_LOCKS
1686
1687/* RTM Adaptive locks */
1688
1689#if KMP_HAVE_RTM_INTRINSICS
1690#include <immintrin.h>
1691#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1692
1693#else
1694
1695// Values from the status register after failed speculation.
1696#define _XBEGIN_STARTED (~0u)
1697#define _XABORT_EXPLICIT (1 << 0)
1698#define _XABORT_RETRY (1 << 1)
1699#define _XABORT_CONFLICT (1 << 2)
1700#define _XABORT_CAPACITY (1 << 3)
1701#define _XABORT_DEBUG (1 << 4)
1702#define _XABORT_NESTED (1 << 5)
1703#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1704
1705// Aborts for which it's worth trying again immediately
1706#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1707
1708#define STRINGIZE_INTERNAL(arg) #arg
1709#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1710
1711// Access to RTM instructions
1712/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1713 an abort. This is the same definition as the compiler intrinsic that will be
1714 supported at some point. */
1715static __inline int _xbegin() {
1716 int res = -1;
1717
1718#if KMP_OS_WINDOWS
1719#if KMP_ARCH_X86_64
1720 _asm {
1721 _emit 0xC7
1722 _emit 0xF8
1723 _emit 2
1724 _emit 0
1725 _emit 0
1726 _emit 0
1727 jmp L2
1728 mov res, eax
1729 L2:
1730 }
1731#else /* IA32 */
1732 _asm {
1733 _emit 0xC7
1734 _emit 0xF8
1735 _emit 2
1736 _emit 0
1737 _emit 0
1738 _emit 0
1739 jmp L2
1740 mov res, eax
1741 L2:
1742 }
1743#endif // KMP_ARCH_X86_64
1744#else
1745 /* Note that %eax must be noted as killed (clobbered), because the XSR is
1746 returned in %eax(%rax) on abort. Other register values are restored, so
1747 don't need to be killed.
1748
1749 We must also mark 'res' as an input and an output, since otherwise
1750 'res=-1' may be dropped as being dead, whereas we do need the assignment on
1751 the successful (i.e., non-abort) path. */
1752 __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1753 " .long 1f-1b-6\n"
1754 " jmp 2f\n"
1755 "1: movl %%eax,%0\n"
1756 "2:"
1757 : "+r"(res)::"memory", "%eax");
1758#endif // KMP_OS_WINDOWS
1759 return res;
1760}
1761
1762/* Transaction end */
1763static __inline void _xend() {
1764#if KMP_OS_WINDOWS
1765 __asm {
1766 _emit 0x0f
1767 _emit 0x01
1768 _emit 0xd5
1769 }
1770#else
1771 __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1772#endif
1773}
1774
1775/* This is a macro, the argument must be a single byte constant which can be
1776 evaluated by the inline assembler, since it is emitted as a byte into the
1777 assembly code. */
1778// clang-format off
1779#if KMP_OS_WINDOWS
1780#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1781#else
1782#define _xabort(ARG) \
1783 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1784#endif
1785// clang-format on
1786#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1787
1788// Statistics is collected for testing purpose
1789#if KMP_DEBUG_ADAPTIVE_LOCKS
1790
1791// We accumulate speculative lock statistics when the lock is destroyed. We
1792// keep locks that haven't been destroyed in the liveLocks list so that we can
1793// grab their statistics too.
1794static kmp_adaptive_lock_statistics_t destroyedStats;
1795
1796// To hold the list of live locks.
1797static kmp_adaptive_lock_info_t liveLocks;
1798
1799// A lock so we can safely update the list of locks.
1800static kmp_bootstrap_lock_t chain_lock =
1801 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1802
1803// Initialize the list of stats.
1804void __kmp_init_speculative_stats() {
1805 kmp_adaptive_lock_info_t *lck = &liveLocks;
1806
1807 memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1808 sizeof(lck->stats));
1809 lck->stats.next = lck;
1810 lck->stats.prev = lck;
1811
1812 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1813 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1814
1815 __kmp_init_bootstrap_lock(&chain_lock);
1816}
1817
1818// Insert the lock into the circular list
1819static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1820 __kmp_acquire_bootstrap_lock(&chain_lock);
1821
1822 lck->stats.next = liveLocks.stats.next;
1823 lck->stats.prev = &liveLocks;
1824
1825 liveLocks.stats.next = lck;
1826 lck->stats.next->stats.prev = lck;
1827
1828 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1829 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1830
1831 __kmp_release_bootstrap_lock(&chain_lock);
1832}
1833
1834static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1835 KMP_ASSERT(lck->stats.next->stats.prev == lck);
1836 KMP_ASSERT(lck->stats.prev->stats.next == lck);
1837
1838 kmp_adaptive_lock_info_t *n = lck->stats.next;
1839 kmp_adaptive_lock_info_t *p = lck->stats.prev;
1840
1841 n->stats.prev = p;
1842 p->stats.next = n;
1843}
1844
1845static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1846 memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1847 sizeof(lck->stats));
1848 __kmp_remember_lock(lck);
1849}
1850
1851static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1852 kmp_adaptive_lock_info_t *lck) {
1853 kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1854
1855 t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1856 t->successfulSpeculations += s->successfulSpeculations;
1857 t->hardFailedSpeculations += s->hardFailedSpeculations;
1858 t->softFailedSpeculations += s->softFailedSpeculations;
1859 t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1860 t->lemmingYields += s->lemmingYields;
1861}
1862
1863static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1864 __kmp_acquire_bootstrap_lock(&chain_lock);
1865
1866 __kmp_add_stats(&destroyedStats, lck);
1867 __kmp_forget_lock(lck);
1868
1869 __kmp_release_bootstrap_lock(&chain_lock);
1870}
1871
1872static float percent(kmp_uint32 count, kmp_uint32 total) {
1873 return (total == 0) ? 0.0 : (100.0 * count) / total;
1874}
1875
1876void __kmp_print_speculative_stats() {
1877 kmp_adaptive_lock_statistics_t total = destroyedStats;
1878 kmp_adaptive_lock_info_t *lck;
1879
1880 for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1881 __kmp_add_stats(&total, lck);
1882 }
1883 kmp_adaptive_lock_statistics_t *t = &total;
1884 kmp_uint32 totalSections =
1885 t->nonSpeculativeAcquires + t->successfulSpeculations;
1886 kmp_uint32 totalSpeculations = t->successfulSpeculations +
1887 t->hardFailedSpeculations +
1888 t->softFailedSpeculations;
1889 if (totalSections <= 0)
1890 return;
1891
1892 kmp_safe_raii_file_t statsFile;
1893 if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1894 statsFile.set_stdout();
1895 } else {
1896 size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1897 char buffer[buffLen];
1898 KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1899 (kmp_int32)getpid());
1900 statsFile.open(buffer, "w");
1901 }
1902
1903 fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1904 fprintf(statsFile,
1905 " Lock parameters: \n"
1906 " max_soft_retries : %10d\n"
1907 " max_badness : %10d\n",
1908 __kmp_adaptive_backoff_params.max_soft_retries,
1909 __kmp_adaptive_backoff_params.max_badness);
1910 fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1911 t->nonSpeculativeAcquireAttempts);
1912 fprintf(statsFile, " Total critical sections : %10d\n",
1913 totalSections);
1914 fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1915 t->successfulSpeculations,
1916 percent(t->successfulSpeculations, totalSections));
1917 fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1918 t->nonSpeculativeAcquires,
1919 percent(t->nonSpeculativeAcquires, totalSections));
1920 fprintf(statsFile, " Lemming yields : %10d\n\n",
1921 t->lemmingYields);
1922
1923 fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1924 totalSpeculations);
1925 fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1926 t->successfulSpeculations,
1927 percent(t->successfulSpeculations, totalSpeculations));
1928 fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1929 t->softFailedSpeculations,
1930 percent(t->softFailedSpeculations, totalSpeculations));
1931 fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1932 t->hardFailedSpeculations,
1933 percent(t->hardFailedSpeculations, totalSpeculations));
1934}
1935
1936#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1937#else
1938#define KMP_INC_STAT(lck, stat)
1939
1940#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1941
1942static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1943 // It is enough to check that the head_id is zero.
1944 // We don't also need to check the tail.
1945 bool res = lck->lk.head_id == 0;
1946
1947// We need a fence here, since we must ensure that no memory operations
1948// from later in this thread float above that read.
1949#if KMP_COMPILER_ICC
1950 _mm_mfence();
1951#else
1952 __sync_synchronize();
1953#endif
1954
1955 return res;
1956}
1957
1958// Functions for manipulating the badness
1959static __inline void
1960__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1961 // Reset the badness to zero so we eagerly try to speculate again
1962 lck->lk.adaptive.badness = 0;
1963 KMP_INC_STAT(lck, successfulSpeculations);
1964}
1965
1966// Create a bit mask with one more set bit.
1967static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1968 kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1969 if (newBadness > lck->lk.adaptive.max_badness) {
1970 return;
1971 } else {
1972 lck->lk.adaptive.badness = newBadness;
1973 }
1974}
1975
1976// Check whether speculation should be attempted.
1977KMP_ATTRIBUTE_TARGET_RTM
1978static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1979 kmp_int32 gtid) {
1980 kmp_uint32 badness = lck->lk.adaptive.badness;
1981 kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1982 int res = (attempts & badness) == 0;
1983 return res;
1984}
1985
1986// Attempt to acquire only the speculative lock.
1987// Does not back off to the non-speculative lock.
1988KMP_ATTRIBUTE_TARGET_RTM
1989static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1990 kmp_int32 gtid) {
1991 int retries = lck->lk.adaptive.max_soft_retries;
1992
1993 // We don't explicitly count the start of speculation, rather we record the
1994 // results (success, hard fail, soft fail). The sum of all of those is the
1995 // total number of times we started speculation since all speculations must
1996 // end one of those ways.
1997 do {
1998 kmp_uint32 status = _xbegin();
1999 // Switch this in to disable actual speculation but exercise at least some
2000 // of the rest of the code. Useful for debugging...
2001 // kmp_uint32 status = _XABORT_NESTED;
2002
2003 if (status == _XBEGIN_STARTED) {
2004 /* We have successfully started speculation. Check that no-one acquired
2005 the lock for real between when we last looked and now. This also gets
2006 the lock cache line into our read-set, which we need so that we'll
2007 abort if anyone later claims it for real. */
2008 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2009 // Lock is now visibly acquired, so someone beat us to it. Abort the
2010 // transaction so we'll restart from _xbegin with the failure status.
2011 _xabort(0x01);
2012 KMP_ASSERT2(0, "should not get here");
2013 }
2014 return 1; // Lock has been acquired (speculatively)
2015 } else {
2016 // We have aborted, update the statistics
2017 if (status & SOFT_ABORT_MASK) {
2018 KMP_INC_STAT(lck, softFailedSpeculations);
2019 // and loop round to retry.
2020 } else {
2021 KMP_INC_STAT(lck, hardFailedSpeculations);
2022 // Give up if we had a hard failure.
2023 break;
2024 }
2025 }
2026 } while (retries--); // Loop while we have retries, and didn't fail hard.
2027
2028 // Either we had a hard failure or we didn't succeed softly after
2029 // the full set of attempts, so back off the badness.
2030 __kmp_step_badness(lck);
2031 return 0;
2032}
2033
2034// Attempt to acquire the speculative lock, or back off to the non-speculative
2035// one if the speculative lock cannot be acquired.
2036// We can succeed speculatively, non-speculatively, or fail.
2037static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2038 // First try to acquire the lock speculatively
2039 if (__kmp_should_speculate(lck, gtid) &&
2040 __kmp_test_adaptive_lock_only(lck, gtid))
2041 return 1;
2042
2043 // Speculative acquisition failed, so try to acquire it non-speculatively.
2044 // Count the non-speculative acquire attempt
2045 lck->lk.adaptive.acquire_attempts++;
2046
2047 // Use base, non-speculative lock.
2048 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2049 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2050 return 1; // Lock is acquired (non-speculatively)
2051 } else {
2052 return 0; // Failed to acquire the lock, it's already visibly locked.
2053 }
2054}
2055
2056static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2057 kmp_int32 gtid) {
2058 char const *const func = "omp_test_lock";
2059 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2060 KMP_FATAL(LockIsUninitialized, func);
2061 }
2062
2063 int retval = __kmp_test_adaptive_lock(lck, gtid);
2064
2065 if (retval) {
2066 lck->lk.qlk.owner_id = gtid + 1;
2067 }
2068 return retval;
2069}
2070
2071// Block until we can acquire a speculative, adaptive lock. We check whether we
2072// should be trying to speculate. If we should be, we check the real lock to see
2073// if it is free, and, if not, pause without attempting to acquire it until it
2074// is. Then we try the speculative acquire. This means that although we suffer
2075// from lemmings a little (because all we can't acquire the lock speculatively
2076// until the queue of threads waiting has cleared), we don't get into a state
2077// where we can never acquire the lock speculatively (because we force the queue
2078// to clear by preventing new arrivals from entering the queue). This does mean
2079// that when we're trying to break lemmings, the lock is no longer fair. However
2080// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2081// problem.
2082static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2083 kmp_int32 gtid) {
2084 if (__kmp_should_speculate(lck, gtid)) {
2085 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2086 if (__kmp_test_adaptive_lock_only(lck, gtid))
2087 return;
2088 // We tried speculation and failed, so give up.
2089 } else {
2090 // We can't try speculation until the lock is free, so we pause here
2091 // (without suspending on the queueing lock, to allow it to drain, then
2092 // try again. All other threads will also see the same result for
2093 // shouldSpeculate, so will be doing the same if they try to claim the
2094 // lock from now on.
2095 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2096 KMP_INC_STAT(lck, lemmingYields);
2097 KMP_YIELD(TRUE);
2098 }
2099
2100 if (__kmp_test_adaptive_lock_only(lck, gtid))
2101 return;
2102 }
2103 }
2104
2105 // Speculative acquisition failed, so acquire it non-speculatively.
2106 // Count the non-speculative acquire attempt
2107 lck->lk.adaptive.acquire_attempts++;
2108
2109 __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2110 // We have acquired the base lock, so count that.
2111 KMP_INC_STAT(lck, nonSpeculativeAcquires);
2112}
2113
2114static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2115 kmp_int32 gtid) {
2116 char const *const func = "omp_set_lock";
2117 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2118 KMP_FATAL(LockIsUninitialized, func);
2119 }
2120 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2121 KMP_FATAL(LockIsAlreadyOwned, func);
2122 }
2123
2124 __kmp_acquire_adaptive_lock(lck, gtid);
2125
2126 lck->lk.qlk.owner_id = gtid + 1;
2127}
2128
2129KMP_ATTRIBUTE_TARGET_RTM
2130static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2131 kmp_int32 gtid) {
2132 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2133 lck))) { // If the lock doesn't look claimed we must be speculating.
2134 // (Or the user's code is buggy and they're releasing without locking;
2135 // if we had XTEST we'd be able to check that case...)
2136 _xend(); // Exit speculation
2137 __kmp_update_badness_after_success(lck);
2138 } else { // Since the lock *is* visibly locked we're not speculating,
2139 // so should use the underlying lock's release scheme.
2140 __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2141 }
2142 return KMP_LOCK_RELEASED;
2143}
2144
2145static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2146 kmp_int32 gtid) {
2147 char const *const func = "omp_unset_lock";
2148 KMP_MB(); /* in case another processor initialized lock */
2149 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2150 KMP_FATAL(LockIsUninitialized, func);
2151 }
2152 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2153 KMP_FATAL(LockUnsettingFree, func);
2154 }
2155 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2156 KMP_FATAL(LockUnsettingSetByAnother, func);
2157 }
2158 lck->lk.qlk.owner_id = 0;
2159 __kmp_release_adaptive_lock(lck, gtid);
2160 return KMP_LOCK_RELEASED;
2161}
2162
2163static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2164 __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2165 lck->lk.adaptive.badness = 0;
2166 lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2167 lck->lk.adaptive.max_soft_retries =
2168 __kmp_adaptive_backoff_params.max_soft_retries;
2169 lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2170#if KMP_DEBUG_ADAPTIVE_LOCKS
2171 __kmp_zero_speculative_stats(&lck->lk.adaptive);
2172#endif
2173 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2174}
2175
2176static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2177#if KMP_DEBUG_ADAPTIVE_LOCKS
2178 __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2179#endif
2180 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2181 // Nothing needed for the speculative part.
2182}
2183
2184static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2185 char const *const func = "omp_destroy_lock";
2186 if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2187 KMP_FATAL(LockIsUninitialized, func);
2188 }
2189 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2190 KMP_FATAL(LockStillOwned, func);
2191 }
2192 __kmp_destroy_adaptive_lock(lck);
2193}
2194
2195#endif // KMP_USE_ADAPTIVE_LOCKS
2196
2197/* ------------------------------------------------------------------------ */
2198/* DRDPA ticket locks */
2199/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2200
2201static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2202 return lck->lk.owner_id - 1;
2203}
2204
2205static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2206 return lck->lk.depth_locked != -1;
2207}
2208
2209__forceinline static int
2210__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2211 kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2212 kmp_uint64 mask = lck->lk.mask; // atomic load
2213 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2214
2215#ifdef USE_LOCK_PROFILE
2216 if (polls[ticket & mask] != ticket)
2217 __kmp_printf("LOCK CONTENTION: %p\n", lck);
2218/* else __kmp_printf( "." );*/
2219#endif /* USE_LOCK_PROFILE */
2220
2221 // Now spin-wait, but reload the polls pointer and mask, in case the
2222 // polling area has been reconfigured. Unless it is reconfigured, the
2223 // reloads stay in L1 cache and are cheap.
2224 //
2225 // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2226 // The current implementation of KMP_WAIT doesn't allow for mask
2227 // and poll to be re-read every spin iteration.
2228 kmp_uint32 spins;
2229 KMP_FSYNC_PREPARE(lck);
2230 KMP_INIT_YIELD(spins);
2231 while (polls[ticket & mask] < ticket) { // atomic load
2232 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2233 // Re-read the mask and the poll pointer from the lock structure.
2234 //
2235 // Make certain that "mask" is read before "polls" !!!
2236 //
2237 // If another thread picks reconfigures the polling area and updates their
2238 // values, and we get the new value of mask and the old polls pointer, we
2239 // could access memory beyond the end of the old polling area.
2240 mask = lck->lk.mask; // atomic load
2241 polls = lck->lk.polls; // atomic load
2242 }
2243
2244 // Critical section starts here
2245 KMP_FSYNC_ACQUIRED(lck);
2246 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2247 ticket, lck));
2248 lck->lk.now_serving = ticket; // non-volatile store
2249
2250 // Deallocate a garbage polling area if we know that we are the last
2251 // thread that could possibly access it.
2252 //
2253 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2254 // ticket.
2255 if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2256 __kmp_free(lck->lk.old_polls);
2257 lck->lk.old_polls = NULL;
2258 lck->lk.cleanup_ticket = 0;
2259 }
2260
2261 // Check to see if we should reconfigure the polling area.
2262 // If there is still a garbage polling area to be deallocated from a
2263 // previous reconfiguration, let a later thread reconfigure it.
2264 if (lck->lk.old_polls == NULL) {
2265 bool reconfigure = false;
2266 std::atomic<kmp_uint64> *old_polls = polls;
2267 kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2268
2269 if (TCR_4(__kmp_nth) >
2270 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2271 // We are in oversubscription mode. Contract the polling area
2272 // down to a single location, if that hasn't been done already.
2273 if (num_polls > 1) {
2274 reconfigure = true;
2275 num_polls = TCR_4(lck->lk.num_polls);
2276 mask = 0;
2277 num_polls = 1;
2278 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2279 sizeof(*polls));
2280 polls[0] = ticket;
2281 }
2282 } else {
2283 // We are in under/fully subscribed mode. Check the number of
2284 // threads waiting on the lock. The size of the polling area
2285 // should be at least the number of threads waiting.
2286 kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2287 if (num_waiting > num_polls) {
2288 kmp_uint32 old_num_polls = num_polls;
2289 reconfigure = true;
2290 do {
2291 mask = (mask << 1) | 1;
2292 num_polls *= 2;
2293 } while (num_polls <= num_waiting);
2294
2295 // Allocate the new polling area, and copy the relevant portion
2296 // of the old polling area to the new area. __kmp_allocate()
2297 // zeroes the memory it allocates, and most of the old area is
2298 // just zero padding, so we only copy the release counters.
2299 polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2300 sizeof(*polls));
2301 kmp_uint32 i;
2302 for (i = 0; i < old_num_polls; i++) {
2303 polls[i].store(old_polls[i]);
2304 }
2305 }
2306 }
2307
2308 if (reconfigure) {
2309 // Now write the updated fields back to the lock structure.
2310 //
2311 // Make certain that "polls" is written before "mask" !!!
2312 //
2313 // If another thread picks up the new value of mask and the old polls
2314 // pointer , it could access memory beyond the end of the old polling
2315 // area.
2316 //
2317 // On x86, we need memory fences.
2318 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2319 "lock %p to %d polls\n",
2320 ticket, lck, num_polls));
2321
2322 lck->lk.old_polls = old_polls;
2323 lck->lk.polls = polls; // atomic store
2324
2325 KMP_MB();
2326
2327 lck->lk.num_polls = num_polls;
2328 lck->lk.mask = mask; // atomic store
2329
2330 KMP_MB();
2331
2332 // Only after the new polling area and mask have been flushed
2333 // to main memory can we update the cleanup ticket field.
2334 //
2335 // volatile load / non-volatile store
2336 lck->lk.cleanup_ticket = lck->lk.next_ticket;
2337 }
2338 }
2339 return KMP_LOCK_ACQUIRED_FIRST;
2340}
2341
2342int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2343 int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2344 return retval;
2345}
2346
2347static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2348 kmp_int32 gtid) {
2349 char const *const func = "omp_set_lock";
2350 if (lck->lk.initialized != lck) {
2351 KMP_FATAL(LockIsUninitialized, func);
2352 }
2353 if (__kmp_is_drdpa_lock_nestable(lck)) {
2354 KMP_FATAL(LockNestableUsedAsSimple, func);
2355 }
2356 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2357 KMP_FATAL(LockIsAlreadyOwned, func);
2358 }
2359
2360 __kmp_acquire_drdpa_lock(lck, gtid);
2361
2362 lck->lk.owner_id = gtid + 1;
2363 return KMP_LOCK_ACQUIRED_FIRST;
2364}
2365
2366int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2367 // First get a ticket, then read the polls pointer and the mask.
2368 // The polls pointer must be read before the mask!!! (See above)
2369 kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2370 std::atomic<kmp_uint64> *polls = lck->lk.polls;
2371 kmp_uint64 mask = lck->lk.mask; // atomic load
2372 if (polls[ticket & mask] == ticket) {
2373 kmp_uint64 next_ticket = ticket + 1;
2374 if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2375 next_ticket)) {
2376 KMP_FSYNC_ACQUIRED(lck);
2377 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2378 ticket, lck));
2379 lck->lk.now_serving = ticket; // non-volatile store
2380
2381 // Since no threads are waiting, there is no possibility that we would
2382 // want to reconfigure the polling area. We might have the cleanup ticket
2383 // value (which says that it is now safe to deallocate old_polls), but
2384 // we'll let a later thread which calls __kmp_acquire_lock do that - this
2385 // routine isn't supposed to block, and we would risk blocks if we called
2386 // __kmp_free() to do the deallocation.
2387 return TRUE;
2388 }
2389 }
2390 return FALSE;
2391}
2392
2393static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2394 kmp_int32 gtid) {
2395 char const *const func = "omp_test_lock";
2396 if (lck->lk.initialized != lck) {
2397 KMP_FATAL(LockIsUninitialized, func);
2398 }
2399 if (__kmp_is_drdpa_lock_nestable(lck)) {
2400 KMP_FATAL(LockNestableUsedAsSimple, func);
2401 }
2402
2403 int retval = __kmp_test_drdpa_lock(lck, gtid);
2404
2405 if (retval) {
2406 lck->lk.owner_id = gtid + 1;
2407 }
2408 return retval;
2409}
2410
2411int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2412 // Read the ticket value from the lock data struct, then the polls pointer and
2413 // the mask. The polls pointer must be read before the mask!!! (See above)
2414 kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2415 std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2416 kmp_uint64 mask = lck->lk.mask; // atomic load
2417 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2418 ticket - 1, lck));
2419 KMP_FSYNC_RELEASING(lck);
2420 polls[ticket & mask] = ticket; // atomic store
2421 return KMP_LOCK_RELEASED;
2422}
2423
2424static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2425 kmp_int32 gtid) {
2426 char const *const func = "omp_unset_lock";
2427 KMP_MB(); /* in case another processor initialized lock */
2428 if (lck->lk.initialized != lck) {
2429 KMP_FATAL(LockIsUninitialized, func);
2430 }
2431 if (__kmp_is_drdpa_lock_nestable(lck)) {
2432 KMP_FATAL(LockNestableUsedAsSimple, func);
2433 }
2434 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2435 KMP_FATAL(LockUnsettingFree, func);
2436 }
2437 if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2438 (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2439 KMP_FATAL(LockUnsettingSetByAnother, func);
2440 }
2441 lck->lk.owner_id = 0;
2442 return __kmp_release_drdpa_lock(lck, gtid);
2443}
2444
2445void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2446 lck->lk.location = NULL;
2447 lck->lk.mask = 0;
2448 lck->lk.num_polls = 1;
2449 lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2450 lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2451 lck->lk.cleanup_ticket = 0;
2452 lck->lk.old_polls = NULL;
2453 lck->lk.next_ticket = 0;
2454 lck->lk.now_serving = 0;
2455 lck->lk.owner_id = 0; // no thread owns the lock.
2456 lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2457 lck->lk.initialized = lck;
2458
2459 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2460}
2461
2462void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2463 lck->lk.initialized = NULL;
2464 lck->lk.location = NULL;
2465 if (lck->lk.polls.load() != NULL) {
2466 __kmp_free(lck->lk.polls.load());
2467 lck->lk.polls = NULL;
2468 }
2469 if (lck->lk.old_polls != NULL) {
2470 __kmp_free(lck->lk.old_polls);
2471 lck->lk.old_polls = NULL;
2472 }
2473 lck->lk.mask = 0;
2474 lck->lk.num_polls = 0;
2475 lck->lk.cleanup_ticket = 0;
2476 lck->lk.next_ticket = 0;
2477 lck->lk.now_serving = 0;
2478 lck->lk.owner_id = 0;
2479 lck->lk.depth_locked = -1;
2480}
2481
2482static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2483 char const *const func = "omp_destroy_lock";
2484 if (lck->lk.initialized != lck) {
2485 KMP_FATAL(LockIsUninitialized, func);
2486 }
2487 if (__kmp_is_drdpa_lock_nestable(lck)) {
2488 KMP_FATAL(LockNestableUsedAsSimple, func);
2489 }
2490 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2491 KMP_FATAL(LockStillOwned, func);
2492 }
2493 __kmp_destroy_drdpa_lock(lck);
2494}
2495
2496// nested drdpa ticket locks
2497
2498int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2499 KMP_DEBUG_ASSERT(gtid >= 0);
2500
2501 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2502 lck->lk.depth_locked += 1;
2503 return KMP_LOCK_ACQUIRED_NEXT;
2504 } else {
2505 __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2506 KMP_MB();
2507 lck->lk.depth_locked = 1;
2508 KMP_MB();
2509 lck->lk.owner_id = gtid + 1;
2510 return KMP_LOCK_ACQUIRED_FIRST;
2511 }
2512}
2513
2514static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2515 kmp_int32 gtid) {
2516 char const *const func = "omp_set_nest_lock";
2517 if (lck->lk.initialized != lck) {
2518 KMP_FATAL(LockIsUninitialized, func);
2519 }
2520 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2521 KMP_FATAL(LockSimpleUsedAsNestable, func);
2522 }
2523 __kmp_acquire_nested_drdpa_lock(lck, gtid);
2524}
2525
2526int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2527 int retval;
2528
2529 KMP_DEBUG_ASSERT(gtid >= 0);
2530
2531 if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2532 retval = ++lck->lk.depth_locked;
2533 } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2534 retval = 0;
2535 } else {
2536 KMP_MB();
2537 retval = lck->lk.depth_locked = 1;
2538 KMP_MB();
2539 lck->lk.owner_id = gtid + 1;
2540 }
2541 return retval;
2542}
2543
2544static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2545 kmp_int32 gtid) {
2546 char const *const func = "omp_test_nest_lock";
2547 if (lck->lk.initialized != lck) {
2548 KMP_FATAL(LockIsUninitialized, func);
2549 }
2550 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2551 KMP_FATAL(LockSimpleUsedAsNestable, func);
2552 }
2553 return __kmp_test_nested_drdpa_lock(lck, gtid);
2554}
2555
2556int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2557 KMP_DEBUG_ASSERT(gtid >= 0);
2558
2559 KMP_MB();
2560 if (--(lck->lk.depth_locked) == 0) {
2561 KMP_MB();
2562 lck->lk.owner_id = 0;
2563 __kmp_release_drdpa_lock(lck, gtid);
2564 return KMP_LOCK_RELEASED;
2565 }
2566 return KMP_LOCK_STILL_HELD;
2567}
2568
2569static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2570 kmp_int32 gtid) {
2571 char const *const func = "omp_unset_nest_lock";
2572 KMP_MB(); /* in case another processor initialized lock */
2573 if (lck->lk.initialized != lck) {
2574 KMP_FATAL(LockIsUninitialized, func);
2575 }
2576 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2577 KMP_FATAL(LockSimpleUsedAsNestable, func);
2578 }
2579 if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2580 KMP_FATAL(LockUnsettingFree, func);
2581 }
2582 if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2583 KMP_FATAL(LockUnsettingSetByAnother, func);
2584 }
2585 return __kmp_release_nested_drdpa_lock(lck, gtid);
2586}
2587
2588void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2589 __kmp_init_drdpa_lock(lck);
2590 lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2591}
2592
2593void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2594 __kmp_destroy_drdpa_lock(lck);
2595 lck->lk.depth_locked = 0;
2596}
2597
2598static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2599 char const *const func = "omp_destroy_nest_lock";
2600 if (lck->lk.initialized != lck) {
2601 KMP_FATAL(LockIsUninitialized, func);
2602 }
2603 if (!__kmp_is_drdpa_lock_nestable(lck)) {
2604 KMP_FATAL(LockSimpleUsedAsNestable, func);
2605 }
2606 if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2607 KMP_FATAL(LockStillOwned, func);
2608 }
2609 __kmp_destroy_nested_drdpa_lock(lck);
2610}
2611
2612// access functions to fields which don't exist for all lock kinds.
2613
2614static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2615 return lck->lk.location;
2616}
2617
2618static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2619 const ident_t *loc) {
2620 lck->lk.location = loc;
2621}
2622
2623static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2624 return lck->lk.flags;
2625}
2626
2627static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2628 kmp_lock_flags_t flags) {
2629 lck->lk.flags = flags;
2630}
2631
2632// Time stamp counter
2633#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2634#define __kmp_tsc() __kmp_hardware_timestamp()
2635// Runtime's default backoff parameters
2636kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2637#else
2638// Use nanoseconds for other platforms
2639extern kmp_uint64 __kmp_now_nsec();
2640kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2641#define __kmp_tsc() __kmp_now_nsec()
2642#endif
2643
2644// A useful predicate for dealing with timestamps that may wrap.
2645// Is a before b? Since the timestamps may wrap, this is asking whether it's
2646// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2647// Times where going clockwise is less distance than going anti-clockwise
2648// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2649// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2650// signed(b) = 0 captures the actual difference
2651static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2652 return ((kmp_int64)b - (kmp_int64)a) > 0;
2653}
2654
2655// Truncated binary exponential backoff function
2656void __kmp_spin_backoff(kmp_backoff_t *boff) {
2657 // We could flatten this loop, but making it a nested loop gives better result
2658 kmp_uint32 i;
2659 for (i = boff->step; i > 0; i--) {
2660 kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2661 do {
2662 KMP_CPU_PAUSE();
2663 } while (before(__kmp_tsc(), goal));
2664 }
2665 boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2666}
2667
2668#if KMP_USE_DYNAMIC_LOCK
2669
2670// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2671// lock word.
2672static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2673 kmp_dyna_lockseq_t seq) {
2674 TCW_4(*lck, KMP_GET_D_TAG(seq));
2675 KA_TRACE(
2676 20,
2677 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2678}
2679
2680#if KMP_USE_TSX
2681
2682// HLE lock functions - imported from the testbed runtime.
2683#define HLE_ACQUIRE ".byte 0xf2;"
2684#define HLE_RELEASE ".byte 0xf3;"
2685
2686static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2687 __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2688 return v;
2689}
2690
2691static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2692
2693static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2694 TCW_4(*lck, 0);
2695}
2696
2697static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2698 // Use gtid for KMP_LOCK_BUSY if necessary
2699 if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2700 int delay = 1;
2701 do {
2702 while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2703 for (int i = delay; i != 0; --i)
2704 KMP_CPU_PAUSE();
2705 delay = ((delay << 1) | 1) & 7;
2706 }
2707 } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2708 }
2709}
2710
2711static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2712 kmp_int32 gtid) {
2713 __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2714}
2715
2716static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2717 __asm__ volatile(HLE_RELEASE "movl %1,%0"
2718 : "=m"(*lck)
2719 : "r"(KMP_LOCK_FREE(hle))
2720 : "memory");
2721 return KMP_LOCK_RELEASED;
2722}
2723
2724static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2725 kmp_int32 gtid) {
2726 return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2727}
2728
2729static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2730 return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2731}
2732
2733static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2734 kmp_int32 gtid) {
2735 return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2736}
2737
2738static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2739 __kmp_init_queuing_lock(lck);
2740}
2741
2742static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2743 __kmp_destroy_queuing_lock(lck);
2744}
2745
2746static void
2747__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2748 __kmp_destroy_queuing_lock_with_checks(lck);
2749}
2750
2751KMP_ATTRIBUTE_TARGET_RTM
2752static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2753 kmp_int32 gtid) {
2754 unsigned retries = 3, status;
2755 do {
2756 status = _xbegin();
2757 if (status == _XBEGIN_STARTED) {
2758 if (__kmp_is_unlocked_queuing_lock(lck))
2759 return;
2760 _xabort(0xff);
2761 }
2762 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2763 // Wait until lock becomes free
2764 while (!__kmp_is_unlocked_queuing_lock(lck)) {
2765 KMP_YIELD(TRUE);
2766 }
2767 } else if (!(status & _XABORT_RETRY))
2768 break;
2769 } while (retries--);
2770
2771 // Fall-back non-speculative lock (xchg)
2772 __kmp_acquire_queuing_lock(lck, gtid);
2773}
2774
2775static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2776 kmp_int32 gtid) {
2777 __kmp_acquire_rtm_queuing_lock(lck, gtid);
2778}
2779
2780KMP_ATTRIBUTE_TARGET_RTM
2781static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2782 kmp_int32 gtid) {
2783 if (__kmp_is_unlocked_queuing_lock(lck)) {
2784 // Releasing from speculation
2785 _xend();
2786 } else {
2787 // Releasing from a real lock
2788 __kmp_release_queuing_lock(lck, gtid);
2789 }
2790 return KMP_LOCK_RELEASED;
2791}
2792
2793static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794 kmp_int32 gtid) {
2795 return __kmp_release_rtm_queuing_lock(lck, gtid);
2796}
2797
2798KMP_ATTRIBUTE_TARGET_RTM
2799static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800 kmp_int32 gtid) {
2801 unsigned retries = 3, status;
2802 do {
2803 status = _xbegin();
2804 if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2805 return 1;
2806 }
2807 if (!(status & _XABORT_RETRY))
2808 break;
2809 } while (retries--);
2810
2811 return __kmp_test_queuing_lock(lck, gtid);
2812}
2813
2814static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2815 kmp_int32 gtid) {
2816 return __kmp_test_rtm_queuing_lock(lck, gtid);
2817}
2818
2819// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2820typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2821
2822static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2823 KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2824}
2825
2826static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2827 __kmp_destroy_rtm_spin_lock(lck);
2828}
2829
2830KMP_ATTRIBUTE_TARGET_RTM
2831static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2832 kmp_int32 gtid) {
2833 unsigned retries = 3, status;
2834 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2835 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2836 do {
2837 status = _xbegin();
2838 if (status == _XBEGIN_STARTED) {
2839 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2840 return KMP_LOCK_ACQUIRED_FIRST;
2841 _xabort(0xff);
2842 }
2843 if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2844 // Wait until lock becomes free
2845 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2846 KMP_YIELD(TRUE);
2847 }
2848 } else if (!(status & _XABORT_RETRY))
2849 break;
2850 } while (retries--);
2851
2852 // Fall-back spin lock
2853 KMP_FSYNC_PREPARE(lck);
2854 kmp_backoff_t backoff = __kmp_spin_backoff_params;
2855 while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2856 !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2857 __kmp_spin_backoff(&backoff);
2858 }
2859 KMP_FSYNC_ACQUIRED(lck);
2860 return KMP_LOCK_ACQUIRED_FIRST;
2861}
2862
2863static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2864 kmp_int32 gtid) {
2865 return __kmp_acquire_rtm_spin_lock(lck, gtid);
2866}
2867
2868KMP_ATTRIBUTE_TARGET_RTM
2869static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2870 kmp_int32 gtid) {
2871 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2872 // Releasing from speculation
2873 _xend();
2874 } else {
2875 // Releasing from a real lock
2876 KMP_FSYNC_RELEASING(lck);
2877 KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2878 }
2879 return KMP_LOCK_RELEASED;
2880}
2881
2882static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2883 kmp_int32 gtid) {
2884 return __kmp_release_rtm_spin_lock(lck, gtid);
2885}
2886
2887KMP_ATTRIBUTE_TARGET_RTM
2888static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2889 unsigned retries = 3, status;
2890 kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2891 kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2892 do {
2893 status = _xbegin();
2894 if (status == _XBEGIN_STARTED &&
2895 KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2896 return TRUE;
2897 }
2898 if (!(status & _XABORT_RETRY))
2899 break;
2900 } while (retries--);
2901
2902 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2903 __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2904 KMP_FSYNC_ACQUIRED(lck);
2905 return TRUE;
2906 }
2907 return FALSE;
2908}
2909
2910static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2911 kmp_int32 gtid) {
2912 return __kmp_test_rtm_spin_lock(lck, gtid);
2913}
2914
2915#endif // KMP_USE_TSX
2916
2917// Entry functions for indirect locks (first element of direct lock jump tables)
2918static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2919 kmp_dyna_lockseq_t tag);
2920static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2921static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2922static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2923static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2924static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2925 kmp_int32);
2926static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2927 kmp_int32);
2928static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2929 kmp_int32);
2930
2931// Lock function definitions for the union parameter type
2932#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2933
2934#define expand1(lk, op) \
2935 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2936 __kmp_##op##_##lk##_##lock(&lock->lk); \
2937 }
2938#define expand2(lk, op) \
2939 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2940 kmp_int32 gtid) { \
2941 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2942 }
2943#define expand3(lk, op) \
2944 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2945 kmp_lock_flags_t flags) { \
2946 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2947 }
2948#define expand4(lk, op) \
2949 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2950 const ident_t *loc) { \
2951 __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2952 }
2953
2954KMP_FOREACH_LOCK_KIND(expand1, init)
2955KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2956KMP_FOREACH_LOCK_KIND(expand1, destroy)
2957KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2958KMP_FOREACH_LOCK_KIND(expand2, acquire)
2959KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2960KMP_FOREACH_LOCK_KIND(expand2, release)
2961KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2962KMP_FOREACH_LOCK_KIND(expand2, test)
2963KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2964KMP_FOREACH_LOCK_KIND(expand3, )
2965KMP_FOREACH_LOCK_KIND(expand4, )
2966
2967#undef expand1
2968#undef expand2
2969#undef expand3
2970#undef expand4
2971
2972// Jump tables for the indirect lock functions
2973// Only fill in the odd entries, that avoids the need to shift out the low bit
2974
2975// init functions
2976#define expand(l, op) 0, __kmp_init_direct_lock,
2977void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2978 __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2979#undef expand
2980
2981// destroy functions
2982#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2983static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2984 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2985#undef expand
2986#define expand(l, op) \
2987 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2988static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2989 __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2990#undef expand
2991
2992// set/acquire functions
2993#define expand(l, op) \
2994 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2995static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2996 __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2997#undef expand
2998#define expand(l, op) \
2999 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3000static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3001 __kmp_set_indirect_lock_with_checks, 0,
3002 KMP_FOREACH_D_LOCK(expand, acquire)};
3003#undef expand
3004
3005// unset/release and test functions
3006#define expand(l, op) \
3007 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3008static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3009 __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3010static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3011 __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3012#undef expand
3013#define expand(l, op) \
3014 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3015static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3016 __kmp_unset_indirect_lock_with_checks, 0,
3017 KMP_FOREACH_D_LOCK(expand, release)};
3018static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019 __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3020#undef expand
3021
3022// Exposes only one set of jump tables (*lock or *lock_with_checks).
3023void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3024int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3025int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3026int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3027
3028// Jump tables for the indirect lock functions
3029#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3030void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3031 KMP_FOREACH_I_LOCK(expand, init)};
3032#undef expand
3033
3034#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3035static void (*indirect_destroy[])(kmp_user_lock_p) = {
3036 KMP_FOREACH_I_LOCK(expand, destroy)};
3037#undef expand
3038#define expand(l, op) \
3039 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3040static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3041 KMP_FOREACH_I_LOCK(expand, destroy)};
3042#undef expand
3043
3044// set/acquire functions
3045#define expand(l, op) \
3046 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3047static int (*indirect_set[])(kmp_user_lock_p,
3048 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3049#undef expand
3050#define expand(l, op) \
3051 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3052static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3053 KMP_FOREACH_I_LOCK(expand, acquire)};
3054#undef expand
3055
3056// unset/release and test functions
3057#define expand(l, op) \
3058 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3059static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3060 KMP_FOREACH_I_LOCK(expand, release)};
3061static int (*indirect_test[])(kmp_user_lock_p,
3062 kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3063#undef expand
3064#define expand(l, op) \
3065 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3066static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3067 KMP_FOREACH_I_LOCK(expand, release)};
3068static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3069 KMP_FOREACH_I_LOCK(expand, test)};
3070#undef expand
3071
3072// Exposes only one jump tables (*lock or *lock_with_checks).
3073void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3074int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3075int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3076int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3077
3078// Lock index table.
3079kmp_indirect_lock_table_t __kmp_i_lock_table;
3080
3081// Size of indirect locks.
3082static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3083
3084// Jump tables for lock accessor/modifier.
3085void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3086 const ident_t *) = {0};
3087void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3088 kmp_lock_flags_t) = {0};
3089const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3090 kmp_user_lock_p) = {0};
3091kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3092 kmp_user_lock_p) = {0};
3093
3094// Use different lock pools for different lock types.
3095static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3096
3097// User lock allocator for dynamically dispatched indirect locks. Every entry of
3098// the indirect lock table holds the address and type of the allocated indirect
3099// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3100// full. A destroyed indirect lock object is returned to the reusable pool of
3101// locks, unique to each lock type.
3102kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3103 kmp_int32 gtid,
3104 kmp_indirect_locktag_t tag) {
3105 kmp_indirect_lock_t *lck;
3106 kmp_lock_index_t idx;
3107
3108 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3109
3110 if (__kmp_indirect_lock_pool[tag] != NULL) {
3111 // Reuse the allocated and destroyed lock object
3112 lck = __kmp_indirect_lock_pool[tag];
3113 if (OMP_LOCK_T_SIZE < sizeof(void *))
3114 idx = lck->lock->pool.index;
3115 __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3116 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3117 lck));
3118 } else {
3119 idx = __kmp_i_lock_table.next;
3120 // Check capacity and double the size if it is full
3121 if (idx == __kmp_i_lock_table.size) {
3122 // Double up the space for block pointers
3123 int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK;
3124 kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate(
3125 2 * row * sizeof(kmp_indirect_lock_t *));
3126 KMP_MEMCPY(new_table, __kmp_i_lock_table.table,
3127 row * sizeof(kmp_indirect_lock_t *));
3128 kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table;
3129 __kmp_i_lock_table.table = new_table;
3130 __kmp_free(old_table);
3131 // Allocate new objects in the new blocks
3132 for (int i = row; i < 2 * row; ++i)
3133 *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate(
3134 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3135 __kmp_i_lock_table.size = 2 * idx;
3136 }
3137 __kmp_i_lock_table.next++;
3138 lck = KMP_GET_I_LOCK(idx);
3139 // Allocate a new base lock object
3140 lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3141 KA_TRACE(20,
3142 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3143 }
3144
3145 __kmp_release_lock(&__kmp_global_lock, gtid);
3146
3147 lck->type = tag;
3148
3149 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3150 *((kmp_lock_index_t *)user_lock) = idx
3151 << 1; // indirect lock word must be even
3152 } else {
3153 *((kmp_indirect_lock_t **)user_lock) = lck;
3154 }
3155
3156 return lck;
3157}
3158
3159// User lock lookup for dynamically dispatched locks.
3160static __forceinline kmp_indirect_lock_t *
3161__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3162 if (__kmp_env_consistency_check) {
3163 kmp_indirect_lock_t *lck = NULL;
3164 if (user_lock == NULL) {
3165 KMP_FATAL(LockIsUninitialized, func);
3166 }
3167 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3168 kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3169 if (idx >= __kmp_i_lock_table.size) {
3170 KMP_FATAL(LockIsUninitialized, func);
3171 }
3172 lck = KMP_GET_I_LOCK(idx);
3173 } else {
3174 lck = *((kmp_indirect_lock_t **)user_lock);
3175 }
3176 if (lck == NULL) {
3177 KMP_FATAL(LockIsUninitialized, func);
3178 }
3179 return lck;
3180 } else {
3181 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3182 return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock));
3183 } else {
3184 return *((kmp_indirect_lock_t **)user_lock);
3185 }
3186 }
3187}
3188
3189static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3190 kmp_dyna_lockseq_t seq) {
3191#if KMP_USE_ADAPTIVE_LOCKS
3192 if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3193 KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3194 seq = lockseq_queuing;
3195 }
3196#endif
3197#if KMP_USE_TSX
3198 if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
3199 seq = lockseq_queuing;
3200 }
3201#endif
3202 kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3203 kmp_indirect_lock_t *l =
3204 __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3205 KMP_I_LOCK_FUNC(l, init)(l->lock);
3206 KA_TRACE(
3207 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3208 seq));
3209}
3210
3211static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3212 kmp_uint32 gtid = __kmp_entry_gtid();
3213 kmp_indirect_lock_t *l =
3214 __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3215 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3216 kmp_indirect_locktag_t tag = l->type;
3217
3218 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3219
3220 // Use the base lock's space to keep the pool chain.
3221 l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3222 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3223 l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3224 }
3225 __kmp_indirect_lock_pool[tag] = l;
3226
3227 __kmp_release_lock(&__kmp_global_lock, gtid);
3228}
3229
3230static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3231 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3232 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3233}
3234
3235static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3236 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3237 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3238}
3239
3240static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3241 kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3242 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3243}
3244
3245static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3246 kmp_int32 gtid) {
3247 kmp_indirect_lock_t *l =
3248 __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3249 return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3250}
3251
3252static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3253 kmp_int32 gtid) {
3254 kmp_indirect_lock_t *l =
3255 __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3256 return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3257}
3258
3259static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3260 kmp_int32 gtid) {
3261 kmp_indirect_lock_t *l =
3262 __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3263 return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3264}
3265
3266kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3267
3268// This is used only in kmp_error.cpp when consistency checking is on.
3269kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3270 switch (seq) {
3271 case lockseq_tas:
3272 case lockseq_nested_tas:
3273 return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3274#if KMP_USE_FUTEX
3275 case lockseq_futex:
3276 case lockseq_nested_futex:
3277 return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3278#endif
3279 case lockseq_ticket:
3280 case lockseq_nested_ticket:
3281 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3282 case lockseq_queuing:
3283 case lockseq_nested_queuing:
3284#if KMP_USE_ADAPTIVE_LOCKS
3285 case lockseq_adaptive:
3286#endif
3287 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3288 case lockseq_drdpa:
3289 case lockseq_nested_drdpa:
3290 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3291 default:
3292 return 0;
3293 }
3294}
3295
3296// Initializes data for dynamic user locks.
3297void __kmp_init_dynamic_user_locks() {
3298 // Initialize jump table for the lock functions
3299 if (__kmp_env_consistency_check) {
3300 __kmp_direct_set = direct_set_check;
3301 __kmp_direct_unset = direct_unset_check;
3302 __kmp_direct_test = direct_test_check;
3303 __kmp_direct_destroy = direct_destroy_check;
3304 __kmp_indirect_set = indirect_set_check;
3305 __kmp_indirect_unset = indirect_unset_check;
3306 __kmp_indirect_test = indirect_test_check;
3307 __kmp_indirect_destroy = indirect_destroy_check;
3308 } else {
3309 __kmp_direct_set = direct_set;
3310 __kmp_direct_unset = direct_unset;
3311 __kmp_direct_test = direct_test;
3312 __kmp_direct_destroy = direct_destroy;
3313 __kmp_indirect_set = indirect_set;
3314 __kmp_indirect_unset = indirect_unset;
3315 __kmp_indirect_test = indirect_test;
3316 __kmp_indirect_destroy = indirect_destroy;
3317 }
3318 // If the user locks have already been initialized, then return. Allow the
3319 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3320 // new lock tables if they have already been allocated.
3321 if (__kmp_init_user_locks)
3322 return;
3323
3324 // Initialize lock index table
3325 __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK;
3326 __kmp_i_lock_table.table =
3327 (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *));
3328 *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3329 KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3330 __kmp_i_lock_table.next = 0;
3331
3332 // Indirect lock size
3333 __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3334 __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3335#if KMP_USE_ADAPTIVE_LOCKS
3336 __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3337#endif
3338 __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3339#if KMP_USE_TSX
3340 __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3341#endif
3342 __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3343#if KMP_USE_FUTEX
3344 __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3345#endif
3346 __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3347 __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3348 __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3349
3350// Initialize lock accessor/modifier
3351#define fill_jumps(table, expand, sep) \
3352 { \
3353 table[locktag##sep##ticket] = expand(ticket); \
3354 table[locktag##sep##queuing] = expand(queuing); \
3355 table[locktag##sep##drdpa] = expand(drdpa); \
3356 }
3357
3358#if KMP_USE_ADAPTIVE_LOCKS
3359#define fill_table(table, expand) \
3360 { \
3361 fill_jumps(table, expand, _); \
3362 table[locktag_adaptive] = expand(queuing); \
3363 fill_jumps(table, expand, _nested_); \
3364 }
3365#else
3366#define fill_table(table, expand) \
3367 { \
3368 fill_jumps(table, expand, _); \
3369 fill_jumps(table, expand, _nested_); \
3370 }
3371#endif // KMP_USE_ADAPTIVE_LOCKS
3372
3373#define expand(l) \
3374 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3375 fill_table(__kmp_indirect_set_location, expand);
3376#undef expand
3377#define expand(l) \
3378 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3379 fill_table(__kmp_indirect_set_flags, expand);
3380#undef expand
3381#define expand(l) \
3382 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3383 fill_table(__kmp_indirect_get_location, expand);
3384#undef expand
3385#define expand(l) \
3386 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3387 fill_table(__kmp_indirect_get_flags, expand);
3388#undef expand
3389
3390 __kmp_init_user_locks = TRUE;
3391}
3392
3393// Clean up the lock table.
3394void __kmp_cleanup_indirect_user_locks() {
3395 kmp_lock_index_t i;
3396 int k;
3397
3398 // Clean up locks in the pools first (they were already destroyed before going
3399 // into the pools).
3400 for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3401 kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3402 while (l != NULL) {
3403 kmp_indirect_lock_t *ll = l;
3404 l = (kmp_indirect_lock_t *)l->lock->pool.next;
3405 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3406 ll));
3407 __kmp_free(ll->lock);
3408 ll->lock = NULL;
3409 }
3410 __kmp_indirect_lock_pool[k] = NULL;
3411 }
3412 // Clean up the remaining undestroyed locks.
3413 for (i = 0; i < __kmp_i_lock_table.next; i++) {
3414 kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i);
3415 if (l->lock != NULL) {
3416 // Locks not destroyed explicitly need to be destroyed here.
3417 KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3418 KA_TRACE(
3419 20,
3420 ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n",
3421 l));
3422 __kmp_free(l->lock);
3423 }
3424 }
3425 // Free the table
3426 for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++)
3427 __kmp_free(__kmp_i_lock_table.table[i]);
3428 __kmp_free(__kmp_i_lock_table.table);
3429
3430 __kmp_init_user_locks = FALSE;
3431}
3432
3433enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3434int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3435
3436#else // KMP_USE_DYNAMIC_LOCK
3437
3438static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3439 __kmp_init_tas_lock(lck);
3440}
3441
3442static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3443 __kmp_init_nested_tas_lock(lck);
3444}
3445
3446#if KMP_USE_FUTEX
3447static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3448 __kmp_init_futex_lock(lck);
3449}
3450
3451static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3452 __kmp_init_nested_futex_lock(lck);
3453}
3454#endif
3455
3456static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3457 return lck == lck->lk.self;
3458}
3459
3460static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3461 __kmp_init_ticket_lock(lck);
3462}
3463
3464static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3465 __kmp_init_nested_ticket_lock(lck);
3466}
3467
3468static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3469 return lck == lck->lk.initialized;
3470}
3471
3472static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3473 __kmp_init_queuing_lock(lck);
3474}
3475
3476static void
3477__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3478 __kmp_init_nested_queuing_lock(lck);
3479}
3480
3481#if KMP_USE_ADAPTIVE_LOCKS
3482static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3483 __kmp_init_adaptive_lock(lck);
3484}
3485#endif
3486
3487static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3488 return lck == lck->lk.initialized;
3489}
3490
3491static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3492 __kmp_init_drdpa_lock(lck);
3493}
3494
3495static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3496 __kmp_init_nested_drdpa_lock(lck);
3497}
3498
3499/* user locks
3500 * They are implemented as a table of function pointers which are set to the
3501 * lock functions of the appropriate kind, once that has been determined. */
3502
3503enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3504
3505size_t __kmp_base_user_lock_size = 0;
3506size_t __kmp_user_lock_size = 0;
3507
3508kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3509int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3510 kmp_int32 gtid) = NULL;
3511
3512int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3513 kmp_int32 gtid) = NULL;
3514int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3515 kmp_int32 gtid) = NULL;
3516void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3517void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3518void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3519int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3520 kmp_int32 gtid) = NULL;
3521
3522int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3523 kmp_int32 gtid) = NULL;
3524int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3525 kmp_int32 gtid) = NULL;
3526void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3527void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3528
3529int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3530const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3531void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3532 const ident_t *loc) = NULL;
3533kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3534void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3535 kmp_lock_flags_t flags) = NULL;
3536
3537void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3538 switch (user_lock_kind) {
3539 case lk_default:
3540 default:
3541 KMP_ASSERT(0);
3542
3543 case lk_tas: {
3544 __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3545 __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3546
3547 __kmp_get_user_lock_owner_ =
3548 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3549
3550 if (__kmp_env_consistency_check) {
3551 KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3552 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3553 } else {
3554 KMP_BIND_USER_LOCK(tas);
3555 KMP_BIND_NESTED_USER_LOCK(tas);
3556 }
3557
3558 __kmp_destroy_user_lock_ =
3559 (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3560
3561 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3562
3563 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3564
3565 __kmp_set_user_lock_location_ =
3566 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3567
3568 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3569
3570 __kmp_set_user_lock_flags_ =
3571 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3572 } break;
3573
3574#if KMP_USE_FUTEX
3575
3576 case lk_futex: {
3577 __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3578 __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3579
3580 __kmp_get_user_lock_owner_ =
3581 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3582
3583 if (__kmp_env_consistency_check) {
3584 KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3585 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3586 } else {
3587 KMP_BIND_USER_LOCK(futex);
3588 KMP_BIND_NESTED_USER_LOCK(futex);
3589 }
3590
3591 __kmp_destroy_user_lock_ =
3592 (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3593
3594 __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3595
3596 __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3597
3598 __kmp_set_user_lock_location_ =
3599 (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3600
3601 __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3602
3603 __kmp_set_user_lock_flags_ =
3604 (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3605 } break;
3606
3607#endif // KMP_USE_FUTEX
3608
3609 case lk_ticket: {
3610 __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3611 __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3612
3613 __kmp_get_user_lock_owner_ =
3614 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3615
3616 if (__kmp_env_consistency_check) {
3617 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3618 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3619 } else {
3620 KMP_BIND_USER_LOCK(ticket);
3621 KMP_BIND_NESTED_USER_LOCK(ticket);
3622 }
3623
3624 __kmp_destroy_user_lock_ =
3625 (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3626
3627 __kmp_is_user_lock_initialized_ =
3628 (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3629
3630 __kmp_get_user_lock_location_ =
3631 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3632
3633 __kmp_set_user_lock_location_ = (void (*)(
3634 kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3635
3636 __kmp_get_user_lock_flags_ =
3637 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3638
3639 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3640 &__kmp_set_ticket_lock_flags);
3641 } break;
3642
3643 case lk_queuing: {
3644 __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3645 __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3646
3647 __kmp_get_user_lock_owner_ =
3648 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3649
3650 if (__kmp_env_consistency_check) {
3651 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3652 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3653 } else {
3654 KMP_BIND_USER_LOCK(queuing);
3655 KMP_BIND_NESTED_USER_LOCK(queuing);
3656 }
3657
3658 __kmp_destroy_user_lock_ =
3659 (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3660
3661 __kmp_is_user_lock_initialized_ =
3662 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3663
3664 __kmp_get_user_lock_location_ =
3665 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3666
3667 __kmp_set_user_lock_location_ = (void (*)(
3668 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3669
3670 __kmp_get_user_lock_flags_ =
3671 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3672
3673 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3674 &__kmp_set_queuing_lock_flags);
3675 } break;
3676
3677#if KMP_USE_ADAPTIVE_LOCKS
3678 case lk_adaptive: {
3679 __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3680 __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3681
3682 __kmp_get_user_lock_owner_ =
3683 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3684
3685 if (__kmp_env_consistency_check) {
3686 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3687 } else {
3688 KMP_BIND_USER_LOCK(adaptive);
3689 }
3690
3691 __kmp_destroy_user_lock_ =
3692 (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3693
3694 __kmp_is_user_lock_initialized_ =
3695 (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3696
3697 __kmp_get_user_lock_location_ =
3698 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3699
3700 __kmp_set_user_lock_location_ = (void (*)(
3701 kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3702
3703 __kmp_get_user_lock_flags_ =
3704 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3705
3706 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3707 &__kmp_set_queuing_lock_flags);
3708
3709 } break;
3710#endif // KMP_USE_ADAPTIVE_LOCKS
3711
3712 case lk_drdpa: {
3713 __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3714 __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3715
3716 __kmp_get_user_lock_owner_ =
3717 (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3718
3719 if (__kmp_env_consistency_check) {
3720 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3721 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3722 } else {
3723 KMP_BIND_USER_LOCK(drdpa);
3724 KMP_BIND_NESTED_USER_LOCK(drdpa);
3725 }
3726
3727 __kmp_destroy_user_lock_ =
3728 (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3729
3730 __kmp_is_user_lock_initialized_ =
3731 (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3732
3733 __kmp_get_user_lock_location_ =
3734 (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3735
3736 __kmp_set_user_lock_location_ = (void (*)(
3737 kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3738
3739 __kmp_get_user_lock_flags_ =
3740 (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3741
3742 __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3743 &__kmp_set_drdpa_lock_flags);
3744 } break;
3745 }
3746}
3747
3748// ----------------------------------------------------------------------------
3749// User lock table & lock allocation
3750
3751kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3752kmp_user_lock_p __kmp_lock_pool = NULL;
3753
3754// Lock block-allocation support.
3755kmp_block_of_locks *__kmp_lock_blocks = NULL;
3756int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3757
3758static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3759 // Assume that kmp_global_lock is held upon entry/exit.
3760 kmp_lock_index_t index;
3761 if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3762 kmp_lock_index_t size;
3763 kmp_user_lock_p *table;
3764 // Reallocate lock table.
3765 if (__kmp_user_lock_table.allocated == 0) {
3766 size = 1024;
3767 } else {
3768 size = __kmp_user_lock_table.allocated * 2;
3769 }
3770 table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3771 KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3772 sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3773 table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3774 // We cannot free the previous table now, since it may be in use by other
3775 // threads. So save the pointer to the previous table in in the first
3776 // element of the new table. All the tables will be organized into a list,
3777 // and could be freed when library shutting down.
3778 __kmp_user_lock_table.table = table;
3779 __kmp_user_lock_table.allocated = size;
3780 }
3781 KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3782 __kmp_user_lock_table.allocated);
3783 index = __kmp_user_lock_table.used;
3784 __kmp_user_lock_table.table[index] = lck;
3785 ++__kmp_user_lock_table.used;
3786 return index;
3787}
3788
3789static kmp_user_lock_p __kmp_lock_block_allocate() {
3790 // Assume that kmp_global_lock is held upon entry/exit.
3791 static int last_index = 0;
3792 if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3793 // Restart the index.
3794 last_index = 0;
3795 // Need to allocate a new block.
3796 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3797 size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3798 char *buffer =
3799 (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3800 // Set up the new block.
3801 kmp_block_of_locks *new_block =
3802 (kmp_block_of_locks *)(&buffer[space_for_locks]);
3803 new_block->next_block = __kmp_lock_blocks;
3804 new_block->locks = (void *)buffer;
3805 // Publish the new block.
3806 KMP_MB();
3807 __kmp_lock_blocks = new_block;
3808 }
3809 kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3810 ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3811 last_index++;
3812 return ret;
3813}
3814
3815// Get memory for a lock. It may be freshly allocated memory or reused memory
3816// from lock pool.
3817kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3818 kmp_lock_flags_t flags) {
3819 kmp_user_lock_p lck;
3820 kmp_lock_index_t index;
3821 KMP_DEBUG_ASSERT(user_lock);
3822
3823 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3824
3825 if (__kmp_lock_pool == NULL) {
3826 // Lock pool is empty. Allocate new memory.
3827
3828 if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3829 lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3830 } else {
3831 lck = __kmp_lock_block_allocate();
3832 }
3833
3834 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3835 // and debugger has info on all allocated locks.
3836 index = __kmp_lock_table_insert(lck);
3837 } else {
3838 // Pick up lock from pool.
3839 lck = __kmp_lock_pool;
3840 index = __kmp_lock_pool->pool.index;
3841 __kmp_lock_pool = __kmp_lock_pool->pool.next;
3842 }
3843
3844 // We could potentially differentiate between nested and regular locks
3845 // here, and do the lock table lookup for regular locks only.
3846 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3847 *((kmp_lock_index_t *)user_lock) = index;
3848 } else {
3849 *((kmp_user_lock_p *)user_lock) = lck;
3850 }
3851
3852 // mark the lock if it is critical section lock.
3853 __kmp_set_user_lock_flags(lck, flags);
3854
3855 __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3856
3857 return lck;
3858}
3859
3860// Put lock's memory to pool for reusing.
3861void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3862 kmp_user_lock_p lck) {
3863 KMP_DEBUG_ASSERT(user_lock != NULL);
3864 KMP_DEBUG_ASSERT(lck != NULL);
3865
3866 __kmp_acquire_lock(&__kmp_global_lock, gtid);
3867
3868 lck->pool.next = __kmp_lock_pool;
3869 __kmp_lock_pool = lck;
3870 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3871 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3872 KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3873 lck->pool.index = index;
3874 }
3875
3876 __kmp_release_lock(&__kmp_global_lock, gtid);
3877}
3878
3879kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3880 kmp_user_lock_p lck = NULL;
3881
3882 if (__kmp_env_consistency_check) {
3883 if (user_lock == NULL) {
3884 KMP_FATAL(LockIsUninitialized, func);
3885 }
3886 }
3887
3888 if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3889 kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3890 if (__kmp_env_consistency_check) {
3891 if (!(0 < index && index < __kmp_user_lock_table.used)) {
3892 KMP_FATAL(LockIsUninitialized, func);
3893 }
3894 }
3895 KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3896 KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3897 lck = __kmp_user_lock_table.table[index];
3898 } else {
3899 lck = *((kmp_user_lock_p *)user_lock);
3900 }
3901
3902 if (__kmp_env_consistency_check) {
3903 if (lck == NULL) {
3904 KMP_FATAL(LockIsUninitialized, func);
3905 }
3906 }
3907
3908 return lck;
3909}
3910
3911void __kmp_cleanup_user_locks(void) {
3912 // Reset lock pool. Don't worry about lock in the pool--we will free them when
3913 // iterating through lock table (it includes all the locks, dead or alive).
3914 __kmp_lock_pool = NULL;
3915
3916#define IS_CRITICAL(lck) \
3917 ((__kmp_get_user_lock_flags_ != NULL) && \
3918 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3919
3920 // Loop through lock table, free all locks.
3921 // Do not free item [0], it is reserved for lock tables list.
3922 //
3923 // FIXME - we are iterating through a list of (pointers to) objects of type
3924 // union kmp_user_lock, but we have no way of knowing whether the base type is
3925 // currently "pool" or whatever the global user lock type is.
3926 //
3927 // We are relying on the fact that for all of the user lock types
3928 // (except "tas"), the first field in the lock struct is the "initialized"
3929 // field, which is set to the address of the lock object itself when
3930 // the lock is initialized. When the union is of type "pool", the
3931 // first field is a pointer to the next object in the free list, which
3932 // will not be the same address as the object itself.
3933 //
3934 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3935 // for "pool" objects on the free list. This must happen as the "location"
3936 // field of real user locks overlaps the "index" field of "pool" objects.
3937 //
3938 // It would be better to run through the free list, and remove all "pool"
3939 // objects from the lock table before executing this loop. However,
3940 // "pool" objects do not always have their index field set (only on
3941 // lin_32e), and I don't want to search the lock table for the address
3942 // of every "pool" object on the free list.
3943 while (__kmp_user_lock_table.used > 1) {
3944 const ident *loc;
3945
3946 // reduce __kmp_user_lock_table.used before freeing the lock,
3947 // so that state of locks is consistent
3948 kmp_user_lock_p lck =
3949 __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3950
3951 if ((__kmp_is_user_lock_initialized_ != NULL) &&
3952 (*__kmp_is_user_lock_initialized_)(lck)) {
3953 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3954 // it is NOT a critical section (user is not responsible for destroying
3955 // criticals) AND we know source location to report.
3956 if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3957 ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3958 (loc->psource != NULL)) {
3959 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3960 KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3961 __kmp_str_loc_free(&str_loc);
3962 }
3963
3964#ifdef KMP_DEBUG
3965 if (IS_CRITICAL(lck)) {
3966 KA_TRACE(
3967 20,
3968 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3969 lck, *(void **)lck));
3970 } else {
3971 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3972 *(void **)lck));
3973 }
3974#endif // KMP_DEBUG
3975
3976 // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3977 __kmp_destroy_user_lock(lck);
3978 }
3979
3980 // Free the lock if block allocation of locks is not used.
3981 if (__kmp_lock_blocks == NULL) {
3982 __kmp_free(lck);
3983 }
3984 }
3985
3986#undef IS_CRITICAL
3987
3988 // delete lock table(s).
3989 kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3990 __kmp_user_lock_table.table = NULL;
3991 __kmp_user_lock_table.allocated = 0;
3992
3993 while (table_ptr != NULL) {
3994 // In the first element we saved the pointer to the previous
3995 // (smaller) lock table.
3996 kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
3997 __kmp_free(table_ptr);
3998 table_ptr = next;
3999 }
4000
4001 // Free buffers allocated for blocks of locks.
4002 kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4003 __kmp_lock_blocks = NULL;
4004
4005 while (block_ptr != NULL) {
4006 kmp_block_of_locks_t *next = block_ptr->next_block;
4007 __kmp_free(block_ptr->locks);
4008 // *block_ptr itself was allocated at the end of the locks vector.
4009 block_ptr = next;
4010 }
4011
4012 TCW_4(__kmp_init_user_locks, FALSE);
4013}
4014
4015#endif // KMP_USE_DYNAMIC_LOCK
void set_stdout()
Definition kmp.h:4218
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Definition kmp.h:4201
Definition kmp.h:233
char const * psource
Definition kmp.h:243