LLVM OpenMP* Runtime Library
kmp_alloc.cpp
1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_io.h"
16 #include "kmp_wrapper_malloc.h"
17 
18 // Disable bget when it is not used
19 #if KMP_USE_BGET
20 
21 /* Thread private buffer management code */
22 
23 typedef int (*bget_compact_t)(size_t, int);
24 typedef void *(*bget_acquire_t)(size_t);
25 typedef void (*bget_release_t)(void *);
26 
27 /* NOTE: bufsize must be a signed datatype */
28 
29 #if KMP_OS_WINDOWS
30 #if KMP_ARCH_X86 || KMP_ARCH_ARM
31 typedef kmp_int32 bufsize;
32 #else
33 typedef kmp_int64 bufsize;
34 #endif
35 #else
36 typedef ssize_t bufsize;
37 #endif
38 
39 /* The three modes of operation are, fifo search, lifo search, and best-fit */
40 
41 typedef enum bget_mode {
42  bget_mode_fifo = 0,
43  bget_mode_lifo = 1,
44  bget_mode_best = 2
45 } bget_mode_t;
46 
47 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
48 static void *bget(kmp_info_t *th, bufsize size);
49 static void *bgetz(kmp_info_t *th, bufsize size);
50 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
51 static void brel(kmp_info_t *th, void *buf);
52 static void bectl(kmp_info_t *th, bget_compact_t compact,
53  bget_acquire_t acquire, bget_release_t release,
54  bufsize pool_incr);
55 
56 /* BGET CONFIGURATION */
57 /* Buffer allocation size quantum: all buffers allocated are a
58  multiple of this size. This MUST be a power of two. */
59 
60 /* On IA-32 architecture with Linux* OS, malloc() does not
61  ensure 16 byte alignmnent */
62 
63 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
64 
65 #define SizeQuant 8
66 #define AlignType double
67 
68 #else
69 
70 #define SizeQuant 16
71 #define AlignType _Quad
72 
73 #endif
74 
75 // Define this symbol to enable the bstats() function which calculates the
76 // total free space in the buffer pool, the largest available buffer, and the
77 // total space currently allocated.
78 #define BufStats 1
79 
80 #ifdef KMP_DEBUG
81 
82 // Define this symbol to enable the bpoold() function which dumps the buffers
83 // in a buffer pool.
84 #define BufDump 1
85 
86 // Define this symbol to enable the bpoolv() function for validating a buffer
87 // pool.
88 #define BufValid 1
89 
90 // Define this symbol to enable the bufdump() function which allows dumping the
91 // contents of an allocated or free buffer.
92 #define DumpData 1
93 
94 #ifdef NOT_USED_NOW
95 
96 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
97 // who attempt to use pointers into released buffers.
98 #define FreeWipe 1
99 
100 // Use a best fit algorithm when searching for space for an allocation request.
101 // This uses memory more efficiently, but allocation will be much slower.
102 #define BestFit 1
103 
104 #endif /* NOT_USED_NOW */
105 #endif /* KMP_DEBUG */
106 
107 static bufsize bget_bin_size[] = {
108  0,
109  // 1 << 6, /* .5 Cache line */
110  1 << 7, /* 1 Cache line, new */
111  1 << 8, /* 2 Cache lines */
112  1 << 9, /* 4 Cache lines, new */
113  1 << 10, /* 8 Cache lines */
114  1 << 11, /* 16 Cache lines, new */
115  1 << 12, 1 << 13, /* new */
116  1 << 14, 1 << 15, /* new */
117  1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
118  1 << 21, /* 2MB */
119  1 << 22, /* 4MB */
120  1 << 23, /* 8MB */
121  1 << 24, /* 16MB */
122  1 << 25, /* 32MB */
123 };
124 
125 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
126 
127 struct bfhead;
128 
129 // Declare the interface, including the requested buffer size type, bufsize.
130 
131 /* Queue links */
132 typedef struct qlinks {
133  struct bfhead *flink; /* Forward link */
134  struct bfhead *blink; /* Backward link */
135 } qlinks_t;
136 
137 /* Header in allocated and free buffers */
138 typedef struct bhead2 {
139  kmp_info_t *bthr; /* The thread which owns the buffer pool */
140  bufsize prevfree; /* Relative link back to previous free buffer in memory or
141  0 if previous buffer is allocated. */
142  bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
143 } bhead2_t;
144 
145 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
146 typedef union bhead {
147  KMP_ALIGN(SizeQuant)
148  AlignType b_align;
149  char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
150  bhead2_t bb;
151 } bhead_t;
152 #define BH(p) ((bhead_t *)(p))
153 
154 /* Header in directly allocated buffers (by acqfcn) */
155 typedef struct bdhead {
156  bufsize tsize; /* Total size, including overhead */
157  bhead_t bh; /* Common header */
158 } bdhead_t;
159 #define BDH(p) ((bdhead_t *)(p))
160 
161 /* Header in free buffers */
162 typedef struct bfhead {
163  bhead_t bh; /* Common allocated/free header */
164  qlinks_t ql; /* Links on free list */
165 } bfhead_t;
166 #define BFH(p) ((bfhead_t *)(p))
167 
168 typedef struct thr_data {
169  bfhead_t freelist[MAX_BGET_BINS];
170 #if BufStats
171  size_t totalloc; /* Total space currently allocated */
172  long numget, numrel; /* Number of bget() and brel() calls */
173  long numpblk; /* Number of pool blocks */
174  long numpget, numprel; /* Number of block gets and rels */
175  long numdget, numdrel; /* Number of direct gets and rels */
176 #endif /* BufStats */
177 
178  /* Automatic expansion block management functions */
179  bget_compact_t compfcn;
180  bget_acquire_t acqfcn;
181  bget_release_t relfcn;
182 
183  bget_mode_t mode; /* what allocation mode to use? */
184 
185  bufsize exp_incr; /* Expansion block size */
186  bufsize pool_len; /* 0: no bpool calls have been made
187  -1: not all pool blocks are the same size
188  >0: (common) block size for all bpool calls made so far
189  */
190  bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
191 } thr_data_t;
192 
193 /* Minimum allocation quantum: */
194 #define QLSize (sizeof(qlinks_t))
195 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
196 #define MaxSize \
197  (bufsize)( \
198  ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
199 // Maximun for the requested size.
200 
201 /* End sentinel: value placed in bsize field of dummy block delimiting
202  end of pool block. The most negative number which will fit in a
203  bufsize, defined in a way that the compiler will accept. */
204 
205 #define ESent \
206  ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
207 
208 /* Thread Data management routines */
209 static int bget_get_bin(bufsize size) {
210  // binary chop bins
211  int lo = 0, hi = MAX_BGET_BINS - 1;
212 
213  KMP_DEBUG_ASSERT(size > 0);
214 
215  while ((hi - lo) > 1) {
216  int mid = (lo + hi) >> 1;
217  if (size < bget_bin_size[mid])
218  hi = mid - 1;
219  else
220  lo = mid;
221  }
222 
223  KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
224 
225  return lo;
226 }
227 
228 static void set_thr_data(kmp_info_t *th) {
229  int i;
230  thr_data_t *data;
231 
232  data = (thr_data_t *)((!th->th.th_local.bget_data)
233  ? __kmp_allocate(sizeof(*data))
234  : th->th.th_local.bget_data);
235 
236  memset(data, '\0', sizeof(*data));
237 
238  for (i = 0; i < MAX_BGET_BINS; ++i) {
239  data->freelist[i].ql.flink = &data->freelist[i];
240  data->freelist[i].ql.blink = &data->freelist[i];
241  }
242 
243  th->th.th_local.bget_data = data;
244  th->th.th_local.bget_list = 0;
245 #if !USE_CMP_XCHG_FOR_BGET
246 #ifdef USE_QUEUING_LOCK_FOR_BGET
247  __kmp_init_lock(&th->th.th_local.bget_lock);
248 #else
249  __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
250 #endif /* USE_LOCK_FOR_BGET */
251 #endif /* ! USE_CMP_XCHG_FOR_BGET */
252 }
253 
254 static thr_data_t *get_thr_data(kmp_info_t *th) {
255  thr_data_t *data;
256 
257  data = (thr_data_t *)th->th.th_local.bget_data;
258 
259  KMP_DEBUG_ASSERT(data != 0);
260 
261  return data;
262 }
263 
264 /* Walk the free list and release the enqueued buffers */
265 static void __kmp_bget_dequeue(kmp_info_t *th) {
266  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
267 
268  if (p != 0) {
269 #if USE_CMP_XCHG_FOR_BGET
270  {
271  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
272  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
273  CCAST(void *, old_value), nullptr)) {
274  KMP_CPU_PAUSE();
275  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
276  }
277  p = CCAST(void *, old_value);
278  }
279 #else /* ! USE_CMP_XCHG_FOR_BGET */
280 #ifdef USE_QUEUING_LOCK_FOR_BGET
281  __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
282 #else
283  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
284 #endif /* USE_QUEUING_LOCK_FOR_BGET */
285 
286  p = (void *)th->th.th_local.bget_list;
287  th->th.th_local.bget_list = 0;
288 
289 #ifdef USE_QUEUING_LOCK_FOR_BGET
290  __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
291 #else
292  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
293 #endif
294 #endif /* USE_CMP_XCHG_FOR_BGET */
295 
296  /* Check again to make sure the list is not empty */
297  while (p != 0) {
298  void *buf = p;
299  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
300 
301  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
302  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
303  (kmp_uintptr_t)th); // clear possible mark
304  KMP_DEBUG_ASSERT(b->ql.blink == 0);
305 
306  p = (void *)b->ql.flink;
307 
308  brel(th, buf);
309  }
310  }
311 }
312 
313 /* Chain together the free buffers by using the thread owner field */
314 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
315 #ifdef USE_QUEUING_LOCK_FOR_BGET
316  ,
317  kmp_int32 rel_gtid
318 #endif
319  ) {
320  bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
321 
322  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
323  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
324  (kmp_uintptr_t)th); // clear possible mark
325 
326  b->ql.blink = 0;
327 
328  KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
329  __kmp_gtid_from_thread(th)));
330 
331 #if USE_CMP_XCHG_FOR_BGET
332  {
333  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
334  /* the next pointer must be set before setting bget_list to buf to avoid
335  exposing a broken list to other threads, even for an instant. */
336  b->ql.flink = BFH(CCAST(void *, old_value));
337 
338  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
339  CCAST(void *, old_value), buf)) {
340  KMP_CPU_PAUSE();
341  old_value = TCR_PTR(th->th.th_local.bget_list);
342  /* the next pointer must be set before setting bget_list to buf to avoid
343  exposing a broken list to other threads, even for an instant. */
344  b->ql.flink = BFH(CCAST(void *, old_value));
345  }
346  }
347 #else /* ! USE_CMP_XCHG_FOR_BGET */
348 #ifdef USE_QUEUING_LOCK_FOR_BGET
349  __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
350 #else
351  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
352 #endif
353 
354  b->ql.flink = BFH(th->th.th_local.bget_list);
355  th->th.th_local.bget_list = (void *)buf;
356 
357 #ifdef USE_QUEUING_LOCK_FOR_BGET
358  __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
359 #else
360  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
361 #endif
362 #endif /* USE_CMP_XCHG_FOR_BGET */
363 }
364 
365 /* insert buffer back onto a new freelist */
366 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
367  int bin;
368 
369  KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
370  KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
371 
372  bin = bget_get_bin(b->bh.bb.bsize);
373 
374  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
375  &thr->freelist[bin]);
376  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
377  &thr->freelist[bin]);
378 
379  b->ql.flink = &thr->freelist[bin];
380  b->ql.blink = thr->freelist[bin].ql.blink;
381 
382  thr->freelist[bin].ql.blink = b;
383  b->ql.blink->ql.flink = b;
384 }
385 
386 /* unlink the buffer from the old freelist */
387 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
388  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
389  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
390 
391  b->ql.blink->ql.flink = b->ql.flink;
392  b->ql.flink->ql.blink = b->ql.blink;
393 }
394 
395 /* GET STATS -- check info on free list */
396 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
397  thr_data_t *thr = get_thr_data(th);
398  int bin;
399 
400  *total_free = *max_free = 0;
401 
402  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
403  bfhead_t *b, *best;
404 
405  best = &thr->freelist[bin];
406  b = best->ql.flink;
407 
408  while (b != &thr->freelist[bin]) {
409  *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
410  if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
411  best = b;
412 
413  /* Link to next buffer */
414  b = b->ql.flink;
415  }
416 
417  if (*max_free < best->bh.bb.bsize)
418  *max_free = best->bh.bb.bsize;
419  }
420 
421  if (*max_free > (bufsize)sizeof(bhead_t))
422  *max_free -= sizeof(bhead_t);
423 }
424 
425 /* BGET -- Allocate a buffer. */
426 static void *bget(kmp_info_t *th, bufsize requested_size) {
427  thr_data_t *thr = get_thr_data(th);
428  bufsize size = requested_size;
429  bfhead_t *b;
430  void *buf;
431  int compactseq = 0;
432  int use_blink = 0;
433  /* For BestFit */
434  bfhead_t *best;
435 
436  if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
437  return NULL;
438  }
439 
440  __kmp_bget_dequeue(th); /* Release any queued buffers */
441 
442  if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
443  size = SizeQ;
444  }
445 #if defined(SizeQuant) && (SizeQuant > 1)
446  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
447 #endif
448 
449  size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
450  KMP_DEBUG_ASSERT(size >= 0);
451  KMP_DEBUG_ASSERT(size % SizeQuant == 0);
452 
453  use_blink = (thr->mode == bget_mode_lifo);
454 
455  /* If a compact function was provided in the call to bectl(), wrap
456  a loop around the allocation process to allow compaction to
457  intervene in case we don't find a suitable buffer in the chain. */
458 
459  for (;;) {
460  int bin;
461 
462  for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
463  /* Link to next buffer */
464  b = (use_blink ? thr->freelist[bin].ql.blink
465  : thr->freelist[bin].ql.flink);
466 
467  if (thr->mode == bget_mode_best) {
468  best = &thr->freelist[bin];
469 
470  /* Scan the free list searching for the first buffer big enough
471  to hold the requested size buffer. */
472  while (b != &thr->freelist[bin]) {
473  if (b->bh.bb.bsize >= (bufsize)size) {
474  if ((best == &thr->freelist[bin]) ||
475  (b->bh.bb.bsize < best->bh.bb.bsize)) {
476  best = b;
477  }
478  }
479 
480  /* Link to next buffer */
481  b = (use_blink ? b->ql.blink : b->ql.flink);
482  }
483  b = best;
484  }
485 
486  while (b != &thr->freelist[bin]) {
487  if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
488 
489  // Buffer is big enough to satisfy the request. Allocate it to the
490  // caller. We must decide whether the buffer is large enough to split
491  // into the part given to the caller and a free buffer that remains
492  // on the free list, or whether the entire buffer should be removed
493  // from the free list and given to the caller in its entirety. We
494  // only split the buffer if enough room remains for a header plus the
495  // minimum quantum of allocation.
496  if ((b->bh.bb.bsize - (bufsize)size) >
497  (bufsize)(SizeQ + (sizeof(bhead_t)))) {
498  bhead_t *ba, *bn;
499 
500  ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
501  bn = BH(((char *)ba) + size);
502 
503  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
504 
505  /* Subtract size from length of free block. */
506  b->bh.bb.bsize -= (bufsize)size;
507 
508  /* Link allocated buffer to the previous free buffer. */
509  ba->bb.prevfree = b->bh.bb.bsize;
510 
511  /* Plug negative size into user buffer. */
512  ba->bb.bsize = -size;
513 
514  /* Mark this buffer as owned by this thread. */
515  TCW_PTR(ba->bb.bthr,
516  th); // not an allocated address (do not mark it)
517  /* Mark buffer after this one not preceded by free block. */
518  bn->bb.prevfree = 0;
519 
520  // unlink buffer from old freelist, and reinsert into new freelist
521  __kmp_bget_remove_from_freelist(b);
522  __kmp_bget_insert_into_freelist(thr, b);
523 #if BufStats
524  thr->totalloc += (size_t)size;
525  thr->numget++; /* Increment number of bget() calls */
526 #endif
527  buf = (void *)((((char *)ba) + sizeof(bhead_t)));
528  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
529  return buf;
530  } else {
531  bhead_t *ba;
532 
533  ba = BH(((char *)b) + b->bh.bb.bsize);
534 
535  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
536 
537  /* The buffer isn't big enough to split. Give the whole
538  shebang to the caller and remove it from the free list. */
539 
540  __kmp_bget_remove_from_freelist(b);
541 #if BufStats
542  thr->totalloc += (size_t)b->bh.bb.bsize;
543  thr->numget++; /* Increment number of bget() calls */
544 #endif
545  /* Negate size to mark buffer allocated. */
546  b->bh.bb.bsize = -(b->bh.bb.bsize);
547 
548  /* Mark this buffer as owned by this thread. */
549  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
550  /* Zero the back pointer in the next buffer in memory
551  to indicate that this buffer is allocated. */
552  ba->bb.prevfree = 0;
553 
554  /* Give user buffer starting at queue links. */
555  buf = (void *)&(b->ql);
556  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
557  return buf;
558  }
559  }
560 
561  /* Link to next buffer */
562  b = (use_blink ? b->ql.blink : b->ql.flink);
563  }
564  }
565 
566  /* We failed to find a buffer. If there's a compact function defined,
567  notify it of the size requested. If it returns TRUE, try the allocation
568  again. */
569 
570  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
571  break;
572  }
573  }
574 
575  /* No buffer available with requested size free. */
576 
577  /* Don't give up yet -- look in the reserve supply. */
578  if (thr->acqfcn != 0) {
579  if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
580  /* Request is too large to fit in a single expansion block.
581  Try to satisy it by a direct buffer acquisition. */
582  bdhead_t *bdh;
583 
584  size += sizeof(bdhead_t) - sizeof(bhead_t);
585 
586  KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
587 
588  /* richryan */
589  bdh = BDH((*thr->acqfcn)((bufsize)size));
590  if (bdh != NULL) {
591 
592  // Mark the buffer special by setting size field of its header to zero.
593  bdh->bh.bb.bsize = 0;
594 
595  /* Mark this buffer as owned by this thread. */
596  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
597  // because direct buffer never goes to free list
598  bdh->bh.bb.prevfree = 0;
599  bdh->tsize = size;
600 #if BufStats
601  thr->totalloc += (size_t)size;
602  thr->numget++; /* Increment number of bget() calls */
603  thr->numdget++; /* Direct bget() call count */
604 #endif
605  buf = (void *)(bdh + 1);
606  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
607  return buf;
608  }
609 
610  } else {
611 
612  /* Try to obtain a new expansion block */
613  void *newpool;
614 
615  KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
616 
617  /* richryan */
618  newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
619  KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
620  if (newpool != NULL) {
621  bpool(th, newpool, thr->exp_incr);
622  buf = bget(
623  th, requested_size); /* This can't, I say, can't get into a loop. */
624  return buf;
625  }
626  }
627  }
628 
629  /* Still no buffer available */
630 
631  return NULL;
632 }
633 
634 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
635  the entire contents of the buffer to zero, not just the
636  region requested by the caller. */
637 
638 static void *bgetz(kmp_info_t *th, bufsize size) {
639  char *buf = (char *)bget(th, size);
640 
641  if (buf != NULL) {
642  bhead_t *b;
643  bufsize rsize;
644 
645  b = BH(buf - sizeof(bhead_t));
646  rsize = -(b->bb.bsize);
647  if (rsize == 0) {
648  bdhead_t *bd;
649 
650  bd = BDH(buf - sizeof(bdhead_t));
651  rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
652  } else {
653  rsize -= sizeof(bhead_t);
654  }
655 
656  KMP_DEBUG_ASSERT(rsize >= size);
657 
658  (void)memset(buf, 0, (bufsize)rsize);
659  }
660  return ((void *)buf);
661 }
662 
663 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
664  simply in terms of brel() and bget(). It could be
665  enhanced to allow the buffer to grow into adjacent free
666  blocks and to avoid moving data unnecessarily. */
667 
668 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
669  void *nbuf;
670  bufsize osize; /* Old size of buffer */
671  bhead_t *b;
672 
673  nbuf = bget(th, size);
674  if (nbuf == NULL) { /* Acquire new buffer */
675  return NULL;
676  }
677  if (buf == NULL) {
678  return nbuf;
679  }
680  b = BH(((char *)buf) - sizeof(bhead_t));
681  osize = -b->bb.bsize;
682  if (osize == 0) {
683  /* Buffer acquired directly through acqfcn. */
684  bdhead_t *bd;
685 
686  bd = BDH(((char *)buf) - sizeof(bdhead_t));
687  osize = bd->tsize - (bufsize)sizeof(bdhead_t);
688  } else {
689  osize -= sizeof(bhead_t);
690  }
691 
692  KMP_DEBUG_ASSERT(osize > 0);
693 
694  (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
695  (size_t)((size < osize) ? size : osize));
696  brel(th, buf);
697 
698  return nbuf;
699 }
700 
701 /* BREL -- Release a buffer. */
702 static void brel(kmp_info_t *th, void *buf) {
703  thr_data_t *thr = get_thr_data(th);
704  bfhead_t *b, *bn;
705  kmp_info_t *bth;
706 
707  KMP_DEBUG_ASSERT(buf != NULL);
708  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
709 
710  b = BFH(((char *)buf) - sizeof(bhead_t));
711 
712  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
713  bdhead_t *bdh;
714 
715  bdh = BDH(((char *)buf) - sizeof(bdhead_t));
716  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
717 #if BufStats
718  thr->totalloc -= (size_t)bdh->tsize;
719  thr->numdrel++; /* Number of direct releases */
720  thr->numrel++; /* Increment number of brel() calls */
721 #endif /* BufStats */
722 #ifdef FreeWipe
723  (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
724 #endif /* FreeWipe */
725 
726  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
727 
728  KMP_DEBUG_ASSERT(thr->relfcn != 0);
729  (*thr->relfcn)((void *)bdh); /* Release it directly. */
730  return;
731  }
732 
733  bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
734  ~1); // clear possible mark before comparison
735  if (bth != th) {
736  /* Add this buffer to be released by the owning thread later */
737  __kmp_bget_enqueue(bth, buf
738 #ifdef USE_QUEUING_LOCK_FOR_BGET
739  ,
740  __kmp_gtid_from_thread(th)
741 #endif
742  );
743  return;
744  }
745 
746  /* Buffer size must be negative, indicating that the buffer is allocated. */
747  if (b->bh.bb.bsize >= 0) {
748  bn = NULL;
749  }
750  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
751 
752  /* Back pointer in next buffer must be zero, indicating the same thing: */
753 
754  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
755 
756 #if BufStats
757  thr->numrel++; /* Increment number of brel() calls */
758  thr->totalloc += (size_t)b->bh.bb.bsize;
759 #endif
760 
761  /* If the back link is nonzero, the previous buffer is free. */
762 
763  if (b->bh.bb.prevfree != 0) {
764  /* The previous buffer is free. Consolidate this buffer with it by adding
765  the length of this buffer to the previous free buffer. Note that we
766  subtract the size in the buffer being released, since it's negative to
767  indicate that the buffer is allocated. */
768  bufsize size = b->bh.bb.bsize;
769 
770  /* Make the previous buffer the one we're working on. */
771  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
772  b->bh.bb.prevfree);
773  b = BFH(((char *)b) - b->bh.bb.prevfree);
774  b->bh.bb.bsize -= size;
775 
776  /* unlink the buffer from the old freelist */
777  __kmp_bget_remove_from_freelist(b);
778  } else {
779  /* The previous buffer isn't allocated. Mark this buffer size as positive
780  (i.e. free) and fall through to place the buffer on the free list as an
781  isolated free block. */
782  b->bh.bb.bsize = -b->bh.bb.bsize;
783  }
784 
785  /* insert buffer back onto a new freelist */
786  __kmp_bget_insert_into_freelist(thr, b);
787 
788  /* Now we look at the next buffer in memory, located by advancing from
789  the start of this buffer by its size, to see if that buffer is
790  free. If it is, we combine this buffer with the next one in
791  memory, dechaining the second buffer from the free list. */
792  bn = BFH(((char *)b) + b->bh.bb.bsize);
793  if (bn->bh.bb.bsize > 0) {
794 
795  /* The buffer is free. Remove it from the free list and add
796  its size to that of our buffer. */
797  KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
798  bn->bh.bb.bsize);
799 
800  __kmp_bget_remove_from_freelist(bn);
801 
802  b->bh.bb.bsize += bn->bh.bb.bsize;
803 
804  /* unlink the buffer from the old freelist, and reinsert it into the new
805  * freelist */
806  __kmp_bget_remove_from_freelist(b);
807  __kmp_bget_insert_into_freelist(thr, b);
808 
809  /* Finally, advance to the buffer that follows the newly
810  consolidated free block. We must set its backpointer to the
811  head of the consolidated free block. We know the next block
812  must be an allocated block because the process of recombination
813  guarantees that two free blocks will never be contiguous in
814  memory. */
815  bn = BFH(((char *)b) + b->bh.bb.bsize);
816  }
817 #ifdef FreeWipe
818  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
819  (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
820 #endif
821  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
822 
823  /* The next buffer is allocated. Set the backpointer in it to point
824  to this buffer; the previous free buffer in memory. */
825 
826  bn->bh.bb.prevfree = b->bh.bb.bsize;
827 
828  /* If a block-release function is defined, and this free buffer
829  constitutes the entire block, release it. Note that pool_len
830  is defined in such a way that the test will fail unless all
831  pool blocks are the same size. */
832  if (thr->relfcn != 0 &&
833  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
834 #if BufStats
835  if (thr->numpblk !=
836  1) { /* Do not release the last buffer until finalization time */
837 #endif
838 
839  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
840  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
841  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
842  b->bh.bb.bsize);
843 
844  /* Unlink the buffer from the free list */
845  __kmp_bget_remove_from_freelist(b);
846 
847  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
848 
849  (*thr->relfcn)(b);
850 #if BufStats
851  thr->numprel++; /* Nr of expansion block releases */
852  thr->numpblk--; /* Total number of blocks */
853  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
854 
855  // avoid leaving stale last_pool pointer around if it is being dealloced
856  if (thr->last_pool == b)
857  thr->last_pool = 0;
858  } else {
859  thr->last_pool = b;
860  }
861 #endif /* BufStats */
862  }
863 }
864 
865 /* BECTL -- Establish automatic pool expansion control */
866 static void bectl(kmp_info_t *th, bget_compact_t compact,
867  bget_acquire_t acquire, bget_release_t release,
868  bufsize pool_incr) {
869  thr_data_t *thr = get_thr_data(th);
870 
871  thr->compfcn = compact;
872  thr->acqfcn = acquire;
873  thr->relfcn = release;
874  thr->exp_incr = pool_incr;
875 }
876 
877 /* BPOOL -- Add a region of memory to the buffer pool. */
878 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
879  /* int bin = 0; */
880  thr_data_t *thr = get_thr_data(th);
881  bfhead_t *b = BFH(buf);
882  bhead_t *bn;
883 
884  __kmp_bget_dequeue(th); /* Release any queued buffers */
885 
886 #ifdef SizeQuant
887  len &= ~(SizeQuant - 1);
888 #endif
889  if (thr->pool_len == 0) {
890  thr->pool_len = len;
891  } else if (len != thr->pool_len) {
892  thr->pool_len = -1;
893  }
894 #if BufStats
895  thr->numpget++; /* Number of block acquisitions */
896  thr->numpblk++; /* Number of blocks total */
897  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
898 #endif /* BufStats */
899 
900  /* Since the block is initially occupied by a single free buffer,
901  it had better not be (much) larger than the largest buffer
902  whose size we can store in bhead.bb.bsize. */
903  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
904 
905  /* Clear the backpointer at the start of the block to indicate that
906  there is no free block prior to this one. That blocks
907  recombination when the first block in memory is released. */
908  b->bh.bb.prevfree = 0;
909 
910  /* Create a dummy allocated buffer at the end of the pool. This dummy
911  buffer is seen when a buffer at the end of the pool is released and
912  blocks recombination of the last buffer with the dummy buffer at
913  the end. The length in the dummy buffer is set to the largest
914  negative number to denote the end of the pool for diagnostic
915  routines (this specific value is not counted on by the actual
916  allocation and release functions). */
917  len -= sizeof(bhead_t);
918  b->bh.bb.bsize = (bufsize)len;
919  /* Set the owner of this buffer */
920  TCW_PTR(b->bh.bb.bthr,
921  (kmp_info_t *)((kmp_uintptr_t)th |
922  1)); // mark the buffer as allocated address
923 
924  /* Chain the new block to the free list. */
925  __kmp_bget_insert_into_freelist(thr, b);
926 
927 #ifdef FreeWipe
928  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
929  (size_t)(len - sizeof(bfhead_t)));
930 #endif
931  bn = BH(((char *)b) + len);
932  bn->bb.prevfree = (bufsize)len;
933  /* Definition of ESent assumes two's complement! */
934  KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
935 
936  bn->bb.bsize = ESent;
937 }
938 
939 /* BFREED -- Dump the free lists for this thread. */
940 static void bfreed(kmp_info_t *th) {
941  int bin = 0, count = 0;
942  int gtid = __kmp_gtid_from_thread(th);
943  thr_data_t *thr = get_thr_data(th);
944 
945 #if BufStats
946  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
947  " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
948  " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
949  " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
950  " drel=%" KMP_INT64_SPEC "\n",
951  gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
952  (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
953  (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
954  (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
955 #endif
956 
957  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
958  bfhead_t *b;
959 
960  for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
961  b = b->ql.flink) {
962  bufsize bs = b->bh.bb.bsize;
963 
964  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
965  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
966  KMP_DEBUG_ASSERT(bs > 0);
967 
968  count += 1;
969 
970  __kmp_printf_no_lock(
971  "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
972  (long)bs);
973 #ifdef FreeWipe
974  {
975  char *lerr = ((char *)b) + sizeof(bfhead_t);
976  if ((bs > sizeof(bfhead_t)) &&
977  ((*lerr != 0x55) ||
978  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
979  0))) {
980  __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
981  "free block have been overstored.)\n",
982  gtid);
983  }
984  }
985 #endif
986  }
987  }
988 
989  if (count == 0)
990  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
991 }
992 
993 void __kmp_initialize_bget(kmp_info_t *th) {
994  KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
995 
996  set_thr_data(th);
997 
998  bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
999  (bufsize)__kmp_malloc_pool_incr);
1000 }
1001 
1002 void __kmp_finalize_bget(kmp_info_t *th) {
1003  thr_data_t *thr;
1004  bfhead_t *b;
1005 
1006  KMP_DEBUG_ASSERT(th != 0);
1007 
1008 #if BufStats
1009  thr = (thr_data_t *)th->th.th_local.bget_data;
1010  KMP_DEBUG_ASSERT(thr != NULL);
1011  b = thr->last_pool;
1012 
1013  /* If a block-release function is defined, and this free buffer constitutes
1014  the entire block, release it. Note that pool_len is defined in such a way
1015  that the test will fail unless all pool blocks are the same size. */
1016 
1017  // Deallocate the last pool if one exists because we no longer do it in brel()
1018  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1019  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1020  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1021  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1022  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1023  b->bh.bb.bsize);
1024 
1025  /* Unlink the buffer from the free list */
1026  __kmp_bget_remove_from_freelist(b);
1027 
1028  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1029 
1030  (*thr->relfcn)(b);
1031  thr->numprel++; /* Nr of expansion block releases */
1032  thr->numpblk--; /* Total number of blocks */
1033  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1034  }
1035 #endif /* BufStats */
1036 
1037  /* Deallocate bget_data */
1038  if (th->th.th_local.bget_data != NULL) {
1039  __kmp_free(th->th.th_local.bget_data);
1040  th->th.th_local.bget_data = NULL;
1041  }
1042 }
1043 
1044 void kmpc_set_poolsize(size_t size) {
1045  bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1046  (bget_release_t)free, (bufsize)size);
1047 }
1048 
1049 size_t kmpc_get_poolsize(void) {
1050  thr_data_t *p;
1051 
1052  p = get_thr_data(__kmp_get_thread());
1053 
1054  return p->exp_incr;
1055 }
1056 
1057 void kmpc_set_poolmode(int mode) {
1058  thr_data_t *p;
1059 
1060  if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1061  mode == bget_mode_best) {
1062  p = get_thr_data(__kmp_get_thread());
1063  p->mode = (bget_mode_t)mode;
1064  }
1065 }
1066 
1067 int kmpc_get_poolmode(void) {
1068  thr_data_t *p;
1069 
1070  p = get_thr_data(__kmp_get_thread());
1071 
1072  return p->mode;
1073 }
1074 
1075 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1076  kmp_info_t *th = __kmp_get_thread();
1077  bufsize a, b;
1078 
1079  __kmp_bget_dequeue(th); /* Release any queued buffers */
1080 
1081  bcheck(th, &a, &b);
1082 
1083  *maxmem = a;
1084  *allmem = b;
1085 }
1086 
1087 void kmpc_poolprint(void) {
1088  kmp_info_t *th = __kmp_get_thread();
1089 
1090  __kmp_bget_dequeue(th); /* Release any queued buffers */
1091 
1092  bfreed(th);
1093 }
1094 
1095 #endif // #if KMP_USE_BGET
1096 
1097 void *kmpc_malloc(size_t size) {
1098  void *ptr;
1099  ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1100  if (ptr != NULL) {
1101  // save allocated pointer just before one returned to user
1102  *(void **)ptr = ptr;
1103  ptr = (void **)ptr + 1;
1104  }
1105  return ptr;
1106 }
1107 
1108 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1109 
1110 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1111  void *ptr;
1112  void *ptr_allocated;
1113  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1114  if (!IS_POWER_OF_TWO(alignment)) {
1115  // AC: do we need to issue a warning here?
1116  errno = EINVAL;
1117  return NULL;
1118  }
1119  size = size + sizeof(void *) + alignment;
1120  ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1121  if (ptr_allocated != NULL) {
1122  // save allocated pointer just before one returned to user
1123  ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1124  ~(alignment - 1));
1125  *((void **)ptr - 1) = ptr_allocated;
1126  } else {
1127  ptr = NULL;
1128  }
1129  return ptr;
1130 }
1131 
1132 void *kmpc_calloc(size_t nelem, size_t elsize) {
1133  void *ptr;
1134  ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1135  if (ptr != NULL) {
1136  // save allocated pointer just before one returned to user
1137  *(void **)ptr = ptr;
1138  ptr = (void **)ptr + 1;
1139  }
1140  return ptr;
1141 }
1142 
1143 void *kmpc_realloc(void *ptr, size_t size) {
1144  void *result = NULL;
1145  if (ptr == NULL) {
1146  // If pointer is NULL, realloc behaves like malloc.
1147  result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1148  // save allocated pointer just before one returned to user
1149  if (result != NULL) {
1150  *(void **)result = result;
1151  result = (void **)result + 1;
1152  }
1153  } else if (size == 0) {
1154  // If size is 0, realloc behaves like free.
1155  // The thread must be registered by the call to kmpc_malloc() or
1156  // kmpc_calloc() before.
1157  // So it should be safe to call __kmp_get_thread(), not
1158  // __kmp_entry_thread().
1159  KMP_ASSERT(*((void **)ptr - 1));
1160  brel(__kmp_get_thread(), *((void **)ptr - 1));
1161  } else {
1162  result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1163  (bufsize)(size + sizeof(ptr)));
1164  if (result != NULL) {
1165  *(void **)result = result;
1166  result = (void **)result + 1;
1167  }
1168  }
1169  return result;
1170 }
1171 
1172 // NOTE: the library must have already been initialized by a previous allocate
1173 void kmpc_free(void *ptr) {
1174  if (!__kmp_init_serial) {
1175  return;
1176  }
1177  if (ptr != NULL) {
1178  kmp_info_t *th = __kmp_get_thread();
1179  __kmp_bget_dequeue(th); /* Release any queued buffers */
1180  // extract allocated pointer and free it
1181  KMP_ASSERT(*((void **)ptr - 1));
1182  brel(th, *((void **)ptr - 1));
1183  }
1184 }
1185 
1186 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1187  void *ptr;
1188  KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1189  (int)size KMP_SRC_LOC_PARM));
1190  ptr = bget(th, (bufsize)size);
1191  KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1192  return ptr;
1193 }
1194 
1195 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1196  size_t elsize KMP_SRC_LOC_DECL) {
1197  void *ptr;
1198  KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1199  (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1200  ptr = bgetz(th, (bufsize)(nelem * elsize));
1201  KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1202  return ptr;
1203 }
1204 
1205 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1206  size_t size KMP_SRC_LOC_DECL) {
1207  KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1208  ptr, (int)size KMP_SRC_LOC_PARM));
1209  ptr = bgetr(th, ptr, (bufsize)size);
1210  KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1211  return ptr;
1212 }
1213 
1214 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1215  KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1216  ptr KMP_SRC_LOC_PARM));
1217  if (ptr != NULL) {
1218  __kmp_bget_dequeue(th); /* Release any queued buffers */
1219  brel(th, ptr);
1220  }
1221  KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1222 }
1223 
1224 #if OMP_50_ENABLED
1225 /* OMP 5.0 Memory Management support */
1226 static int (*p_hbw_check)(void);
1227 static void *(*p_hbw_malloc)(size_t);
1228 static void (*p_hbw_free)(void *);
1229 static int (*p_hbw_set_policy)(int);
1230 static const char *kmp_mk_lib_name;
1231 static void *h_memkind;
1232 
1233 void __kmp_init_memkind() {
1234 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1235  kmp_mk_lib_name = "libmemkind.so";
1236  h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1237  if (h_memkind) {
1238  p_hbw_check = (int (*)())dlsym(h_memkind, "hbw_check_available");
1239  p_hbw_malloc = (void *(*)(size_t))dlsym(h_memkind, "hbw_malloc");
1240  p_hbw_free = (void (*)(void *))dlsym(h_memkind, "hbw_free");
1241  p_hbw_set_policy = (int (*)(int))dlsym(h_memkind, "hbw_set_policy");
1242  if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) {
1243  __kmp_memkind_available = 1;
1244  if (p_hbw_check() == 0) {
1245  p_hbw_set_policy(1); // return NULL is not enough memory
1246  __kmp_hbw_mem_available = 1; // found HBW memory available
1247  }
1248  return; // success - all symbols resolved
1249  }
1250  dlclose(h_memkind); // failure
1251  h_memkind = NULL;
1252  }
1253  p_hbw_check = NULL;
1254  p_hbw_malloc = NULL;
1255  p_hbw_free = NULL;
1256  p_hbw_set_policy = NULL;
1257 #else
1258  kmp_mk_lib_name = "";
1259  h_memkind = NULL;
1260  p_hbw_check = NULL;
1261  p_hbw_malloc = NULL;
1262  p_hbw_free = NULL;
1263  p_hbw_set_policy = NULL;
1264 #endif
1265 }
1266 
1267 void __kmp_fini_memkind() {
1268 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1269  if (h_memkind) {
1270  dlclose(h_memkind);
1271  h_memkind = NULL;
1272  }
1273  p_hbw_check = NULL;
1274  p_hbw_malloc = NULL;
1275  p_hbw_free = NULL;
1276  p_hbw_set_policy = NULL;
1277 #endif
1278 }
1279 
1280 void __kmpc_set_default_allocator(int gtid, const omp_allocator_t *allocator) {
1281  if (allocator == OMP_NULL_ALLOCATOR)
1282  allocator = omp_default_mem_alloc;
1283  KMP_DEBUG_ASSERT(
1284  allocator == omp_default_mem_alloc ||
1285  allocator == omp_large_cap_mem_alloc ||
1286  allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc ||
1287  allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc ||
1288  allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc);
1289  __kmp_threads[gtid]->th.th_def_allocator = allocator;
1290 }
1291 const omp_allocator_t *__kmpc_get_default_allocator(int gtid) {
1292  return __kmp_threads[gtid]->th.th_def_allocator;
1293 }
1294 
1295 typedef struct kmp_mem_desc { // Memory block descriptor
1296  void *ptr_alloc; // Pointer returned by allocator
1297  size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1298  void *ptr_align; // Pointer to aligned memory, returned
1299  const omp_allocator_t *allocator; // allocator
1300 } kmp_mem_desc_t;
1301 static int alignment = sizeof(void *); // let's align to pointer size
1302 
1303 void *__kmpc_alloc(int gtid, size_t size, const omp_allocator_t *allocator) {
1304  KMP_DEBUG_ASSERT(__kmp_init_serial);
1305  if (allocator == OMP_NULL_ALLOCATOR)
1306  allocator = __kmp_threads[gtid]->th.th_def_allocator;
1307 
1308  int sz_desc = sizeof(kmp_mem_desc_t);
1309  void *ptr = NULL;
1310  kmp_mem_desc_t desc;
1311  kmp_uintptr_t addr; // address returned by allocator
1312  kmp_uintptr_t addr_align; // address to return to caller
1313  kmp_uintptr_t addr_descr; // address of memory block descriptor
1314 
1315  KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1316 
1317  desc.size_a = size + sz_desc + alignment;
1318  if (allocator == omp_default_mem_alloc)
1319  ptr = __kmp_allocate(desc.size_a);
1320  if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
1321  KMP_DEBUG_ASSERT(p_hbw_malloc != NULL);
1322  ptr = p_hbw_malloc(desc.size_a);
1323  }
1324 
1325  KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr,
1326  desc.size_a, __kmp_hbw_mem_available));
1327  if (ptr == NULL)
1328  return NULL;
1329 
1330  addr = (kmp_uintptr_t)ptr;
1331  addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1);
1332  addr_descr = addr_align - sz_desc;
1333 
1334  desc.ptr_alloc = ptr;
1335  desc.ptr_align = (void *)addr_align;
1336  desc.allocator = allocator;
1337  *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
1338  KMP_MB();
1339 
1340  KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid));
1341  return desc.ptr_align;
1342 }
1343 
1344 void __kmpc_free(int gtid, void *ptr, const omp_allocator_t *allocator) {
1345  KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1346  if (ptr == NULL)
1347  return;
1348 
1349  kmp_mem_desc_t desc;
1350  kmp_uintptr_t addr_align; // address to return to caller
1351  kmp_uintptr_t addr_descr; // address of memory block descriptor
1352 
1353  addr_align = (kmp_uintptr_t)ptr;
1354  addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1355  desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1356 
1357  KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1358  if (allocator) {
1359  KMP_DEBUG_ASSERT(desc.allocator == allocator);
1360  } else {
1361  allocator = desc.allocator;
1362  }
1363  KMP_DEBUG_ASSERT(allocator);
1364 
1365  if (allocator == omp_default_mem_alloc)
1366  __kmp_free(desc.ptr_alloc);
1367  if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
1368  KMP_DEBUG_ASSERT(p_hbw_free != NULL);
1369  p_hbw_free(desc.ptr_alloc);
1370  }
1371  KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
1372  allocator));
1373 }
1374 
1375 #endif
1376 
1377 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1378  memory leaks, but it may be useful for debugging memory corruptions, used
1379  freed pointers, etc. */
1380 /* #define LEAK_MEMORY */
1381 struct kmp_mem_descr { // Memory block descriptor.
1382  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1383  size_t size_allocated; // Size of allocated memory block.
1384  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1385  size_t size_aligned; // Size of aligned memory block.
1386 };
1387 typedef struct kmp_mem_descr kmp_mem_descr_t;
1388 
1389 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1390  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1391  error. Must use __kmp_free when freeing memory allocated by this routine! */
1392 static void *___kmp_allocate_align(size_t size,
1393  size_t alignment KMP_SRC_LOC_DECL) {
1394  /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1395  requested to return properly aligned pointer. Original pointer returned
1396  by malloc() and size of allocated block is saved in descriptor just
1397  before the aligned pointer. This information used by __kmp_free() -- it
1398  has to pass to free() original pointer, not aligned one.
1399 
1400  +---------+------------+-----------------------------------+---------+
1401  | padding | descriptor | aligned block | padding |
1402  +---------+------------+-----------------------------------+---------+
1403  ^ ^
1404  | |
1405  | +- Aligned pointer returned to caller
1406  +- Pointer returned by malloc()
1407 
1408  Aligned block is filled with zeros, paddings are filled with 0xEF. */
1409 
1410  kmp_mem_descr_t descr;
1411  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1412  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1413  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1414 
1415  KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1416  (int)size, (int)alignment KMP_SRC_LOC_PARM));
1417 
1418  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1419  KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1420  // Make sure kmp_uintptr_t is enough to store addresses.
1421 
1422  descr.size_aligned = size;
1423  descr.size_allocated =
1424  descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1425 
1426 #if KMP_DEBUG
1427  descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1428 #else
1429  descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1430 #endif
1431  KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1432  descr.ptr_allocated));
1433  if (descr.ptr_allocated == NULL) {
1434  KMP_FATAL(OutOfHeapMemory);
1435  }
1436 
1437  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1438  addr_aligned =
1439  (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1440  addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1441 
1442  descr.ptr_aligned = (void *)addr_aligned;
1443 
1444  KE_TRACE(26, (" ___kmp_allocate_align: "
1445  "ptr_allocated=%p, size_allocated=%d, "
1446  "ptr_aligned=%p, size_aligned=%d\n",
1447  descr.ptr_allocated, (int)descr.size_allocated,
1448  descr.ptr_aligned, (int)descr.size_aligned));
1449 
1450  KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1451  KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1452  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1453  addr_allocated + descr.size_allocated);
1454  KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1455 #ifdef KMP_DEBUG
1456  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1457 // Fill allocated memory block with 0xEF.
1458 #endif
1459  memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1460  // Fill the aligned memory block (which is intended for using by caller) with
1461  // 0x00. Do not
1462  // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1463  // memory. (Padding
1464  // bytes remain filled with 0xEF in debugging library.)
1465  *((kmp_mem_descr_t *)addr_descr) = descr;
1466 
1467  KMP_MB();
1468 
1469  KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1470  return descr.ptr_aligned;
1471 } // func ___kmp_allocate_align
1472 
1473 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1474  Do not call this func directly! Use __kmp_allocate macro instead.
1475  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1476  error. Must use __kmp_free when freeing memory allocated by this routine! */
1477 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1478  void *ptr;
1479  KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
1480  (int)size KMP_SRC_LOC_PARM));
1481  ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1482  KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
1483  return ptr;
1484 } // func ___kmp_allocate
1485 
1486 /* Allocate memory on page boundary, fill allocated memory with 0x00.
1487  Does not call this func directly! Use __kmp_page_allocate macro instead.
1488  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1489  error. Must use __kmp_free when freeing memory allocated by this routine! */
1490 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
1491  int page_size = 8 * 1024;
1492  void *ptr;
1493 
1494  KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
1495  (int)size KMP_SRC_LOC_PARM));
1496  ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1497  KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
1498  return ptr;
1499 } // ___kmp_page_allocate
1500 
1501 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1502  In debug mode, fill the memory block with 0xEF before call to free(). */
1503 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
1504  kmp_mem_descr_t descr;
1505  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1506  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1507 
1508  KE_TRACE(25,
1509  ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1510  KMP_ASSERT(ptr != NULL);
1511 
1512  descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
1513 
1514  KE_TRACE(26, (" __kmp_free: "
1515  "ptr_allocated=%p, size_allocated=%d, "
1516  "ptr_aligned=%p, size_aligned=%d\n",
1517  descr.ptr_allocated, (int)descr.size_allocated,
1518  descr.ptr_aligned, (int)descr.size_aligned));
1519 
1520  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1521  addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1522 
1523  KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1524  KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1525  KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
1526  KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1527  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1528  addr_allocated + descr.size_allocated);
1529 
1530 #ifdef KMP_DEBUG
1531  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1532 // Fill memory block with 0xEF, it helps catch using freed memory.
1533 #endif
1534 
1535 #ifndef LEAK_MEMORY
1536  KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
1537 #ifdef KMP_DEBUG
1538  _free_src_loc(descr.ptr_allocated, _file_, _line_);
1539 #else
1540  free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1541 #endif
1542 #endif
1543  KMP_MB();
1544  KE_TRACE(25, ("<- __kmp_free() returns\n"));
1545 } // func ___kmp_free
1546 
1547 #if USE_FAST_MEMORY == 3
1548 // Allocate fast memory by first scanning the thread's free lists
1549 // If a chunk the right size exists, grab it off the free list.
1550 // Otherwise allocate normally using kmp_thread_malloc.
1551 
1552 // AC: How to choose the limit? Just get 16 for now...
1553 #define KMP_FREE_LIST_LIMIT 16
1554 
1555 // Always use 128 bytes for determining buckets for caching memory blocks
1556 #define DCACHE_LINE 128
1557 
1558 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
1559  void *ptr;
1560  int num_lines;
1561  int idx;
1562  int index;
1563  void *alloc_ptr;
1564  size_t alloc_size;
1565  kmp_mem_descr_t *descr;
1566 
1567  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1568  __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
1569 
1570  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1571  idx = num_lines - 1;
1572  KMP_DEBUG_ASSERT(idx >= 0);
1573  if (idx < 2) {
1574  index = 0; // idx is [ 0, 1 ], use first free list
1575  num_lines = 2; // 1, 2 cache lines or less than cache line
1576  } else if ((idx >>= 2) == 0) {
1577  index = 1; // idx is [ 2, 3 ], use second free list
1578  num_lines = 4; // 3, 4 cache lines
1579  } else if ((idx >>= 2) == 0) {
1580  index = 2; // idx is [ 4, 15 ], use third free list
1581  num_lines = 16; // 5, 6, ..., 16 cache lines
1582  } else if ((idx >>= 2) == 0) {
1583  index = 3; // idx is [ 16, 63 ], use fourth free list
1584  num_lines = 64; // 17, 18, ..., 64 cache lines
1585  } else {
1586  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1587  }
1588 
1589  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1590  if (ptr != NULL) {
1591  // pop the head of no-sync free list
1592  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1593  KMP_DEBUG_ASSERT(
1594  this_thr ==
1595  ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1596  ->ptr_aligned);
1597  goto end;
1598  }
1599  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1600  if (ptr != NULL) {
1601  // no-sync free list is empty, use sync free list (filled in by other
1602  // threads only)
1603  // pop the head of the sync free list, push NULL instead
1604  while (!KMP_COMPARE_AND_STORE_PTR(
1605  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
1606  KMP_CPU_PAUSE();
1607  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1608  }
1609  // push the rest of chain into no-sync free list (can be NULL if there was
1610  // the only block)
1611  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1612  KMP_DEBUG_ASSERT(
1613  this_thr ==
1614  ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1615  ->ptr_aligned);
1616  goto end;
1617  }
1618 
1619 alloc_call:
1620  // haven't found block in the free lists, thus allocate it
1621  size = num_lines * DCACHE_LINE;
1622 
1623  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1624  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
1625  "alloc_size %d\n",
1626  __kmp_gtid_from_thread(this_thr), alloc_size));
1627  alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1628 
1629  // align ptr to DCACHE_LINE
1630  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
1631  DCACHE_LINE) &
1632  ~(DCACHE_LINE - 1));
1633  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1634 
1635  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1636  // we don't need size_allocated
1637  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1638  // (it is already saved in bget buffer,
1639  // but we may want to use another allocator in future)
1640  descr->size_aligned = size;
1641 
1642 end:
1643  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
1644  __kmp_gtid_from_thread(this_thr), ptr));
1645  return ptr;
1646 } // func __kmp_fast_allocate
1647 
1648 // Free fast memory and place it on the thread's free list if it is of
1649 // the correct size.
1650 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
1651  kmp_mem_descr_t *descr;
1652  kmp_info_t *alloc_thr;
1653  size_t size;
1654  size_t idx;
1655  int index;
1656 
1657  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1658  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1659  KMP_ASSERT(ptr != NULL);
1660 
1661  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1662 
1663  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1664  (int)descr->size_aligned));
1665 
1666  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1667 
1668  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1669  if (idx == size) {
1670  index = 0; // 2 cache lines
1671  } else if ((idx <<= 1) == size) {
1672  index = 1; // 4 cache lines
1673  } else if ((idx <<= 2) == size) {
1674  index = 2; // 16 cache lines
1675  } else if ((idx <<= 2) == size) {
1676  index = 3; // 64 cache lines
1677  } else {
1678  KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1679  goto free_call; // 65 or more cache lines ( > 8KB )
1680  }
1681 
1682  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1683  if (alloc_thr == this_thr) {
1684  // push block to self no-sync free list, linking previous head (LIFO)
1685  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1686  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1687  } else {
1688  void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1689  if (head == NULL) {
1690  // Create new free list
1691  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1692  *((void **)ptr) = NULL; // mark the tail of the list
1693  descr->size_allocated = (size_t)1; // head of the list keeps its length
1694  } else {
1695  // need to check existed "other" list's owner thread and size of queue
1696  kmp_mem_descr_t *dsc =
1697  (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
1698  // allocating thread, same for all queue nodes
1699  kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1700  size_t q_sz =
1701  dsc->size_allocated + 1; // new size in case we add current task
1702  if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1703  // we can add current task to "other" list, no sync needed
1704  *((void **)ptr) = head;
1705  descr->size_allocated = q_sz;
1706  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1707  } else {
1708  // either queue blocks owner is changing or size limit exceeded
1709  // return old queue to allocating thread (q_th) synchroneously,
1710  // and start new list for alloc_thr's tasks
1711  void *old_ptr;
1712  void *tail = head;
1713  void *next = *((void **)head);
1714  while (next != NULL) {
1715  KMP_DEBUG_ASSERT(
1716  // queue size should decrease by 1 each step through the list
1717  ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
1718  ->size_allocated +
1719  1 ==
1720  ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
1721  ->size_allocated);
1722  tail = next; // remember tail node
1723  next = *((void **)next);
1724  }
1725  KMP_DEBUG_ASSERT(q_th != NULL);
1726  // push block to owner's sync free list
1727  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1728  /* the next pointer must be set before setting free_list to ptr to avoid
1729  exposing a broken list to other threads, even for an instant. */
1730  *((void **)tail) = old_ptr;
1731 
1732  while (!KMP_COMPARE_AND_STORE_PTR(
1733  &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1734  KMP_CPU_PAUSE();
1735  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1736  *((void **)tail) = old_ptr;
1737  }
1738 
1739  // start new list of not-selt tasks
1740  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1741  *((void **)ptr) = NULL;
1742  descr->size_allocated = (size_t)1; // head of queue keeps its length
1743  }
1744  }
1745  }
1746  goto end;
1747 
1748 free_call:
1749  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1750  __kmp_gtid_from_thread(this_thr), size));
1751  __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
1752  brel(this_thr, descr->ptr_allocated);
1753 
1754 end:
1755  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
1756 
1757 } // func __kmp_fast_free
1758 
1759 // Initialize the thread free lists related to fast memory
1760 // Only do this when a thread is initially created.
1761 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1762  KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1763 
1764  memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
1765 }
1766 
1767 // Free the memory in the thread free lists related to fast memory
1768 // Only do this when a thread is being reaped (destroyed).
1769 void __kmp_free_fast_memory(kmp_info_t *th) {
1770  // Suppose we use BGET underlying allocator, walk through its structures...
1771  int bin;
1772  thr_data_t *thr = get_thr_data(th);
1773  void **lst = NULL;
1774 
1775  KE_TRACE(
1776  5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1777 
1778  __kmp_bget_dequeue(th); // Release any queued buffers
1779 
1780  // Dig through free lists and extract all allocated blocks
1781  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1782  bfhead_t *b = thr->freelist[bin].ql.flink;
1783  while (b != &thr->freelist[bin]) {
1784  if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
1785  *((void **)b) =
1786  lst; // link the list (override bthr, but keep flink yet)
1787  lst = (void **)b; // push b into lst
1788  }
1789  b = b->ql.flink; // get next buffer
1790  }
1791  }
1792  while (lst != NULL) {
1793  void *next = *lst;
1794  KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1795  lst, next, th, __kmp_gtid_from_thread(th)));
1796  (*thr->relfcn)(lst);
1797 #if BufStats
1798  // count blocks to prevent problems in __kmp_finalize_bget()
1799  thr->numprel++; /* Nr of expansion block releases */
1800  thr->numpblk--; /* Total number of blocks */
1801 #endif
1802  lst = (void **)next;
1803  }
1804 
1805  KE_TRACE(
1806  5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1807 }
1808 
1809 #endif // USE_FAST_MEMORY