LLVM OpenMP* Runtime Library
kmp_alloc.cpp
1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_io.h"
16 #include "kmp_wrapper_malloc.h"
17 
18 // Disable bget when it is not used
19 #if KMP_USE_BGET
20 
21 /* Thread private buffer management code */
22 
23 typedef int (*bget_compact_t)(size_t, int);
24 typedef void *(*bget_acquire_t)(size_t);
25 typedef void (*bget_release_t)(void *);
26 
27 /* NOTE: bufsize must be a signed datatype */
28 
29 #if KMP_OS_WINDOWS
30 #if KMP_ARCH_X86 || KMP_ARCH_ARM
31 typedef kmp_int32 bufsize;
32 #else
33 typedef kmp_int64 bufsize;
34 #endif
35 #else
36 typedef ssize_t bufsize;
37 #endif
38 
39 /* The three modes of operation are, fifo search, lifo search, and best-fit */
40 
41 typedef enum bget_mode {
42  bget_mode_fifo = 0,
43  bget_mode_lifo = 1,
44  bget_mode_best = 2
45 } bget_mode_t;
46 
47 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
48 static void *bget(kmp_info_t *th, bufsize size);
49 static void *bgetz(kmp_info_t *th, bufsize size);
50 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
51 static void brel(kmp_info_t *th, void *buf);
52 static void bectl(kmp_info_t *th, bget_compact_t compact,
53  bget_acquire_t acquire, bget_release_t release,
54  bufsize pool_incr);
55 
56 #ifdef KMP_DEBUG
57 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
58  bufsize *maxfree, long *nget, long *nrel);
59 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
60  long *npget, long *nprel, long *ndget, long *ndrel);
61 static void bufdump(kmp_info_t *th, void *buf);
62 static void bpoold(kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
63 static int bpoolv(kmp_info_t *th, void *pool);
64 #endif
65 
66 /* BGET CONFIGURATION */
67 /* Buffer allocation size quantum: all buffers allocated are a
68  multiple of this size. This MUST be a power of two. */
69 
70 /* On IA-32 architecture with Linux* OS, malloc() does not
71  ensure 16 byte alignmnent */
72 
73 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
74 
75 #define SizeQuant 8
76 #define AlignType double
77 
78 #else
79 
80 #define SizeQuant 16
81 #define AlignType _Quad
82 
83 #endif
84 
85 // Define this symbol to enable the bstats() function which calculates the
86 // total free space in the buffer pool, the largest available buffer, and the
87 // total space currently allocated.
88 #define BufStats 1
89 
90 #ifdef KMP_DEBUG
91 
92 // Define this symbol to enable the bpoold() function which dumps the buffers
93 // in a buffer pool.
94 #define BufDump 1
95 
96 // Define this symbol to enable the bpoolv() function for validating a buffer
97 // pool.
98 #define BufValid 1
99 
100 // Define this symbol to enable the bufdump() function which allows dumping the
101 // contents of an allocated or free buffer.
102 #define DumpData 1
103 
104 #ifdef NOT_USED_NOW
105 
106 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
107 // who attempt to use pointers into released buffers.
108 #define FreeWipe 1
109 
110 // Use a best fit algorithm when searching for space for an allocation request.
111 // This uses memory more efficiently, but allocation will be much slower.
112 #define BestFit 1
113 
114 #endif /* NOT_USED_NOW */
115 #endif /* KMP_DEBUG */
116 
117 static bufsize bget_bin_size[] = {
118  0,
119  // 1 << 6, /* .5 Cache line */
120  1 << 7, /* 1 Cache line, new */
121  1 << 8, /* 2 Cache lines */
122  1 << 9, /* 4 Cache lines, new */
123  1 << 10, /* 8 Cache lines */
124  1 << 11, /* 16 Cache lines, new */
125  1 << 12, 1 << 13, /* new */
126  1 << 14, 1 << 15, /* new */
127  1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
128  1 << 21, /* 2MB */
129  1 << 22, /* 4MB */
130  1 << 23, /* 8MB */
131  1 << 24, /* 16MB */
132  1 << 25, /* 32MB */
133 };
134 
135 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
136 
137 struct bfhead;
138 
139 // Declare the interface, including the requested buffer size type, bufsize.
140 
141 /* Queue links */
142 typedef struct qlinks {
143  struct bfhead *flink; /* Forward link */
144  struct bfhead *blink; /* Backward link */
145 } qlinks_t;
146 
147 /* Header in allocated and free buffers */
148 typedef struct bhead2 {
149  kmp_info_t *bthr; /* The thread which owns the buffer pool */
150  bufsize prevfree; /* Relative link back to previous free buffer in memory or
151  0 if previous buffer is allocated. */
152  bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
153 } bhead2_t;
154 
155 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
156 typedef union bhead {
157  KMP_ALIGN(SizeQuant)
158  AlignType b_align;
159  char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
160  bhead2_t bb;
161 } bhead_t;
162 #define BH(p) ((bhead_t *)(p))
163 
164 /* Header in directly allocated buffers (by acqfcn) */
165 typedef struct bdhead {
166  bufsize tsize; /* Total size, including overhead */
167  bhead_t bh; /* Common header */
168 } bdhead_t;
169 #define BDH(p) ((bdhead_t *)(p))
170 
171 /* Header in free buffers */
172 typedef struct bfhead {
173  bhead_t bh; /* Common allocated/free header */
174  qlinks_t ql; /* Links on free list */
175 } bfhead_t;
176 #define BFH(p) ((bfhead_t *)(p))
177 
178 typedef struct thr_data {
179  bfhead_t freelist[MAX_BGET_BINS];
180 #if BufStats
181  size_t totalloc; /* Total space currently allocated */
182  long numget, numrel; /* Number of bget() and brel() calls */
183  long numpblk; /* Number of pool blocks */
184  long numpget, numprel; /* Number of block gets and rels */
185  long numdget, numdrel; /* Number of direct gets and rels */
186 #endif /* BufStats */
187 
188  /* Automatic expansion block management functions */
189  bget_compact_t compfcn;
190  bget_acquire_t acqfcn;
191  bget_release_t relfcn;
192 
193  bget_mode_t mode; /* what allocation mode to use? */
194 
195  bufsize exp_incr; /* Expansion block size */
196  bufsize pool_len; /* 0: no bpool calls have been made
197  -1: not all pool blocks are the same size
198  >0: (common) block size for all bpool calls made so far
199  */
200  bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
201 } thr_data_t;
202 
203 /* Minimum allocation quantum: */
204 #define QLSize (sizeof(qlinks_t))
205 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
206 #define MaxSize \
207  (bufsize)( \
208  ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
209 // Maximun for the requested size.
210 
211 /* End sentinel: value placed in bsize field of dummy block delimiting
212  end of pool block. The most negative number which will fit in a
213  bufsize, defined in a way that the compiler will accept. */
214 
215 #define ESent \
216  ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
217 
218 /* Thread Data management routines */
219 static int bget_get_bin(bufsize size) {
220  // binary chop bins
221  int lo = 0, hi = MAX_BGET_BINS - 1;
222 
223  KMP_DEBUG_ASSERT(size > 0);
224 
225  while ((hi - lo) > 1) {
226  int mid = (lo + hi) >> 1;
227  if (size < bget_bin_size[mid])
228  hi = mid - 1;
229  else
230  lo = mid;
231  }
232 
233  KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
234 
235  return lo;
236 }
237 
238 static void set_thr_data(kmp_info_t *th) {
239  int i;
240  thr_data_t *data;
241 
242  data = (thr_data_t *)((!th->th.th_local.bget_data)
243  ? __kmp_allocate(sizeof(*data))
244  : th->th.th_local.bget_data);
245 
246  memset(data, '\0', sizeof(*data));
247 
248  for (i = 0; i < MAX_BGET_BINS; ++i) {
249  data->freelist[i].ql.flink = &data->freelist[i];
250  data->freelist[i].ql.blink = &data->freelist[i];
251  }
252 
253  th->th.th_local.bget_data = data;
254  th->th.th_local.bget_list = 0;
255 #if !USE_CMP_XCHG_FOR_BGET
256 #ifdef USE_QUEUING_LOCK_FOR_BGET
257  __kmp_init_lock(&th->th.th_local.bget_lock);
258 #else
259  __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
260 #endif /* USE_LOCK_FOR_BGET */
261 #endif /* ! USE_CMP_XCHG_FOR_BGET */
262 }
263 
264 static thr_data_t *get_thr_data(kmp_info_t *th) {
265  thr_data_t *data;
266 
267  data = (thr_data_t *)th->th.th_local.bget_data;
268 
269  KMP_DEBUG_ASSERT(data != 0);
270 
271  return data;
272 }
273 
274 #ifdef KMP_DEBUG
275 
276 static void __kmp_bget_validate_queue(kmp_info_t *th) {
277  /* NOTE: assume that the global_lock is held */
278 
279  void *p = (void *)th->th.th_local.bget_list;
280 
281  while (p != 0) {
282  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
283 
284  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
285  p = (void *)b->ql.flink;
286  }
287 }
288 
289 #endif
290 
291 /* Walk the free list and release the enqueued buffers */
292 static void __kmp_bget_dequeue(kmp_info_t *th) {
293  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
294 
295  if (p != 0) {
296 #if USE_CMP_XCHG_FOR_BGET
297  {
298  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
299  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
300  CCAST(void *, old_value), nullptr)) {
301  KMP_CPU_PAUSE();
302  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
303  }
304  p = CCAST(void *, old_value);
305  }
306 #else /* ! USE_CMP_XCHG_FOR_BGET */
307 #ifdef USE_QUEUING_LOCK_FOR_BGET
308  __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
309 #else
310  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
311 #endif /* USE_QUEUING_LOCK_FOR_BGET */
312 
313  p = (void *)th->th.th_local.bget_list;
314  th->th.th_local.bget_list = 0;
315 
316 #ifdef USE_QUEUING_LOCK_FOR_BGET
317  __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
318 #else
319  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
320 #endif
321 #endif /* USE_CMP_XCHG_FOR_BGET */
322 
323  /* Check again to make sure the list is not empty */
324  while (p != 0) {
325  void *buf = p;
326  bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
327 
328  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
329  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
330  (kmp_uintptr_t)th); // clear possible mark
331  KMP_DEBUG_ASSERT(b->ql.blink == 0);
332 
333  p = (void *)b->ql.flink;
334 
335  brel(th, buf);
336  }
337  }
338 }
339 
340 /* Chain together the free buffers by using the thread owner field */
341 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
342 #ifdef USE_QUEUING_LOCK_FOR_BGET
343  ,
344  kmp_int32 rel_gtid
345 #endif
346  ) {
347  bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
348 
349  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
350  KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
351  (kmp_uintptr_t)th); // clear possible mark
352 
353  b->ql.blink = 0;
354 
355  KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
356  __kmp_gtid_from_thread(th)));
357 
358 #if USE_CMP_XCHG_FOR_BGET
359  {
360  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
361  /* the next pointer must be set before setting bget_list to buf to avoid
362  exposing a broken list to other threads, even for an instant. */
363  b->ql.flink = BFH(CCAST(void *, old_value));
364 
365  while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
366  CCAST(void *, old_value), buf)) {
367  KMP_CPU_PAUSE();
368  old_value = TCR_PTR(th->th.th_local.bget_list);
369  /* the next pointer must be set before setting bget_list to buf to avoid
370  exposing a broken list to other threads, even for an instant. */
371  b->ql.flink = BFH(CCAST(void *, old_value));
372  }
373  }
374 #else /* ! USE_CMP_XCHG_FOR_BGET */
375 #ifdef USE_QUEUING_LOCK_FOR_BGET
376  __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
377 #else
378  __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
379 #endif
380 
381  b->ql.flink = BFH(th->th.th_local.bget_list);
382  th->th.th_local.bget_list = (void *)buf;
383 
384 #ifdef USE_QUEUING_LOCK_FOR_BGET
385  __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
386 #else
387  __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
388 #endif
389 #endif /* USE_CMP_XCHG_FOR_BGET */
390 }
391 
392 /* insert buffer back onto a new freelist */
393 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
394  int bin;
395 
396  KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
397  KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
398 
399  bin = bget_get_bin(b->bh.bb.bsize);
400 
401  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
402  &thr->freelist[bin]);
403  KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
404  &thr->freelist[bin]);
405 
406  b->ql.flink = &thr->freelist[bin];
407  b->ql.blink = thr->freelist[bin].ql.blink;
408 
409  thr->freelist[bin].ql.blink = b;
410  b->ql.blink->ql.flink = b;
411 }
412 
413 /* unlink the buffer from the old freelist */
414 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
415  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
416  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
417 
418  b->ql.blink->ql.flink = b->ql.flink;
419  b->ql.flink->ql.blink = b->ql.blink;
420 }
421 
422 /* GET STATS -- check info on free list */
423 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
424  thr_data_t *thr = get_thr_data(th);
425  int bin;
426 
427  *total_free = *max_free = 0;
428 
429  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
430  bfhead_t *b, *best;
431 
432  best = &thr->freelist[bin];
433  b = best->ql.flink;
434 
435  while (b != &thr->freelist[bin]) {
436  *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
437  if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
438  best = b;
439 
440  /* Link to next buffer */
441  b = b->ql.flink;
442  }
443 
444  if (*max_free < best->bh.bb.bsize)
445  *max_free = best->bh.bb.bsize;
446  }
447 
448  if (*max_free > (bufsize)sizeof(bhead_t))
449  *max_free -= sizeof(bhead_t);
450 }
451 
452 /* BGET -- Allocate a buffer. */
453 static void *bget(kmp_info_t *th, bufsize requested_size) {
454  thr_data_t *thr = get_thr_data(th);
455  bufsize size = requested_size;
456  bfhead_t *b;
457  void *buf;
458  int compactseq = 0;
459  int use_blink = 0;
460  /* For BestFit */
461  bfhead_t *best;
462 
463  if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
464  return NULL;
465  }
466 
467  __kmp_bget_dequeue(th); /* Release any queued buffers */
468 
469  if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
470  size = SizeQ;
471  }
472 #if defined(SizeQuant) && (SizeQuant > 1)
473  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
474 #endif
475 
476  size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
477  KMP_DEBUG_ASSERT(size >= 0);
478  KMP_DEBUG_ASSERT(size % SizeQuant == 0);
479 
480  use_blink = (thr->mode == bget_mode_lifo);
481 
482  /* If a compact function was provided in the call to bectl(), wrap
483  a loop around the allocation process to allow compaction to
484  intervene in case we don't find a suitable buffer in the chain. */
485 
486  for (;;) {
487  int bin;
488 
489  for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
490  /* Link to next buffer */
491  b = (use_blink ? thr->freelist[bin].ql.blink
492  : thr->freelist[bin].ql.flink);
493 
494  if (thr->mode == bget_mode_best) {
495  best = &thr->freelist[bin];
496 
497  /* Scan the free list searching for the first buffer big enough
498  to hold the requested size buffer. */
499  while (b != &thr->freelist[bin]) {
500  if (b->bh.bb.bsize >= (bufsize)size) {
501  if ((best == &thr->freelist[bin]) ||
502  (b->bh.bb.bsize < best->bh.bb.bsize)) {
503  best = b;
504  }
505  }
506 
507  /* Link to next buffer */
508  b = (use_blink ? b->ql.blink : b->ql.flink);
509  }
510  b = best;
511  }
512 
513  while (b != &thr->freelist[bin]) {
514  if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
515 
516  // Buffer is big enough to satisfy the request. Allocate it to the
517  // caller. We must decide whether the buffer is large enough to split
518  // into the part given to the caller and a free buffer that remains
519  // on the free list, or whether the entire buffer should be removed
520  // from the free list and given to the caller in its entirety. We
521  // only split the buffer if enough room remains for a header plus the
522  // minimum quantum of allocation.
523  if ((b->bh.bb.bsize - (bufsize)size) >
524  (bufsize)(SizeQ + (sizeof(bhead_t)))) {
525  bhead_t *ba, *bn;
526 
527  ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
528  bn = BH(((char *)ba) + size);
529 
530  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
531 
532  /* Subtract size from length of free block. */
533  b->bh.bb.bsize -= (bufsize)size;
534 
535  /* Link allocated buffer to the previous free buffer. */
536  ba->bb.prevfree = b->bh.bb.bsize;
537 
538  /* Plug negative size into user buffer. */
539  ba->bb.bsize = -size;
540 
541  /* Mark this buffer as owned by this thread. */
542  TCW_PTR(ba->bb.bthr,
543  th); // not an allocated address (do not mark it)
544  /* Mark buffer after this one not preceded by free block. */
545  bn->bb.prevfree = 0;
546 
547  // unlink buffer from old freelist, and reinsert into new freelist
548  __kmp_bget_remove_from_freelist(b);
549  __kmp_bget_insert_into_freelist(thr, b);
550 #if BufStats
551  thr->totalloc += (size_t)size;
552  thr->numget++; /* Increment number of bget() calls */
553 #endif
554  buf = (void *)((((char *)ba) + sizeof(bhead_t)));
555  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
556  return buf;
557  } else {
558  bhead_t *ba;
559 
560  ba = BH(((char *)b) + b->bh.bb.bsize);
561 
562  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
563 
564  /* The buffer isn't big enough to split. Give the whole
565  shebang to the caller and remove it from the free list. */
566 
567  __kmp_bget_remove_from_freelist(b);
568 #if BufStats
569  thr->totalloc += (size_t)b->bh.bb.bsize;
570  thr->numget++; /* Increment number of bget() calls */
571 #endif
572  /* Negate size to mark buffer allocated. */
573  b->bh.bb.bsize = -(b->bh.bb.bsize);
574 
575  /* Mark this buffer as owned by this thread. */
576  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
577  /* Zero the back pointer in the next buffer in memory
578  to indicate that this buffer is allocated. */
579  ba->bb.prevfree = 0;
580 
581  /* Give user buffer starting at queue links. */
582  buf = (void *)&(b->ql);
583  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
584  return buf;
585  }
586  }
587 
588  /* Link to next buffer */
589  b = (use_blink ? b->ql.blink : b->ql.flink);
590  }
591  }
592 
593  /* We failed to find a buffer. If there's a compact function defined,
594  notify it of the size requested. If it returns TRUE, try the allocation
595  again. */
596 
597  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
598  break;
599  }
600  }
601 
602  /* No buffer available with requested size free. */
603 
604  /* Don't give up yet -- look in the reserve supply. */
605  if (thr->acqfcn != 0) {
606  if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
607  /* Request is too large to fit in a single expansion block.
608  Try to satisy it by a direct buffer acquisition. */
609  bdhead_t *bdh;
610 
611  size += sizeof(bdhead_t) - sizeof(bhead_t);
612 
613  KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
614 
615  /* richryan */
616  bdh = BDH((*thr->acqfcn)((bufsize)size));
617  if (bdh != NULL) {
618 
619  // Mark the buffer special by setting size field of its header to zero.
620  bdh->bh.bb.bsize = 0;
621 
622  /* Mark this buffer as owned by this thread. */
623  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
624  // because direct buffer never goes to free list
625  bdh->bh.bb.prevfree = 0;
626  bdh->tsize = size;
627 #if BufStats
628  thr->totalloc += (size_t)size;
629  thr->numget++; /* Increment number of bget() calls */
630  thr->numdget++; /* Direct bget() call count */
631 #endif
632  buf = (void *)(bdh + 1);
633  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
634  return buf;
635  }
636 
637  } else {
638 
639  /* Try to obtain a new expansion block */
640  void *newpool;
641 
642  KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
643 
644  /* richryan */
645  newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
646  KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
647  if (newpool != NULL) {
648  bpool(th, newpool, thr->exp_incr);
649  buf = bget(
650  th, requested_size); /* This can't, I say, can't get into a loop. */
651  return buf;
652  }
653  }
654  }
655 
656  /* Still no buffer available */
657 
658  return NULL;
659 }
660 
661 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
662  the entire contents of the buffer to zero, not just the
663  region requested by the caller. */
664 
665 static void *bgetz(kmp_info_t *th, bufsize size) {
666  char *buf = (char *)bget(th, size);
667 
668  if (buf != NULL) {
669  bhead_t *b;
670  bufsize rsize;
671 
672  b = BH(buf - sizeof(bhead_t));
673  rsize = -(b->bb.bsize);
674  if (rsize == 0) {
675  bdhead_t *bd;
676 
677  bd = BDH(buf - sizeof(bdhead_t));
678  rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
679  } else {
680  rsize -= sizeof(bhead_t);
681  }
682 
683  KMP_DEBUG_ASSERT(rsize >= size);
684 
685  (void)memset(buf, 0, (bufsize)rsize);
686  }
687  return ((void *)buf);
688 }
689 
690 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
691  simply in terms of brel() and bget(). It could be
692  enhanced to allow the buffer to grow into adjacent free
693  blocks and to avoid moving data unnecessarily. */
694 
695 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
696  void *nbuf;
697  bufsize osize; /* Old size of buffer */
698  bhead_t *b;
699 
700  nbuf = bget(th, size);
701  if (nbuf == NULL) { /* Acquire new buffer */
702  return NULL;
703  }
704  if (buf == NULL) {
705  return nbuf;
706  }
707  b = BH(((char *)buf) - sizeof(bhead_t));
708  osize = -b->bb.bsize;
709  if (osize == 0) {
710  /* Buffer acquired directly through acqfcn. */
711  bdhead_t *bd;
712 
713  bd = BDH(((char *)buf) - sizeof(bdhead_t));
714  osize = bd->tsize - (bufsize)sizeof(bdhead_t);
715  } else {
716  osize -= sizeof(bhead_t);
717  }
718 
719  KMP_DEBUG_ASSERT(osize > 0);
720 
721  (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
722  (size_t)((size < osize) ? size : osize));
723  brel(th, buf);
724 
725  return nbuf;
726 }
727 
728 /* BREL -- Release a buffer. */
729 static void brel(kmp_info_t *th, void *buf) {
730  thr_data_t *thr = get_thr_data(th);
731  bfhead_t *b, *bn;
732  kmp_info_t *bth;
733 
734  KMP_DEBUG_ASSERT(buf != NULL);
735  KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
736 
737  b = BFH(((char *)buf) - sizeof(bhead_t));
738 
739  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
740  bdhead_t *bdh;
741 
742  bdh = BDH(((char *)buf) - sizeof(bdhead_t));
743  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
744 #if BufStats
745  thr->totalloc -= (size_t)bdh->tsize;
746  thr->numdrel++; /* Number of direct releases */
747  thr->numrel++; /* Increment number of brel() calls */
748 #endif /* BufStats */
749 #ifdef FreeWipe
750  (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
751 #endif /* FreeWipe */
752 
753  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
754 
755  KMP_DEBUG_ASSERT(thr->relfcn != 0);
756  (*thr->relfcn)((void *)bdh); /* Release it directly. */
757  return;
758  }
759 
760  bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
761  ~1); // clear possible mark before comparison
762  if (bth != th) {
763  /* Add this buffer to be released by the owning thread later */
764  __kmp_bget_enqueue(bth, buf
765 #ifdef USE_QUEUING_LOCK_FOR_BGET
766  ,
767  __kmp_gtid_from_thread(th)
768 #endif
769  );
770  return;
771  }
772 
773  /* Buffer size must be negative, indicating that the buffer is allocated. */
774  if (b->bh.bb.bsize >= 0) {
775  bn = NULL;
776  }
777  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
778 
779  /* Back pointer in next buffer must be zero, indicating the same thing: */
780 
781  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
782 
783 #if BufStats
784  thr->numrel++; /* Increment number of brel() calls */
785  thr->totalloc += (size_t)b->bh.bb.bsize;
786 #endif
787 
788  /* If the back link is nonzero, the previous buffer is free. */
789 
790  if (b->bh.bb.prevfree != 0) {
791  /* The previous buffer is free. Consolidate this buffer with it by adding
792  the length of this buffer to the previous free buffer. Note that we
793  subtract the size in the buffer being released, since it's negative to
794  indicate that the buffer is allocated. */
795  bufsize size = b->bh.bb.bsize;
796 
797  /* Make the previous buffer the one we're working on. */
798  KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
799  b->bh.bb.prevfree);
800  b = BFH(((char *)b) - b->bh.bb.prevfree);
801  b->bh.bb.bsize -= size;
802 
803  /* unlink the buffer from the old freelist */
804  __kmp_bget_remove_from_freelist(b);
805  } else {
806  /* The previous buffer isn't allocated. Mark this buffer size as positive
807  (i.e. free) and fall through to place the buffer on the free list as an
808  isolated free block. */
809  b->bh.bb.bsize = -b->bh.bb.bsize;
810  }
811 
812  /* insert buffer back onto a new freelist */
813  __kmp_bget_insert_into_freelist(thr, b);
814 
815  /* Now we look at the next buffer in memory, located by advancing from
816  the start of this buffer by its size, to see if that buffer is
817  free. If it is, we combine this buffer with the next one in
818  memory, dechaining the second buffer from the free list. */
819  bn = BFH(((char *)b) + b->bh.bb.bsize);
820  if (bn->bh.bb.bsize > 0) {
821 
822  /* The buffer is free. Remove it from the free list and add
823  its size to that of our buffer. */
824  KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
825  bn->bh.bb.bsize);
826 
827  __kmp_bget_remove_from_freelist(bn);
828 
829  b->bh.bb.bsize += bn->bh.bb.bsize;
830 
831  /* unlink the buffer from the old freelist, and reinsert it into the new
832  * freelist */
833  __kmp_bget_remove_from_freelist(b);
834  __kmp_bget_insert_into_freelist(thr, b);
835 
836  /* Finally, advance to the buffer that follows the newly
837  consolidated free block. We must set its backpointer to the
838  head of the consolidated free block. We know the next block
839  must be an allocated block because the process of recombination
840  guarantees that two free blocks will never be contiguous in
841  memory. */
842  bn = BFH(((char *)b) + b->bh.bb.bsize);
843  }
844 #ifdef FreeWipe
845  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
846  (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
847 #endif
848  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
849 
850  /* The next buffer is allocated. Set the backpointer in it to point
851  to this buffer; the previous free buffer in memory. */
852 
853  bn->bh.bb.prevfree = b->bh.bb.bsize;
854 
855  /* If a block-release function is defined, and this free buffer
856  constitutes the entire block, release it. Note that pool_len
857  is defined in such a way that the test will fail unless all
858  pool blocks are the same size. */
859  if (thr->relfcn != 0 &&
860  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
861 #if BufStats
862  if (thr->numpblk !=
863  1) { /* Do not release the last buffer until finalization time */
864 #endif
865 
866  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
867  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
868  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
869  b->bh.bb.bsize);
870 
871  /* Unlink the buffer from the free list */
872  __kmp_bget_remove_from_freelist(b);
873 
874  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
875 
876  (*thr->relfcn)(b);
877 #if BufStats
878  thr->numprel++; /* Nr of expansion block releases */
879  thr->numpblk--; /* Total number of blocks */
880  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
881 
882  // avoid leaving stale last_pool pointer around if it is being dealloced
883  if (thr->last_pool == b)
884  thr->last_pool = 0;
885  } else {
886  thr->last_pool = b;
887  }
888 #endif /* BufStats */
889  }
890 }
891 
892 /* BECTL -- Establish automatic pool expansion control */
893 static void bectl(kmp_info_t *th, bget_compact_t compact,
894  bget_acquire_t acquire, bget_release_t release,
895  bufsize pool_incr) {
896  thr_data_t *thr = get_thr_data(th);
897 
898  thr->compfcn = compact;
899  thr->acqfcn = acquire;
900  thr->relfcn = release;
901  thr->exp_incr = pool_incr;
902 }
903 
904 /* BPOOL -- Add a region of memory to the buffer pool. */
905 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
906  /* int bin = 0; */
907  thr_data_t *thr = get_thr_data(th);
908  bfhead_t *b = BFH(buf);
909  bhead_t *bn;
910 
911  __kmp_bget_dequeue(th); /* Release any queued buffers */
912 
913 #ifdef SizeQuant
914  len &= ~(SizeQuant - 1);
915 #endif
916  if (thr->pool_len == 0) {
917  thr->pool_len = len;
918  } else if (len != thr->pool_len) {
919  thr->pool_len = -1;
920  }
921 #if BufStats
922  thr->numpget++; /* Number of block acquisitions */
923  thr->numpblk++; /* Number of blocks total */
924  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
925 #endif /* BufStats */
926 
927  /* Since the block is initially occupied by a single free buffer,
928  it had better not be (much) larger than the largest buffer
929  whose size we can store in bhead.bb.bsize. */
930  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
931 
932  /* Clear the backpointer at the start of the block to indicate that
933  there is no free block prior to this one. That blocks
934  recombination when the first block in memory is released. */
935  b->bh.bb.prevfree = 0;
936 
937  /* Create a dummy allocated buffer at the end of the pool. This dummy
938  buffer is seen when a buffer at the end of the pool is released and
939  blocks recombination of the last buffer with the dummy buffer at
940  the end. The length in the dummy buffer is set to the largest
941  negative number to denote the end of the pool for diagnostic
942  routines (this specific value is not counted on by the actual
943  allocation and release functions). */
944  len -= sizeof(bhead_t);
945  b->bh.bb.bsize = (bufsize)len;
946  /* Set the owner of this buffer */
947  TCW_PTR(b->bh.bb.bthr,
948  (kmp_info_t *)((kmp_uintptr_t)th |
949  1)); // mark the buffer as allocated address
950 
951  /* Chain the new block to the free list. */
952  __kmp_bget_insert_into_freelist(thr, b);
953 
954 #ifdef FreeWipe
955  (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
956  (size_t)(len - sizeof(bfhead_t)));
957 #endif
958  bn = BH(((char *)b) + len);
959  bn->bb.prevfree = (bufsize)len;
960  /* Definition of ESent assumes two's complement! */
961  KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
962 
963  bn->bb.bsize = ESent;
964 }
965 
966 /* BFREED -- Dump the free lists for this thread. */
967 static void bfreed(kmp_info_t *th) {
968  int bin = 0, count = 0;
969  int gtid = __kmp_gtid_from_thread(th);
970  thr_data_t *thr = get_thr_data(th);
971 
972 #if BufStats
973  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
974  " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
975  " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
976  " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
977  " drel=%" KMP_INT64_SPEC "\n",
978  gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
979  (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
980  (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
981  (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
982 #endif
983 
984  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
985  bfhead_t *b;
986 
987  for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
988  b = b->ql.flink) {
989  bufsize bs = b->bh.bb.bsize;
990 
991  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
992  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
993  KMP_DEBUG_ASSERT(bs > 0);
994 
995  count += 1;
996 
997  __kmp_printf_no_lock(
998  "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
999  (long)bs);
1000 #ifdef FreeWipe
1001  {
1002  char *lerr = ((char *)b) + sizeof(bfhead_t);
1003  if ((bs > sizeof(bfhead_t)) &&
1004  ((*lerr != 0x55) ||
1005  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1006  0))) {
1007  __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
1008  "free block have been overstored.)\n",
1009  gtid);
1010  }
1011  }
1012 #endif
1013  }
1014  }
1015 
1016  if (count == 0)
1017  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
1018 }
1019 
1020 #ifdef KMP_DEBUG
1021 
1022 #if BufStats
1023 
1024 /* BSTATS -- Return buffer allocation free space statistics. */
1025 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
1026  bufsize *maxfree, long *nget, long *nrel) {
1027  int bin = 0;
1028  thr_data_t *thr = get_thr_data(th);
1029 
1030  *nget = thr->numget;
1031  *nrel = thr->numrel;
1032  *curalloc = (bufsize)thr->totalloc;
1033  *totfree = 0;
1034  *maxfree = -1;
1035 
1036  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1037  bfhead_t *b = thr->freelist[bin].ql.flink;
1038 
1039  while (b != &thr->freelist[bin]) {
1040  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1041  *totfree += b->bh.bb.bsize;
1042  if (b->bh.bb.bsize > *maxfree) {
1043  *maxfree = b->bh.bb.bsize;
1044  }
1045  b = b->ql.flink; /* Link to next buffer */
1046  }
1047  }
1048 }
1049 
1050 /* BSTATSE -- Return extended statistics */
1051 static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
1052  long *npget, long *nprel, long *ndget, long *ndrel) {
1053  thr_data_t *thr = get_thr_data(th);
1054 
1055  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1056  *npool = thr->numpblk;
1057  *npget = thr->numpget;
1058  *nprel = thr->numprel;
1059  *ndget = thr->numdget;
1060  *ndrel = thr->numdrel;
1061 }
1062 
1063 #endif /* BufStats */
1064 
1065 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1066  data pointer, and backs up to the buffer header. It will
1067  dump either a free block or an allocated one. */
1068 static void bufdump(kmp_info_t *th, void *buf) {
1069  bfhead_t *b;
1070  unsigned char *bdump;
1071  bufsize bdlen;
1072 
1073  b = BFH(((char *)buf) - sizeof(bhead_t));
1074  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1075  if (b->bh.bb.bsize < 0) {
1076  bdump = (unsigned char *)buf;
1077  bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t);
1078  } else {
1079  bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t));
1080  bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t);
1081  }
1082 
1083  while (bdlen > 0) {
1084  int i, dupes = 0;
1085  bufsize l = bdlen;
1086  char bhex[50], bascii[20];
1087 
1088  if (l > 16) {
1089  l = 16;
1090  }
1091 
1092  for (i = 0; i < l; i++) {
1093  (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1094  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1095  bascii[i] = bdump[i];
1096  else
1097  bascii[i] = ' ';
1098  }
1099  bascii[i] = 0;
1100  (void)__kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1101  bdump += l;
1102  bdlen -= l;
1103  while ((bdlen > 16) &&
1104  (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) {
1105  dupes++;
1106  bdump += 16;
1107  bdlen -= 16;
1108  }
1109  if (dupes > 1) {
1110  (void)__kmp_printf_no_lock(
1111  " (%d lines [%d bytes] identical to above line skipped)\n", dupes,
1112  dupes * 16);
1113  } else if (dupes == 1) {
1114  bdump -= 16;
1115  bdlen += 16;
1116  }
1117  }
1118 }
1119 
1120 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1121  If DUMPALLOC is nonzero, the contents of allocated buffers
1122  are dumped. If DUMPFREE is nonzero, free blocks are
1123  dumped as well. If FreeWipe checking is enabled, free
1124  blocks which have been clobbered will always be dumped. */
1125 static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) {
1126  bfhead_t *b = BFH((char *)buf - sizeof(bhead_t));
1127 
1128  while (b->bh.bb.bsize != ESent) {
1129  bufsize bs = b->bh.bb.bsize;
1130 
1131  if (bs < 0) {
1132  bs = -bs;
1133  (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n",
1134  (long)bs);
1135  if (dumpalloc) {
1136  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1137  }
1138  } else {
1139  const char *lerr = "";
1140 
1141  KMP_DEBUG_ASSERT(bs > 0);
1142  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1143  lerr = " (Bad free list links)";
1144  }
1145  (void)__kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1146  (long)bs, lerr);
1147 #ifdef FreeWipe
1148  lerr = ((char *)b) + sizeof(bfhead_t);
1149  if ((bs > sizeof(bfhead_t)) &&
1150  ((*lerr != 0x55) ||
1151  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1152  0))) {
1153  (void)__kmp_printf_no_lock(
1154  "(Contents of above free block have been overstored.)\n");
1155  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1156  } else
1157 #endif
1158  if (dumpfree) {
1159  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1160  }
1161  }
1162  b = BFH(((char *)b) + bs);
1163  }
1164 }
1165 
1166 /* BPOOLV -- Validate a buffer pool. */
1167 static int bpoolv(kmp_info_t *th, void *buf) {
1168  bfhead_t *b = BFH(buf);
1169 
1170  while (b->bh.bb.bsize != ESent) {
1171  bufsize bs = b->bh.bb.bsize;
1172 
1173  if (bs < 0) {
1174  bs = -bs;
1175  } else {
1176 #ifdef FreeWipe
1177  char *lerr = "";
1178 #endif
1179 
1180  KMP_DEBUG_ASSERT(bs > 0);
1181  if (bs <= 0) {
1182  return 0;
1183  }
1184  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1185  (void)__kmp_printf_no_lock(
1186  "Free block: size %6ld bytes. (Bad free list links)\n", (long)bs);
1187  KMP_DEBUG_ASSERT(0);
1188  return 0;
1189  }
1190 #ifdef FreeWipe
1191  lerr = ((char *)b) + sizeof(bfhead_t);
1192  if ((bs > sizeof(bfhead_t)) &&
1193  ((*lerr != 0x55) ||
1194  (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
1195  0))) {
1196  (void)__kmp_printf_no_lock(
1197  "(Contents of above free block have been overstored.)\n");
1198  bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
1199  KMP_DEBUG_ASSERT(0);
1200  return 0;
1201  }
1202 #endif /* FreeWipe */
1203  }
1204  b = BFH(((char *)b) + bs);
1205  }
1206  return 1;
1207 }
1208 
1209 #endif /* KMP_DEBUG */
1210 
1211 void __kmp_initialize_bget(kmp_info_t *th) {
1212  KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
1213 
1214  set_thr_data(th);
1215 
1216  bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1217  (bufsize)__kmp_malloc_pool_incr);
1218 }
1219 
1220 void __kmp_finalize_bget(kmp_info_t *th) {
1221  thr_data_t *thr;
1222  bfhead_t *b;
1223 
1224  KMP_DEBUG_ASSERT(th != 0);
1225 
1226 #if BufStats
1227  thr = (thr_data_t *)th->th.th_local.bget_data;
1228  KMP_DEBUG_ASSERT(thr != NULL);
1229  b = thr->last_pool;
1230 
1231  /* If a block-release function is defined, and this free buffer constitutes
1232  the entire block, release it. Note that pool_len is defined in such a way
1233  that the test will fail unless all pool blocks are the same size. */
1234 
1235  // Deallocate the last pool if one exists because we no longer do it in brel()
1236  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1237  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1238  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1239  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1240  KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1241  b->bh.bb.bsize);
1242 
1243  /* Unlink the buffer from the free list */
1244  __kmp_bget_remove_from_freelist(b);
1245 
1246  KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1247 
1248  (*thr->relfcn)(b);
1249  thr->numprel++; /* Nr of expansion block releases */
1250  thr->numpblk--; /* Total number of blocks */
1251  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1252  }
1253 #endif /* BufStats */
1254 
1255  /* Deallocate bget_data */
1256  if (th->th.th_local.bget_data != NULL) {
1257  __kmp_free(th->th.th_local.bget_data);
1258  th->th.th_local.bget_data = NULL;
1259  }
1260 }
1261 
1262 void kmpc_set_poolsize(size_t size) {
1263  bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1264  (bget_release_t)free, (bufsize)size);
1265 }
1266 
1267 size_t kmpc_get_poolsize(void) {
1268  thr_data_t *p;
1269 
1270  p = get_thr_data(__kmp_get_thread());
1271 
1272  return p->exp_incr;
1273 }
1274 
1275 void kmpc_set_poolmode(int mode) {
1276  thr_data_t *p;
1277 
1278  if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1279  mode == bget_mode_best) {
1280  p = get_thr_data(__kmp_get_thread());
1281  p->mode = (bget_mode_t)mode;
1282  }
1283 }
1284 
1285 int kmpc_get_poolmode(void) {
1286  thr_data_t *p;
1287 
1288  p = get_thr_data(__kmp_get_thread());
1289 
1290  return p->mode;
1291 }
1292 
1293 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1294  kmp_info_t *th = __kmp_get_thread();
1295  bufsize a, b;
1296 
1297  __kmp_bget_dequeue(th); /* Release any queued buffers */
1298 
1299  bcheck(th, &a, &b);
1300 
1301  *maxmem = a;
1302  *allmem = b;
1303 }
1304 
1305 void kmpc_poolprint(void) {
1306  kmp_info_t *th = __kmp_get_thread();
1307 
1308  __kmp_bget_dequeue(th); /* Release any queued buffers */
1309 
1310  bfreed(th);
1311 }
1312 
1313 #endif // #if KMP_USE_BGET
1314 
1315 void *kmpc_malloc(size_t size) {
1316  void *ptr;
1317  ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1318  if (ptr != NULL) {
1319  // save allocated pointer just before one returned to user
1320  *(void **)ptr = ptr;
1321  ptr = (void **)ptr + 1;
1322  }
1323  return ptr;
1324 }
1325 
1326 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1327 
1328 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1329  void *ptr;
1330  void *ptr_allocated;
1331  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1332  if (!IS_POWER_OF_TWO(alignment)) {
1333  // AC: do we need to issue a warning here?
1334  errno = EINVAL;
1335  return NULL;
1336  }
1337  size = size + sizeof(void *) + alignment;
1338  ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1339  if (ptr_allocated != NULL) {
1340  // save allocated pointer just before one returned to user
1341  ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1342  ~(alignment - 1));
1343  *((void **)ptr - 1) = ptr_allocated;
1344  } else {
1345  ptr = NULL;
1346  }
1347  return ptr;
1348 }
1349 
1350 void *kmpc_calloc(size_t nelem, size_t elsize) {
1351  void *ptr;
1352  ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1353  if (ptr != NULL) {
1354  // save allocated pointer just before one returned to user
1355  *(void **)ptr = ptr;
1356  ptr = (void **)ptr + 1;
1357  }
1358  return ptr;
1359 }
1360 
1361 void *kmpc_realloc(void *ptr, size_t size) {
1362  void *result = NULL;
1363  if (ptr == NULL) {
1364  // If pointer is NULL, realloc behaves like malloc.
1365  result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1366  // save allocated pointer just before one returned to user
1367  if (result != NULL) {
1368  *(void **)result = result;
1369  result = (void **)result + 1;
1370  }
1371  } else if (size == 0) {
1372  // If size is 0, realloc behaves like free.
1373  // The thread must be registered by the call to kmpc_malloc() or
1374  // kmpc_calloc() before.
1375  // So it should be safe to call __kmp_get_thread(), not
1376  // __kmp_entry_thread().
1377  KMP_ASSERT(*((void **)ptr - 1));
1378  brel(__kmp_get_thread(), *((void **)ptr - 1));
1379  } else {
1380  result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1381  (bufsize)(size + sizeof(ptr)));
1382  if (result != NULL) {
1383  *(void **)result = result;
1384  result = (void **)result + 1;
1385  }
1386  }
1387  return result;
1388 }
1389 
1390 // NOTE: the library must have already been initialized by a previous allocate
1391 void kmpc_free(void *ptr) {
1392  if (!__kmp_init_serial) {
1393  return;
1394  }
1395  if (ptr != NULL) {
1396  kmp_info_t *th = __kmp_get_thread();
1397  __kmp_bget_dequeue(th); /* Release any queued buffers */
1398  // extract allocated pointer and free it
1399  KMP_ASSERT(*((void **)ptr - 1));
1400  brel(th, *((void **)ptr - 1));
1401  }
1402 }
1403 
1404 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1405  void *ptr;
1406  KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1407  (int)size KMP_SRC_LOC_PARM));
1408  ptr = bget(th, (bufsize)size);
1409  KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1410  return ptr;
1411 }
1412 
1413 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1414  size_t elsize KMP_SRC_LOC_DECL) {
1415  void *ptr;
1416  KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1417  (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1418  ptr = bgetz(th, (bufsize)(nelem * elsize));
1419  KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1420  return ptr;
1421 }
1422 
1423 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1424  size_t size KMP_SRC_LOC_DECL) {
1425  KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1426  ptr, (int)size KMP_SRC_LOC_PARM));
1427  ptr = bgetr(th, ptr, (bufsize)size);
1428  KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1429  return ptr;
1430 }
1431 
1432 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1433  KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1434  ptr KMP_SRC_LOC_PARM));
1435  if (ptr != NULL) {
1436  __kmp_bget_dequeue(th); /* Release any queued buffers */
1437  brel(th, ptr);
1438  }
1439  KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1440 }
1441 
1442 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1443  memory leaks, but it may be useful for debugging memory corruptions, used
1444  freed pointers, etc. */
1445 /* #define LEAK_MEMORY */
1446 struct kmp_mem_descr { // Memory block descriptor.
1447  void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1448  size_t size_allocated; // Size of allocated memory block.
1449  void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1450  size_t size_aligned; // Size of aligned memory block.
1451 };
1452 typedef struct kmp_mem_descr kmp_mem_descr_t;
1453 
1454 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1455  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1456  error. Must use __kmp_free when freeing memory allocated by this routine! */
1457 static void *___kmp_allocate_align(size_t size,
1458  size_t alignment KMP_SRC_LOC_DECL) {
1459  /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1460  requested to return properly aligned pointer. Original pointer returned
1461  by malloc() and size of allocated block is saved in descriptor just
1462  before the aligned pointer. This information used by __kmp_free() -- it
1463  has to pass to free() original pointer, not aligned one.
1464 
1465  +---------+------------+-----------------------------------+---------+
1466  | padding | descriptor | aligned block | padding |
1467  +---------+------------+-----------------------------------+---------+
1468  ^ ^
1469  | |
1470  | +- Aligned pointer returned to caller
1471  +- Pointer returned by malloc()
1472 
1473  Aligned block is filled with zeros, paddings are filled with 0xEF. */
1474 
1475  kmp_mem_descr_t descr;
1476  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1477  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1478  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1479 
1480  KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1481  (int)size, (int)alignment KMP_SRC_LOC_PARM));
1482 
1483  KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1484  KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1485  // Make sure kmp_uintptr_t is enough to store addresses.
1486 
1487  descr.size_aligned = size;
1488  descr.size_allocated =
1489  descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1490 
1491 #if KMP_DEBUG
1492  descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1493 #else
1494  descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1495 #endif
1496  KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1497  descr.ptr_allocated));
1498  if (descr.ptr_allocated == NULL) {
1499  KMP_FATAL(OutOfHeapMemory);
1500  }
1501 
1502  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1503  addr_aligned =
1504  (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1505  addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1506 
1507  descr.ptr_aligned = (void *)addr_aligned;
1508 
1509  KE_TRACE(26, (" ___kmp_allocate_align: "
1510  "ptr_allocated=%p, size_allocated=%d, "
1511  "ptr_aligned=%p, size_aligned=%d\n",
1512  descr.ptr_allocated, (int)descr.size_allocated,
1513  descr.ptr_aligned, (int)descr.size_aligned));
1514 
1515  KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1516  KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1517  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1518  addr_allocated + descr.size_allocated);
1519  KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1520 #ifdef KMP_DEBUG
1521  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1522 // Fill allocated memory block with 0xEF.
1523 #endif
1524  memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1525  // Fill the aligned memory block (which is intended for using by caller) with
1526  // 0x00. Do not
1527  // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1528  // memory. (Padding
1529  // bytes remain filled with 0xEF in debugging library.)
1530  *((kmp_mem_descr_t *)addr_descr) = descr;
1531 
1532  KMP_MB();
1533 
1534  KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1535  return descr.ptr_aligned;
1536 } // func ___kmp_allocate_align
1537 
1538 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1539  Do not call this func directly! Use __kmp_allocate macro instead.
1540  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1541  error. Must use __kmp_free when freeing memory allocated by this routine! */
1542 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1543  void *ptr;
1544  KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
1545  (int)size KMP_SRC_LOC_PARM));
1546  ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1547  KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
1548  return ptr;
1549 } // func ___kmp_allocate
1550 
1551 /* Allocate memory on page boundary, fill allocated memory with 0x00.
1552  Does not call this func directly! Use __kmp_page_allocate macro instead.
1553  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1554  error. Must use __kmp_free when freeing memory allocated by this routine! */
1555 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
1556  int page_size = 8 * 1024;
1557  void *ptr;
1558 
1559  KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
1560  (int)size KMP_SRC_LOC_PARM));
1561  ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1562  KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
1563  return ptr;
1564 } // ___kmp_page_allocate
1565 
1566 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1567  In debug mode, fill the memory block with 0xEF before call to free(). */
1568 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
1569  kmp_mem_descr_t descr;
1570  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1571  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1572 
1573  KE_TRACE(25,
1574  ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1575  KMP_ASSERT(ptr != NULL);
1576 
1577  descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
1578 
1579  KE_TRACE(26, (" __kmp_free: "
1580  "ptr_allocated=%p, size_allocated=%d, "
1581  "ptr_aligned=%p, size_aligned=%d\n",
1582  descr.ptr_allocated, (int)descr.size_allocated,
1583  descr.ptr_aligned, (int)descr.size_aligned));
1584 
1585  addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1586  addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1587 
1588  KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1589  KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1590  KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
1591  KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1592  KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1593  addr_allocated + descr.size_allocated);
1594 
1595 #ifdef KMP_DEBUG
1596  memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1597 // Fill memory block with 0xEF, it helps catch using freed memory.
1598 #endif
1599 
1600 #ifndef LEAK_MEMORY
1601  KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
1602 #ifdef KMP_DEBUG
1603  _free_src_loc(descr.ptr_allocated, _file_, _line_);
1604 #else
1605  free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1606 #endif
1607 #endif
1608  KMP_MB();
1609  KE_TRACE(25, ("<- __kmp_free() returns\n"));
1610 } // func ___kmp_free
1611 
1612 #if USE_FAST_MEMORY == 3
1613 // Allocate fast memory by first scanning the thread's free lists
1614 // If a chunk the right size exists, grab it off the free list.
1615 // Otherwise allocate normally using kmp_thread_malloc.
1616 
1617 // AC: How to choose the limit? Just get 16 for now...
1618 #define KMP_FREE_LIST_LIMIT 16
1619 
1620 // Always use 128 bytes for determining buckets for caching memory blocks
1621 #define DCACHE_LINE 128
1622 
1623 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
1624  void *ptr;
1625  int num_lines;
1626  int idx;
1627  int index;
1628  void *alloc_ptr;
1629  size_t alloc_size;
1630  kmp_mem_descr_t *descr;
1631 
1632  KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1633  __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
1634 
1635  num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1636  idx = num_lines - 1;
1637  KMP_DEBUG_ASSERT(idx >= 0);
1638  if (idx < 2) {
1639  index = 0; // idx is [ 0, 1 ], use first free list
1640  num_lines = 2; // 1, 2 cache lines or less than cache line
1641  } else if ((idx >>= 2) == 0) {
1642  index = 1; // idx is [ 2, 3 ], use second free list
1643  num_lines = 4; // 3, 4 cache lines
1644  } else if ((idx >>= 2) == 0) {
1645  index = 2; // idx is [ 4, 15 ], use third free list
1646  num_lines = 16; // 5, 6, ..., 16 cache lines
1647  } else if ((idx >>= 2) == 0) {
1648  index = 3; // idx is [ 16, 63 ], use fourth free list
1649  num_lines = 64; // 17, 18, ..., 64 cache lines
1650  } else {
1651  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1652  }
1653 
1654  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1655  if (ptr != NULL) {
1656  // pop the head of no-sync free list
1657  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1658  KMP_DEBUG_ASSERT(
1659  this_thr ==
1660  ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1661  ->ptr_aligned);
1662  goto end;
1663  }
1664  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1665  if (ptr != NULL) {
1666  // no-sync free list is empty, use sync free list (filled in by other
1667  // threads only)
1668  // pop the head of the sync free list, push NULL instead
1669  while (!KMP_COMPARE_AND_STORE_PTR(
1670  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
1671  KMP_CPU_PAUSE();
1672  ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1673  }
1674  // push the rest of chain into no-sync free list (can be NULL if there was
1675  // the only block)
1676  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1677  KMP_DEBUG_ASSERT(
1678  this_thr ==
1679  ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1680  ->ptr_aligned);
1681  goto end;
1682  }
1683 
1684 alloc_call:
1685  // haven't found block in the free lists, thus allocate it
1686  size = num_lines * DCACHE_LINE;
1687 
1688  alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1689  KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
1690  "alloc_size %d\n",
1691  __kmp_gtid_from_thread(this_thr), alloc_size));
1692  alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1693 
1694  // align ptr to DCACHE_LINE
1695  ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
1696  DCACHE_LINE) &
1697  ~(DCACHE_LINE - 1));
1698  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1699 
1700  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1701  // we don't need size_allocated
1702  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1703  // (it is already saved in bget buffer,
1704  // but we may want to use another allocator in future)
1705  descr->size_aligned = size;
1706 
1707 end:
1708  KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
1709  __kmp_gtid_from_thread(this_thr), ptr));
1710  return ptr;
1711 } // func __kmp_fast_allocate
1712 
1713 // Free fast memory and place it on the thread's free list if it is of
1714 // the correct size.
1715 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
1716  kmp_mem_descr_t *descr;
1717  kmp_info_t *alloc_thr;
1718  size_t size;
1719  size_t idx;
1720  int index;
1721 
1722  KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1723  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1724  KMP_ASSERT(ptr != NULL);
1725 
1726  descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
1727 
1728  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1729  (int)descr->size_aligned));
1730 
1731  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1732 
1733  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1734  if (idx == size) {
1735  index = 0; // 2 cache lines
1736  } else if ((idx <<= 1) == size) {
1737  index = 1; // 4 cache lines
1738  } else if ((idx <<= 2) == size) {
1739  index = 2; // 16 cache lines
1740  } else if ((idx <<= 2) == size) {
1741  index = 3; // 64 cache lines
1742  } else {
1743  KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1744  goto free_call; // 65 or more cache lines ( > 8KB )
1745  }
1746 
1747  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1748  if (alloc_thr == this_thr) {
1749  // push block to self no-sync free list, linking previous head (LIFO)
1750  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1751  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1752  } else {
1753  void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1754  if (head == NULL) {
1755  // Create new free list
1756  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1757  *((void **)ptr) = NULL; // mark the tail of the list
1758  descr->size_allocated = (size_t)1; // head of the list keeps its length
1759  } else {
1760  // need to check existed "other" list's owner thread and size of queue
1761  kmp_mem_descr_t *dsc =
1762  (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
1763  // allocating thread, same for all queue nodes
1764  kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1765  size_t q_sz =
1766  dsc->size_allocated + 1; // new size in case we add current task
1767  if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1768  // we can add current task to "other" list, no sync needed
1769  *((void **)ptr) = head;
1770  descr->size_allocated = q_sz;
1771  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1772  } else {
1773  // either queue blocks owner is changing or size limit exceeded
1774  // return old queue to allocating thread (q_th) synchroneously,
1775  // and start new list for alloc_thr's tasks
1776  void *old_ptr;
1777  void *tail = head;
1778  void *next = *((void **)head);
1779  while (next != NULL) {
1780  KMP_DEBUG_ASSERT(
1781  // queue size should decrease by 1 each step through the list
1782  ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
1783  ->size_allocated +
1784  1 ==
1785  ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
1786  ->size_allocated);
1787  tail = next; // remember tail node
1788  next = *((void **)next);
1789  }
1790  KMP_DEBUG_ASSERT(q_th != NULL);
1791  // push block to owner's sync free list
1792  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1793  /* the next pointer must be set before setting free_list to ptr to avoid
1794  exposing a broken list to other threads, even for an instant. */
1795  *((void **)tail) = old_ptr;
1796 
1797  while (!KMP_COMPARE_AND_STORE_PTR(
1798  &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1799  KMP_CPU_PAUSE();
1800  old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1801  *((void **)tail) = old_ptr;
1802  }
1803 
1804  // start new list of not-selt tasks
1805  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1806  *((void **)ptr) = NULL;
1807  descr->size_allocated = (size_t)1; // head of queue keeps its length
1808  }
1809  }
1810  }
1811  goto end;
1812 
1813 free_call:
1814  KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1815  __kmp_gtid_from_thread(this_thr), size));
1816  __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
1817  brel(this_thr, descr->ptr_allocated);
1818 
1819 end:
1820  KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
1821 
1822 } // func __kmp_fast_free
1823 
1824 // Initialize the thread free lists related to fast memory
1825 // Only do this when a thread is initially created.
1826 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1827  KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1828 
1829  memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
1830 }
1831 
1832 // Free the memory in the thread free lists related to fast memory
1833 // Only do this when a thread is being reaped (destroyed).
1834 void __kmp_free_fast_memory(kmp_info_t *th) {
1835  // Suppose we use BGET underlying allocator, walk through its structures...
1836  int bin;
1837  thr_data_t *thr = get_thr_data(th);
1838  void **lst = NULL;
1839 
1840  KE_TRACE(
1841  5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1842 
1843  __kmp_bget_dequeue(th); // Release any queued buffers
1844 
1845  // Dig through free lists and extract all allocated blocks
1846  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1847  bfhead_t *b = thr->freelist[bin].ql.flink;
1848  while (b != &thr->freelist[bin]) {
1849  if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
1850  *((void **)b) =
1851  lst; // link the list (override bthr, but keep flink yet)
1852  lst = (void **)b; // push b into lst
1853  }
1854  b = b->ql.flink; // get next buffer
1855  }
1856  }
1857  while (lst != NULL) {
1858  void *next = *lst;
1859  KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1860  lst, next, th, __kmp_gtid_from_thread(th)));
1861  (*thr->relfcn)(lst);
1862 #if BufStats
1863  // count blocks to prevent problems in __kmp_finalize_bget()
1864  thr->numprel++; /* Nr of expansion block releases */
1865  thr->numpblk--; /* Total number of blocks */
1866 #endif
1867  lst = (void **)next;
1868  }
1869 
1870  KE_TRACE(
1871  5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1872 }
1873 
1874 #endif // USE_FAST_MEMORY