LLVM OpenMP* Runtime Library
kmp_alloc.c
1 /*
2  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_wrapper_malloc.h"
18 #include "kmp_io.h"
19 
20 // Disable bget when it is not used
21 #if KMP_USE_BGET
22 
23 /* Thread private buffer management code */
24 
25 typedef int (*bget_compact_t)(size_t, int);
26 typedef void *(*bget_acquire_t)(size_t);
27 typedef void (*bget_release_t)(void *);
28 
29 /* NOTE: bufsize must be a signed datatype */
30 
31 #if KMP_OS_WINDOWS
32 # if KMP_ARCH_X86 || KMP_ARCH_ARM
33  typedef kmp_int32 bufsize;
34 # else
35  typedef kmp_int64 bufsize;
36 # endif
37 #else
38  typedef ssize_t bufsize;
39 #endif
40 
41 /* The three modes of operation are, fifo search, lifo search, and best-fit */
42 
43 typedef enum bget_mode {
44  bget_mode_fifo = 0,
45  bget_mode_lifo = 1,
46  bget_mode_best = 2
47 } bget_mode_t;
48 
49 
50 static void bpool( kmp_info_t *th, void *buffer, bufsize len);
51 static void *bget( kmp_info_t *th, bufsize size);
52 static void *bgetz( kmp_info_t *th, bufsize size);
53 static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
54 static void brel( kmp_info_t *th, void *buf);
55 static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
56 
57 #ifdef KMP_DEBUG
58 static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
59 static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
60 static void bufdump( kmp_info_t *th, void *buf);
61 static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
62 static int bpoolv( kmp_info_t *th, void *pool);
63 #endif
64 
65 /* BGET CONFIGURATION */
66  /* Buffer allocation size quantum:
67  all buffers allocated are a
68  multiple of this size. This
69  MUST be a power of two. */
70 
71  /* On IA-32 architecture with Linux* OS,
72  malloc() does not
73  ensure 16 byte alignmnent */
74 
75 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
76 
77 #define SizeQuant 8
78 #define AlignType double
79 
80 #else
81 
82 #define SizeQuant 16
83 #define AlignType _Quad
84 
85 #endif
86 
87 #define BufStats 1 /* Define this symbol to enable the
88  bstats() function which calculates
89  the total free space in the buffer
90  pool, the largest available
91  buffer, and the total space
92  currently allocated. */
93 
94 #ifdef KMP_DEBUG
95 
96 #define BufDump 1 /* Define this symbol to enable the
97  bpoold() function which dumps the
98  buffers in a buffer pool. */
99 
100 #define BufValid 1 /* Define this symbol to enable the
101  bpoolv() function for validating
102  a buffer pool. */
103 
104 #define DumpData 1 /* Define this symbol to enable the
105  bufdump() function which allows
106  dumping the contents of an allocated
107  or free buffer. */
108 #ifdef NOT_USED_NOW
109 
110 #define FreeWipe 1 /* Wipe free buffers to a guaranteed
111  pattern of garbage to trip up
112  miscreants who attempt to use
113  pointers into released buffers. */
114 
115 #define BestFit 1 /* Use a best fit algorithm when
116  searching for space for an
117  allocation request. This uses
118  memory more efficiently, but
119  allocation will be much slower. */
120 #endif /* NOT_USED_NOW */
121 #endif /* KMP_DEBUG */
122 
123 
124 static bufsize bget_bin_size[ ] = {
125  0,
126 // 1 << 6, /* .5 Cache line */
127  1 << 7, /* 1 Cache line, new */
128  1 << 8, /* 2 Cache lines */
129  1 << 9, /* 4 Cache lines, new */
130  1 << 10, /* 8 Cache lines */
131  1 << 11, /* 16 Cache lines, new */
132  1 << 12,
133  1 << 13, /* new */
134  1 << 14,
135  1 << 15, /* new */
136  1 << 16,
137  1 << 17,
138  1 << 18,
139  1 << 19,
140  1 << 20, /* 1MB */
141  1 << 21, /* 2MB */
142  1 << 22, /* 4MB */
143  1 << 23, /* 8MB */
144  1 << 24, /* 16MB */
145  1 << 25, /* 32MB */
146 };
147 
148 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
149 
150 struct bfhead;
151 
152 /* Declare the interface, including the requested buffer size type,
153  bufsize. */
154 
155 /* Queue links */
156 
157 typedef struct qlinks {
158  struct bfhead *flink; /* Forward link */
159  struct bfhead *blink; /* Backward link */
160 } qlinks_t;
161 
162 /* Header in allocated and free buffers */
163 
164 typedef struct bhead2 {
165  kmp_info_t *bthr; /* The thread which owns the buffer pool */
166  bufsize prevfree; /* Relative link back to previous
167  free buffer in memory or 0 if
168  previous buffer is allocated. */
169  bufsize bsize; /* Buffer size: positive if free,
170  negative if allocated. */
171 } bhead2_t;
172 
173 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
174 
175 typedef union bhead {
176  KMP_ALIGN( SizeQuant )
177  AlignType b_align;
178  char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
179  bhead2_t bb;
180 } bhead_t;
181 #define BH(p) ((bhead_t *) (p))
182 
183 /* Header in directly allocated buffers (by acqfcn) */
184 
185 typedef struct bdhead
186 {
187  bufsize tsize; /* Total size, including overhead */
188  bhead_t bh; /* Common header */
189 } bdhead_t;
190 #define BDH(p) ((bdhead_t *) (p))
191 
192 /* Header in free buffers */
193 
194 typedef struct bfhead {
195  bhead_t bh; /* Common allocated/free header */
196  qlinks_t ql; /* Links on free list */
197 } bfhead_t;
198 #define BFH(p) ((bfhead_t *) (p))
199 
200 typedef struct thr_data {
201  bfhead_t freelist[ MAX_BGET_BINS ];
202 #if BufStats
203  size_t totalloc; /* Total space currently allocated */
204  long numget, numrel; /* Number of bget() and brel() calls */
205  long numpblk; /* Number of pool blocks */
206  long numpget, numprel; /* Number of block gets and rels */
207  long numdget, numdrel; /* Number of direct gets and rels */
208 #endif /* BufStats */
209 
210  /* Automatic expansion block management functions */
211  bget_compact_t compfcn;
212  bget_acquire_t acqfcn;
213  bget_release_t relfcn;
214 
215  bget_mode_t mode; /* what allocation mode to use? */
216 
217  bufsize exp_incr; /* Expansion block size */
218  bufsize pool_len; /* 0: no bpool calls have been made
219  -1: not all pool blocks are
220  the same size
221  >0: (common) block size for all
222  bpool calls made so far
223  */
224  bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
225 } thr_data_t;
226 
227 /* Minimum allocation quantum: */
228 
229 #define QLSize (sizeof(qlinks_t))
230 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
231 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
232  // Maximun for the requested size.
233 
234 /* End sentinel: value placed in bsize field of dummy block delimiting
235  end of pool block. The most negative number which will fit in a
236  bufsize, defined in a way that the compiler will accept. */
237 
238 #define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
239 
240 /* ------------------------------------------------------------------------ */
241 
242 /* Thread Data management routines */
243 
244 static int
245 bget_get_bin( bufsize size )
246 {
247  // binary chop bins
248  int lo = 0, hi = MAX_BGET_BINS - 1;
249 
250  KMP_DEBUG_ASSERT( size > 0 );
251 
252  while ( (hi - lo) > 1 ) {
253  int mid = (lo + hi) >> 1;
254  if (size < bget_bin_size[ mid ])
255  hi = mid - 1;
256  else
257  lo = mid;
258  }
259 
260  KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
261 
262  return lo;
263 }
264 
265 static void
266 set_thr_data( kmp_info_t *th )
267 {
268  int i;
269  thr_data_t *data;
270 
271  data =
272  (thr_data_t *)(
273  ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
274  );
275 
276  memset( data, '\0', sizeof( *data ) );
277 
278  for (i = 0; i < MAX_BGET_BINS; ++i) {
279  data->freelist[ i ].ql.flink = & data->freelist[ i ];
280  data->freelist[ i ].ql.blink = & data->freelist[ i ];
281  }
282 
283  th->th.th_local.bget_data = data;
284  th->th.th_local.bget_list = 0;
285 #if ! USE_CMP_XCHG_FOR_BGET
286 #ifdef USE_QUEUING_LOCK_FOR_BGET
287  __kmp_init_lock( & th->th.th_local.bget_lock );
288 #else
289  __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
290 #endif /* USE_LOCK_FOR_BGET */
291 #endif /* ! USE_CMP_XCHG_FOR_BGET */
292 }
293 
294 static thr_data_t *
295 get_thr_data( kmp_info_t *th )
296 {
297  thr_data_t *data;
298 
299  data = (thr_data_t *) th->th.th_local.bget_data;
300 
301  KMP_DEBUG_ASSERT( data != 0 );
302 
303  return data;
304 }
305 
306 
307 #ifdef KMP_DEBUG
308 
309 static void
310 __kmp_bget_validate_queue( kmp_info_t *th )
311 {
312  /* NOTE: assume that the global_lock is held */
313 
314  void *p = (void *) th->th.th_local.bget_list;
315 
316  while (p != 0) {
317  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
318 
319  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
320  p = (void *) b->ql.flink;
321  }
322 }
323 
324 #endif
325 
326 /* Walk the free list and release the enqueued buffers */
327 
328 static void
329 __kmp_bget_dequeue( kmp_info_t *th )
330 {
331  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
332 
333  if (p != 0) {
334  #if USE_CMP_XCHG_FOR_BGET
335  {
336  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
337  while ( ! KMP_COMPARE_AND_STORE_PTR(
338  & th->th.th_local.bget_list, old_value, NULL ) )
339  {
340  KMP_CPU_PAUSE();
341  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
342  }
343  p = (void *) old_value;
344  }
345  #else /* ! USE_CMP_XCHG_FOR_BGET */
346  #ifdef USE_QUEUING_LOCK_FOR_BGET
347  __kmp_acquire_lock( & th->th.th_local.bget_lock,
348  __kmp_gtid_from_thread(th) );
349  #else
350  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
351  #endif /* USE_QUEUING_LOCK_FOR_BGET */
352 
353  p = (void *) th->th.th_local.bget_list;
354  th->th.th_local.bget_list = 0;
355 
356  #ifdef USE_QUEUING_LOCK_FOR_BGET
357  __kmp_release_lock( & th->th.th_local.bget_lock,
358  __kmp_gtid_from_thread(th) );
359  #else
360  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
361  #endif
362  #endif /* USE_CMP_XCHG_FOR_BGET */
363 
364  /* Check again to make sure the list is not empty */
365 
366  while (p != 0) {
367  void *buf = p;
368  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
369 
370  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
371  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
372  (kmp_uintptr_t)th ); // clear possible mark
373  KMP_DEBUG_ASSERT( b->ql.blink == 0 );
374 
375  p = (void *) b->ql.flink;
376 
377  brel( th, buf );
378  }
379  }
380 }
381 
382 /* Chain together the free buffers by using the thread owner field */
383 
384 static void
385 __kmp_bget_enqueue( kmp_info_t *th, void *buf
386 #ifdef USE_QUEUING_LOCK_FOR_BGET
387  , kmp_int32 rel_gtid
388 #endif
389  )
390 {
391  bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
392 
393  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
394  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
395  (kmp_uintptr_t)th ); // clear possible mark
396 
397  b->ql.blink = 0;
398 
399  KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
400  __kmp_gtid_from_thread( th ) ) );
401 
402 #if USE_CMP_XCHG_FOR_BGET
403  {
404  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
405  /* the next pointer must be set before setting bget_list to buf to avoid
406  exposing a broken list to other threads, even for an instant. */
407  b->ql.flink = BFH( old_value );
408 
409  while ( ! KMP_COMPARE_AND_STORE_PTR(
410  & th->th.th_local.bget_list, old_value, buf ) )
411  {
412  KMP_CPU_PAUSE();
413  old_value = TCR_PTR(th->th.th_local.bget_list);
414  /* the next pointer must be set before setting bget_list to buf to avoid
415  exposing a broken list to other threads, even for an instant. */
416  b->ql.flink = BFH( old_value );
417  }
418  }
419 #else /* ! USE_CMP_XCHG_FOR_BGET */
420 # ifdef USE_QUEUING_LOCK_FOR_BGET
421  __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
422 # else
423  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
424  # endif
425 
426  b->ql.flink = BFH( th->th.th_local.bget_list );
427  th->th.th_local.bget_list = (void *) buf;
428 
429 # ifdef USE_QUEUING_LOCK_FOR_BGET
430  __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
431 # else
432  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
433 # endif
434 #endif /* USE_CMP_XCHG_FOR_BGET */
435 }
436 
437 /* insert buffer back onto a new freelist */
438 
439 static void
440 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
441 {
442  int bin;
443 
444  KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
445  KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
446 
447  bin = bget_get_bin( b->bh.bb.bsize );
448 
449  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
450  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
451 
452  b->ql.flink = &thr->freelist[ bin ];
453  b->ql.blink = thr->freelist[ bin ].ql.blink;
454 
455  thr->freelist[ bin ].ql.blink = b;
456  b->ql.blink->ql.flink = b;
457 }
458 
459 /* unlink the buffer from the old freelist */
460 
461 static void
462 __kmp_bget_remove_from_freelist( bfhead_t *b )
463 {
464  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
465  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
466 
467  b->ql.blink->ql.flink = b->ql.flink;
468  b->ql.flink->ql.blink = b->ql.blink;
469 }
470 
471 /* ------------------------------------------------------------------------ */
472 
473 /* GET STATS -- check info on free list */
474 
475 static void
476 bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
477 {
478  thr_data_t *thr = get_thr_data( th );
479  int bin;
480 
481  *total_free = *max_free = 0;
482 
483  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
484  bfhead_t *b, *best;
485 
486  best = &thr->freelist[ bin ];
487  b = best->ql.flink;
488 
489  while (b != &thr->freelist[ bin ]) {
490  *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
491  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
492  best = b;
493 
494  /* Link to next buffer */
495  b = b->ql.flink;
496  }
497 
498  if (*max_free < best->bh.bb.bsize)
499  *max_free = best->bh.bb.bsize;
500  }
501 
502  if (*max_free > (bufsize)sizeof( bhead_t ))
503  *max_free -= sizeof( bhead_t );
504 }
505 
506 /* ------------------------------------------------------------------------ */
507 
508 /* BGET -- Allocate a buffer. */
509 
510 static void *
511 bget( kmp_info_t *th, bufsize requested_size )
512 {
513  thr_data_t *thr = get_thr_data( th );
514  bufsize size = requested_size;
515  bfhead_t *b;
516  void *buf;
517  int compactseq = 0;
518  int use_blink = 0;
519 /* For BestFit */
520  bfhead_t *best;
521 
522  if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
523  return NULL;
524  }; // if
525 
526  __kmp_bget_dequeue( th ); /* Release any queued buffers */
527 
528  if (size < (bufsize)SizeQ) { /* Need at least room for the */
529  size = SizeQ; /* queue links. */
530  }
531  #if defined( SizeQuant ) && ( SizeQuant > 1 )
532  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
533  #endif
534 
535  size += sizeof(bhead_t); /* Add overhead in allocated buffer
536  to size required. */
537  KMP_DEBUG_ASSERT( size >= 0 );
538  KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
539 
540  use_blink = ( thr->mode == bget_mode_lifo );
541 
542  /* If a compact function was provided in the call to bectl(), wrap
543  a loop around the allocation process to allow compaction to
544  intervene in case we don't find a suitable buffer in the chain. */
545 
546  for (;;) {
547  int bin;
548 
549  for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
550  /* Link to next buffer */
551  b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
552 
553  if (thr->mode == bget_mode_best) {
554  best = &thr->freelist[ bin ];
555 
556  /* Scan the free list searching for the first buffer big enough
557  to hold the requested size buffer. */
558 
559  while (b != &thr->freelist[ bin ]) {
560  if (b->bh.bb.bsize >= (bufsize) size) {
561  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
562  best = b;
563  }
564  }
565 
566  /* Link to next buffer */
567  b = ( use_blink ? b->ql.blink : b->ql.flink );
568  }
569  b = best;
570  }
571 
572  while (b != &thr->freelist[ bin ]) {
573  if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
574 
575  /* Buffer is big enough to satisfy the request. Allocate it
576  to the caller. We must decide whether the buffer is large
577  enough to split into the part given to the caller and a
578  free buffer that remains on the free list, or whether the
579  entire buffer should be removed from the free list and
580  given to the caller in its entirety. We only split the
581  buffer if enough room remains for a header plus the minimum
582  quantum of allocation. */
583 
584  if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
585  bhead_t *ba, *bn;
586 
587  ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
588  bn = BH(((char *) ba) + size);
589 
590  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
591 
592  /* Subtract size from length of free block. */
593  b->bh.bb.bsize -= (bufsize) size;
594 
595  /* Link allocated buffer to the previous free buffer. */
596  ba->bb.prevfree = b->bh.bb.bsize;
597 
598  /* Plug negative size into user buffer. */
599  ba->bb.bsize = -size;
600 
601  /* Mark this buffer as owned by this thread. */
602  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
603  /* Mark buffer after this one not preceded by free block. */
604  bn->bb.prevfree = 0;
605 
606  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
607  __kmp_bget_remove_from_freelist( b );
608  __kmp_bget_insert_into_freelist( thr, b );
609 #if BufStats
610  thr->totalloc += (size_t) size;
611  thr->numget++; /* Increment number of bget() calls */
612 #endif
613  buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
614  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
615  return buf;
616  } else {
617  bhead_t *ba;
618 
619  ba = BH(((char *) b) + b->bh.bb.bsize);
620 
621  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
622 
623  /* The buffer isn't big enough to split. Give the whole
624  shebang to the caller and remove it from the free list. */
625 
626  __kmp_bget_remove_from_freelist( b );
627 #if BufStats
628  thr->totalloc += (size_t) b->bh.bb.bsize;
629  thr->numget++; /* Increment number of bget() calls */
630 #endif
631  /* Negate size to mark buffer allocated. */
632  b->bh.bb.bsize = -(b->bh.bb.bsize);
633 
634  /* Mark this buffer as owned by this thread. */
635  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
636  /* Zero the back pointer in the next buffer in memory
637  to indicate that this buffer is allocated. */
638  ba->bb.prevfree = 0;
639 
640  /* Give user buffer starting at queue links. */
641  buf = (void *) &(b->ql);
642  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
643  return buf;
644  }
645  }
646 
647  /* Link to next buffer */
648  b = ( use_blink ? b->ql.blink : b->ql.flink );
649  }
650  }
651 
652  /* We failed to find a buffer. If there's a compact function
653  defined, notify it of the size requested. If it returns
654  TRUE, try the allocation again. */
655 
656  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
657  break;
658  }
659  }
660 
661  /* No buffer available with requested size free. */
662 
663  /* Don't give up yet -- look in the reserve supply. */
664 
665  if (thr->acqfcn != 0) {
666  if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
667 
668  /* Request is too large to fit in a single expansion
669  block. Try to satisy it by a direct buffer acquisition. */
670 
671  bdhead_t *bdh;
672 
673  size += sizeof(bdhead_t) - sizeof(bhead_t);
674 
675  KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
676 
677  /* richryan */
678  bdh = BDH((*thr->acqfcn)((bufsize) size));
679  if (bdh != NULL) {
680 
681  /* Mark the buffer special by setting the size field
682  of its header to zero. */
683  bdh->bh.bb.bsize = 0;
684 
685  /* Mark this buffer as owned by this thread. */
686  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
687  // because direct buffer never goes to free list
688  bdh->bh.bb.prevfree = 0;
689  bdh->tsize = size;
690 #if BufStats
691  thr->totalloc += (size_t) size;
692  thr->numget++; /* Increment number of bget() calls */
693  thr->numdget++; /* Direct bget() call count */
694 #endif
695  buf = (void *) (bdh + 1);
696  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
697  return buf;
698  }
699 
700  } else {
701 
702  /* Try to obtain a new expansion block */
703 
704  void *newpool;
705 
706  KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
707 
708  /* richryan */
709  newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
710  KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
711  if (newpool != NULL) {
712  bpool( th, newpool, thr->exp_incr);
713  buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
714  return buf;
715  }
716  }
717  }
718 
719  /* Still no buffer available */
720 
721  return NULL;
722 }
723 
724 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
725  the entire contents of the buffer to zero, not just the
726  region requested by the caller. */
727 
728 static void *
729 bgetz( kmp_info_t *th, bufsize size )
730 {
731  char *buf = (char *) bget( th, size);
732 
733  if (buf != NULL) {
734  bhead_t *b;
735  bufsize rsize;
736 
737  b = BH(buf - sizeof(bhead_t));
738  rsize = -(b->bb.bsize);
739  if (rsize == 0) {
740  bdhead_t *bd;
741 
742  bd = BDH(buf - sizeof(bdhead_t));
743  rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
744  } else {
745  rsize -= sizeof(bhead_t);
746  }
747 
748  KMP_DEBUG_ASSERT(rsize >= size);
749 
750  (void) memset(buf, 0, (bufsize) rsize);
751  }
752  return ((void *) buf);
753 }
754 
755 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
756  simply in terms of brel() and bget(). It could be
757  enhanced to allow the buffer to grow into adjacent free
758  blocks and to avoid moving data unnecessarily. */
759 
760 static void *
761 bgetr( kmp_info_t *th, void *buf, bufsize size)
762 {
763  void *nbuf;
764  bufsize osize; /* Old size of buffer */
765  bhead_t *b;
766 
767  nbuf = bget( th, size );
768  if ( nbuf == NULL ) { /* Acquire new buffer */
769  return NULL;
770  }
771  if ( buf == NULL ) {
772  return nbuf;
773  }
774  b = BH(((char *) buf) - sizeof(bhead_t));
775  osize = -b->bb.bsize;
776  if (osize == 0) {
777  /* Buffer acquired directly through acqfcn. */
778  bdhead_t *bd;
779 
780  bd = BDH(((char *) buf) - sizeof(bdhead_t));
781  osize = bd->tsize - (bufsize) sizeof(bdhead_t);
782  } else {
783  osize -= sizeof(bhead_t);
784  };
785 
786  KMP_DEBUG_ASSERT(osize > 0);
787 
788  (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
789  (size_t) ((size < osize) ? size : osize));
790  brel( th, buf );
791 
792  return nbuf;
793 }
794 
795 /* BREL -- Release a buffer. */
796 
797 static void
798 brel( kmp_info_t *th, void *buf )
799 {
800  thr_data_t *thr = get_thr_data( th );
801  bfhead_t *b, *bn;
802  kmp_info_t *bth;
803 
804  KMP_DEBUG_ASSERT(buf != NULL);
805  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
806 
807  b = BFH(((char *) buf) - sizeof(bhead_t));
808 
809  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
810  bdhead_t *bdh;
811 
812  bdh = BDH(((char *) buf) - sizeof(bdhead_t));
813  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
814 #if BufStats
815  thr->totalloc -= (size_t) bdh->tsize;
816  thr->numdrel++; /* Number of direct releases */
817  thr->numrel++; /* Increment number of brel() calls */
818 #endif /* BufStats */
819 #ifdef FreeWipe
820  (void) memset((char *) buf, 0x55,
821  (size_t) (bdh->tsize - sizeof(bdhead_t)));
822 #endif /* FreeWipe */
823 
824  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
825 
826  KMP_DEBUG_ASSERT( thr->relfcn != 0 );
827  (*thr->relfcn)((void *) bdh); /* Release it directly. */
828  return;
829  }
830 
831  bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
832  if ( bth != th ) {
833  /* Add this buffer to be released by the owning thread later */
834  __kmp_bget_enqueue( bth, buf
835 #ifdef USE_QUEUING_LOCK_FOR_BGET
836  , __kmp_gtid_from_thread( th )
837 #endif
838  );
839  return;
840  }
841 
842  /* Buffer size must be negative, indicating that the buffer is
843  allocated. */
844 
845  if (b->bh.bb.bsize >= 0) {
846  bn = NULL;
847  }
848  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
849 
850  /* Back pointer in next buffer must be zero, indicating the
851  same thing: */
852 
853  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
854 
855 #if BufStats
856  thr->numrel++; /* Increment number of brel() calls */
857  thr->totalloc += (size_t) b->bh.bb.bsize;
858 #endif
859 
860  /* If the back link is nonzero, the previous buffer is free. */
861 
862  if (b->bh.bb.prevfree != 0) {
863  /* The previous buffer is free. Consolidate this buffer with it
864  by adding the length of this buffer to the previous free
865  buffer. Note that we subtract the size in the buffer being
866  released, since it's negative to indicate that the buffer is
867  allocated. */
868 
869  register bufsize size = b->bh.bb.bsize;
870 
871  /* Make the previous buffer the one we're working on. */
872  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
873  b = BFH(((char *) b) - b->bh.bb.prevfree);
874  b->bh.bb.bsize -= size;
875 
876  /* unlink the buffer from the old freelist */
877  __kmp_bget_remove_from_freelist( b );
878  }
879  else {
880  /* The previous buffer isn't allocated. Mark this buffer
881  size as positive (i.e. free) and fall through to place
882  the buffer on the free list as an isolated free block. */
883 
884  b->bh.bb.bsize = -b->bh.bb.bsize;
885  }
886 
887  /* insert buffer back onto a new freelist */
888  __kmp_bget_insert_into_freelist( thr, b );
889 
890 
891  /* Now we look at the next buffer in memory, located by advancing from
892  the start of this buffer by its size, to see if that buffer is
893  free. If it is, we combine this buffer with the next one in
894  memory, dechaining the second buffer from the free list. */
895 
896  bn = BFH(((char *) b) + b->bh.bb.bsize);
897  if (bn->bh.bb.bsize > 0) {
898 
899  /* The buffer is free. Remove it from the free list and add
900  its size to that of our buffer. */
901 
902  KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
903 
904  __kmp_bget_remove_from_freelist( bn );
905 
906  b->bh.bb.bsize += bn->bh.bb.bsize;
907 
908  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
909 
910  __kmp_bget_remove_from_freelist( b );
911  __kmp_bget_insert_into_freelist( thr, b );
912 
913  /* Finally, advance to the buffer that follows the newly
914  consolidated free block. We must set its backpointer to the
915  head of the consolidated free block. We know the next block
916  must be an allocated block because the process of recombination
917  guarantees that two free blocks will never be contiguous in
918  memory. */
919 
920  bn = BFH(((char *) b) + b->bh.bb.bsize);
921  }
922 #ifdef FreeWipe
923  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
924  (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
925 #endif
926  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
927 
928  /* The next buffer is allocated. Set the backpointer in it to point
929  to this buffer; the previous free buffer in memory. */
930 
931  bn->bh.bb.prevfree = b->bh.bb.bsize;
932 
933  /* If a block-release function is defined, and this free buffer
934  constitutes the entire block, release it. Note that pool_len
935  is defined in such a way that the test will fail unless all
936  pool blocks are the same size. */
937 
938  if (thr->relfcn != 0 &&
939  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
940  {
941 #if BufStats
942  if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
943 #endif
944 
945  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
946  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
947  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
948 
949  /* Unlink the buffer from the free list */
950  __kmp_bget_remove_from_freelist( b );
951 
952  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
953 
954  (*thr->relfcn)(b);
955 #if BufStats
956  thr->numprel++; /* Nr of expansion block releases */
957  thr->numpblk--; /* Total number of blocks */
958  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
959 
960  /* avoid leaving stale last_pool pointer around if it is being dealloced */
961  if (thr->last_pool == b) thr->last_pool = 0;
962  }
963  else {
964  thr->last_pool = b;
965  }
966 #endif /* BufStats */
967  }
968 }
969 
970 /* BECTL -- Establish automatic pool expansion control */
971 
972 static void
973 bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
974 {
975  thr_data_t *thr = get_thr_data( th );
976 
977  thr->compfcn = compact;
978  thr->acqfcn = acquire;
979  thr->relfcn = release;
980  thr->exp_incr = pool_incr;
981 }
982 
983 /* BPOOL -- Add a region of memory to the buffer pool. */
984 
985 static void
986 bpool( kmp_info_t *th, void *buf, bufsize len)
987 {
988 /* int bin = 0; */
989  thr_data_t *thr = get_thr_data( th );
990  bfhead_t *b = BFH(buf);
991  bhead_t *bn;
992 
993  __kmp_bget_dequeue( th ); /* Release any queued buffers */
994 
995 #ifdef SizeQuant
996  len &= ~(SizeQuant - 1);
997 #endif
998  if (thr->pool_len == 0) {
999  thr->pool_len = len;
1000  } else if (len != thr->pool_len) {
1001  thr->pool_len = -1;
1002  }
1003 #if BufStats
1004  thr->numpget++; /* Number of block acquisitions */
1005  thr->numpblk++; /* Number of blocks total */
1006  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1007 #endif /* BufStats */
1008 
1009  /* Since the block is initially occupied by a single free buffer,
1010  it had better not be (much) larger than the largest buffer
1011  whose size we can store in bhead.bb.bsize. */
1012 
1013  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1014 
1015  /* Clear the backpointer at the start of the block to indicate that
1016  there is no free block prior to this one. That blocks
1017  recombination when the first block in memory is released. */
1018 
1019  b->bh.bb.prevfree = 0;
1020 
1021  /* Create a dummy allocated buffer at the end of the pool. This dummy
1022  buffer is seen when a buffer at the end of the pool is released and
1023  blocks recombination of the last buffer with the dummy buffer at
1024  the end. The length in the dummy buffer is set to the largest
1025  negative number to denote the end of the pool for diagnostic
1026  routines (this specific value is not counted on by the actual
1027  allocation and release functions). */
1028 
1029  len -= sizeof(bhead_t);
1030  b->bh.bb.bsize = (bufsize) len;
1031  /* Set the owner of this buffer */
1032  TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1033 
1034  /* Chain the new block to the free list. */
1035  __kmp_bget_insert_into_freelist( thr, b );
1036 
1037 #ifdef FreeWipe
1038  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1039  (size_t) (len - sizeof(bfhead_t)));
1040 #endif
1041  bn = BH(((char *) b) + len);
1042  bn->bb.prevfree = (bufsize) len;
1043  /* Definition of ESent assumes two's complement! */
1044  KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1045 
1046  bn->bb.bsize = ESent;
1047 }
1048 
1049 /* ------------------------------------------------------------------------ */
1050 
1051 /* BFREED -- Dump the free lists for this thread. */
1052 
1053 static void
1054 bfreed( kmp_info_t *th )
1055 {
1056  int bin = 0, count = 0;
1057  int gtid = __kmp_gtid_from_thread( th );
1058  thr_data_t *thr = get_thr_data( th );
1059 
1060 #if BufStats
1061  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1062  KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1063  " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1064  gtid, (kmp_uint64) thr->totalloc,
1065  (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1066  (kmp_int64) thr->numpblk,
1067  (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1068  (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1069 #endif
1070 
1071  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1072  bfhead_t *b;
1073 
1074  for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1075  bufsize bs = b->bh.bb.bsize;
1076 
1077  KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1078  KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1079  KMP_DEBUG_ASSERT( bs > 0 );
1080 
1081  count += 1;
1082 
1083  __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1084 #ifdef FreeWipe
1085  {
1086  char *lerr = ((char *) b) + sizeof(bfhead_t);
1087  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1088  __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1089  }
1090  }
1091 #endif
1092  }
1093  }
1094 
1095  if (count == 0)
1096  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1097 }
1098 
1099 /* ------------------------------------------------------------------------ */
1100 
1101 #ifdef KMP_DEBUG
1102 
1103 #if BufStats
1104 
1105 /* BSTATS -- Return buffer allocation free space statistics. */
1106 
1107 static void
1108 bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1109 {
1110  int bin = 0;
1111  thr_data_t *thr = get_thr_data( th );
1112 
1113  *nget = thr->numget;
1114  *nrel = thr->numrel;
1115  *curalloc = (bufsize) thr->totalloc;
1116  *totfree = 0;
1117  *maxfree = -1;
1118 
1119  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1120  bfhead_t *b = thr->freelist[ bin ].ql.flink;
1121 
1122  while (b != &thr->freelist[ bin ]) {
1123  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1124  *totfree += b->bh.bb.bsize;
1125  if (b->bh.bb.bsize > *maxfree) {
1126  *maxfree = b->bh.bb.bsize;
1127  }
1128  b = b->ql.flink; /* Link to next buffer */
1129  }
1130  }
1131 }
1132 
1133 /* BSTATSE -- Return extended statistics */
1134 
1135 static void
1136 bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1137 {
1138  thr_data_t *thr = get_thr_data( th );
1139 
1140  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1141  *npool = thr->numpblk;
1142  *npget = thr->numpget;
1143  *nprel = thr->numprel;
1144  *ndget = thr->numdget;
1145  *ndrel = thr->numdrel;
1146 }
1147 
1148 #endif /* BufStats */
1149 
1150 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1151  data pointer, and backs up to the buffer header. It will
1152  dump either a free block or an allocated one. */
1153 
1154 static void
1155 bufdump( kmp_info_t *th, void *buf )
1156 {
1157  bfhead_t *b;
1158  unsigned char *bdump;
1159  bufsize bdlen;
1160 
1161  b = BFH(((char *) buf) - sizeof(bhead_t));
1162  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1163  if (b->bh.bb.bsize < 0) {
1164  bdump = (unsigned char *) buf;
1165  bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1166  } else {
1167  bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1168  bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1169  }
1170 
1171  while (bdlen > 0) {
1172  int i, dupes = 0;
1173  bufsize l = bdlen;
1174  char bhex[50], bascii[20];
1175 
1176  if (l > 16) {
1177  l = 16;
1178  }
1179 
1180  for (i = 0; i < l; i++) {
1181  (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1182  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1183  bascii[ i ] = bdump[ i ];
1184  else
1185  bascii[ i ] = ' ';
1186  }
1187  bascii[i] = 0;
1188  (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1189  bdump += l;
1190  bdlen -= l;
1191  while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1192  (char *) bdump, 16) == 0)) {
1193  dupes++;
1194  bdump += 16;
1195  bdlen -= 16;
1196  }
1197  if (dupes > 1) {
1198  (void) __kmp_printf_no_lock(
1199  " (%d lines [%d bytes] identical to above line skipped)\n",
1200  dupes, dupes * 16);
1201  } else if (dupes == 1) {
1202  bdump -= 16;
1203  bdlen += 16;
1204  }
1205  }
1206 }
1207 
1208 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1209  If DUMPALLOC is nonzero, the contents of allocated buffers
1210  are dumped. If DUMPFREE is nonzero, free blocks are
1211  dumped as well. If FreeWipe checking is enabled, free
1212  blocks which have been clobbered will always be dumped. */
1213 
1214 static void
1215 bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1216 {
1217  bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1218 
1219  while (b->bh.bb.bsize != ESent) {
1220  bufsize bs = b->bh.bb.bsize;
1221 
1222  if (bs < 0) {
1223  bs = -bs;
1224  (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1225  if (dumpalloc) {
1226  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1227  }
1228  } else {
1229  const char *lerr = "";
1230 
1231  KMP_DEBUG_ASSERT(bs > 0);
1232  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1233  lerr = " (Bad free list links)";
1234  }
1235  (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1236  (long) bs, lerr);
1237 #ifdef FreeWipe
1238  lerr = ((char *) b) + sizeof(bfhead_t);
1239  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1240  (memcmp(lerr, lerr + 1,
1241  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1242  (void) __kmp_printf_no_lock(
1243  "(Contents of above free block have been overstored.)\n");
1244  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1245  } else
1246 #endif
1247  if (dumpfree) {
1248  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1249  }
1250  }
1251  b = BFH(((char *) b) + bs);
1252  }
1253 }
1254 
1255 /* BPOOLV -- Validate a buffer pool. */
1256 
1257 static int
1258 bpoolv( kmp_info_t *th, void *buf )
1259 {
1260  bfhead_t *b = BFH(buf);
1261 
1262  while (b->bh.bb.bsize != ESent) {
1263  bufsize bs = b->bh.bb.bsize;
1264 
1265  if (bs < 0) {
1266  bs = -bs;
1267  } else {
1268 #ifdef FreeWipe
1269  char *lerr = "";
1270 #endif
1271 
1272  KMP_DEBUG_ASSERT(bs > 0);
1273  if (bs <= 0) {
1274  return 0;
1275  }
1276  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1277  (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1278  (long) bs);
1279  KMP_DEBUG_ASSERT(0);
1280  return 0;
1281  }
1282 #ifdef FreeWipe
1283  lerr = ((char *) b) + sizeof(bfhead_t);
1284  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1285  (memcmp(lerr, lerr + 1,
1286  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1287  (void) __kmp_printf_no_lock(
1288  "(Contents of above free block have been overstored.)\n");
1289  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1290  KMP_DEBUG_ASSERT(0);
1291  return 0;
1292  }
1293 #endif /* FreeWipe */
1294  }
1295  b = BFH(((char *) b) + bs);
1296  }
1297  return 1;
1298 }
1299 
1300 #endif /* KMP_DEBUG */
1301 
1302 /* ------------------------------------------------------------------------ */
1303 
1304 void
1305 __kmp_initialize_bget( kmp_info_t *th )
1306 {
1307  KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1308 
1309  set_thr_data( th );
1310 
1311  bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1312  (bufsize) __kmp_malloc_pool_incr );
1313 }
1314 
1315 void
1316 __kmp_finalize_bget( kmp_info_t *th )
1317 {
1318  thr_data_t *thr;
1319  bfhead_t *b;
1320 
1321  KMP_DEBUG_ASSERT( th != 0 );
1322 
1323 #if BufStats
1324  thr = (thr_data_t *) th->th.th_local.bget_data;
1325  KMP_DEBUG_ASSERT( thr != NULL );
1326  b = thr->last_pool;
1327 
1328  /* If a block-release function is defined, and this free buffer
1329  constitutes the entire block, release it. Note that pool_len
1330  is defined in such a way that the test will fail unless all
1331  pool blocks are the same size. */
1332 
1333  /* Deallocate the last pool if one exists because we no longer do it in brel() */
1334  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1335  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1336  {
1337  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1338  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1339  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1340 
1341  /* Unlink the buffer from the free list */
1342  __kmp_bget_remove_from_freelist( b );
1343 
1344  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1345 
1346  (*thr->relfcn)(b);
1347  thr->numprel++; /* Nr of expansion block releases */
1348  thr->numpblk--; /* Total number of blocks */
1349  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1350  }
1351 #endif /* BufStats */
1352 
1353  /* Deallocate bget_data */
1354  if ( th->th.th_local.bget_data != NULL ) {
1355  __kmp_free( th->th.th_local.bget_data );
1356  th->th.th_local.bget_data = NULL;
1357  }; // if
1358 }
1359 
1360 void
1361 kmpc_set_poolsize( size_t size )
1362 {
1363  bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1364  (bget_release_t) free, (bufsize) size );
1365 }
1366 
1367 size_t
1368 kmpc_get_poolsize( void )
1369 {
1370  thr_data_t *p;
1371 
1372  p = get_thr_data( __kmp_get_thread() );
1373 
1374  return p->exp_incr;
1375 }
1376 
1377 void
1378 kmpc_set_poolmode( int mode )
1379 {
1380  thr_data_t *p;
1381 
1382  if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1383  p = get_thr_data( __kmp_get_thread() );
1384  p->mode = (bget_mode_t) mode;
1385  }
1386 }
1387 
1388 int
1389 kmpc_get_poolmode( void )
1390 {
1391  thr_data_t *p;
1392 
1393  p = get_thr_data( __kmp_get_thread() );
1394 
1395  return p->mode;
1396 }
1397 
1398 void
1399 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1400 {
1401  kmp_info_t *th = __kmp_get_thread();
1402  bufsize a, b;
1403 
1404  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1405 
1406  bcheck( th, &a, &b );
1407 
1408  *maxmem = a;
1409  *allmem = b;
1410 }
1411 
1412 void
1413 kmpc_poolprint( void )
1414 {
1415  kmp_info_t *th = __kmp_get_thread();
1416 
1417  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1418 
1419  bfreed( th );
1420 }
1421 
1422 #endif // #if KMP_USE_BGET
1423 
1424 /* ------------------------------------------------------------------------ */
1425 
1426 void *
1427 kmpc_malloc( size_t size )
1428 {
1429  void * ptr;
1430  ptr = bget( __kmp_entry_thread(), (bufsize) size );
1431 
1432  return ptr;
1433 }
1434 
1435 void *
1436 kmpc_calloc( size_t nelem, size_t elsize )
1437 {
1438  void * ptr;
1439  ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
1440 
1441  return ptr;
1442 }
1443 
1444 void *
1445 kmpc_realloc( void * ptr, size_t size )
1446 {
1447  void * result = NULL;
1448 
1449  if ( ptr == NULL ) {
1450  // If pointer is NULL, realloc behaves like malloc.
1451  result = bget( __kmp_entry_thread(), (bufsize) size );
1452  } else if ( size == 0 ) {
1453  // If size is 0, realloc behaves like free.
1454  // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1455  // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1456  brel( __kmp_get_thread(), ptr );
1457  } else {
1458  result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
1459  }; // if
1460 
1461  return result;
1462 }
1463 
1464 /* NOTE: the library must have already been initialized by a previous allocate */
1465 
1466 void
1467 kmpc_free( void * ptr )
1468 {
1469  if ( ! __kmp_init_serial ) {
1470  return;
1471  }; // if
1472  if ( ptr != NULL ) {
1473  kmp_info_t *th = __kmp_get_thread();
1474  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1475  brel( th, ptr );
1476  };
1477 }
1478 
1479 
1480 /* ------------------------------------------------------------------------ */
1481 
1482 void *
1483 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1484 {
1485  void * ptr;
1486  KE_TRACE( 30, (
1487  "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1488  th,
1489  (int) size
1490  KMP_SRC_LOC_PARM
1491  ) );
1492  ptr = bget( th, (bufsize) size );
1493  KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1494  return ptr;
1495 }
1496 
1497 void *
1498 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1499 {
1500  void * ptr;
1501  KE_TRACE( 30, (
1502  "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1503  th,
1504  (int) nelem,
1505  (int) elsize
1506  KMP_SRC_LOC_PARM
1507  ) );
1508  ptr = bgetz( th, (bufsize) (nelem * elsize) );
1509  KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1510  return ptr;
1511 }
1512 
1513 void *
1514 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1515 {
1516  KE_TRACE( 30, (
1517  "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1518  th,
1519  ptr,
1520  (int) size
1521  KMP_SRC_LOC_PARM
1522  ) );
1523  ptr = bgetr( th, ptr, (bufsize) size );
1524  KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1525  return ptr;
1526 }
1527 
1528 void
1529 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1530 {
1531  KE_TRACE( 30, (
1532  "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1533  th,
1534  ptr
1535  KMP_SRC_LOC_PARM
1536  ) );
1537  if ( ptr != NULL ) {
1538  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1539  brel( th, ptr );
1540  }
1541  KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1542 }
1543 
1544 /* ------------------------------------------------------------------------ */
1545 /* ------------------------------------------------------------------------ */
1546 /*
1547  If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1548  may be useful for debugging memory corruptions, used freed pointers, etc.
1549 */
1550 /* #define LEAK_MEMORY */
1551 
1552 struct kmp_mem_descr { // Memory block descriptor.
1553  void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1554  size_t size_allocated; // Size of allocated memory block.
1555  void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1556  size_t size_aligned; // Size of aligned memory block.
1557 };
1558 typedef struct kmp_mem_descr kmp_mem_descr_t;
1559 
1560 /*
1561  Allocate memory on requested boundary, fill allocated memory with 0x00.
1562  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1563  Must use __kmp_free when freeing memory allocated by this routine!
1564  */
1565 static
1566 void *
1567 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1568 {
1569  /*
1570  __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1571  return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1572  block is saved in descriptor just before the aligned pointer. This information used by
1573  __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1574 
1575  +---------+------------+-----------------------------------+---------+
1576  | padding | descriptor | aligned block | padding |
1577  +---------+------------+-----------------------------------+---------+
1578  ^ ^
1579  | |
1580  | +- Aligned pointer returned to caller
1581  +- Pointer returned by malloc()
1582 
1583  Aligned block is filled with zeros, paddings are filled with 0xEF.
1584  */
1585 
1586  kmp_mem_descr_t descr;
1587  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1588  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1589  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1590 
1591  KE_TRACE( 25, (
1592  "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1593  (int) size,
1594  (int) alignment
1595  KMP_SRC_LOC_PARM
1596  ) );
1597 
1598  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1599  KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1600  // Make sure kmp_uintptr_t is enough to store addresses.
1601 
1602  descr.size_aligned = size;
1603  descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1604 
1605  #if KMP_DEBUG
1606  descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1607  #else
1608  descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1609  #endif
1610  KE_TRACE( 10, (
1611  " malloc( %d ) returned %p\n",
1612  (int) descr.size_allocated,
1613  descr.ptr_allocated
1614  ) );
1615  if ( descr.ptr_allocated == NULL ) {
1616  KMP_FATAL( OutOfHeapMemory );
1617  };
1618 
1619  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1620  addr_aligned =
1621  ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1622  & ~ ( alignment - 1 );
1623  addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1624 
1625  descr.ptr_aligned = (void *) addr_aligned;
1626 
1627  KE_TRACE( 26, (
1628  " ___kmp_allocate_align: "
1629  "ptr_allocated=%p, size_allocated=%d, "
1630  "ptr_aligned=%p, size_aligned=%d\n",
1631  descr.ptr_allocated,
1632  (int) descr.size_allocated,
1633  descr.ptr_aligned,
1634  (int) descr.size_aligned
1635  ) );
1636 
1637  KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1638  KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1639  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1640  KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1641 
1642  #ifdef KMP_DEBUG
1643  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1644  // Fill allocated memory block with 0xEF.
1645  #endif
1646  memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1647  // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1648  // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1649  // bytes remain filled with 0xEF in debugging library.)
1650  * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1651 
1652  KMP_MB();
1653 
1654  KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1655  return descr.ptr_aligned;
1656 
1657 } // func ___kmp_allocate_align
1658 
1659 
1660 /*
1661  Allocate memory on cache line boundary, fill allocated memory with 0x00.
1662  Do not call this func directly! Use __kmp_allocate macro instead.
1663  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1664  Must use __kmp_free when freeing memory allocated by this routine!
1665  */
1666 void *
1667 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1668 {
1669 
1670  void * ptr;
1671  KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1672  ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1673  KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1674  return ptr;
1675 
1676 } // func ___kmp_allocate
1677 
1678 #if (BUILD_MEMORY==FIRST_TOUCH)
1679 void *
1680 __kmp_ft_page_allocate(size_t size)
1681 {
1682  void *adr, *aadr;
1683 #if KMP_OS_LINUX
1684  /* TODO: Use this function to get page size everywhere */
1685  int page_size = getpagesize();
1686 #else
1687  /* TODO: Find windows function to get page size and use it everywhere */
1688  int page_size = PAGE_SIZE;
1689 #endif /* KMP_OS_LINUX */
1690 
1691  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1692  size + page_size + KMP_PTR_SKIP);
1693  if ( adr == 0 )
1694  KMP_FATAL( OutOfHeapMemory );
1695 
1696  /* check to see if adr is on a page boundary. */
1697  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1698  /* nothing to do if adr is already on a page boundary. */
1699  aadr = adr;
1700  else
1701  /* else set aadr to the first page boundary in the allocated memory. */
1702  aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1703 
1704  /* the first touch by the owner thread. */
1705  *((void**)aadr) = adr;
1706 
1707  /* skip the memory space used for storing adr above. */
1708  return (void*)((char*)aadr + KMP_PTR_SKIP);
1709 }
1710 #endif
1711 
1712 /*
1713  Allocate memory on page boundary, fill allocated memory with 0x00.
1714  Does not call this func directly! Use __kmp_page_allocate macro instead.
1715  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1716  Must use __kmp_free when freeing memory allocated by this routine!
1717  */
1718 void *
1719 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1720 {
1721  int page_size = 8 * 1024;
1722  void * ptr;
1723 
1724  KE_TRACE( 25, (
1725  "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1726  (int) size
1727  KMP_SRC_LOC_PARM
1728  ) );
1729  ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1730  KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1731  return ptr;
1732 } // ___kmp_page_allocate
1733 
1734 /*
1735  Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1736  In debug mode, fill the memory block with 0xEF before call to free().
1737 */
1738 void
1739 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1740 {
1741 
1742  kmp_mem_descr_t descr;
1743  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1744  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1745 
1746  KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1747  KMP_ASSERT( ptr != NULL );
1748 
1749  descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1750 
1751  KE_TRACE( 26, ( " __kmp_free: "
1752  "ptr_allocated=%p, size_allocated=%d, "
1753  "ptr_aligned=%p, size_aligned=%d\n",
1754  descr.ptr_allocated, (int) descr.size_allocated,
1755  descr.ptr_aligned, (int) descr.size_aligned ));
1756 
1757  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1758  addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1759 
1760  KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1761  KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1762  KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1763  KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1764  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1765 
1766  #ifdef KMP_DEBUG
1767  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1768  // Fill memory block with 0xEF, it helps catch using freed memory.
1769  #endif
1770 
1771  #ifndef LEAK_MEMORY
1772  KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1773  # ifdef KMP_DEBUG
1774  _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1775  # else
1776  free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1777  # endif
1778  #endif
1779 
1780  KMP_MB();
1781 
1782  KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1783 
1784 } // func ___kmp_free
1785 
1786 /* ------------------------------------------------------------------------ */
1787 /* ------------------------------------------------------------------------ */
1788 
1789 #if USE_FAST_MEMORY == 3
1790 // Allocate fast memory by first scanning the thread's free lists
1791 // If a chunk the right size exists, grab it off the free list.
1792 // Otherwise allocate normally using kmp_thread_malloc.
1793 
1794 // AC: How to choose the limit? Just get 16 for now...
1795 #define KMP_FREE_LIST_LIMIT 16
1796 
1797 // Always use 128 bytes for determining buckets for caching memory blocks
1798 #define DCACHE_LINE 128
1799 
1800 void *
1801 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1802 {
1803  void * ptr;
1804  int num_lines;
1805  int idx;
1806  int index;
1807  void * alloc_ptr;
1808  size_t alloc_size;
1809  kmp_mem_descr_t * descr;
1810 
1811  KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1812  __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1813 
1814  num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1815  idx = num_lines - 1;
1816  KMP_DEBUG_ASSERT( idx >= 0 );
1817  if ( idx < 2 ) {
1818  index = 0; // idx is [ 0, 1 ], use first free list
1819  num_lines = 2; // 1, 2 cache lines or less than cache line
1820  } else if ( ( idx >>= 2 ) == 0 ) {
1821  index = 1; // idx is [ 2, 3 ], use second free list
1822  num_lines = 4; // 3, 4 cache lines
1823  } else if ( ( idx >>= 2 ) == 0 ) {
1824  index = 2; // idx is [ 4, 15 ], use third free list
1825  num_lines = 16; // 5, 6, ..., 16 cache lines
1826  } else if ( ( idx >>= 2 ) == 0 ) {
1827  index = 3; // idx is [ 16, 63 ], use fourth free list
1828  num_lines = 64; // 17, 18, ..., 64 cache lines
1829  } else {
1830  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1831  }
1832 
1833  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1834  if ( ptr != NULL ) {
1835  // pop the head of no-sync free list
1836  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1837  KMP_DEBUG_ASSERT( this_thr ==
1838  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1839  goto end;
1840  };
1841  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1842  if ( ptr != NULL ) {
1843  // no-sync free list is empty, use sync free list (filled in by other threads only)
1844  // pop the head of the sync free list, push NULL instead
1845  while ( ! KMP_COMPARE_AND_STORE_PTR(
1846  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1847  {
1848  KMP_CPU_PAUSE();
1849  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1850  }
1851  // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1852  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1853  KMP_DEBUG_ASSERT( this_thr ==
1854  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1855  goto end;
1856  }
1857 
1858  alloc_call:
1859  // haven't found block in the free lists, thus allocate it
1860  size = num_lines * DCACHE_LINE;
1861 
1862  alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1863  KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1864  __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1865  alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1866 
1867  // align ptr to DCACHE_LINE
1868  ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1869  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1870 
1871  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1872  // we don't need size_allocated
1873  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1874  // (it is already saved in bget buffer,
1875  // but we may want to use another allocator in future)
1876  descr->size_aligned = size;
1877 
1878  end:
1879  KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1880  __kmp_gtid_from_thread( this_thr ), ptr ) );
1881  return ptr;
1882 } // func __kmp_fast_allocate
1883 
1884 // Free fast memory and place it on the thread's free list if it is of
1885 // the correct size.
1886 void
1887 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1888 {
1889  kmp_mem_descr_t * descr;
1890  kmp_info_t * alloc_thr;
1891  size_t size;
1892  size_t idx;
1893  int index;
1894 
1895  KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1896  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1897  KMP_ASSERT( ptr != NULL );
1898 
1899  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1900 
1901  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1902  (int) descr->size_aligned ) );
1903 
1904  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1905 
1906  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1907  if ( idx == size ) {
1908  index = 0; // 2 cache lines
1909  } else if ( ( idx <<= 1 ) == size ) {
1910  index = 1; // 4 cache lines
1911  } else if ( ( idx <<= 2 ) == size ) {
1912  index = 2; // 16 cache lines
1913  } else if ( ( idx <<= 2 ) == size ) {
1914  index = 3; // 64 cache lines
1915  } else {
1916  KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1917  goto free_call; // 65 or more cache lines ( > 8KB )
1918  }
1919 
1920  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1921  if ( alloc_thr == this_thr ) {
1922  // push block to self no-sync free list, linking previous head (LIFO)
1923  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1924  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1925  } else {
1926  void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1927  if ( head == NULL ) {
1928  // Create new free list
1929  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1930  *((void **)ptr) = NULL; // mark the tail of the list
1931  descr->size_allocated = (size_t)1; // head of the list keeps its length
1932  } else {
1933  // need to check existed "other" list's owner thread and size of queue
1934  kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1935  kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1936  size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1937  if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
1938  // we can add current task to "other" list, no sync needed
1939  *((void **)ptr) = head;
1940  descr->size_allocated = q_sz;
1941  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1942  } else {
1943  // either queue blocks owner is changing or size limit exceeded
1944  // return old queue to allocating thread (q_th) synchroneously,
1945  // and start new list for alloc_thr's tasks
1946  void * old_ptr;
1947  void * tail = head;
1948  void * next = *((void **)head);
1949  while ( next != NULL ) {
1950  KMP_DEBUG_ASSERT(
1951  // queue size should decrease by 1 each step through the list
1952  ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1953  ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1954  tail = next; // remember tail node
1955  next = *((void **)next);
1956  }
1957  KMP_DEBUG_ASSERT( q_th != NULL );
1958  // push block to owner's sync free list
1959  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1960  /* the next pointer must be set before setting free_list to ptr to avoid
1961  exposing a broken list to other threads, even for an instant. */
1962  *((void **)tail) = old_ptr;
1963 
1964  while ( ! KMP_COMPARE_AND_STORE_PTR(
1965  &q_th->th.th_free_lists[index].th_free_list_sync,
1966  old_ptr,
1967  head ) )
1968  {
1969  KMP_CPU_PAUSE();
1970  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1971  *((void **)tail) = old_ptr;
1972  }
1973 
1974  // start new list of not-selt tasks
1975  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1976  *((void **)ptr) = NULL;
1977  descr->size_allocated = (size_t)1; // head of queue keeps its length
1978  }
1979  }
1980  }
1981  goto end;
1982 
1983  free_call:
1984  KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1985  __kmp_gtid_from_thread( this_thr), size ) );
1986  __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
1987  brel( this_thr, descr->ptr_allocated );
1988 
1989  end:
1990  KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
1991 
1992 } // func __kmp_fast_free
1993 
1994 
1995 // Initialize the thread free lists related to fast memory
1996 // Only do this when a thread is initially created.
1997 void
1998 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
1999 {
2000  KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2001 
2002  memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2003 }
2004 
2005 // Free the memory in the thread free lists related to fast memory
2006 // Only do this when a thread is being reaped (destroyed).
2007 void
2008 __kmp_free_fast_memory( kmp_info_t *th )
2009 {
2010  // Suppose we use BGET underlying allocator, walk through its structures...
2011  int bin;
2012  thr_data_t * thr = get_thr_data( th );
2013  void ** lst = NULL;
2014 
2015  KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2016  __kmp_gtid_from_thread( th ) ) );
2017 
2018  __kmp_bget_dequeue( th ); // Release any queued buffers
2019 
2020  // Dig through free lists and extract all allocated blocks
2021  for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2022  bfhead_t * b = thr->freelist[ bin ].ql.flink;
2023  while ( b != &thr->freelist[ bin ] ) {
2024  if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2025  *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2026  lst = (void**)b; // push b into lst
2027  }
2028  b = b->ql.flink; // get next buffer
2029  }
2030  }
2031  while ( lst != NULL ) {
2032  void * next = *lst;
2033  KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2034  lst, next, th, __kmp_gtid_from_thread( th ) ) );
2035  (*thr->relfcn)(lst);
2036  #if BufStats
2037  // count blocks to prevent problems in __kmp_finalize_bget()
2038  thr->numprel++; /* Nr of expansion block releases */
2039  thr->numpblk--; /* Total number of blocks */
2040  #endif
2041  lst = (void**)next;
2042  }
2043 
2044  KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2045  __kmp_gtid_from_thread( th ) ) );
2046 }
2047 
2048 #endif // USE_FAST_MEMORY