LLVM OpenMP* Runtime Library
kmp_alloc.cpp
1 /*
2  * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_wrapper_malloc.h"
18 #include "kmp_io.h"
19 
20 // Disable bget when it is not used
21 #if KMP_USE_BGET
22 
23 /* Thread private buffer management code */
24 
25 typedef int (*bget_compact_t)(size_t, int);
26 typedef void *(*bget_acquire_t)(size_t);
27 typedef void (*bget_release_t)(void *);
28 
29 /* NOTE: bufsize must be a signed datatype */
30 
31 #if KMP_OS_WINDOWS
32 # if KMP_ARCH_X86 || KMP_ARCH_ARM
33  typedef kmp_int32 bufsize;
34 # else
35  typedef kmp_int64 bufsize;
36 # endif
37 #else
38  typedef ssize_t bufsize;
39 #endif
40 
41 /* The three modes of operation are, fifo search, lifo search, and best-fit */
42 
43 typedef enum bget_mode {
44  bget_mode_fifo = 0,
45  bget_mode_lifo = 1,
46  bget_mode_best = 2
47 } bget_mode_t;
48 
49 
50 static void bpool( kmp_info_t *th, void *buffer, bufsize len);
51 static void *bget( kmp_info_t *th, bufsize size);
52 static void *bgetz( kmp_info_t *th, bufsize size);
53 static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
54 static void brel( kmp_info_t *th, void *buf);
55 static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
56 
57 #ifdef KMP_DEBUG
58 static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
59 static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
60 static void bufdump( kmp_info_t *th, void *buf);
61 static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
62 static int bpoolv( kmp_info_t *th, void *pool);
63 #endif
64 
65 /* BGET CONFIGURATION */
66  /* Buffer allocation size quantum:
67  all buffers allocated are a
68  multiple of this size. This
69  MUST be a power of two. */
70 
71  /* On IA-32 architecture with Linux* OS,
72  malloc() does not
73  ensure 16 byte alignmnent */
74 
75 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
76 
77 #define SizeQuant 8
78 #define AlignType double
79 
80 #else
81 
82 #define SizeQuant 16
83 #define AlignType _Quad
84 
85 #endif
86 
87 #define BufStats 1 /* Define this symbol to enable the
88  bstats() function which calculates
89  the total free space in the buffer
90  pool, the largest available
91  buffer, and the total space
92  currently allocated. */
93 
94 #ifdef KMP_DEBUG
95 
96 #define BufDump 1 /* Define this symbol to enable the
97  bpoold() function which dumps the
98  buffers in a buffer pool. */
99 
100 #define BufValid 1 /* Define this symbol to enable the
101  bpoolv() function for validating
102  a buffer pool. */
103 
104 #define DumpData 1 /* Define this symbol to enable the
105  bufdump() function which allows
106  dumping the contents of an allocated
107  or free buffer. */
108 #ifdef NOT_USED_NOW
109 
110 #define FreeWipe 1 /* Wipe free buffers to a guaranteed
111  pattern of garbage to trip up
112  miscreants who attempt to use
113  pointers into released buffers. */
114 
115 #define BestFit 1 /* Use a best fit algorithm when
116  searching for space for an
117  allocation request. This uses
118  memory more efficiently, but
119  allocation will be much slower. */
120 #endif /* NOT_USED_NOW */
121 #endif /* KMP_DEBUG */
122 
123 
124 static bufsize bget_bin_size[ ] = {
125  0,
126 // 1 << 6, /* .5 Cache line */
127  1 << 7, /* 1 Cache line, new */
128  1 << 8, /* 2 Cache lines */
129  1 << 9, /* 4 Cache lines, new */
130  1 << 10, /* 8 Cache lines */
131  1 << 11, /* 16 Cache lines, new */
132  1 << 12,
133  1 << 13, /* new */
134  1 << 14,
135  1 << 15, /* new */
136  1 << 16,
137  1 << 17,
138  1 << 18,
139  1 << 19,
140  1 << 20, /* 1MB */
141  1 << 21, /* 2MB */
142  1 << 22, /* 4MB */
143  1 << 23, /* 8MB */
144  1 << 24, /* 16MB */
145  1 << 25, /* 32MB */
146 };
147 
148 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
149 
150 struct bfhead;
151 
152 /* Declare the interface, including the requested buffer size type,
153  bufsize. */
154 
155 /* Queue links */
156 
157 typedef struct qlinks {
158  struct bfhead *flink; /* Forward link */
159  struct bfhead *blink; /* Backward link */
160 } qlinks_t;
161 
162 /* Header in allocated and free buffers */
163 
164 typedef struct bhead2 {
165  kmp_info_t *bthr; /* The thread which owns the buffer pool */
166  bufsize prevfree; /* Relative link back to previous
167  free buffer in memory or 0 if
168  previous buffer is allocated. */
169  bufsize bsize; /* Buffer size: positive if free,
170  negative if allocated. */
171 } bhead2_t;
172 
173 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
174 
175 typedef union bhead {
176  KMP_ALIGN( SizeQuant )
177  AlignType b_align;
178  char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
179  bhead2_t bb;
180 } bhead_t;
181 #define BH(p) ((bhead_t *) (p))
182 
183 /* Header in directly allocated buffers (by acqfcn) */
184 
185 typedef struct bdhead
186 {
187  bufsize tsize; /* Total size, including overhead */
188  bhead_t bh; /* Common header */
189 } bdhead_t;
190 #define BDH(p) ((bdhead_t *) (p))
191 
192 /* Header in free buffers */
193 
194 typedef struct bfhead {
195  bhead_t bh; /* Common allocated/free header */
196  qlinks_t ql; /* Links on free list */
197 } bfhead_t;
198 #define BFH(p) ((bfhead_t *) (p))
199 
200 typedef struct thr_data {
201  bfhead_t freelist[ MAX_BGET_BINS ];
202 #if BufStats
203  size_t totalloc; /* Total space currently allocated */
204  long numget, numrel; /* Number of bget() and brel() calls */
205  long numpblk; /* Number of pool blocks */
206  long numpget, numprel; /* Number of block gets and rels */
207  long numdget, numdrel; /* Number of direct gets and rels */
208 #endif /* BufStats */
209 
210  /* Automatic expansion block management functions */
211  bget_compact_t compfcn;
212  bget_acquire_t acqfcn;
213  bget_release_t relfcn;
214 
215  bget_mode_t mode; /* what allocation mode to use? */
216 
217  bufsize exp_incr; /* Expansion block size */
218  bufsize pool_len; /* 0: no bpool calls have been made
219  -1: not all pool blocks are
220  the same size
221  >0: (common) block size for all
222  bpool calls made so far
223  */
224  bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
225 } thr_data_t;
226 
227 /* Minimum allocation quantum: */
228 
229 #define QLSize (sizeof(qlinks_t))
230 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
231 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
232  // Maximun for the requested size.
233 
234 /* End sentinel: value placed in bsize field of dummy block delimiting
235  end of pool block. The most negative number which will fit in a
236  bufsize, defined in a way that the compiler will accept. */
237 
238 #define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
239 
240 /* ------------------------------------------------------------------------ */
241 
242 /* Thread Data management routines */
243 
244 static int
245 bget_get_bin( bufsize size )
246 {
247  // binary chop bins
248  int lo = 0, hi = MAX_BGET_BINS - 1;
249 
250  KMP_DEBUG_ASSERT( size > 0 );
251 
252  while ( (hi - lo) > 1 ) {
253  int mid = (lo + hi) >> 1;
254  if (size < bget_bin_size[ mid ])
255  hi = mid - 1;
256  else
257  lo = mid;
258  }
259 
260  KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
261 
262  return lo;
263 }
264 
265 static void
266 set_thr_data( kmp_info_t *th )
267 {
268  int i;
269  thr_data_t *data;
270 
271  data =
272  (thr_data_t *)(
273  ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
274  );
275 
276  memset( data, '\0', sizeof( *data ) );
277 
278  for (i = 0; i < MAX_BGET_BINS; ++i) {
279  data->freelist[ i ].ql.flink = & data->freelist[ i ];
280  data->freelist[ i ].ql.blink = & data->freelist[ i ];
281  }
282 
283  th->th.th_local.bget_data = data;
284  th->th.th_local.bget_list = 0;
285 #if ! USE_CMP_XCHG_FOR_BGET
286 #ifdef USE_QUEUING_LOCK_FOR_BGET
287  __kmp_init_lock( & th->th.th_local.bget_lock );
288 #else
289  __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
290 #endif /* USE_LOCK_FOR_BGET */
291 #endif /* ! USE_CMP_XCHG_FOR_BGET */
292 }
293 
294 static thr_data_t *
295 get_thr_data( kmp_info_t *th )
296 {
297  thr_data_t *data;
298 
299  data = (thr_data_t *) th->th.th_local.bget_data;
300 
301  KMP_DEBUG_ASSERT( data != 0 );
302 
303  return data;
304 }
305 
306 
307 #ifdef KMP_DEBUG
308 
309 static void
310 __kmp_bget_validate_queue( kmp_info_t *th )
311 {
312  /* NOTE: assume that the global_lock is held */
313 
314  void *p = (void *) th->th.th_local.bget_list;
315 
316  while (p != 0) {
317  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
318 
319  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
320  p = (void *) b->ql.flink;
321  }
322 }
323 
324 #endif
325 
326 /* Walk the free list and release the enqueued buffers */
327 
328 static void
329 __kmp_bget_dequeue( kmp_info_t *th )
330 {
331  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
332 
333  if (p != 0) {
334  #if USE_CMP_XCHG_FOR_BGET
335  {
336  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
337  while ( ! KMP_COMPARE_AND_STORE_PTR(
338  & th->th.th_local.bget_list, old_value, NULL ) )
339  {
340  KMP_CPU_PAUSE();
341  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
342  }
343  p = (void *) old_value;
344  }
345  #else /* ! USE_CMP_XCHG_FOR_BGET */
346  #ifdef USE_QUEUING_LOCK_FOR_BGET
347  __kmp_acquire_lock( & th->th.th_local.bget_lock,
348  __kmp_gtid_from_thread(th) );
349  #else
350  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
351  #endif /* USE_QUEUING_LOCK_FOR_BGET */
352 
353  p = (void *) th->th.th_local.bget_list;
354  th->th.th_local.bget_list = 0;
355 
356  #ifdef USE_QUEUING_LOCK_FOR_BGET
357  __kmp_release_lock( & th->th.th_local.bget_lock,
358  __kmp_gtid_from_thread(th) );
359  #else
360  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
361  #endif
362  #endif /* USE_CMP_XCHG_FOR_BGET */
363 
364  /* Check again to make sure the list is not empty */
365 
366  while (p != 0) {
367  void *buf = p;
368  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
369 
370  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
371  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
372  (kmp_uintptr_t)th ); // clear possible mark
373  KMP_DEBUG_ASSERT( b->ql.blink == 0 );
374 
375  p = (void *) b->ql.flink;
376 
377  brel( th, buf );
378  }
379  }
380 }
381 
382 /* Chain together the free buffers by using the thread owner field */
383 
384 static void
385 __kmp_bget_enqueue( kmp_info_t *th, void *buf
386 #ifdef USE_QUEUING_LOCK_FOR_BGET
387  , kmp_int32 rel_gtid
388 #endif
389  )
390 {
391  bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
392 
393  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
394  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
395  (kmp_uintptr_t)th ); // clear possible mark
396 
397  b->ql.blink = 0;
398 
399  KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
400  __kmp_gtid_from_thread( th ) ) );
401 
402 #if USE_CMP_XCHG_FOR_BGET
403  {
404  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
405  /* the next pointer must be set before setting bget_list to buf to avoid
406  exposing a broken list to other threads, even for an instant. */
407  b->ql.flink = BFH( old_value );
408 
409  while ( ! KMP_COMPARE_AND_STORE_PTR(
410  & th->th.th_local.bget_list, old_value, buf ) )
411  {
412  KMP_CPU_PAUSE();
413  old_value = TCR_PTR(th->th.th_local.bget_list);
414  /* the next pointer must be set before setting bget_list to buf to avoid
415  exposing a broken list to other threads, even for an instant. */
416  b->ql.flink = BFH( old_value );
417  }
418  }
419 #else /* ! USE_CMP_XCHG_FOR_BGET */
420 # ifdef USE_QUEUING_LOCK_FOR_BGET
421  __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
422 # else
423  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
424  # endif
425 
426  b->ql.flink = BFH( th->th.th_local.bget_list );
427  th->th.th_local.bget_list = (void *) buf;
428 
429 # ifdef USE_QUEUING_LOCK_FOR_BGET
430  __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
431 # else
432  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
433 # endif
434 #endif /* USE_CMP_XCHG_FOR_BGET */
435 }
436 
437 /* insert buffer back onto a new freelist */
438 
439 static void
440 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
441 {
442  int bin;
443 
444  KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
445  KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
446 
447  bin = bget_get_bin( b->bh.bb.bsize );
448 
449  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
450  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
451 
452  b->ql.flink = &thr->freelist[ bin ];
453  b->ql.blink = thr->freelist[ bin ].ql.blink;
454 
455  thr->freelist[ bin ].ql.blink = b;
456  b->ql.blink->ql.flink = b;
457 }
458 
459 /* unlink the buffer from the old freelist */
460 
461 static void
462 __kmp_bget_remove_from_freelist( bfhead_t *b )
463 {
464  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
465  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
466 
467  b->ql.blink->ql.flink = b->ql.flink;
468  b->ql.flink->ql.blink = b->ql.blink;
469 }
470 
471 /* ------------------------------------------------------------------------ */
472 
473 /* GET STATS -- check info on free list */
474 
475 static void
476 bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
477 {
478  thr_data_t *thr = get_thr_data( th );
479  int bin;
480 
481  *total_free = *max_free = 0;
482 
483  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
484  bfhead_t *b, *best;
485 
486  best = &thr->freelist[ bin ];
487  b = best->ql.flink;
488 
489  while (b != &thr->freelist[ bin ]) {
490  *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
491  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
492  best = b;
493 
494  /* Link to next buffer */
495  b = b->ql.flink;
496  }
497 
498  if (*max_free < best->bh.bb.bsize)
499  *max_free = best->bh.bb.bsize;
500  }
501 
502  if (*max_free > (bufsize)sizeof( bhead_t ))
503  *max_free -= sizeof( bhead_t );
504 }
505 
506 /* ------------------------------------------------------------------------ */
507 
508 /* BGET -- Allocate a buffer. */
509 
510 static void *
511 bget( kmp_info_t *th, bufsize requested_size )
512 {
513  thr_data_t *thr = get_thr_data( th );
514  bufsize size = requested_size;
515  bfhead_t *b;
516  void *buf;
517  int compactseq = 0;
518  int use_blink = 0;
519 /* For BestFit */
520  bfhead_t *best;
521 
522  if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
523  return NULL;
524  }; // if
525 
526  __kmp_bget_dequeue( th ); /* Release any queued buffers */
527 
528  if (size < (bufsize)SizeQ) { /* Need at least room for the */
529  size = SizeQ; /* queue links. */
530  }
531  #if defined( SizeQuant ) && ( SizeQuant > 1 )
532  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
533  #endif
534 
535  size += sizeof(bhead_t); /* Add overhead in allocated buffer
536  to size required. */
537  KMP_DEBUG_ASSERT( size >= 0 );
538  KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
539 
540  use_blink = ( thr->mode == bget_mode_lifo );
541 
542  /* If a compact function was provided in the call to bectl(), wrap
543  a loop around the allocation process to allow compaction to
544  intervene in case we don't find a suitable buffer in the chain. */
545 
546  for (;;) {
547  int bin;
548 
549  for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
550  /* Link to next buffer */
551  b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
552 
553  if (thr->mode == bget_mode_best) {
554  best = &thr->freelist[ bin ];
555 
556  /* Scan the free list searching for the first buffer big enough
557  to hold the requested size buffer. */
558 
559  while (b != &thr->freelist[ bin ]) {
560  if (b->bh.bb.bsize >= (bufsize) size) {
561  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
562  best = b;
563  }
564  }
565 
566  /* Link to next buffer */
567  b = ( use_blink ? b->ql.blink : b->ql.flink );
568  }
569  b = best;
570  }
571 
572  while (b != &thr->freelist[ bin ]) {
573  if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
574 
575  /* Buffer is big enough to satisfy the request. Allocate it
576  to the caller. We must decide whether the buffer is large
577  enough to split into the part given to the caller and a
578  free buffer that remains on the free list, or whether the
579  entire buffer should be removed from the free list and
580  given to the caller in its entirety. We only split the
581  buffer if enough room remains for a header plus the minimum
582  quantum of allocation. */
583 
584  if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
585  bhead_t *ba, *bn;
586 
587  ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
588  bn = BH(((char *) ba) + size);
589 
590  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
591 
592  /* Subtract size from length of free block. */
593  b->bh.bb.bsize -= (bufsize) size;
594 
595  /* Link allocated buffer to the previous free buffer. */
596  ba->bb.prevfree = b->bh.bb.bsize;
597 
598  /* Plug negative size into user buffer. */
599  ba->bb.bsize = -size;
600 
601  /* Mark this buffer as owned by this thread. */
602  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
603  /* Mark buffer after this one not preceded by free block. */
604  bn->bb.prevfree = 0;
605 
606  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
607  __kmp_bget_remove_from_freelist( b );
608  __kmp_bget_insert_into_freelist( thr, b );
609 #if BufStats
610  thr->totalloc += (size_t) size;
611  thr->numget++; /* Increment number of bget() calls */
612 #endif
613  buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
614  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
615  return buf;
616  } else {
617  bhead_t *ba;
618 
619  ba = BH(((char *) b) + b->bh.bb.bsize);
620 
621  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
622 
623  /* The buffer isn't big enough to split. Give the whole
624  shebang to the caller and remove it from the free list. */
625 
626  __kmp_bget_remove_from_freelist( b );
627 #if BufStats
628  thr->totalloc += (size_t) b->bh.bb.bsize;
629  thr->numget++; /* Increment number of bget() calls */
630 #endif
631  /* Negate size to mark buffer allocated. */
632  b->bh.bb.bsize = -(b->bh.bb.bsize);
633 
634  /* Mark this buffer as owned by this thread. */
635  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
636  /* Zero the back pointer in the next buffer in memory
637  to indicate that this buffer is allocated. */
638  ba->bb.prevfree = 0;
639 
640  /* Give user buffer starting at queue links. */
641  buf = (void *) &(b->ql);
642  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
643  return buf;
644  }
645  }
646 
647  /* Link to next buffer */
648  b = ( use_blink ? b->ql.blink : b->ql.flink );
649  }
650  }
651 
652  /* We failed to find a buffer. If there's a compact function
653  defined, notify it of the size requested. If it returns
654  TRUE, try the allocation again. */
655 
656  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
657  break;
658  }
659  }
660 
661  /* No buffer available with requested size free. */
662 
663  /* Don't give up yet -- look in the reserve supply. */
664 
665  if (thr->acqfcn != 0) {
666  if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
667 
668  /* Request is too large to fit in a single expansion
669  block. Try to satisy it by a direct buffer acquisition. */
670 
671  bdhead_t *bdh;
672 
673  size += sizeof(bdhead_t) - sizeof(bhead_t);
674 
675  KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
676 
677  /* richryan */
678  bdh = BDH((*thr->acqfcn)((bufsize) size));
679  if (bdh != NULL) {
680 
681  /* Mark the buffer special by setting the size field
682  of its header to zero. */
683  bdh->bh.bb.bsize = 0;
684 
685  /* Mark this buffer as owned by this thread. */
686  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
687  // because direct buffer never goes to free list
688  bdh->bh.bb.prevfree = 0;
689  bdh->tsize = size;
690 #if BufStats
691  thr->totalloc += (size_t) size;
692  thr->numget++; /* Increment number of bget() calls */
693  thr->numdget++; /* Direct bget() call count */
694 #endif
695  buf = (void *) (bdh + 1);
696  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
697  return buf;
698  }
699 
700  } else {
701 
702  /* Try to obtain a new expansion block */
703 
704  void *newpool;
705 
706  KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
707 
708  /* richryan */
709  newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
710  KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
711  if (newpool != NULL) {
712  bpool( th, newpool, thr->exp_incr);
713  buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
714  return buf;
715  }
716  }
717  }
718 
719  /* Still no buffer available */
720 
721  return NULL;
722 }
723 
724 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
725  the entire contents of the buffer to zero, not just the
726  region requested by the caller. */
727 
728 static void *
729 bgetz( kmp_info_t *th, bufsize size )
730 {
731  char *buf = (char *) bget( th, size);
732 
733  if (buf != NULL) {
734  bhead_t *b;
735  bufsize rsize;
736 
737  b = BH(buf - sizeof(bhead_t));
738  rsize = -(b->bb.bsize);
739  if (rsize == 0) {
740  bdhead_t *bd;
741 
742  bd = BDH(buf - sizeof(bdhead_t));
743  rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
744  } else {
745  rsize -= sizeof(bhead_t);
746  }
747 
748  KMP_DEBUG_ASSERT(rsize >= size);
749 
750  (void) memset(buf, 0, (bufsize) rsize);
751  }
752  return ((void *) buf);
753 }
754 
755 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
756  simply in terms of brel() and bget(). It could be
757  enhanced to allow the buffer to grow into adjacent free
758  blocks and to avoid moving data unnecessarily. */
759 
760 static void *
761 bgetr( kmp_info_t *th, void *buf, bufsize size)
762 {
763  void *nbuf;
764  bufsize osize; /* Old size of buffer */
765  bhead_t *b;
766 
767  nbuf = bget( th, size );
768  if ( nbuf == NULL ) { /* Acquire new buffer */
769  return NULL;
770  }
771  if ( buf == NULL ) {
772  return nbuf;
773  }
774  b = BH(((char *) buf) - sizeof(bhead_t));
775  osize = -b->bb.bsize;
776  if (osize == 0) {
777  /* Buffer acquired directly through acqfcn. */
778  bdhead_t *bd;
779 
780  bd = BDH(((char *) buf) - sizeof(bdhead_t));
781  osize = bd->tsize - (bufsize) sizeof(bdhead_t);
782  } else {
783  osize -= sizeof(bhead_t);
784  };
785 
786  KMP_DEBUG_ASSERT(osize > 0);
787 
788  (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
789  (size_t) ((size < osize) ? size : osize));
790  brel( th, buf );
791 
792  return nbuf;
793 }
794 
795 /* BREL -- Release a buffer. */
796 
797 static void
798 brel( kmp_info_t *th, void *buf )
799 {
800  thr_data_t *thr = get_thr_data( th );
801  bfhead_t *b, *bn;
802  kmp_info_t *bth;
803 
804  KMP_DEBUG_ASSERT(buf != NULL);
805  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
806 
807  b = BFH(((char *) buf) - sizeof(bhead_t));
808 
809  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
810  bdhead_t *bdh;
811 
812  bdh = BDH(((char *) buf) - sizeof(bdhead_t));
813  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
814 #if BufStats
815  thr->totalloc -= (size_t) bdh->tsize;
816  thr->numdrel++; /* Number of direct releases */
817  thr->numrel++; /* Increment number of brel() calls */
818 #endif /* BufStats */
819 #ifdef FreeWipe
820  (void) memset((char *) buf, 0x55,
821  (size_t) (bdh->tsize - sizeof(bdhead_t)));
822 #endif /* FreeWipe */
823 
824  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
825 
826  KMP_DEBUG_ASSERT( thr->relfcn != 0 );
827  (*thr->relfcn)((void *) bdh); /* Release it directly. */
828  return;
829  }
830 
831  bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
832  if ( bth != th ) {
833  /* Add this buffer to be released by the owning thread later */
834  __kmp_bget_enqueue( bth, buf
835 #ifdef USE_QUEUING_LOCK_FOR_BGET
836  , __kmp_gtid_from_thread( th )
837 #endif
838  );
839  return;
840  }
841 
842  /* Buffer size must be negative, indicating that the buffer is
843  allocated. */
844 
845  if (b->bh.bb.bsize >= 0) {
846  bn = NULL;
847  }
848  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
849 
850  /* Back pointer in next buffer must be zero, indicating the
851  same thing: */
852 
853  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
854 
855 #if BufStats
856  thr->numrel++; /* Increment number of brel() calls */
857  thr->totalloc += (size_t) b->bh.bb.bsize;
858 #endif
859 
860  /* If the back link is nonzero, the previous buffer is free. */
861 
862  if (b->bh.bb.prevfree != 0) {
863  /* The previous buffer is free. Consolidate this buffer with it
864  by adding the length of this buffer to the previous free
865  buffer. Note that we subtract the size in the buffer being
866  released, since it's negative to indicate that the buffer is
867  allocated. */
868 
869  register bufsize size = b->bh.bb.bsize;
870 
871  /* Make the previous buffer the one we're working on. */
872  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
873  b = BFH(((char *) b) - b->bh.bb.prevfree);
874  b->bh.bb.bsize -= size;
875 
876  /* unlink the buffer from the old freelist */
877  __kmp_bget_remove_from_freelist( b );
878  }
879  else {
880  /* The previous buffer isn't allocated. Mark this buffer
881  size as positive (i.e. free) and fall through to place
882  the buffer on the free list as an isolated free block. */
883 
884  b->bh.bb.bsize = -b->bh.bb.bsize;
885  }
886 
887  /* insert buffer back onto a new freelist */
888  __kmp_bget_insert_into_freelist( thr, b );
889 
890 
891  /* Now we look at the next buffer in memory, located by advancing from
892  the start of this buffer by its size, to see if that buffer is
893  free. If it is, we combine this buffer with the next one in
894  memory, dechaining the second buffer from the free list. */
895 
896  bn = BFH(((char *) b) + b->bh.bb.bsize);
897  if (bn->bh.bb.bsize > 0) {
898 
899  /* The buffer is free. Remove it from the free list and add
900  its size to that of our buffer. */
901 
902  KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
903 
904  __kmp_bget_remove_from_freelist( bn );
905 
906  b->bh.bb.bsize += bn->bh.bb.bsize;
907 
908  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
909 
910  __kmp_bget_remove_from_freelist( b );
911  __kmp_bget_insert_into_freelist( thr, b );
912 
913  /* Finally, advance to the buffer that follows the newly
914  consolidated free block. We must set its backpointer to the
915  head of the consolidated free block. We know the next block
916  must be an allocated block because the process of recombination
917  guarantees that two free blocks will never be contiguous in
918  memory. */
919 
920  bn = BFH(((char *) b) + b->bh.bb.bsize);
921  }
922 #ifdef FreeWipe
923  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
924  (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
925 #endif
926  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
927 
928  /* The next buffer is allocated. Set the backpointer in it to point
929  to this buffer; the previous free buffer in memory. */
930 
931  bn->bh.bb.prevfree = b->bh.bb.bsize;
932 
933  /* If a block-release function is defined, and this free buffer
934  constitutes the entire block, release it. Note that pool_len
935  is defined in such a way that the test will fail unless all
936  pool blocks are the same size. */
937 
938  if (thr->relfcn != 0 &&
939  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
940  {
941 #if BufStats
942  if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
943 #endif
944 
945  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
946  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
947  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
948 
949  /* Unlink the buffer from the free list */
950  __kmp_bget_remove_from_freelist( b );
951 
952  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
953 
954  (*thr->relfcn)(b);
955 #if BufStats
956  thr->numprel++; /* Nr of expansion block releases */
957  thr->numpblk--; /* Total number of blocks */
958  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
959 
960  /* avoid leaving stale last_pool pointer around if it is being dealloced */
961  if (thr->last_pool == b) thr->last_pool = 0;
962  }
963  else {
964  thr->last_pool = b;
965  }
966 #endif /* BufStats */
967  }
968 }
969 
970 /* BECTL -- Establish automatic pool expansion control */
971 
972 static void
973 bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
974 {
975  thr_data_t *thr = get_thr_data( th );
976 
977  thr->compfcn = compact;
978  thr->acqfcn = acquire;
979  thr->relfcn = release;
980  thr->exp_incr = pool_incr;
981 }
982 
983 /* BPOOL -- Add a region of memory to the buffer pool. */
984 
985 static void
986 bpool( kmp_info_t *th, void *buf, bufsize len)
987 {
988 /* int bin = 0; */
989  thr_data_t *thr = get_thr_data( th );
990  bfhead_t *b = BFH(buf);
991  bhead_t *bn;
992 
993  __kmp_bget_dequeue( th ); /* Release any queued buffers */
994 
995 #ifdef SizeQuant
996  len &= ~(SizeQuant - 1);
997 #endif
998  if (thr->pool_len == 0) {
999  thr->pool_len = len;
1000  } else if (len != thr->pool_len) {
1001  thr->pool_len = -1;
1002  }
1003 #if BufStats
1004  thr->numpget++; /* Number of block acquisitions */
1005  thr->numpblk++; /* Number of blocks total */
1006  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1007 #endif /* BufStats */
1008 
1009  /* Since the block is initially occupied by a single free buffer,
1010  it had better not be (much) larger than the largest buffer
1011  whose size we can store in bhead.bb.bsize. */
1012 
1013  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1014 
1015  /* Clear the backpointer at the start of the block to indicate that
1016  there is no free block prior to this one. That blocks
1017  recombination when the first block in memory is released. */
1018 
1019  b->bh.bb.prevfree = 0;
1020 
1021  /* Create a dummy allocated buffer at the end of the pool. This dummy
1022  buffer is seen when a buffer at the end of the pool is released and
1023  blocks recombination of the last buffer with the dummy buffer at
1024  the end. The length in the dummy buffer is set to the largest
1025  negative number to denote the end of the pool for diagnostic
1026  routines (this specific value is not counted on by the actual
1027  allocation and release functions). */
1028 
1029  len -= sizeof(bhead_t);
1030  b->bh.bb.bsize = (bufsize) len;
1031  /* Set the owner of this buffer */
1032  TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1033 
1034  /* Chain the new block to the free list. */
1035  __kmp_bget_insert_into_freelist( thr, b );
1036 
1037 #ifdef FreeWipe
1038  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1039  (size_t) (len - sizeof(bfhead_t)));
1040 #endif
1041  bn = BH(((char *) b) + len);
1042  bn->bb.prevfree = (bufsize) len;
1043  /* Definition of ESent assumes two's complement! */
1044  KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1045 
1046  bn->bb.bsize = ESent;
1047 }
1048 
1049 /* ------------------------------------------------------------------------ */
1050 
1051 /* BFREED -- Dump the free lists for this thread. */
1052 
1053 static void
1054 bfreed( kmp_info_t *th )
1055 {
1056  int bin = 0, count = 0;
1057  int gtid = __kmp_gtid_from_thread( th );
1058  thr_data_t *thr = get_thr_data( th );
1059 
1060 #if BufStats
1061  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1062  KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1063  " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1064  gtid, (kmp_uint64) thr->totalloc,
1065  (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1066  (kmp_int64) thr->numpblk,
1067  (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1068  (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1069 #endif
1070 
1071  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1072  bfhead_t *b;
1073 
1074  for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1075  bufsize bs = b->bh.bb.bsize;
1076 
1077  KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1078  KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1079  KMP_DEBUG_ASSERT( bs > 0 );
1080 
1081  count += 1;
1082 
1083  __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1084 #ifdef FreeWipe
1085  {
1086  char *lerr = ((char *) b) + sizeof(bfhead_t);
1087  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1088  __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1089  }
1090  }
1091 #endif
1092  }
1093  }
1094 
1095  if (count == 0)
1096  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1097 }
1098 
1099 /* ------------------------------------------------------------------------ */
1100 
1101 #ifdef KMP_DEBUG
1102 
1103 #if BufStats
1104 
1105 /* BSTATS -- Return buffer allocation free space statistics. */
1106 
1107 static void
1108 bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1109 {
1110  int bin = 0;
1111  thr_data_t *thr = get_thr_data( th );
1112 
1113  *nget = thr->numget;
1114  *nrel = thr->numrel;
1115  *curalloc = (bufsize) thr->totalloc;
1116  *totfree = 0;
1117  *maxfree = -1;
1118 
1119  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1120  bfhead_t *b = thr->freelist[ bin ].ql.flink;
1121 
1122  while (b != &thr->freelist[ bin ]) {
1123  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1124  *totfree += b->bh.bb.bsize;
1125  if (b->bh.bb.bsize > *maxfree) {
1126  *maxfree = b->bh.bb.bsize;
1127  }
1128  b = b->ql.flink; /* Link to next buffer */
1129  }
1130  }
1131 }
1132 
1133 /* BSTATSE -- Return extended statistics */
1134 
1135 static void
1136 bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1137 {
1138  thr_data_t *thr = get_thr_data( th );
1139 
1140  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1141  *npool = thr->numpblk;
1142  *npget = thr->numpget;
1143  *nprel = thr->numprel;
1144  *ndget = thr->numdget;
1145  *ndrel = thr->numdrel;
1146 }
1147 
1148 #endif /* BufStats */
1149 
1150 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1151  data pointer, and backs up to the buffer header. It will
1152  dump either a free block or an allocated one. */
1153 
1154 static void
1155 bufdump( kmp_info_t *th, void *buf )
1156 {
1157  bfhead_t *b;
1158  unsigned char *bdump;
1159  bufsize bdlen;
1160 
1161  b = BFH(((char *) buf) - sizeof(bhead_t));
1162  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1163  if (b->bh.bb.bsize < 0) {
1164  bdump = (unsigned char *) buf;
1165  bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1166  } else {
1167  bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1168  bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1169  }
1170 
1171  while (bdlen > 0) {
1172  int i, dupes = 0;
1173  bufsize l = bdlen;
1174  char bhex[50], bascii[20];
1175 
1176  if (l > 16) {
1177  l = 16;
1178  }
1179 
1180  for (i = 0; i < l; i++) {
1181  (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1182  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1183  bascii[ i ] = bdump[ i ];
1184  else
1185  bascii[ i ] = ' ';
1186  }
1187  bascii[i] = 0;
1188  (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1189  bdump += l;
1190  bdlen -= l;
1191  while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1192  (char *) bdump, 16) == 0)) {
1193  dupes++;
1194  bdump += 16;
1195  bdlen -= 16;
1196  }
1197  if (dupes > 1) {
1198  (void) __kmp_printf_no_lock(
1199  " (%d lines [%d bytes] identical to above line skipped)\n",
1200  dupes, dupes * 16);
1201  } else if (dupes == 1) {
1202  bdump -= 16;
1203  bdlen += 16;
1204  }
1205  }
1206 }
1207 
1208 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1209  If DUMPALLOC is nonzero, the contents of allocated buffers
1210  are dumped. If DUMPFREE is nonzero, free blocks are
1211  dumped as well. If FreeWipe checking is enabled, free
1212  blocks which have been clobbered will always be dumped. */
1213 
1214 static void
1215 bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1216 {
1217  bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1218 
1219  while (b->bh.bb.bsize != ESent) {
1220  bufsize bs = b->bh.bb.bsize;
1221 
1222  if (bs < 0) {
1223  bs = -bs;
1224  (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1225  if (dumpalloc) {
1226  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1227  }
1228  } else {
1229  const char *lerr = "";
1230 
1231  KMP_DEBUG_ASSERT(bs > 0);
1232  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1233  lerr = " (Bad free list links)";
1234  }
1235  (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1236  (long) bs, lerr);
1237 #ifdef FreeWipe
1238  lerr = ((char *) b) + sizeof(bfhead_t);
1239  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1240  (memcmp(lerr, lerr + 1,
1241  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1242  (void) __kmp_printf_no_lock(
1243  "(Contents of above free block have been overstored.)\n");
1244  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1245  } else
1246 #endif
1247  if (dumpfree) {
1248  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1249  }
1250  }
1251  b = BFH(((char *) b) + bs);
1252  }
1253 }
1254 
1255 /* BPOOLV -- Validate a buffer pool. */
1256 
1257 static int
1258 bpoolv( kmp_info_t *th, void *buf )
1259 {
1260  bfhead_t *b = BFH(buf);
1261 
1262  while (b->bh.bb.bsize != ESent) {
1263  bufsize bs = b->bh.bb.bsize;
1264 
1265  if (bs < 0) {
1266  bs = -bs;
1267  } else {
1268 #ifdef FreeWipe
1269  char *lerr = "";
1270 #endif
1271 
1272  KMP_DEBUG_ASSERT(bs > 0);
1273  if (bs <= 0) {
1274  return 0;
1275  }
1276  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1277  (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1278  (long) bs);
1279  KMP_DEBUG_ASSERT(0);
1280  return 0;
1281  }
1282 #ifdef FreeWipe
1283  lerr = ((char *) b) + sizeof(bfhead_t);
1284  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1285  (memcmp(lerr, lerr + 1,
1286  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1287  (void) __kmp_printf_no_lock(
1288  "(Contents of above free block have been overstored.)\n");
1289  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1290  KMP_DEBUG_ASSERT(0);
1291  return 0;
1292  }
1293 #endif /* FreeWipe */
1294  }
1295  b = BFH(((char *) b) + bs);
1296  }
1297  return 1;
1298 }
1299 
1300 #endif /* KMP_DEBUG */
1301 
1302 /* ------------------------------------------------------------------------ */
1303 
1304 void
1305 __kmp_initialize_bget( kmp_info_t *th )
1306 {
1307  KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1308 
1309  set_thr_data( th );
1310 
1311  bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1312  (bufsize) __kmp_malloc_pool_incr );
1313 }
1314 
1315 void
1316 __kmp_finalize_bget( kmp_info_t *th )
1317 {
1318  thr_data_t *thr;
1319  bfhead_t *b;
1320 
1321  KMP_DEBUG_ASSERT( th != 0 );
1322 
1323 #if BufStats
1324  thr = (thr_data_t *) th->th.th_local.bget_data;
1325  KMP_DEBUG_ASSERT( thr != NULL );
1326  b = thr->last_pool;
1327 
1328  /* If a block-release function is defined, and this free buffer
1329  constitutes the entire block, release it. Note that pool_len
1330  is defined in such a way that the test will fail unless all
1331  pool blocks are the same size. */
1332 
1333  /* Deallocate the last pool if one exists because we no longer do it in brel() */
1334  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1335  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1336  {
1337  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1338  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1339  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1340 
1341  /* Unlink the buffer from the free list */
1342  __kmp_bget_remove_from_freelist( b );
1343 
1344  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1345 
1346  (*thr->relfcn)(b);
1347  thr->numprel++; /* Nr of expansion block releases */
1348  thr->numpblk--; /* Total number of blocks */
1349  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1350  }
1351 #endif /* BufStats */
1352 
1353  /* Deallocate bget_data */
1354  if ( th->th.th_local.bget_data != NULL ) {
1355  __kmp_free( th->th.th_local.bget_data );
1356  th->th.th_local.bget_data = NULL;
1357  }; // if
1358 }
1359 
1360 void
1361 kmpc_set_poolsize( size_t size )
1362 {
1363  bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1364  (bget_release_t) free, (bufsize) size );
1365 }
1366 
1367 size_t
1368 kmpc_get_poolsize( void )
1369 {
1370  thr_data_t *p;
1371 
1372  p = get_thr_data( __kmp_get_thread() );
1373 
1374  return p->exp_incr;
1375 }
1376 
1377 void
1378 kmpc_set_poolmode( int mode )
1379 {
1380  thr_data_t *p;
1381 
1382  if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1383  p = get_thr_data( __kmp_get_thread() );
1384  p->mode = (bget_mode_t) mode;
1385  }
1386 }
1387 
1388 int
1389 kmpc_get_poolmode( void )
1390 {
1391  thr_data_t *p;
1392 
1393  p = get_thr_data( __kmp_get_thread() );
1394 
1395  return p->mode;
1396 }
1397 
1398 void
1399 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1400 {
1401  kmp_info_t *th = __kmp_get_thread();
1402  bufsize a, b;
1403 
1404  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1405 
1406  bcheck( th, &a, &b );
1407 
1408  *maxmem = a;
1409  *allmem = b;
1410 }
1411 
1412 void
1413 kmpc_poolprint( void )
1414 {
1415  kmp_info_t *th = __kmp_get_thread();
1416 
1417  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1418 
1419  bfreed( th );
1420 }
1421 
1422 #endif // #if KMP_USE_BGET
1423 
1424 /* ------------------------------------------------------------------------ */
1425 
1426 void *
1427 kmpc_malloc( size_t size )
1428 {
1429  void * ptr;
1430  ptr = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
1431  if( ptr != NULL ) {
1432  // save allocated pointer just before one returned to user
1433  *(void**)ptr = ptr;
1434  ptr = (void**)ptr + 1;
1435  }
1436  return ptr;
1437 }
1438 
1439 #define IS_POWER_OF_TWO(n) (((n)&((n)-1))==0)
1440 
1441 void *
1442 kmpc_aligned_malloc( size_t size, size_t alignment )
1443 {
1444  void * ptr;
1445  void * ptr_allocated;
1446  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too big
1447  if( !IS_POWER_OF_TWO(alignment) ) {
1448  // AC: do we need to issue a warning here?
1449  errno = EINVAL;
1450  return NULL;
1451  }
1452  size = size + sizeof( void* ) + alignment;
1453  ptr_allocated = bget( __kmp_entry_thread(), (bufsize)size );
1454  if( ptr_allocated != NULL ) {
1455  // save allocated pointer just before one returned to user
1456  ptr = (void*)(((kmp_uintptr_t)ptr_allocated + sizeof( void* ) + alignment) & ~(alignment - 1));
1457  *((void**)ptr - 1) = ptr_allocated;
1458  } else {
1459  ptr = NULL;
1460  }
1461  return ptr;
1462 }
1463 
1464 void *
1465 kmpc_calloc( size_t nelem, size_t elsize )
1466 {
1467  void * ptr;
1468  ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize + sizeof(ptr)) );
1469  if( ptr != NULL ) {
1470  // save allocated pointer just before one returned to user
1471  *(void**)ptr = ptr;
1472  ptr = (void**)ptr + 1;
1473  }
1474  return ptr;
1475 }
1476 
1477 void *
1478 kmpc_realloc( void * ptr, size_t size )
1479 {
1480  void * result = NULL;
1481  if ( ptr == NULL ) {
1482  // If pointer is NULL, realloc behaves like malloc.
1483  result = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
1484  // save allocated pointer just before one returned to user
1485  if( result != NULL ) {
1486  *(void**)result = result;
1487  result = (void**)result + 1;
1488  }
1489  } else if ( size == 0 ) {
1490  // If size is 0, realloc behaves like free.
1491  // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1492  // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1493  KMP_ASSERT(*((void**)ptr - 1));
1494  brel( __kmp_get_thread(), *((void**)ptr - 1) );
1495  } else {
1496  result = bgetr( __kmp_entry_thread(), *((void**)ptr - 1), (bufsize)(size + sizeof(ptr)) );
1497  if( result != NULL ) {
1498  *(void**)result = result;
1499  result = (void**)result + 1;
1500  }
1501  }; // if
1502  return result;
1503 }
1504 
1505 /* NOTE: the library must have already been initialized by a previous allocate */
1506 
1507 void
1508 kmpc_free( void * ptr )
1509 {
1510  if ( ! __kmp_init_serial ) {
1511  return;
1512  }; // if
1513  if ( ptr != NULL ) {
1514  kmp_info_t *th = __kmp_get_thread();
1515  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1516  // extract allocated pointer and free it
1517  KMP_ASSERT(*((void**)ptr - 1));
1518  brel( th, *((void**)ptr - 1) );
1519  };
1520 }
1521 
1522 
1523 /* ------------------------------------------------------------------------ */
1524 
1525 void *
1526 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1527 {
1528  void * ptr;
1529  KE_TRACE( 30, (
1530  "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1531  th,
1532  (int) size
1533  KMP_SRC_LOC_PARM
1534  ) );
1535  ptr = bget( th, (bufsize) size );
1536  KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1537  return ptr;
1538 }
1539 
1540 void *
1541 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1542 {
1543  void * ptr;
1544  KE_TRACE( 30, (
1545  "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1546  th,
1547  (int) nelem,
1548  (int) elsize
1549  KMP_SRC_LOC_PARM
1550  ) );
1551  ptr = bgetz( th, (bufsize) (nelem * elsize) );
1552  KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1553  return ptr;
1554 }
1555 
1556 void *
1557 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1558 {
1559  KE_TRACE( 30, (
1560  "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1561  th,
1562  ptr,
1563  (int) size
1564  KMP_SRC_LOC_PARM
1565  ) );
1566  ptr = bgetr( th, ptr, (bufsize) size );
1567  KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1568  return ptr;
1569 }
1570 
1571 void
1572 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1573 {
1574  KE_TRACE( 30, (
1575  "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1576  th,
1577  ptr
1578  KMP_SRC_LOC_PARM
1579  ) );
1580  if ( ptr != NULL ) {
1581  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1582  brel( th, ptr );
1583  }
1584  KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1585 }
1586 
1587 /* ------------------------------------------------------------------------ */
1588 /* ------------------------------------------------------------------------ */
1589 /*
1590  If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1591  may be useful for debugging memory corruptions, used freed pointers, etc.
1592 */
1593 /* #define LEAK_MEMORY */
1594 
1595 struct kmp_mem_descr { // Memory block descriptor.
1596  void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1597  size_t size_allocated; // Size of allocated memory block.
1598  void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1599  size_t size_aligned; // Size of aligned memory block.
1600 };
1601 typedef struct kmp_mem_descr kmp_mem_descr_t;
1602 
1603 /*
1604  Allocate memory on requested boundary, fill allocated memory with 0x00.
1605  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1606  Must use __kmp_free when freeing memory allocated by this routine!
1607  */
1608 static
1609 void *
1610 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1611 {
1612  /*
1613  __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1614  return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1615  block is saved in descriptor just before the aligned pointer. This information used by
1616  __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1617 
1618  +---------+------------+-----------------------------------+---------+
1619  | padding | descriptor | aligned block | padding |
1620  +---------+------------+-----------------------------------+---------+
1621  ^ ^
1622  | |
1623  | +- Aligned pointer returned to caller
1624  +- Pointer returned by malloc()
1625 
1626  Aligned block is filled with zeros, paddings are filled with 0xEF.
1627  */
1628 
1629  kmp_mem_descr_t descr;
1630  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1631  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1632  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1633 
1634  KE_TRACE( 25, (
1635  "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1636  (int) size,
1637  (int) alignment
1638  KMP_SRC_LOC_PARM
1639  ) );
1640 
1641  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1642  KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1643  // Make sure kmp_uintptr_t is enough to store addresses.
1644 
1645  descr.size_aligned = size;
1646  descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1647 
1648 #if KMP_DEBUG
1649  descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1650 #else
1651  descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1652 #endif
1653  KE_TRACE( 10, (
1654  " malloc( %d ) returned %p\n",
1655  (int) descr.size_allocated,
1656  descr.ptr_allocated
1657  ) );
1658  if ( descr.ptr_allocated == NULL ) {
1659  KMP_FATAL( OutOfHeapMemory );
1660  };
1661 
1662  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1663  addr_aligned =
1664  ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1665  & ~ ( alignment - 1 );
1666  addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1667 
1668  descr.ptr_aligned = (void *) addr_aligned;
1669 
1670  KE_TRACE( 26, (
1671  " ___kmp_allocate_align: "
1672  "ptr_allocated=%p, size_allocated=%d, "
1673  "ptr_aligned=%p, size_aligned=%d\n",
1674  descr.ptr_allocated,
1675  (int) descr.size_allocated,
1676  descr.ptr_aligned,
1677  (int) descr.size_aligned
1678  ) );
1679 
1680  KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1681  KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1682  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1683  KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1684 #ifdef KMP_DEBUG
1685  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1686  // Fill allocated memory block with 0xEF.
1687 #endif
1688  memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1689  // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1690  // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1691  // bytes remain filled with 0xEF in debugging library.)
1692  * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1693 
1694  KMP_MB();
1695 
1696  KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1697  return descr.ptr_aligned;
1698 } // func ___kmp_allocate_align
1699 
1700 
1701 /*
1702  Allocate memory on cache line boundary, fill allocated memory with 0x00.
1703  Do not call this func directly! Use __kmp_allocate macro instead.
1704  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1705  Must use __kmp_free when freeing memory allocated by this routine!
1706  */
1707 void *
1708 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1709 {
1710  void * ptr;
1711  KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1712  ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1713  KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1714  return ptr;
1715 } // func ___kmp_allocate
1716 
1717 #if (BUILD_MEMORY==FIRST_TOUCH)
1718 void *
1719 __kmp_ft_page_allocate(size_t size)
1720 {
1721  void *adr, *aadr;
1722 
1723  const int page_size = KMP_GET_PAGE_SIZE();
1724 
1725  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1726  size + page_size + KMP_PTR_SKIP);
1727  if ( adr == 0 )
1728  KMP_FATAL( OutOfHeapMemory );
1729 
1730  /* check to see if adr is on a page boundary. */
1731  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1732  /* nothing to do if adr is already on a page boundary. */
1733  aadr = adr;
1734  else
1735  /* else set aadr to the first page boundary in the allocated memory. */
1736  aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1737 
1738  /* the first touch by the owner thread. */
1739  *((void**)aadr) = adr;
1740 
1741  /* skip the memory space used for storing adr above. */
1742  return (void*)((char*)aadr + KMP_PTR_SKIP);
1743 }
1744 #endif
1745 
1746 /*
1747  Allocate memory on page boundary, fill allocated memory with 0x00.
1748  Does not call this func directly! Use __kmp_page_allocate macro instead.
1749  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1750  Must use __kmp_free when freeing memory allocated by this routine!
1751  */
1752 void *
1753 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1754 {
1755  int page_size = 8 * 1024;
1756  void * ptr;
1757 
1758  KE_TRACE( 25, (
1759  "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1760  (int) size
1761  KMP_SRC_LOC_PARM
1762  ) );
1763  ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1764  KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1765  return ptr;
1766 } // ___kmp_page_allocate
1767 
1768 /*
1769  Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1770  In debug mode, fill the memory block with 0xEF before call to free().
1771 */
1772 void
1773 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1774 {
1775  kmp_mem_descr_t descr;
1776  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1777  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1778 
1779  KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1780  KMP_ASSERT( ptr != NULL );
1781 
1782  descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1783 
1784  KE_TRACE( 26, ( " __kmp_free: "
1785  "ptr_allocated=%p, size_allocated=%d, "
1786  "ptr_aligned=%p, size_aligned=%d\n",
1787  descr.ptr_allocated, (int) descr.size_allocated,
1788  descr.ptr_aligned, (int) descr.size_aligned ));
1789 
1790  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1791  addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1792 
1793  KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1794  KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1795  KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1796  KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1797  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1798 
1799  #ifdef KMP_DEBUG
1800  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1801  // Fill memory block with 0xEF, it helps catch using freed memory.
1802  #endif
1803 
1804  #ifndef LEAK_MEMORY
1805  KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1806  # ifdef KMP_DEBUG
1807  _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1808  # else
1809  free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1810  # endif
1811  #endif
1812  KMP_MB();
1813  KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1814 } // func ___kmp_free
1815 
1816 /* ------------------------------------------------------------------------ */
1817 /* ------------------------------------------------------------------------ */
1818 
1819 #if USE_FAST_MEMORY == 3
1820 // Allocate fast memory by first scanning the thread's free lists
1821 // If a chunk the right size exists, grab it off the free list.
1822 // Otherwise allocate normally using kmp_thread_malloc.
1823 
1824 // AC: How to choose the limit? Just get 16 for now...
1825 #define KMP_FREE_LIST_LIMIT 16
1826 
1827 // Always use 128 bytes for determining buckets for caching memory blocks
1828 #define DCACHE_LINE 128
1829 
1830 void *
1831 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1832 {
1833  void * ptr;
1834  int num_lines;
1835  int idx;
1836  int index;
1837  void * alloc_ptr;
1838  size_t alloc_size;
1839  kmp_mem_descr_t * descr;
1840 
1841  KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1842  __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1843 
1844  num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1845  idx = num_lines - 1;
1846  KMP_DEBUG_ASSERT( idx >= 0 );
1847  if ( idx < 2 ) {
1848  index = 0; // idx is [ 0, 1 ], use first free list
1849  num_lines = 2; // 1, 2 cache lines or less than cache line
1850  } else if ( ( idx >>= 2 ) == 0 ) {
1851  index = 1; // idx is [ 2, 3 ], use second free list
1852  num_lines = 4; // 3, 4 cache lines
1853  } else if ( ( idx >>= 2 ) == 0 ) {
1854  index = 2; // idx is [ 4, 15 ], use third free list
1855  num_lines = 16; // 5, 6, ..., 16 cache lines
1856  } else if ( ( idx >>= 2 ) == 0 ) {
1857  index = 3; // idx is [ 16, 63 ], use fourth free list
1858  num_lines = 64; // 17, 18, ..., 64 cache lines
1859  } else {
1860  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1861  }
1862 
1863  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1864  if ( ptr != NULL ) {
1865  // pop the head of no-sync free list
1866  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1867  KMP_DEBUG_ASSERT( this_thr ==
1868  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1869  goto end;
1870  };
1871  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1872  if ( ptr != NULL ) {
1873  // no-sync free list is empty, use sync free list (filled in by other threads only)
1874  // pop the head of the sync free list, push NULL instead
1875  while ( ! KMP_COMPARE_AND_STORE_PTR(
1876  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1877  {
1878  KMP_CPU_PAUSE();
1879  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1880  }
1881  // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1882  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1883  KMP_DEBUG_ASSERT( this_thr ==
1884  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1885  goto end;
1886  }
1887 
1888  alloc_call:
1889  // haven't found block in the free lists, thus allocate it
1890  size = num_lines * DCACHE_LINE;
1891 
1892  alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1893  KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1894  __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1895  alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1896 
1897  // align ptr to DCACHE_LINE
1898  ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1899  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1900 
1901  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1902  // we don't need size_allocated
1903  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1904  // (it is already saved in bget buffer,
1905  // but we may want to use another allocator in future)
1906  descr->size_aligned = size;
1907 
1908  end:
1909  KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1910  __kmp_gtid_from_thread( this_thr ), ptr ) );
1911  return ptr;
1912 } // func __kmp_fast_allocate
1913 
1914 // Free fast memory and place it on the thread's free list if it is of
1915 // the correct size.
1916 void
1917 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1918 {
1919  kmp_mem_descr_t * descr;
1920  kmp_info_t * alloc_thr;
1921  size_t size;
1922  size_t idx;
1923  int index;
1924 
1925  KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1926  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1927  KMP_ASSERT( ptr != NULL );
1928 
1929  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1930 
1931  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1932  (int) descr->size_aligned ) );
1933 
1934  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1935 
1936  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1937  if ( idx == size ) {
1938  index = 0; // 2 cache lines
1939  } else if ( ( idx <<= 1 ) == size ) {
1940  index = 1; // 4 cache lines
1941  } else if ( ( idx <<= 2 ) == size ) {
1942  index = 2; // 16 cache lines
1943  } else if ( ( idx <<= 2 ) == size ) {
1944  index = 3; // 64 cache lines
1945  } else {
1946  KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1947  goto free_call; // 65 or more cache lines ( > 8KB )
1948  }
1949 
1950  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1951  if ( alloc_thr == this_thr ) {
1952  // push block to self no-sync free list, linking previous head (LIFO)
1953  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1954  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1955  } else {
1956  void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1957  if ( head == NULL ) {
1958  // Create new free list
1959  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1960  *((void **)ptr) = NULL; // mark the tail of the list
1961  descr->size_allocated = (size_t)1; // head of the list keeps its length
1962  } else {
1963  // need to check existed "other" list's owner thread and size of queue
1964  kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1965  kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1966  size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1967  if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
1968  // we can add current task to "other" list, no sync needed
1969  *((void **)ptr) = head;
1970  descr->size_allocated = q_sz;
1971  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1972  } else {
1973  // either queue blocks owner is changing or size limit exceeded
1974  // return old queue to allocating thread (q_th) synchroneously,
1975  // and start new list for alloc_thr's tasks
1976  void * old_ptr;
1977  void * tail = head;
1978  void * next = *((void **)head);
1979  while ( next != NULL ) {
1980  KMP_DEBUG_ASSERT(
1981  // queue size should decrease by 1 each step through the list
1982  ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1983  ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1984  tail = next; // remember tail node
1985  next = *((void **)next);
1986  }
1987  KMP_DEBUG_ASSERT( q_th != NULL );
1988  // push block to owner's sync free list
1989  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1990  /* the next pointer must be set before setting free_list to ptr to avoid
1991  exposing a broken list to other threads, even for an instant. */
1992  *((void **)tail) = old_ptr;
1993 
1994  while ( ! KMP_COMPARE_AND_STORE_PTR(
1995  &q_th->th.th_free_lists[index].th_free_list_sync,
1996  old_ptr,
1997  head ) )
1998  {
1999  KMP_CPU_PAUSE();
2000  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
2001  *((void **)tail) = old_ptr;
2002  }
2003 
2004  // start new list of not-selt tasks
2005  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2006  *((void **)ptr) = NULL;
2007  descr->size_allocated = (size_t)1; // head of queue keeps its length
2008  }
2009  }
2010  }
2011  goto end;
2012 
2013  free_call:
2014  KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2015  __kmp_gtid_from_thread( this_thr), size ) );
2016  __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
2017  brel( this_thr, descr->ptr_allocated );
2018 
2019  end:
2020  KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
2021 
2022 } // func __kmp_fast_free
2023 
2024 
2025 // Initialize the thread free lists related to fast memory
2026 // Only do this when a thread is initially created.
2027 void
2028 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
2029 {
2030  KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2031 
2032  memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2033 }
2034 
2035 // Free the memory in the thread free lists related to fast memory
2036 // Only do this when a thread is being reaped (destroyed).
2037 void
2038 __kmp_free_fast_memory( kmp_info_t *th )
2039 {
2040  // Suppose we use BGET underlying allocator, walk through its structures...
2041  int bin;
2042  thr_data_t * thr = get_thr_data( th );
2043  void ** lst = NULL;
2044 
2045  KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2046  __kmp_gtid_from_thread( th ) ) );
2047 
2048  __kmp_bget_dequeue( th ); // Release any queued buffers
2049 
2050  // Dig through free lists and extract all allocated blocks
2051  for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2052  bfhead_t * b = thr->freelist[ bin ].ql.flink;
2053  while ( b != &thr->freelist[ bin ] ) {
2054  if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2055  *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2056  lst = (void**)b; // push b into lst
2057  }
2058  b = b->ql.flink; // get next buffer
2059  }
2060  }
2061  while ( lst != NULL ) {
2062  void * next = *lst;
2063  KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2064  lst, next, th, __kmp_gtid_from_thread( th ) ) );
2065  (*thr->relfcn)(lst);
2066  #if BufStats
2067  // count blocks to prevent problems in __kmp_finalize_bget()
2068  thr->numprel++; /* Nr of expansion block releases */
2069  thr->numpblk--; /* Total number of blocks */
2070  #endif
2071  lst = (void**)next;
2072  }
2073 
2074  KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2075  __kmp_gtid_from_thread( th ) ) );
2076 }
2077 
2078 #endif // USE_FAST_MEMORY