LLVM OpenMP* Runtime Library
kmp_alloc.c
1 /*
2  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp.h"
17 #include "kmp_wrapper_malloc.h"
18 #include "kmp_io.h"
19 
20 // Disable bget when it is not used
21 #if KMP_USE_BGET
22 
23 /* Thread private buffer management code */
24 
25 typedef int (*bget_compact_t)(size_t, int);
26 typedef void *(*bget_acquire_t)(size_t);
27 typedef void (*bget_release_t)(void *);
28 
29 /* NOTE: bufsize must be a signed datatype */
30 
31 #if KMP_OS_WINDOWS
32 # if KMP_ARCH_X86 || KMP_ARCH_ARM
33  typedef kmp_int32 bufsize;
34 # else
35  typedef kmp_int64 bufsize;
36 # endif
37 #else
38  typedef ssize_t bufsize;
39 #endif
40 
41 /* The three modes of operation are, fifo search, lifo search, and best-fit */
42 
43 typedef enum bget_mode {
44  bget_mode_fifo = 0,
45  bget_mode_lifo = 1,
46  bget_mode_best = 2
47 } bget_mode_t;
48 
49 
50 static void bpool( kmp_info_t *th, void *buffer, bufsize len);
51 static void *bget( kmp_info_t *th, bufsize size);
52 static void *bgetz( kmp_info_t *th, bufsize size);
53 static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
54 static void brel( kmp_info_t *th, void *buf);
55 static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
56 
57 #ifdef KMP_DEBUG
58 static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
59 static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
60 static void bufdump( kmp_info_t *th, void *buf);
61 static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
62 static int bpoolv( kmp_info_t *th, void *pool);
63 #endif
64 
65 /* BGET CONFIGURATION */
66  /* Buffer allocation size quantum:
67  all buffers allocated are a
68  multiple of this size. This
69  MUST be a power of two. */
70 
71  /* On IA-32 architecture with Linux* OS,
72  malloc() does not
73  ensure 16 byte alignmnent */
74 
75 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
76 
77 #define SizeQuant 8
78 #define AlignType double
79 
80 #else
81 
82 #define SizeQuant 16
83 #define AlignType _Quad
84 
85 #endif
86 
87 #define BufStats 1 /* Define this symbol to enable the
88  bstats() function which calculates
89  the total free space in the buffer
90  pool, the largest available
91  buffer, and the total space
92  currently allocated. */
93 
94 #ifdef KMP_DEBUG
95 
96 #define BufDump 1 /* Define this symbol to enable the
97  bpoold() function which dumps the
98  buffers in a buffer pool. */
99 
100 #define BufValid 1 /* Define this symbol to enable the
101  bpoolv() function for validating
102  a buffer pool. */
103 
104 #define DumpData 1 /* Define this symbol to enable the
105  bufdump() function which allows
106  dumping the contents of an allocated
107  or free buffer. */
108 #ifdef NOT_USED_NOW
109 
110 #define FreeWipe 1 /* Wipe free buffers to a guaranteed
111  pattern of garbage to trip up
112  miscreants who attempt to use
113  pointers into released buffers. */
114 
115 #define BestFit 1 /* Use a best fit algorithm when
116  searching for space for an
117  allocation request. This uses
118  memory more efficiently, but
119  allocation will be much slower. */
120 #endif /* NOT_USED_NOW */
121 #endif /* KMP_DEBUG */
122 
123 
124 static bufsize bget_bin_size[ ] = {
125  0,
126 // 1 << 6, /* .5 Cache line */
127  1 << 7, /* 1 Cache line, new */
128  1 << 8, /* 2 Cache lines */
129  1 << 9, /* 4 Cache lines, new */
130  1 << 10, /* 8 Cache lines */
131  1 << 11, /* 16 Cache lines, new */
132  1 << 12,
133  1 << 13, /* new */
134  1 << 14,
135  1 << 15, /* new */
136  1 << 16,
137  1 << 17,
138  1 << 18,
139  1 << 19,
140  1 << 20, /* 1MB */
141  1 << 21, /* 2MB */
142  1 << 22, /* 4MB */
143  1 << 23, /* 8MB */
144  1 << 24, /* 16MB */
145  1 << 25, /* 32MB */
146 };
147 
148 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
149 
150 struct bfhead;
151 
152 /* Declare the interface, including the requested buffer size type,
153  bufsize. */
154 
155 /* Queue links */
156 
157 typedef struct qlinks {
158  struct bfhead *flink; /* Forward link */
159  struct bfhead *blink; /* Backward link */
160 } qlinks_t;
161 
162 /* Header in allocated and free buffers */
163 
164 typedef struct bhead2 {
165  kmp_info_t *bthr; /* The thread which owns the buffer pool */
166  bufsize prevfree; /* Relative link back to previous
167  free buffer in memory or 0 if
168  previous buffer is allocated. */
169  bufsize bsize; /* Buffer size: positive if free,
170  negative if allocated. */
171 } bhead2_t;
172 
173 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
174 
175 typedef union bhead {
176  KMP_ALIGN( SizeQuant )
177  AlignType b_align;
178  char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
179  bhead2_t bb;
180 } bhead_t;
181 #define BH(p) ((bhead_t *) (p))
182 
183 /* Header in directly allocated buffers (by acqfcn) */
184 
185 typedef struct bdhead
186 {
187  bufsize tsize; /* Total size, including overhead */
188  bhead_t bh; /* Common header */
189 } bdhead_t;
190 #define BDH(p) ((bdhead_t *) (p))
191 
192 /* Header in free buffers */
193 
194 typedef struct bfhead {
195  bhead_t bh; /* Common allocated/free header */
196  qlinks_t ql; /* Links on free list */
197 } bfhead_t;
198 #define BFH(p) ((bfhead_t *) (p))
199 
200 typedef struct thr_data {
201  bfhead_t freelist[ MAX_BGET_BINS ];
202 #if BufStats
203  size_t totalloc; /* Total space currently allocated */
204  long numget, numrel; /* Number of bget() and brel() calls */
205  long numpblk; /* Number of pool blocks */
206  long numpget, numprel; /* Number of block gets and rels */
207  long numdget, numdrel; /* Number of direct gets and rels */
208 #endif /* BufStats */
209 
210  /* Automatic expansion block management functions */
211  bget_compact_t compfcn;
212  bget_acquire_t acqfcn;
213  bget_release_t relfcn;
214 
215  bget_mode_t mode; /* what allocation mode to use? */
216 
217  bufsize exp_incr; /* Expansion block size */
218  bufsize pool_len; /* 0: no bpool calls have been made
219  -1: not all pool blocks are
220  the same size
221  >0: (common) block size for all
222  bpool calls made so far
223  */
224  bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
225 } thr_data_t;
226 
227 /* Minimum allocation quantum: */
228 
229 #define QLSize (sizeof(qlinks_t))
230 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
231 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
232  // Maximun for the requested size.
233 
234 /* End sentinel: value placed in bsize field of dummy block delimiting
235  end of pool block. The most negative number which will fit in a
236  bufsize, defined in a way that the compiler will accept. */
237 
238 #define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
239 
240 /* ------------------------------------------------------------------------ */
241 
242 /* Thread Data management routines */
243 
244 static int
245 bget_get_bin( bufsize size )
246 {
247  // binary chop bins
248  int lo = 0, hi = MAX_BGET_BINS - 1;
249 
250  KMP_DEBUG_ASSERT( size > 0 );
251 
252  while ( (hi - lo) > 1 ) {
253  int mid = (lo + hi) >> 1;
254  if (size < bget_bin_size[ mid ])
255  hi = mid - 1;
256  else
257  lo = mid;
258  }
259 
260  KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
261 
262  return lo;
263 }
264 
265 static void
266 set_thr_data( kmp_info_t *th )
267 {
268  int i;
269  thr_data_t *data;
270 
271  data =
272  (thr_data_t *)(
273  ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
274  );
275 
276  memset( data, '\0', sizeof( *data ) );
277 
278  for (i = 0; i < MAX_BGET_BINS; ++i) {
279  data->freelist[ i ].ql.flink = & data->freelist[ i ];
280  data->freelist[ i ].ql.blink = & data->freelist[ i ];
281  }
282 
283  th->th.th_local.bget_data = data;
284  th->th.th_local.bget_list = 0;
285 #if ! USE_CMP_XCHG_FOR_BGET
286 #ifdef USE_QUEUING_LOCK_FOR_BGET
287  __kmp_init_lock( & th->th.th_local.bget_lock );
288 #else
289  __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
290 #endif /* USE_LOCK_FOR_BGET */
291 #endif /* ! USE_CMP_XCHG_FOR_BGET */
292 }
293 
294 static thr_data_t *
295 get_thr_data( kmp_info_t *th )
296 {
297  thr_data_t *data;
298 
299  data = (thr_data_t *) th->th.th_local.bget_data;
300 
301  KMP_DEBUG_ASSERT( data != 0 );
302 
303  return data;
304 }
305 
306 
307 #ifdef KMP_DEBUG
308 
309 static void
310 __kmp_bget_validate_queue( kmp_info_t *th )
311 {
312  /* NOTE: assume that the global_lock is held */
313 
314  void *p = (void *) th->th.th_local.bget_list;
315 
316  while (p != 0) {
317  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
318 
319  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
320  p = (void *) b->ql.flink;
321  }
322 }
323 
324 #endif
325 
326 /* Walk the free list and release the enqueued buffers */
327 
328 static void
329 __kmp_bget_dequeue( kmp_info_t *th )
330 {
331  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
332 
333  if (p != 0) {
334  #if USE_CMP_XCHG_FOR_BGET
335  {
336  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
337  while ( ! KMP_COMPARE_AND_STORE_PTR(
338  & th->th.th_local.bget_list, old_value, NULL ) )
339  {
340  KMP_CPU_PAUSE();
341  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
342  }
343  p = (void *) old_value;
344  }
345  #else /* ! USE_CMP_XCHG_FOR_BGET */
346  #ifdef USE_QUEUING_LOCK_FOR_BGET
347  __kmp_acquire_lock( & th->th.th_local.bget_lock,
348  __kmp_gtid_from_thread(th) );
349  #else
350  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
351  #endif /* USE_QUEUING_LOCK_FOR_BGET */
352 
353  p = (void *) th->th.th_local.bget_list;
354  th->th.th_local.bget_list = 0;
355 
356  #ifdef USE_QUEUING_LOCK_FOR_BGET
357  __kmp_release_lock( & th->th.th_local.bget_lock,
358  __kmp_gtid_from_thread(th) );
359  #else
360  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
361  #endif
362  #endif /* USE_CMP_XCHG_FOR_BGET */
363 
364  /* Check again to make sure the list is not empty */
365 
366  while (p != 0) {
367  void *buf = p;
368  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
369 
370  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
371  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
372  (kmp_uintptr_t)th ); // clear possible mark
373  KMP_DEBUG_ASSERT( b->ql.blink == 0 );
374 
375  p = (void *) b->ql.flink;
376 
377  brel( th, buf );
378  }
379  }
380 }
381 
382 /* Chain together the free buffers by using the thread owner field */
383 
384 static void
385 __kmp_bget_enqueue( kmp_info_t *th, void *buf
386 #ifdef USE_QUEUING_LOCK_FOR_BGET
387  , kmp_int32 rel_gtid
388 #endif
389  )
390 {
391  bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
392 
393  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
394  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
395  (kmp_uintptr_t)th ); // clear possible mark
396 
397  b->ql.blink = 0;
398 
399  KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
400  __kmp_gtid_from_thread( th ) ) );
401 
402 #if USE_CMP_XCHG_FOR_BGET
403  {
404  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
405  /* the next pointer must be set before setting bget_list to buf to avoid
406  exposing a broken list to other threads, even for an instant. */
407  b->ql.flink = BFH( old_value );
408 
409  while ( ! KMP_COMPARE_AND_STORE_PTR(
410  & th->th.th_local.bget_list, old_value, buf ) )
411  {
412  KMP_CPU_PAUSE();
413  old_value = TCR_PTR(th->th.th_local.bget_list);
414  /* the next pointer must be set before setting bget_list to buf to avoid
415  exposing a broken list to other threads, even for an instant. */
416  b->ql.flink = BFH( old_value );
417  }
418  }
419 #else /* ! USE_CMP_XCHG_FOR_BGET */
420 # ifdef USE_QUEUING_LOCK_FOR_BGET
421  __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
422 # else
423  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
424  # endif
425 
426  b->ql.flink = BFH( th->th.th_local.bget_list );
427  th->th.th_local.bget_list = (void *) buf;
428 
429 # ifdef USE_QUEUING_LOCK_FOR_BGET
430  __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
431 # else
432  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
433 # endif
434 #endif /* USE_CMP_XCHG_FOR_BGET */
435 }
436 
437 /* insert buffer back onto a new freelist */
438 
439 static void
440 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
441 {
442  int bin;
443 
444  KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
445  KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
446 
447  bin = bget_get_bin( b->bh.bb.bsize );
448 
449  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
450  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
451 
452  b->ql.flink = &thr->freelist[ bin ];
453  b->ql.blink = thr->freelist[ bin ].ql.blink;
454 
455  thr->freelist[ bin ].ql.blink = b;
456  b->ql.blink->ql.flink = b;
457 }
458 
459 /* unlink the buffer from the old freelist */
460 
461 static void
462 __kmp_bget_remove_from_freelist( bfhead_t *b )
463 {
464  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
465  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
466 
467  b->ql.blink->ql.flink = b->ql.flink;
468  b->ql.flink->ql.blink = b->ql.blink;
469 }
470 
471 /* ------------------------------------------------------------------------ */
472 
473 /* GET STATS -- check info on free list */
474 
475 static void
476 bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
477 {
478  thr_data_t *thr = get_thr_data( th );
479  int bin;
480 
481  *total_free = *max_free = 0;
482 
483  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
484  bfhead_t *b, *best;
485 
486  best = &thr->freelist[ bin ];
487  b = best->ql.flink;
488 
489  while (b != &thr->freelist[ bin ]) {
490  *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
491  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
492  best = b;
493 
494  /* Link to next buffer */
495  b = b->ql.flink;
496  }
497 
498  if (*max_free < best->bh.bb.bsize)
499  *max_free = best->bh.bb.bsize;
500  }
501 
502  if (*max_free > (bufsize)sizeof( bhead_t ))
503  *max_free -= sizeof( bhead_t );
504 }
505 
506 /* ------------------------------------------------------------------------ */
507 
508 /* BGET -- Allocate a buffer. */
509 
510 static void *
511 bget( kmp_info_t *th, bufsize requested_size )
512 {
513  thr_data_t *thr = get_thr_data( th );
514  bufsize size = requested_size;
515  bfhead_t *b;
516  void *buf;
517  int compactseq = 0;
518  int use_blink = 0;
519 /* For BestFit */
520  bfhead_t *best;
521 
522  if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
523  return NULL;
524  }; // if
525 
526  __kmp_bget_dequeue( th ); /* Release any queued buffers */
527 
528  if (size < (bufsize)SizeQ) { /* Need at least room for the */
529  size = SizeQ; /* queue links. */
530  }
531  #if defined( SizeQuant ) && ( SizeQuant > 1 )
532  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
533  #endif
534 
535  size += sizeof(bhead_t); /* Add overhead in allocated buffer
536  to size required. */
537  KMP_DEBUG_ASSERT( size >= 0 );
538  KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
539 
540  use_blink = ( thr->mode == bget_mode_lifo );
541 
542  /* If a compact function was provided in the call to bectl(), wrap
543  a loop around the allocation process to allow compaction to
544  intervene in case we don't find a suitable buffer in the chain. */
545 
546  for (;;) {
547  int bin;
548 
549  for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
550  /* Link to next buffer */
551  b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
552 
553  if (thr->mode == bget_mode_best) {
554  best = &thr->freelist[ bin ];
555 
556  /* Scan the free list searching for the first buffer big enough
557  to hold the requested size buffer. */
558 
559  while (b != &thr->freelist[ bin ]) {
560  if (b->bh.bb.bsize >= (bufsize) size) {
561  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
562  best = b;
563  }
564  }
565 
566  /* Link to next buffer */
567  b = ( use_blink ? b->ql.blink : b->ql.flink );
568  }
569  b = best;
570  }
571 
572  while (b != &thr->freelist[ bin ]) {
573  if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
574 
575  /* Buffer is big enough to satisfy the request. Allocate it
576  to the caller. We must decide whether the buffer is large
577  enough to split into the part given to the caller and a
578  free buffer that remains on the free list, or whether the
579  entire buffer should be removed from the free list and
580  given to the caller in its entirety. We only split the
581  buffer if enough room remains for a header plus the minimum
582  quantum of allocation. */
583 
584  if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
585  bhead_t *ba, *bn;
586 
587  ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
588  bn = BH(((char *) ba) + size);
589 
590  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
591 
592  /* Subtract size from length of free block. */
593  b->bh.bb.bsize -= (bufsize) size;
594 
595  /* Link allocated buffer to the previous free buffer. */
596  ba->bb.prevfree = b->bh.bb.bsize;
597 
598  /* Plug negative size into user buffer. */
599  ba->bb.bsize = -size;
600 
601  /* Mark this buffer as owned by this thread. */
602  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
603  /* Mark buffer after this one not preceded by free block. */
604  bn->bb.prevfree = 0;
605 
606  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
607  __kmp_bget_remove_from_freelist( b );
608  __kmp_bget_insert_into_freelist( thr, b );
609 #if BufStats
610  thr->totalloc += (size_t) size;
611  thr->numget++; /* Increment number of bget() calls */
612 #endif
613  buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
614  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
615  return buf;
616  } else {
617  bhead_t *ba;
618 
619  ba = BH(((char *) b) + b->bh.bb.bsize);
620 
621  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
622 
623  /* The buffer isn't big enough to split. Give the whole
624  shebang to the caller and remove it from the free list. */
625 
626  __kmp_bget_remove_from_freelist( b );
627 #if BufStats
628  thr->totalloc += (size_t) b->bh.bb.bsize;
629  thr->numget++; /* Increment number of bget() calls */
630 #endif
631  /* Negate size to mark buffer allocated. */
632  b->bh.bb.bsize = -(b->bh.bb.bsize);
633 
634  /* Mark this buffer as owned by this thread. */
635  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
636  /* Zero the back pointer in the next buffer in memory
637  to indicate that this buffer is allocated. */
638  ba->bb.prevfree = 0;
639 
640  /* Give user buffer starting at queue links. */
641  buf = (void *) &(b->ql);
642  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
643  return buf;
644  }
645  }
646 
647  /* Link to next buffer */
648  b = ( use_blink ? b->ql.blink : b->ql.flink );
649  }
650  }
651 
652  /* We failed to find a buffer. If there's a compact function
653  defined, notify it of the size requested. If it returns
654  TRUE, try the allocation again. */
655 
656  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
657  break;
658  }
659  }
660 
661  /* No buffer available with requested size free. */
662 
663  /* Don't give up yet -- look in the reserve supply. */
664 
665  if (thr->acqfcn != 0) {
666  if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
667 
668  /* Request is too large to fit in a single expansion
669  block. Try to satisy it by a direct buffer acquisition. */
670 
671  bdhead_t *bdh;
672 
673  size += sizeof(bdhead_t) - sizeof(bhead_t);
674 
675  KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
676 
677  /* richryan */
678  bdh = BDH((*thr->acqfcn)((bufsize) size));
679  if (bdh != NULL) {
680 
681  /* Mark the buffer special by setting the size field
682  of its header to zero. */
683  bdh->bh.bb.bsize = 0;
684 
685  /* Mark this buffer as owned by this thread. */
686  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
687  // because direct buffer never goes to free list
688  bdh->bh.bb.prevfree = 0;
689  bdh->tsize = size;
690 #if BufStats
691  thr->totalloc += (size_t) size;
692  thr->numget++; /* Increment number of bget() calls */
693  thr->numdget++; /* Direct bget() call count */
694 #endif
695  buf = (void *) (bdh + 1);
696  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
697  return buf;
698  }
699 
700  } else {
701 
702  /* Try to obtain a new expansion block */
703 
704  void *newpool;
705 
706  KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
707 
708  /* richryan */
709  newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
710  KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
711  if (newpool != NULL) {
712  bpool( th, newpool, thr->exp_incr);
713  buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
714  return buf;
715  }
716  }
717  }
718 
719  /* Still no buffer available */
720 
721  return NULL;
722 }
723 
724 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
725  the entire contents of the buffer to zero, not just the
726  region requested by the caller. */
727 
728 static void *
729 bgetz( kmp_info_t *th, bufsize size )
730 {
731  char *buf = (char *) bget( th, size);
732 
733  if (buf != NULL) {
734  bhead_t *b;
735  bufsize rsize;
736 
737  b = BH(buf - sizeof(bhead_t));
738  rsize = -(b->bb.bsize);
739  if (rsize == 0) {
740  bdhead_t *bd;
741 
742  bd = BDH(buf - sizeof(bdhead_t));
743  rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
744  } else {
745  rsize -= sizeof(bhead_t);
746  }
747 
748  KMP_DEBUG_ASSERT(rsize >= size);
749 
750  (void) memset(buf, 0, (bufsize) rsize);
751  }
752  return ((void *) buf);
753 }
754 
755 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
756  simply in terms of brel() and bget(). It could be
757  enhanced to allow the buffer to grow into adjacent free
758  blocks and to avoid moving data unnecessarily. */
759 
760 static void *
761 bgetr( kmp_info_t *th, void *buf, bufsize size)
762 {
763  void *nbuf;
764  bufsize osize; /* Old size of buffer */
765  bhead_t *b;
766 
767  nbuf = bget( th, size );
768  if ( nbuf == NULL ) { /* Acquire new buffer */
769  return NULL;
770  }
771  if ( buf == NULL ) {
772  return nbuf;
773  }
774  b = BH(((char *) buf) - sizeof(bhead_t));
775  osize = -b->bb.bsize;
776  if (osize == 0) {
777  /* Buffer acquired directly through acqfcn. */
778  bdhead_t *bd;
779 
780  bd = BDH(((char *) buf) - sizeof(bdhead_t));
781  osize = bd->tsize - (bufsize) sizeof(bdhead_t);
782  } else {
783  osize -= sizeof(bhead_t);
784  };
785 
786  KMP_DEBUG_ASSERT(osize > 0);
787 
788  (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
789  (size_t) ((size < osize) ? size : osize));
790  brel( th, buf );
791 
792  return nbuf;
793 }
794 
795 /* BREL -- Release a buffer. */
796 
797 static void
798 brel( kmp_info_t *th, void *buf )
799 {
800  thr_data_t *thr = get_thr_data( th );
801  bfhead_t *b, *bn;
802  kmp_info_t *bth;
803 
804  KMP_DEBUG_ASSERT(buf != NULL);
805  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
806 
807  b = BFH(((char *) buf) - sizeof(bhead_t));
808 
809  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
810  bdhead_t *bdh;
811 
812  bdh = BDH(((char *) buf) - sizeof(bdhead_t));
813  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
814 #if BufStats
815  thr->totalloc -= (size_t) bdh->tsize;
816  thr->numdrel++; /* Number of direct releases */
817  thr->numrel++; /* Increment number of brel() calls */
818 #endif /* BufStats */
819 #ifdef FreeWipe
820  (void) memset((char *) buf, 0x55,
821  (size_t) (bdh->tsize - sizeof(bdhead_t)));
822 #endif /* FreeWipe */
823 
824  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
825 
826  KMP_DEBUG_ASSERT( thr->relfcn != 0 );
827  (*thr->relfcn)((void *) bdh); /* Release it directly. */
828  return;
829  }
830 
831  bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
832  if ( bth != th ) {
833  /* Add this buffer to be released by the owning thread later */
834  __kmp_bget_enqueue( bth, buf
835 #ifdef USE_QUEUING_LOCK_FOR_BGET
836  , __kmp_gtid_from_thread( th )
837 #endif
838  );
839  return;
840  }
841 
842  /* Buffer size must be negative, indicating that the buffer is
843  allocated. */
844 
845  if (b->bh.bb.bsize >= 0) {
846  bn = NULL;
847  }
848  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
849 
850  /* Back pointer in next buffer must be zero, indicating the
851  same thing: */
852 
853  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
854 
855 #if BufStats
856  thr->numrel++; /* Increment number of brel() calls */
857  thr->totalloc += (size_t) b->bh.bb.bsize;
858 #endif
859 
860  /* If the back link is nonzero, the previous buffer is free. */
861 
862  if (b->bh.bb.prevfree != 0) {
863  /* The previous buffer is free. Consolidate this buffer with it
864  by adding the length of this buffer to the previous free
865  buffer. Note that we subtract the size in the buffer being
866  released, since it's negative to indicate that the buffer is
867  allocated. */
868 
869  register bufsize size = b->bh.bb.bsize;
870 
871  /* Make the previous buffer the one we're working on. */
872  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
873  b = BFH(((char *) b) - b->bh.bb.prevfree);
874  b->bh.bb.bsize -= size;
875 
876  /* unlink the buffer from the old freelist */
877  __kmp_bget_remove_from_freelist( b );
878  }
879  else {
880  /* The previous buffer isn't allocated. Mark this buffer
881  size as positive (i.e. free) and fall through to place
882  the buffer on the free list as an isolated free block. */
883 
884  b->bh.bb.bsize = -b->bh.bb.bsize;
885  }
886 
887  /* insert buffer back onto a new freelist */
888  __kmp_bget_insert_into_freelist( thr, b );
889 
890 
891  /* Now we look at the next buffer in memory, located by advancing from
892  the start of this buffer by its size, to see if that buffer is
893  free. If it is, we combine this buffer with the next one in
894  memory, dechaining the second buffer from the free list. */
895 
896  bn = BFH(((char *) b) + b->bh.bb.bsize);
897  if (bn->bh.bb.bsize > 0) {
898 
899  /* The buffer is free. Remove it from the free list and add
900  its size to that of our buffer. */
901 
902  KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
903 
904  __kmp_bget_remove_from_freelist( bn );
905 
906  b->bh.bb.bsize += bn->bh.bb.bsize;
907 
908  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
909 
910  __kmp_bget_remove_from_freelist( b );
911  __kmp_bget_insert_into_freelist( thr, b );
912 
913  /* Finally, advance to the buffer that follows the newly
914  consolidated free block. We must set its backpointer to the
915  head of the consolidated free block. We know the next block
916  must be an allocated block because the process of recombination
917  guarantees that two free blocks will never be contiguous in
918  memory. */
919 
920  bn = BFH(((char *) b) + b->bh.bb.bsize);
921  }
922 #ifdef FreeWipe
923  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
924  (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
925 #endif
926  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
927 
928  /* The next buffer is allocated. Set the backpointer in it to point
929  to this buffer; the previous free buffer in memory. */
930 
931  bn->bh.bb.prevfree = b->bh.bb.bsize;
932 
933  /* If a block-release function is defined, and this free buffer
934  constitutes the entire block, release it. Note that pool_len
935  is defined in such a way that the test will fail unless all
936  pool blocks are the same size. */
937 
938  if (thr->relfcn != 0 &&
939  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
940  {
941 #if BufStats
942  if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
943 #endif
944 
945  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
946  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
947  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
948 
949  /* Unlink the buffer from the free list */
950  __kmp_bget_remove_from_freelist( b );
951 
952  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
953 
954  (*thr->relfcn)(b);
955 #if BufStats
956  thr->numprel++; /* Nr of expansion block releases */
957  thr->numpblk--; /* Total number of blocks */
958  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
959 
960  /* avoid leaving stale last_pool pointer around if it is being dealloced */
961  if (thr->last_pool == b) thr->last_pool = 0;
962  }
963  else {
964  thr->last_pool = b;
965  }
966 #endif /* BufStats */
967  }
968 }
969 
970 /* BECTL -- Establish automatic pool expansion control */
971 
972 static void
973 bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
974 {
975  thr_data_t *thr = get_thr_data( th );
976 
977  thr->compfcn = compact;
978  thr->acqfcn = acquire;
979  thr->relfcn = release;
980  thr->exp_incr = pool_incr;
981 }
982 
983 /* BPOOL -- Add a region of memory to the buffer pool. */
984 
985 static void
986 bpool( kmp_info_t *th, void *buf, bufsize len)
987 {
988 /* int bin = 0; */
989  thr_data_t *thr = get_thr_data( th );
990  bfhead_t *b = BFH(buf);
991  bhead_t *bn;
992 
993  __kmp_bget_dequeue( th ); /* Release any queued buffers */
994 
995 #ifdef SizeQuant
996  len &= ~(SizeQuant - 1);
997 #endif
998  if (thr->pool_len == 0) {
999  thr->pool_len = len;
1000  } else if (len != thr->pool_len) {
1001  thr->pool_len = -1;
1002  }
1003 #if BufStats
1004  thr->numpget++; /* Number of block acquisitions */
1005  thr->numpblk++; /* Number of blocks total */
1006  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1007 #endif /* BufStats */
1008 
1009  /* Since the block is initially occupied by a single free buffer,
1010  it had better not be (much) larger than the largest buffer
1011  whose size we can store in bhead.bb.bsize. */
1012 
1013  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1014 
1015  /* Clear the backpointer at the start of the block to indicate that
1016  there is no free block prior to this one. That blocks
1017  recombination when the first block in memory is released. */
1018 
1019  b->bh.bb.prevfree = 0;
1020 
1021  /* Create a dummy allocated buffer at the end of the pool. This dummy
1022  buffer is seen when a buffer at the end of the pool is released and
1023  blocks recombination of the last buffer with the dummy buffer at
1024  the end. The length in the dummy buffer is set to the largest
1025  negative number to denote the end of the pool for diagnostic
1026  routines (this specific value is not counted on by the actual
1027  allocation and release functions). */
1028 
1029  len -= sizeof(bhead_t);
1030  b->bh.bb.bsize = (bufsize) len;
1031  /* Set the owner of this buffer */
1032  TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1033 
1034  /* Chain the new block to the free list. */
1035  __kmp_bget_insert_into_freelist( thr, b );
1036 
1037 #ifdef FreeWipe
1038  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1039  (size_t) (len - sizeof(bfhead_t)));
1040 #endif
1041  bn = BH(((char *) b) + len);
1042  bn->bb.prevfree = (bufsize) len;
1043  /* Definition of ESent assumes two's complement! */
1044  KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1045 
1046  bn->bb.bsize = ESent;
1047 }
1048 
1049 /* ------------------------------------------------------------------------ */
1050 
1051 /* BFREED -- Dump the free lists for this thread. */
1052 
1053 static void
1054 bfreed( kmp_info_t *th )
1055 {
1056  int bin = 0, count = 0;
1057  int gtid = __kmp_gtid_from_thread( th );
1058  thr_data_t *thr = get_thr_data( th );
1059 
1060 #if BufStats
1061  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1062  KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1063  " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1064  gtid, (kmp_uint64) thr->totalloc,
1065  (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1066  (kmp_int64) thr->numpblk,
1067  (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1068  (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1069 #endif
1070 
1071  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1072  bfhead_t *b;
1073 
1074  for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1075  bufsize bs = b->bh.bb.bsize;
1076 
1077  KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1078  KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1079  KMP_DEBUG_ASSERT( bs > 0 );
1080 
1081  count += 1;
1082 
1083  __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1084 #ifdef FreeWipe
1085  {
1086  char *lerr = ((char *) b) + sizeof(bfhead_t);
1087  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1088  __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1089  }
1090  }
1091 #endif
1092  }
1093  }
1094 
1095  if (count == 0)
1096  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1097 }
1098 
1099 /* ------------------------------------------------------------------------ */
1100 
1101 #ifdef KMP_DEBUG
1102 
1103 #if BufStats
1104 
1105 /* BSTATS -- Return buffer allocation free space statistics. */
1106 
1107 static void
1108 bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1109 {
1110  int bin = 0;
1111  thr_data_t *thr = get_thr_data( th );
1112 
1113  *nget = thr->numget;
1114  *nrel = thr->numrel;
1115  *curalloc = (bufsize) thr->totalloc;
1116  *totfree = 0;
1117  *maxfree = -1;
1118 
1119  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1120  bfhead_t *b = thr->freelist[ bin ].ql.flink;
1121 
1122  while (b != &thr->freelist[ bin ]) {
1123  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1124  *totfree += b->bh.bb.bsize;
1125  if (b->bh.bb.bsize > *maxfree) {
1126  *maxfree = b->bh.bb.bsize;
1127  }
1128  b = b->ql.flink; /* Link to next buffer */
1129  }
1130  }
1131 }
1132 
1133 /* BSTATSE -- Return extended statistics */
1134 
1135 static void
1136 bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1137 {
1138  thr_data_t *thr = get_thr_data( th );
1139 
1140  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1141  *npool = thr->numpblk;
1142  *npget = thr->numpget;
1143  *nprel = thr->numprel;
1144  *ndget = thr->numdget;
1145  *ndrel = thr->numdrel;
1146 }
1147 
1148 #endif /* BufStats */
1149 
1150 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1151  data pointer, and backs up to the buffer header. It will
1152  dump either a free block or an allocated one. */
1153 
1154 static void
1155 bufdump( kmp_info_t *th, void *buf )
1156 {
1157  bfhead_t *b;
1158  unsigned char *bdump;
1159  bufsize bdlen;
1160 
1161  b = BFH(((char *) buf) - sizeof(bhead_t));
1162  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1163  if (b->bh.bb.bsize < 0) {
1164  bdump = (unsigned char *) buf;
1165  bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1166  } else {
1167  bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1168  bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1169  }
1170 
1171  while (bdlen > 0) {
1172  int i, dupes = 0;
1173  bufsize l = bdlen;
1174  char bhex[50], bascii[20];
1175 
1176  if (l > 16) {
1177  l = 16;
1178  }
1179 
1180  for (i = 0; i < l; i++) {
1181  (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
1182  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1183  bascii[ i ] = bdump[ i ];
1184  else
1185  bascii[ i ] = ' ';
1186  }
1187  bascii[i] = 0;
1188  (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1189  bdump += l;
1190  bdlen -= l;
1191  while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1192  (char *) bdump, 16) == 0)) {
1193  dupes++;
1194  bdump += 16;
1195  bdlen -= 16;
1196  }
1197  if (dupes > 1) {
1198  (void) __kmp_printf_no_lock(
1199  " (%d lines [%d bytes] identical to above line skipped)\n",
1200  dupes, dupes * 16);
1201  } else if (dupes == 1) {
1202  bdump -= 16;
1203  bdlen += 16;
1204  }
1205  }
1206 }
1207 
1208 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1209  If DUMPALLOC is nonzero, the contents of allocated buffers
1210  are dumped. If DUMPFREE is nonzero, free blocks are
1211  dumped as well. If FreeWipe checking is enabled, free
1212  blocks which have been clobbered will always be dumped. */
1213 
1214 static void
1215 bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1216 {
1217  bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1218 
1219  while (b->bh.bb.bsize != ESent) {
1220  bufsize bs = b->bh.bb.bsize;
1221 
1222  if (bs < 0) {
1223  bs = -bs;
1224  (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1225  if (dumpalloc) {
1226  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1227  }
1228  } else {
1229  const char *lerr = "";
1230 
1231  KMP_DEBUG_ASSERT(bs > 0);
1232  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1233  lerr = " (Bad free list links)";
1234  }
1235  (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1236  (long) bs, lerr);
1237 #ifdef FreeWipe
1238  lerr = ((char *) b) + sizeof(bfhead_t);
1239  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1240  (memcmp(lerr, lerr + 1,
1241  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1242  (void) __kmp_printf_no_lock(
1243  "(Contents of above free block have been overstored.)\n");
1244  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1245  } else
1246 #endif
1247  if (dumpfree) {
1248  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1249  }
1250  }
1251  b = BFH(((char *) b) + bs);
1252  }
1253 }
1254 
1255 /* BPOOLV -- Validate a buffer pool. */
1256 
1257 static int
1258 bpoolv( kmp_info_t *th, void *buf )
1259 {
1260  bfhead_t *b = BFH(buf);
1261 
1262  while (b->bh.bb.bsize != ESent) {
1263  bufsize bs = b->bh.bb.bsize;
1264 
1265  if (bs < 0) {
1266  bs = -bs;
1267  } else {
1268 #ifdef FreeWipe
1269  char *lerr = "";
1270 #endif
1271 
1272  KMP_DEBUG_ASSERT(bs > 0);
1273  if (bs <= 0) {
1274  return 0;
1275  }
1276  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1277  (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1278  (long) bs);
1279  KMP_DEBUG_ASSERT(0);
1280  return 0;
1281  }
1282 #ifdef FreeWipe
1283  lerr = ((char *) b) + sizeof(bfhead_t);
1284  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1285  (memcmp(lerr, lerr + 1,
1286  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1287  (void) __kmp_printf_no_lock(
1288  "(Contents of above free block have been overstored.)\n");
1289  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1290  KMP_DEBUG_ASSERT(0);
1291  return 0;
1292  }
1293 #endif /* FreeWipe */
1294  }
1295  b = BFH(((char *) b) + bs);
1296  }
1297  return 1;
1298 }
1299 
1300 #endif /* KMP_DEBUG */
1301 
1302 /* ------------------------------------------------------------------------ */
1303 
1304 void
1305 __kmp_initialize_bget( kmp_info_t *th )
1306 {
1307  KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1308 
1309  set_thr_data( th );
1310 
1311  bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1312  (bufsize) __kmp_malloc_pool_incr );
1313 }
1314 
1315 void
1316 __kmp_finalize_bget( kmp_info_t *th )
1317 {
1318  thr_data_t *thr;
1319  bfhead_t *b;
1320 
1321  KMP_DEBUG_ASSERT( th != 0 );
1322 
1323 #if BufStats
1324  thr = (thr_data_t *) th->th.th_local.bget_data;
1325  KMP_DEBUG_ASSERT( thr != NULL );
1326  b = thr->last_pool;
1327 
1328  /* If a block-release function is defined, and this free buffer
1329  constitutes the entire block, release it. Note that pool_len
1330  is defined in such a way that the test will fail unless all
1331  pool blocks are the same size. */
1332 
1333  /* Deallocate the last pool if one exists because we no longer do it in brel() */
1334  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1335  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1336  {
1337  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1338  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1339  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1340 
1341  /* Unlink the buffer from the free list */
1342  __kmp_bget_remove_from_freelist( b );
1343 
1344  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1345 
1346  (*thr->relfcn)(b);
1347  thr->numprel++; /* Nr of expansion block releases */
1348  thr->numpblk--; /* Total number of blocks */
1349  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1350  }
1351 #endif /* BufStats */
1352 
1353  /* Deallocate bget_data */
1354  if ( th->th.th_local.bget_data != NULL ) {
1355  __kmp_free( th->th.th_local.bget_data );
1356  th->th.th_local.bget_data = NULL;
1357  }; // if
1358 }
1359 
1360 void
1361 kmpc_set_poolsize( size_t size )
1362 {
1363  bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1364  (bget_release_t) free, (bufsize) size );
1365 }
1366 
1367 size_t
1368 kmpc_get_poolsize( void )
1369 {
1370  thr_data_t *p;
1371 
1372  p = get_thr_data( __kmp_get_thread() );
1373 
1374  return p->exp_incr;
1375 }
1376 
1377 void
1378 kmpc_set_poolmode( int mode )
1379 {
1380  thr_data_t *p;
1381 
1382  if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1383  p = get_thr_data( __kmp_get_thread() );
1384  p->mode = (bget_mode_t) mode;
1385  }
1386 }
1387 
1388 int
1389 kmpc_get_poolmode( void )
1390 {
1391  thr_data_t *p;
1392 
1393  p = get_thr_data( __kmp_get_thread() );
1394 
1395  return p->mode;
1396 }
1397 
1398 void
1399 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1400 {
1401  kmp_info_t *th = __kmp_get_thread();
1402  bufsize a, b;
1403 
1404  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1405 
1406  bcheck( th, &a, &b );
1407 
1408  *maxmem = a;
1409  *allmem = b;
1410 }
1411 
1412 void
1413 kmpc_poolprint( void )
1414 {
1415  kmp_info_t *th = __kmp_get_thread();
1416 
1417  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1418 
1419  bfreed( th );
1420 }
1421 
1422 #endif // #if KMP_USE_BGET
1423 
1424 /* ------------------------------------------------------------------------ */
1425 
1426 void *
1427 kmpc_malloc( size_t size )
1428 {
1429  void * ptr;
1430  ptr = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
1431  if( ptr != NULL ) {
1432  // save allocated pointer just before one returned to user
1433  *(void**)ptr = ptr;
1434  ptr = (void**)ptr + 1;
1435  }
1436  return ptr;
1437 }
1438 
1439 #define IS_POWER_OF_TWO(n) (((n)&((n)-1))==0)
1440 
1441 void *
1442 kmpc_aligned_malloc( size_t size, size_t alignment )
1443 {
1444  void * ptr;
1445  void * ptr_allocated;
1446  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too big
1447  if( !IS_POWER_OF_TWO(alignment) ) {
1448  // AC: do we need to issue a warning here?
1449  errno = EINVAL;
1450  return NULL;
1451  }
1452  size = size + sizeof( void* ) + alignment;
1453  ptr_allocated = bget( __kmp_entry_thread(), (bufsize)size );
1454  if( ptr_allocated != NULL ) {
1455  // save allocated pointer just before one returned to user
1456  ptr = (void*)(((kmp_uintptr_t)ptr_allocated + sizeof( void* ) + alignment) & ~(alignment - 1));
1457  *((void**)ptr - 1) = ptr_allocated;
1458  } else {
1459  ptr = NULL;
1460  }
1461  return ptr;
1462 }
1463 
1464 void *
1465 kmpc_calloc( size_t nelem, size_t elsize )
1466 {
1467  void * ptr;
1468  ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize + sizeof(ptr)) );
1469  if( ptr != NULL ) {
1470  // save allocated pointer just before one returned to user
1471  *(void**)ptr = ptr;
1472  ptr = (void**)ptr + 1;
1473  }
1474  return ptr;
1475 }
1476 
1477 void *
1478 kmpc_realloc( void * ptr, size_t size )
1479 {
1480  void * result = NULL;
1481  if ( ptr == NULL ) {
1482  // If pointer is NULL, realloc behaves like malloc.
1483  result = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
1484  // save allocated pointer just before one returned to user
1485  if( result != NULL ) {
1486  *(void**)result = result;
1487  result = (void**)result + 1;
1488  }
1489  } else if ( size == 0 ) {
1490  // If size is 0, realloc behaves like free.
1491  // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1492  // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1493  KMP_ASSERT(*((void**)ptr - 1));
1494  brel( __kmp_get_thread(), *((void**)ptr - 1) );
1495  } else {
1496  result = bgetr( __kmp_entry_thread(), *((void**)ptr - 1), (bufsize)(size + sizeof(ptr)) );
1497  if( result != NULL ) {
1498  *(void**)result = result;
1499  result = (void**)result + 1;
1500  }
1501  }; // if
1502  return result;
1503 }
1504 
1505 /* NOTE: the library must have already been initialized by a previous allocate */
1506 
1507 void
1508 kmpc_free( void * ptr )
1509 {
1510  if ( ! __kmp_init_serial ) {
1511  return;
1512  }; // if
1513  if ( ptr != NULL ) {
1514  kmp_info_t *th = __kmp_get_thread();
1515  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1516  // extract allocated pointer and free it
1517  KMP_ASSERT(*((void**)ptr - 1));
1518  brel( th, *((void**)ptr - 1) );
1519  };
1520 }
1521 
1522 
1523 /* ------------------------------------------------------------------------ */
1524 
1525 void *
1526 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1527 {
1528  void * ptr;
1529  KE_TRACE( 30, (
1530  "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1531  th,
1532  (int) size
1533  KMP_SRC_LOC_PARM
1534  ) );
1535  ptr = bget( th, (bufsize) size );
1536  KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1537  return ptr;
1538 }
1539 
1540 void *
1541 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1542 {
1543  void * ptr;
1544  KE_TRACE( 30, (
1545  "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1546  th,
1547  (int) nelem,
1548  (int) elsize
1549  KMP_SRC_LOC_PARM
1550  ) );
1551  ptr = bgetz( th, (bufsize) (nelem * elsize) );
1552  KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1553  return ptr;
1554 }
1555 
1556 void *
1557 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1558 {
1559  KE_TRACE( 30, (
1560  "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1561  th,
1562  ptr,
1563  (int) size
1564  KMP_SRC_LOC_PARM
1565  ) );
1566  ptr = bgetr( th, ptr, (bufsize) size );
1567  KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1568  return ptr;
1569 }
1570 
1571 void
1572 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1573 {
1574  KE_TRACE( 30, (
1575  "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1576  th,
1577  ptr
1578  KMP_SRC_LOC_PARM
1579  ) );
1580  if ( ptr != NULL ) {
1581  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1582  brel( th, ptr );
1583  }
1584  KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1585 }
1586 
1587 /* ------------------------------------------------------------------------ */
1588 /* ------------------------------------------------------------------------ */
1589 /*
1590  If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1591  may be useful for debugging memory corruptions, used freed pointers, etc.
1592 */
1593 /* #define LEAK_MEMORY */
1594 
1595 struct kmp_mem_descr { // Memory block descriptor.
1596  void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1597  size_t size_allocated; // Size of allocated memory block.
1598  void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1599  size_t size_aligned; // Size of aligned memory block.
1600 };
1601 typedef struct kmp_mem_descr kmp_mem_descr_t;
1602 
1603 /*
1604  Allocate memory on requested boundary, fill allocated memory with 0x00.
1605  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1606  Must use __kmp_free when freeing memory allocated by this routine!
1607  */
1608 static
1609 void *
1610 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1611 {
1612  /*
1613  __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1614  return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1615  block is saved in descriptor just before the aligned pointer. This information used by
1616  __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1617 
1618  +---------+------------+-----------------------------------+---------+
1619  | padding | descriptor | aligned block | padding |
1620  +---------+------------+-----------------------------------+---------+
1621  ^ ^
1622  | |
1623  | +- Aligned pointer returned to caller
1624  +- Pointer returned by malloc()
1625 
1626  Aligned block is filled with zeros, paddings are filled with 0xEF.
1627  */
1628 
1629  kmp_mem_descr_t descr;
1630  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1631  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1632  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1633 
1634  KE_TRACE( 25, (
1635  "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1636  (int) size,
1637  (int) alignment
1638  KMP_SRC_LOC_PARM
1639  ) );
1640 
1641  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1642  KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1643  // Make sure kmp_uintptr_t is enough to store addresses.
1644 
1645  descr.size_aligned = size;
1646  descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1647 
1648 #if KMP_DEBUG
1649  descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1650 #else
1651  descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1652 #endif
1653  KE_TRACE( 10, (
1654  " malloc( %d ) returned %p\n",
1655  (int) descr.size_allocated,
1656  descr.ptr_allocated
1657  ) );
1658  if ( descr.ptr_allocated == NULL ) {
1659  KMP_FATAL( OutOfHeapMemory );
1660  };
1661 
1662  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1663  addr_aligned =
1664  ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1665  & ~ ( alignment - 1 );
1666  addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1667 
1668  descr.ptr_aligned = (void *) addr_aligned;
1669 
1670  KE_TRACE( 26, (
1671  " ___kmp_allocate_align: "
1672  "ptr_allocated=%p, size_allocated=%d, "
1673  "ptr_aligned=%p, size_aligned=%d\n",
1674  descr.ptr_allocated,
1675  (int) descr.size_allocated,
1676  descr.ptr_aligned,
1677  (int) descr.size_aligned
1678  ) );
1679 
1680  KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1681  KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1682  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1683  KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1684 #ifdef KMP_DEBUG
1685  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1686  // Fill allocated memory block with 0xEF.
1687 #endif
1688  memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1689  // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1690  // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1691  // bytes remain filled with 0xEF in debugging library.)
1692  * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1693 
1694  KMP_MB();
1695 
1696  KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1697  return descr.ptr_aligned;
1698 } // func ___kmp_allocate_align
1699 
1700 
1701 /*
1702  Allocate memory on cache line boundary, fill allocated memory with 0x00.
1703  Do not call this func directly! Use __kmp_allocate macro instead.
1704  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1705  Must use __kmp_free when freeing memory allocated by this routine!
1706  */
1707 void *
1708 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1709 {
1710  void * ptr;
1711  KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1712  ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1713  KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1714  return ptr;
1715 } // func ___kmp_allocate
1716 
1717 #if (BUILD_MEMORY==FIRST_TOUCH)
1718 void *
1719 __kmp_ft_page_allocate(size_t size)
1720 {
1721  void *adr, *aadr;
1722 #if KMP_OS_LINUX
1723  /* TODO: Use this function to get page size everywhere */
1724  int page_size = getpagesize();
1725 #else
1726  /* TODO: Find windows function to get page size and use it everywhere */
1727  int page_size = PAGE_SIZE;
1728 #endif /* KMP_OS_LINUX */
1729 
1730  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1731  size + page_size + KMP_PTR_SKIP);
1732  if ( adr == 0 )
1733  KMP_FATAL( OutOfHeapMemory );
1734 
1735  /* check to see if adr is on a page boundary. */
1736  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1737  /* nothing to do if adr is already on a page boundary. */
1738  aadr = adr;
1739  else
1740  /* else set aadr to the first page boundary in the allocated memory. */
1741  aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1742 
1743  /* the first touch by the owner thread. */
1744  *((void**)aadr) = adr;
1745 
1746  /* skip the memory space used for storing adr above. */
1747  return (void*)((char*)aadr + KMP_PTR_SKIP);
1748 }
1749 #endif
1750 
1751 /*
1752  Allocate memory on page boundary, fill allocated memory with 0x00.
1753  Does not call this func directly! Use __kmp_page_allocate macro instead.
1754  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1755  Must use __kmp_free when freeing memory allocated by this routine!
1756  */
1757 void *
1758 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1759 {
1760  int page_size = 8 * 1024;
1761  void * ptr;
1762 
1763  KE_TRACE( 25, (
1764  "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1765  (int) size
1766  KMP_SRC_LOC_PARM
1767  ) );
1768  ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1769  KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1770  return ptr;
1771 } // ___kmp_page_allocate
1772 
1773 /*
1774  Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1775  In debug mode, fill the memory block with 0xEF before call to free().
1776 */
1777 void
1778 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1779 {
1780  kmp_mem_descr_t descr;
1781  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1782  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1783 
1784  KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1785  KMP_ASSERT( ptr != NULL );
1786 
1787  descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1788 
1789  KE_TRACE( 26, ( " __kmp_free: "
1790  "ptr_allocated=%p, size_allocated=%d, "
1791  "ptr_aligned=%p, size_aligned=%d\n",
1792  descr.ptr_allocated, (int) descr.size_allocated,
1793  descr.ptr_aligned, (int) descr.size_aligned ));
1794 
1795  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1796  addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1797 
1798  KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1799  KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1800  KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1801  KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1802  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1803 
1804  #ifdef KMP_DEBUG
1805  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1806  // Fill memory block with 0xEF, it helps catch using freed memory.
1807  #endif
1808 
1809  #ifndef LEAK_MEMORY
1810  KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1811  # ifdef KMP_DEBUG
1812  _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1813  # else
1814  free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1815  # endif
1816  #endif
1817  KMP_MB();
1818  KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1819 } // func ___kmp_free
1820 
1821 /* ------------------------------------------------------------------------ */
1822 /* ------------------------------------------------------------------------ */
1823 
1824 #if USE_FAST_MEMORY == 3
1825 // Allocate fast memory by first scanning the thread's free lists
1826 // If a chunk the right size exists, grab it off the free list.
1827 // Otherwise allocate normally using kmp_thread_malloc.
1828 
1829 // AC: How to choose the limit? Just get 16 for now...
1830 #define KMP_FREE_LIST_LIMIT 16
1831 
1832 // Always use 128 bytes for determining buckets for caching memory blocks
1833 #define DCACHE_LINE 128
1834 
1835 void *
1836 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1837 {
1838  void * ptr;
1839  int num_lines;
1840  int idx;
1841  int index;
1842  void * alloc_ptr;
1843  size_t alloc_size;
1844  kmp_mem_descr_t * descr;
1845 
1846  KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1847  __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1848 
1849  num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1850  idx = num_lines - 1;
1851  KMP_DEBUG_ASSERT( idx >= 0 );
1852  if ( idx < 2 ) {
1853  index = 0; // idx is [ 0, 1 ], use first free list
1854  num_lines = 2; // 1, 2 cache lines or less than cache line
1855  } else if ( ( idx >>= 2 ) == 0 ) {
1856  index = 1; // idx is [ 2, 3 ], use second free list
1857  num_lines = 4; // 3, 4 cache lines
1858  } else if ( ( idx >>= 2 ) == 0 ) {
1859  index = 2; // idx is [ 4, 15 ], use third free list
1860  num_lines = 16; // 5, 6, ..., 16 cache lines
1861  } else if ( ( idx >>= 2 ) == 0 ) {
1862  index = 3; // idx is [ 16, 63 ], use fourth free list
1863  num_lines = 64; // 17, 18, ..., 64 cache lines
1864  } else {
1865  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1866  }
1867 
1868  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1869  if ( ptr != NULL ) {
1870  // pop the head of no-sync free list
1871  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1872  KMP_DEBUG_ASSERT( this_thr ==
1873  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1874  goto end;
1875  };
1876  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1877  if ( ptr != NULL ) {
1878  // no-sync free list is empty, use sync free list (filled in by other threads only)
1879  // pop the head of the sync free list, push NULL instead
1880  while ( ! KMP_COMPARE_AND_STORE_PTR(
1881  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1882  {
1883  KMP_CPU_PAUSE();
1884  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1885  }
1886  // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1887  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1888  KMP_DEBUG_ASSERT( this_thr ==
1889  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1890  goto end;
1891  }
1892 
1893  alloc_call:
1894  // haven't found block in the free lists, thus allocate it
1895  size = num_lines * DCACHE_LINE;
1896 
1897  alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1898  KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1899  __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1900  alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1901 
1902  // align ptr to DCACHE_LINE
1903  ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1904  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1905 
1906  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1907  // we don't need size_allocated
1908  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1909  // (it is already saved in bget buffer,
1910  // but we may want to use another allocator in future)
1911  descr->size_aligned = size;
1912 
1913  end:
1914  KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1915  __kmp_gtid_from_thread( this_thr ), ptr ) );
1916  return ptr;
1917 } // func __kmp_fast_allocate
1918 
1919 // Free fast memory and place it on the thread's free list if it is of
1920 // the correct size.
1921 void
1922 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1923 {
1924  kmp_mem_descr_t * descr;
1925  kmp_info_t * alloc_thr;
1926  size_t size;
1927  size_t idx;
1928  int index;
1929 
1930  KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1931  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1932  KMP_ASSERT( ptr != NULL );
1933 
1934  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1935 
1936  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1937  (int) descr->size_aligned ) );
1938 
1939  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1940 
1941  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1942  if ( idx == size ) {
1943  index = 0; // 2 cache lines
1944  } else if ( ( idx <<= 1 ) == size ) {
1945  index = 1; // 4 cache lines
1946  } else if ( ( idx <<= 2 ) == size ) {
1947  index = 2; // 16 cache lines
1948  } else if ( ( idx <<= 2 ) == size ) {
1949  index = 3; // 64 cache lines
1950  } else {
1951  KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1952  goto free_call; // 65 or more cache lines ( > 8KB )
1953  }
1954 
1955  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1956  if ( alloc_thr == this_thr ) {
1957  // push block to self no-sync free list, linking previous head (LIFO)
1958  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1959  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1960  } else {
1961  void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1962  if ( head == NULL ) {
1963  // Create new free list
1964  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1965  *((void **)ptr) = NULL; // mark the tail of the list
1966  descr->size_allocated = (size_t)1; // head of the list keeps its length
1967  } else {
1968  // need to check existed "other" list's owner thread and size of queue
1969  kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1970  kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1971  size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1972  if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
1973  // we can add current task to "other" list, no sync needed
1974  *((void **)ptr) = head;
1975  descr->size_allocated = q_sz;
1976  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1977  } else {
1978  // either queue blocks owner is changing or size limit exceeded
1979  // return old queue to allocating thread (q_th) synchroneously,
1980  // and start new list for alloc_thr's tasks
1981  void * old_ptr;
1982  void * tail = head;
1983  void * next = *((void **)head);
1984  while ( next != NULL ) {
1985  KMP_DEBUG_ASSERT(
1986  // queue size should decrease by 1 each step through the list
1987  ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1988  ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1989  tail = next; // remember tail node
1990  next = *((void **)next);
1991  }
1992  KMP_DEBUG_ASSERT( q_th != NULL );
1993  // push block to owner's sync free list
1994  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1995  /* the next pointer must be set before setting free_list to ptr to avoid
1996  exposing a broken list to other threads, even for an instant. */
1997  *((void **)tail) = old_ptr;
1998 
1999  while ( ! KMP_COMPARE_AND_STORE_PTR(
2000  &q_th->th.th_free_lists[index].th_free_list_sync,
2001  old_ptr,
2002  head ) )
2003  {
2004  KMP_CPU_PAUSE();
2005  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
2006  *((void **)tail) = old_ptr;
2007  }
2008 
2009  // start new list of not-selt tasks
2010  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2011  *((void **)ptr) = NULL;
2012  descr->size_allocated = (size_t)1; // head of queue keeps its length
2013  }
2014  }
2015  }
2016  goto end;
2017 
2018  free_call:
2019  KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2020  __kmp_gtid_from_thread( this_thr), size ) );
2021  __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
2022  brel( this_thr, descr->ptr_allocated );
2023 
2024  end:
2025  KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
2026 
2027 } // func __kmp_fast_free
2028 
2029 
2030 // Initialize the thread free lists related to fast memory
2031 // Only do this when a thread is initially created.
2032 void
2033 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
2034 {
2035  KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2036 
2037  memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2038 }
2039 
2040 // Free the memory in the thread free lists related to fast memory
2041 // Only do this when a thread is being reaped (destroyed).
2042 void
2043 __kmp_free_fast_memory( kmp_info_t *th )
2044 {
2045  // Suppose we use BGET underlying allocator, walk through its structures...
2046  int bin;
2047  thr_data_t * thr = get_thr_data( th );
2048  void ** lst = NULL;
2049 
2050  KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2051  __kmp_gtid_from_thread( th ) ) );
2052 
2053  __kmp_bget_dequeue( th ); // Release any queued buffers
2054 
2055  // Dig through free lists and extract all allocated blocks
2056  for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2057  bfhead_t * b = thr->freelist[ bin ].ql.flink;
2058  while ( b != &thr->freelist[ bin ] ) {
2059  if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2060  *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2061  lst = (void**)b; // push b into lst
2062  }
2063  b = b->ql.flink; // get next buffer
2064  }
2065  }
2066  while ( lst != NULL ) {
2067  void * next = *lst;
2068  KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2069  lst, next, th, __kmp_gtid_from_thread( th ) ) );
2070  (*thr->relfcn)(lst);
2071  #if BufStats
2072  // count blocks to prevent problems in __kmp_finalize_bget()
2073  thr->numprel++; /* Nr of expansion block releases */
2074  thr->numpblk--; /* Total number of blocks */
2075  #endif
2076  lst = (void**)next;
2077  }
2078 
2079  KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2080  __kmp_gtid_from_thread( th ) ) );
2081 }
2082 
2083 #endif // USE_FAST_MEMORY