16 #include "kmp_wrapper_malloc.h" 23 typedef int (*bget_compact_t)(size_t, int);
24 typedef void *(*bget_acquire_t)(size_t);
25 typedef void (*bget_release_t)(
void *);
30 #if KMP_ARCH_X86 || KMP_ARCH_ARM 31 typedef kmp_int32 bufsize;
33 typedef kmp_int64 bufsize;
36 typedef ssize_t bufsize;
41 typedef enum bget_mode {
47 static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
48 static void *bget(kmp_info_t *th, bufsize size);
49 static void *bgetz(kmp_info_t *th, bufsize size);
50 static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
51 static void brel(kmp_info_t *th,
void *buf);
52 static void bectl(kmp_info_t *th, bget_compact_t compact,
53 bget_acquire_t acquire, bget_release_t release,
57 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
58 bufsize *maxfree,
long *nget,
long *nrel);
59 static void bstatse(kmp_info_t *th, bufsize *pool_incr,
long *npool,
60 long *npget,
long *nprel,
long *ndget,
long *ndrel);
61 static void bufdump(kmp_info_t *th,
void *buf);
62 static void bpoold(kmp_info_t *th,
void *pool,
int dumpalloc,
int dumpfree);
63 static int bpoolv(kmp_info_t *th,
void *pool);
73 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD 76 #define AlignType double 81 #define AlignType _Quad 117 static bufsize bget_bin_size[] = {
127 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
135 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 142 typedef struct qlinks {
143 struct bfhead *flink;
144 struct bfhead *blink;
148 typedef struct bhead2 {
156 typedef union bhead {
159 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
162 #define BH(p) ((bhead_t *)(p)) 165 typedef struct bdhead {
169 #define BDH(p) ((bdhead_t *)(p)) 172 typedef struct bfhead {
176 #define BFH(p) ((bfhead_t *)(p)) 178 typedef struct thr_data {
179 bfhead_t freelist[MAX_BGET_BINS];
184 long numpget, numprel;
185 long numdget, numdrel;
189 bget_compact_t compfcn;
190 bget_acquire_t acqfcn;
191 bget_release_t relfcn;
204 #define QLSize (sizeof(qlinks_t)) 205 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 208 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 216 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 219 static int bget_get_bin(bufsize size) {
221 int lo = 0, hi = MAX_BGET_BINS - 1;
223 KMP_DEBUG_ASSERT(size > 0);
225 while ((hi - lo) > 1) {
226 int mid = (lo + hi) >> 1;
227 if (size < bget_bin_size[mid])
233 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
238 static void set_thr_data(kmp_info_t *th) {
242 data = (thr_data_t *)((!th->th.th_local.bget_data)
243 ? __kmp_allocate(
sizeof(*data))
244 : th->th.th_local.bget_data);
246 memset(data,
'\0',
sizeof(*data));
248 for (i = 0; i < MAX_BGET_BINS; ++i) {
249 data->freelist[i].ql.flink = &data->freelist[i];
250 data->freelist[i].ql.blink = &data->freelist[i];
253 th->th.th_local.bget_data = data;
254 th->th.th_local.bget_list = 0;
255 #if !USE_CMP_XCHG_FOR_BGET 256 #ifdef USE_QUEUING_LOCK_FOR_BGET 257 __kmp_init_lock(&th->th.th_local.bget_lock);
259 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
264 static thr_data_t *get_thr_data(kmp_info_t *th) {
267 data = (thr_data_t *)th->th.th_local.bget_data;
269 KMP_DEBUG_ASSERT(data != 0);
276 static void __kmp_bget_validate_queue(kmp_info_t *th) {
279 void *p = (
void *)th->th.th_local.bget_list;
282 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
284 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
285 p = (
void *)b->ql.flink;
292 static void __kmp_bget_dequeue(kmp_info_t *th) {
293 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
296 #if USE_CMP_XCHG_FOR_BGET 298 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
299 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
300 CCAST(
void *, old_value),
nullptr)) {
302 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
304 p = CCAST(
void *, old_value);
307 #ifdef USE_QUEUING_LOCK_FOR_BGET 308 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
310 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
313 p = (
void *)th->th.th_local.bget_list;
314 th->th.th_local.bget_list = 0;
316 #ifdef USE_QUEUING_LOCK_FOR_BGET
317 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
319 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
326 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
328 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
329 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
331 KMP_DEBUG_ASSERT(b->ql.blink == 0);
333 p = (
void *)b->ql.flink;
341 static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
342 #ifdef USE_QUEUING_LOCK_FOR_BGET
347 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
349 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
350 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
355 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
356 __kmp_gtid_from_thread(th)));
358 #if USE_CMP_XCHG_FOR_BGET 360 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
363 b->ql.flink = BFH(CCAST(
void *, old_value));
365 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
366 CCAST(
void *, old_value), buf)) {
368 old_value = TCR_PTR(th->th.th_local.bget_list);
371 b->ql.flink = BFH(CCAST(
void *, old_value));
375 #ifdef USE_QUEUING_LOCK_FOR_BGET 376 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
378 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
381 b->ql.flink = BFH(th->th.th_local.bget_list);
382 th->th.th_local.bget_list = (
void *)buf;
384 #ifdef USE_QUEUING_LOCK_FOR_BGET 385 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
387 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
393 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
396 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
397 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
399 bin = bget_get_bin(b->bh.bb.bsize);
401 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
402 &thr->freelist[bin]);
403 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
404 &thr->freelist[bin]);
406 b->ql.flink = &thr->freelist[bin];
407 b->ql.blink = thr->freelist[bin].ql.blink;
409 thr->freelist[bin].ql.blink = b;
410 b->ql.blink->ql.flink = b;
414 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
415 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
416 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
418 b->ql.blink->ql.flink = b->ql.flink;
419 b->ql.flink->ql.blink = b->ql.blink;
423 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
424 thr_data_t *thr = get_thr_data(th);
427 *total_free = *max_free = 0;
429 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
432 best = &thr->freelist[bin];
435 while (b != &thr->freelist[bin]) {
436 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
437 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
444 if (*max_free < best->bh.bb.bsize)
445 *max_free = best->bh.bb.bsize;
448 if (*max_free > (bufsize)
sizeof(bhead_t))
449 *max_free -=
sizeof(bhead_t);
453 static void *bget(kmp_info_t *th, bufsize requested_size) {
454 thr_data_t *thr = get_thr_data(th);
455 bufsize size = requested_size;
463 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
467 __kmp_bget_dequeue(th);
469 if (size < (bufsize)SizeQ) {
472 #if defined(SizeQuant) && (SizeQuant > 1) 473 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
476 size +=
sizeof(bhead_t);
477 KMP_DEBUG_ASSERT(size >= 0);
478 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
480 use_blink = (thr->mode == bget_mode_lifo);
489 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
491 b = (use_blink ? thr->freelist[bin].ql.blink
492 : thr->freelist[bin].ql.flink);
494 if (thr->mode == bget_mode_best) {
495 best = &thr->freelist[bin];
499 while (b != &thr->freelist[bin]) {
500 if (b->bh.bb.bsize >= (bufsize)size) {
501 if ((best == &thr->freelist[bin]) ||
502 (b->bh.bb.bsize < best->bh.bb.bsize)) {
508 b = (use_blink ? b->ql.blink : b->ql.flink);
513 while (b != &thr->freelist[bin]) {
514 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
523 if ((b->bh.bb.bsize - (bufsize)size) >
524 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
527 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
528 bn = BH(((
char *)ba) + size);
530 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
533 b->bh.bb.bsize -= (bufsize)size;
536 ba->bb.prevfree = b->bh.bb.bsize;
539 ba->bb.bsize = -size;
548 __kmp_bget_remove_from_freelist(b);
549 __kmp_bget_insert_into_freelist(thr, b);
551 thr->totalloc += (size_t)size;
554 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
555 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
560 ba = BH(((
char *)b) + b->bh.bb.bsize);
562 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
567 __kmp_bget_remove_from_freelist(b);
569 thr->totalloc += (size_t)b->bh.bb.bsize;
573 b->bh.bb.bsize = -(b->bh.bb.bsize);
576 TCW_PTR(ba->bb.bthr, th);
582 buf = (
void *)&(b->ql);
583 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
589 b = (use_blink ? b->ql.blink : b->ql.flink);
597 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
605 if (thr->acqfcn != 0) {
606 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
611 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
613 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
616 bdh = BDH((*thr->acqfcn)((bufsize)size));
620 bdh->bh.bb.bsize = 0;
623 TCW_PTR(bdh->bh.bb.bthr, th);
625 bdh->bh.bb.prevfree = 0;
628 thr->totalloc += (size_t)size;
632 buf = (
void *)(bdh + 1);
633 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
642 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
645 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
646 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
647 if (newpool != NULL) {
648 bpool(th, newpool, thr->exp_incr);
665 static void *bgetz(kmp_info_t *th, bufsize size) {
666 char *buf = (
char *)bget(th, size);
672 b = BH(buf -
sizeof(bhead_t));
673 rsize = -(b->bb.bsize);
677 bd = BDH(buf -
sizeof(bdhead_t));
678 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
680 rsize -=
sizeof(bhead_t);
683 KMP_DEBUG_ASSERT(rsize >= size);
685 (void)memset(buf, 0, (bufsize)rsize);
687 return ((
void *)buf);
695 static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
700 nbuf = bget(th, size);
707 b = BH(((
char *)buf) -
sizeof(bhead_t));
708 osize = -b->bb.bsize;
713 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
714 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
716 osize -=
sizeof(bhead_t);
719 KMP_DEBUG_ASSERT(osize > 0);
721 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
722 (
size_t)((size < osize) ? size : osize));
729 static void brel(kmp_info_t *th,
void *buf) {
730 thr_data_t *thr = get_thr_data(th);
734 KMP_DEBUG_ASSERT(buf != NULL);
735 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
737 b = BFH(((
char *)buf) -
sizeof(bhead_t));
739 if (b->bh.bb.bsize == 0) {
742 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
743 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
745 thr->totalloc -= (size_t)bdh->tsize;
750 (
void)memset((
char *)buf, 0x55, (
size_t)(bdh->tsize -
sizeof(bdhead_t)));
753 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
755 KMP_DEBUG_ASSERT(thr->relfcn != 0);
756 (*thr->relfcn)((
void *)bdh);
760 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
764 __kmp_bget_enqueue(bth, buf
765 #ifdef USE_QUEUING_LOCK_FOR_BGET
767 __kmp_gtid_from_thread(th)
774 if (b->bh.bb.bsize >= 0) {
777 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
781 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
785 thr->totalloc += (size_t)b->bh.bb.bsize;
790 if (b->bh.bb.prevfree != 0) {
795 bufsize size = b->bh.bb.bsize;
798 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
800 b = BFH(((
char *)b) - b->bh.bb.prevfree);
801 b->bh.bb.bsize -= size;
804 __kmp_bget_remove_from_freelist(b);
809 b->bh.bb.bsize = -b->bh.bb.bsize;
813 __kmp_bget_insert_into_freelist(thr, b);
819 bn = BFH(((
char *)b) + b->bh.bb.bsize);
820 if (bn->bh.bb.bsize > 0) {
824 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
827 __kmp_bget_remove_from_freelist(bn);
829 b->bh.bb.bsize += bn->bh.bb.bsize;
833 __kmp_bget_remove_from_freelist(b);
834 __kmp_bget_insert_into_freelist(thr, b);
842 bn = BFH(((
char *)b) + b->bh.bb.bsize);
845 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
846 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
848 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
853 bn->bh.bb.prevfree = b->bh.bb.bsize;
859 if (thr->relfcn != 0 &&
860 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
866 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
867 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
868 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
872 __kmp_bget_remove_from_freelist(b);
874 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
880 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
883 if (thr->last_pool == b)
893 static void bectl(kmp_info_t *th, bget_compact_t compact,
894 bget_acquire_t acquire, bget_release_t release,
896 thr_data_t *thr = get_thr_data(th);
898 thr->compfcn = compact;
899 thr->acqfcn = acquire;
900 thr->relfcn = release;
901 thr->exp_incr = pool_incr;
905 static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
907 thr_data_t *thr = get_thr_data(th);
908 bfhead_t *b = BFH(buf);
911 __kmp_bget_dequeue(th);
914 len &= ~(SizeQuant - 1);
916 if (thr->pool_len == 0) {
918 }
else if (len != thr->pool_len) {
924 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
930 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
935 b->bh.bb.prevfree = 0;
944 len -=
sizeof(bhead_t);
945 b->bh.bb.bsize = (bufsize)len;
947 TCW_PTR(b->bh.bb.bthr,
948 (kmp_info_t *)((kmp_uintptr_t)th |
952 __kmp_bget_insert_into_freelist(thr, b);
955 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
956 (
size_t)(len -
sizeof(bfhead_t)));
958 bn = BH(((
char *)b) + len);
959 bn->bb.prevfree = (bufsize)len;
961 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
963 bn->bb.bsize = ESent;
967 static void bfreed(kmp_info_t *th) {
968 int bin = 0, count = 0;
969 int gtid = __kmp_gtid_from_thread(th);
970 thr_data_t *thr = get_thr_data(th);
973 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
974 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
975 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
976 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
977 " drel=%" KMP_INT64_SPEC
"\n",
978 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
979 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
980 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
981 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
984 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
987 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
989 bufsize bs = b->bh.bb.bsize;
991 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
992 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
993 KMP_DEBUG_ASSERT(bs > 0);
997 __kmp_printf_no_lock(
998 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
1002 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
1003 if ((bs >
sizeof(bfhead_t)) &&
1005 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
1007 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above " 1008 "free block have been overstored.)\n",
1017 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
1025 static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
1026 bufsize *maxfree,
long *nget,
long *nrel) {
1028 thr_data_t *thr = get_thr_data(th);
1030 *nget = thr->numget;
1031 *nrel = thr->numrel;
1032 *curalloc = (bufsize)thr->totalloc;
1036 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1037 bfhead_t *b = thr->freelist[bin].ql.flink;
1039 while (b != &thr->freelist[bin]) {
1040 KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1041 *totfree += b->bh.bb.bsize;
1042 if (b->bh.bb.bsize > *maxfree) {
1043 *maxfree = b->bh.bb.bsize;
1051 static void bstatse(kmp_info_t *th, bufsize *pool_incr,
long *npool,
1052 long *npget,
long *nprel,
long *ndget,
long *ndrel) {
1053 thr_data_t *thr = get_thr_data(th);
1055 *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1056 *npool = thr->numpblk;
1057 *npget = thr->numpget;
1058 *nprel = thr->numprel;
1059 *ndget = thr->numdget;
1060 *ndrel = thr->numdrel;
1068 static void bufdump(kmp_info_t *th,
void *buf) {
1070 unsigned char *bdump;
1073 b = BFH(((
char *)buf) -
sizeof(bhead_t));
1074 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1075 if (b->bh.bb.bsize < 0) {
1076 bdump = (
unsigned char *)buf;
1077 bdlen = (-b->bh.bb.bsize) - (bufsize)
sizeof(bhead_t);
1079 bdump = (
unsigned char *)(((
char *)b) +
sizeof(bfhead_t));
1080 bdlen = b->bh.bb.bsize - (bufsize)
sizeof(bfhead_t);
1086 char bhex[50], bascii[20];
1092 for (i = 0; i < l; i++) {
1093 (void)KMP_SNPRINTF(bhex + i * 3,
sizeof(bhex) - i * 3,
"%02X ", bdump[i]);
1094 if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1095 bascii[i] = bdump[i];
1100 (void)__kmp_printf_no_lock(
"%-48s %s\n", bhex, bascii);
1103 while ((bdlen > 16) &&
1104 (memcmp((
char *)(bdump - 16), (
char *)bdump, 16) == 0)) {
1110 (void)__kmp_printf_no_lock(
1111 " (%d lines [%d bytes] identical to above line skipped)\n", dupes,
1113 }
else if (dupes == 1) {
1125 static void bpoold(kmp_info_t *th,
void *buf,
int dumpalloc,
int dumpfree) {
1126 bfhead_t *b = BFH((
char *)buf -
sizeof(bhead_t));
1128 while (b->bh.bb.bsize != ESent) {
1129 bufsize bs = b->bh.bb.bsize;
1133 (void)__kmp_printf_no_lock(
"Allocated buffer: size %6ld bytes.\n",
1136 bufdump(th, (
void *)(((
char *)b) +
sizeof(bhead_t)));
1139 const char *lerr =
"";
1141 KMP_DEBUG_ASSERT(bs > 0);
1142 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1143 lerr =
" (Bad free list links)";
1145 (void)__kmp_printf_no_lock(
"Free block: size %6ld bytes.%s\n",
1148 lerr = ((
char *)b) +
sizeof(bfhead_t);
1149 if ((bs >
sizeof(bfhead_t)) &&
1151 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
1153 (void)__kmp_printf_no_lock(
1154 "(Contents of above free block have been overstored.)\n");
1155 bufdump(th, (
void *)(((
char *)b) +
sizeof(bhead_t)));
1159 bufdump(th, (
void *)(((
char *)b) +
sizeof(bhead_t)));
1162 b = BFH(((
char *)b) + bs);
1167 static int bpoolv(kmp_info_t *th,
void *buf) {
1168 bfhead_t *b = BFH(buf);
1170 while (b->bh.bb.bsize != ESent) {
1171 bufsize bs = b->bh.bb.bsize;
1180 KMP_DEBUG_ASSERT(bs > 0);
1184 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1185 (void)__kmp_printf_no_lock(
1186 "Free block: size %6ld bytes. (Bad free list links)\n", (
long)bs);
1187 KMP_DEBUG_ASSERT(0);
1191 lerr = ((
char *)b) +
sizeof(bfhead_t);
1192 if ((bs >
sizeof(bfhead_t)) &&
1194 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
1196 (void)__kmp_printf_no_lock(
1197 "(Contents of above free block have been overstored.)\n");
1198 bufdump(th, (
void *)(((
char *)b) +
sizeof(bhead_t)));
1199 KMP_DEBUG_ASSERT(0);
1204 b = BFH(((
char *)b) + bs);
1211 void __kmp_initialize_bget(kmp_info_t *th) {
1212 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
1216 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
1217 (bufsize)__kmp_malloc_pool_incr);
1220 void __kmp_finalize_bget(kmp_info_t *th) {
1224 KMP_DEBUG_ASSERT(th != 0);
1227 thr = (thr_data_t *)th->th.th_local.bget_data;
1228 KMP_DEBUG_ASSERT(thr != NULL);
1236 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1237 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1238 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1239 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1240 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1244 __kmp_bget_remove_from_freelist(b);
1246 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1251 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1256 if (th->th.th_local.bget_data != NULL) {
1257 __kmp_free(th->th.th_local.bget_data);
1258 th->th.th_local.bget_data = NULL;
1262 void kmpc_set_poolsize(
size_t size) {
1263 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1264 (bget_release_t)free, (bufsize)size);
1267 size_t kmpc_get_poolsize(
void) {
1270 p = get_thr_data(__kmp_get_thread());
1275 void kmpc_set_poolmode(
int mode) {
1278 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1279 mode == bget_mode_best) {
1280 p = get_thr_data(__kmp_get_thread());
1281 p->mode = (bget_mode_t)mode;
1285 int kmpc_get_poolmode(
void) {
1288 p = get_thr_data(__kmp_get_thread());
1293 void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1294 kmp_info_t *th = __kmp_get_thread();
1297 __kmp_bget_dequeue(th);
1305 void kmpc_poolprint(
void) {
1306 kmp_info_t *th = __kmp_get_thread();
1308 __kmp_bget_dequeue(th);
1313 #endif // #if KMP_USE_BGET 1315 void *kmpc_malloc(
size_t size) {
1317 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1320 *(
void **)ptr = ptr;
1321 ptr = (
void **)ptr + 1;
1326 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1328 void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1330 void *ptr_allocated;
1331 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1332 if (!IS_POWER_OF_TWO(alignment)) {
1337 size = size +
sizeof(
void *) + alignment;
1338 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1339 if (ptr_allocated != NULL) {
1341 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1343 *((
void **)ptr - 1) = ptr_allocated;
1350 void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1352 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1355 *(
void **)ptr = ptr;
1356 ptr = (
void **)ptr + 1;
1361 void *kmpc_realloc(
void *ptr,
size_t size) {
1362 void *result = NULL;
1365 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1367 if (result != NULL) {
1368 *(
void **)result = result;
1369 result = (
void **)result + 1;
1371 }
else if (size == 0) {
1377 KMP_ASSERT(*((
void **)ptr - 1));
1378 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1380 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1381 (bufsize)(size +
sizeof(ptr)));
1382 if (result != NULL) {
1383 *(
void **)result = result;
1384 result = (
void **)result + 1;
1391 void kmpc_free(
void *ptr) {
1392 if (!__kmp_init_serial) {
1396 kmp_info_t *th = __kmp_get_thread();
1397 __kmp_bget_dequeue(th);
1399 KMP_ASSERT(*((
void **)ptr - 1));
1400 brel(th, *((
void **)ptr - 1));
1404 void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1406 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1407 (
int)size KMP_SRC_LOC_PARM));
1408 ptr = bget(th, (bufsize)size);
1409 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1413 void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1414 size_t elsize KMP_SRC_LOC_DECL) {
1416 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1417 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1418 ptr = bgetz(th, (bufsize)(nelem * elsize));
1419 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1423 void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1424 size_t size KMP_SRC_LOC_DECL) {
1425 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1426 ptr, (
int)size KMP_SRC_LOC_PARM));
1427 ptr = bgetr(th, ptr, (bufsize)size);
1428 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1432 void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1433 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1434 ptr KMP_SRC_LOC_PARM));
1436 __kmp_bget_dequeue(th);
1439 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1446 struct kmp_mem_descr {
1447 void *ptr_allocated;
1448 size_t size_allocated;
1450 size_t size_aligned;
1452 typedef struct kmp_mem_descr kmp_mem_descr_t;
1457 static void *___kmp_allocate_align(
size_t size,
1458 size_t alignment KMP_SRC_LOC_DECL) {
1475 kmp_mem_descr_t descr;
1476 kmp_uintptr_t addr_allocated;
1477 kmp_uintptr_t addr_aligned;
1478 kmp_uintptr_t addr_descr;
1480 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1481 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
1483 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1484 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
1487 descr.size_aligned = size;
1488 descr.size_allocated =
1489 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
1492 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1494 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1496 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
1497 descr.ptr_allocated));
1498 if (descr.ptr_allocated == NULL) {
1499 KMP_FATAL(OutOfHeapMemory);
1502 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1504 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1505 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
1507 descr.ptr_aligned = (
void *)addr_aligned;
1509 KE_TRACE(26, (
" ___kmp_allocate_align: " 1510 "ptr_allocated=%p, size_allocated=%d, " 1511 "ptr_aligned=%p, size_aligned=%d\n",
1512 descr.ptr_allocated, (
int)descr.size_allocated,
1513 descr.ptr_aligned, (
int)descr.size_aligned));
1515 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1516 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
1517 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1518 addr_allocated + descr.size_allocated);
1519 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1521 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1524 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1530 *((kmp_mem_descr_t *)addr_descr) = descr;
1534 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1535 return descr.ptr_aligned;
1542 void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
1544 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
1545 (
int)size KMP_SRC_LOC_PARM));
1546 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1547 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
1555 void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
1556 int page_size = 8 * 1024;
1559 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
1560 (
int)size KMP_SRC_LOC_PARM));
1561 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1562 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
1568 void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
1569 kmp_mem_descr_t descr;
1570 kmp_uintptr_t addr_allocated;
1571 kmp_uintptr_t addr_aligned;
1574 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1575 KMP_ASSERT(ptr != NULL);
1577 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
1579 KE_TRACE(26, (
" __kmp_free: " 1580 "ptr_allocated=%p, size_allocated=%d, " 1581 "ptr_aligned=%p, size_aligned=%d\n",
1582 descr.ptr_allocated, (
int)descr.size_allocated,
1583 descr.ptr_aligned, (
int)descr.size_aligned));
1585 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1586 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1588 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1589 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1590 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
1591 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1592 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1593 addr_allocated + descr.size_allocated);
1596 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1601 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
1603 _free_src_loc(descr.ptr_allocated, _file_, _line_);
1605 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1609 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
1612 #if USE_FAST_MEMORY == 3 1618 #define KMP_FREE_LIST_LIMIT 16 1621 #define DCACHE_LINE 128 1623 void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
1630 kmp_mem_descr_t *descr;
1632 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1633 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
1635 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1636 idx = num_lines - 1;
1637 KMP_DEBUG_ASSERT(idx >= 0);
1641 }
else if ((idx >>= 2) == 0) {
1644 }
else if ((idx >>= 2) == 0) {
1647 }
else if ((idx >>= 2) == 0) {
1654 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1657 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
1660 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t)))
1664 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1669 while (!KMP_COMPARE_AND_STORE_PTR(
1670 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
1672 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1676 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
1679 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t)))
1686 size = num_lines * DCACHE_LINE;
1688 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1689 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 1691 __kmp_gtid_from_thread(this_thr), alloc_size));
1692 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1695 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
1697 ~(DCACHE_LINE - 1));
1698 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
1700 descr->ptr_allocated = alloc_ptr;
1702 descr->ptr_aligned = (
void *)this_thr;
1705 descr->size_aligned = size;
1708 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
1709 __kmp_gtid_from_thread(this_thr), ptr));
1715 void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
1716 kmp_mem_descr_t *descr;
1717 kmp_info_t *alloc_thr;
1722 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1723 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1724 KMP_ASSERT(ptr != NULL);
1726 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
1728 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
1729 (
int)descr->size_aligned));
1731 size = descr->size_aligned;
1733 idx = DCACHE_LINE * 2;
1736 }
else if ((idx <<= 1) == size) {
1738 }
else if ((idx <<= 2) == size) {
1740 }
else if ((idx <<= 2) == size) {
1743 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1747 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
1748 if (alloc_thr == this_thr) {
1750 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1751 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1753 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1756 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1757 *((
void **)ptr) = NULL;
1758 descr->size_allocated = (size_t)1;
1761 kmp_mem_descr_t *dsc =
1762 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
1764 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1766 dsc->size_allocated + 1;
1767 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1769 *((
void **)ptr) = head;
1770 descr->size_allocated = q_sz;
1771 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1778 void *next = *((
void **)head);
1779 while (next != NULL) {
1782 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
1785 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
1788 next = *((
void **)next);
1790 KMP_DEBUG_ASSERT(q_th != NULL);
1792 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1795 *((
void **)tail) = old_ptr;
1797 while (!KMP_COMPARE_AND_STORE_PTR(
1798 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1800 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1801 *((
void **)tail) = old_ptr;
1805 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1806 *((
void **)ptr) = NULL;
1807 descr->size_allocated = (size_t)1;
1814 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1815 __kmp_gtid_from_thread(this_thr), size));
1816 __kmp_bget_dequeue(this_thr);
1817 brel(this_thr, descr->ptr_allocated);
1820 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
1826 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1827 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1829 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
1834 void __kmp_free_fast_memory(kmp_info_t *th) {
1837 thr_data_t *thr = get_thr_data(th);
1841 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1843 __kmp_bget_dequeue(th);
1846 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1847 bfhead_t *b = thr->freelist[bin].ql.flink;
1848 while (b != &thr->freelist[bin]) {
1849 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
1857 while (lst != NULL) {
1859 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1860 lst, next, th, __kmp_gtid_from_thread(th)));
1861 (*thr->relfcn)(lst);
1867 lst = (
void **)next;
1871 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1874 #endif // USE_FAST_MEMORY