16 #include "kmp_wrapper_malloc.h" 23 typedef int (*bget_compact_t)(size_t, int);
24 typedef void *(*bget_acquire_t)(size_t);
25 typedef void (*bget_release_t)(
void *);
30 #if KMP_ARCH_X86 || KMP_ARCH_ARM 31 typedef kmp_int32 bufsize;
33 typedef kmp_int64 bufsize;
36 typedef ssize_t bufsize;
41 typedef enum bget_mode {
47 static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
48 static void *bget(kmp_info_t *th, bufsize size);
49 static void *bgetz(kmp_info_t *th, bufsize size);
50 static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
51 static void brel(kmp_info_t *th,
void *buf);
52 static void bectl(kmp_info_t *th, bget_compact_t compact,
53 bget_acquire_t acquire, bget_release_t release,
63 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD 66 #define AlignType double 71 #define AlignType _Quad 107 static bufsize bget_bin_size[] = {
117 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
125 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize)) 132 typedef struct qlinks {
133 struct bfhead *flink;
134 struct bfhead *blink;
138 typedef struct bhead2 {
146 typedef union bhead {
149 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
152 #define BH(p) ((bhead_t *)(p)) 155 typedef struct bdhead {
159 #define BDH(p) ((bdhead_t *)(p)) 162 typedef struct bfhead {
166 #define BFH(p) ((bfhead_t *)(p)) 168 typedef struct thr_data {
169 bfhead_t freelist[MAX_BGET_BINS];
174 long numpget, numprel;
175 long numdget, numdrel;
179 bget_compact_t compfcn;
180 bget_acquire_t acqfcn;
181 bget_release_t relfcn;
194 #define QLSize (sizeof(qlinks_t)) 195 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) 198 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) 206 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) 209 static int bget_get_bin(bufsize size) {
211 int lo = 0, hi = MAX_BGET_BINS - 1;
213 KMP_DEBUG_ASSERT(size > 0);
215 while ((hi - lo) > 1) {
216 int mid = (lo + hi) >> 1;
217 if (size < bget_bin_size[mid])
223 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
228 static void set_thr_data(kmp_info_t *th) {
232 data = (thr_data_t *)((!th->th.th_local.bget_data)
233 ? __kmp_allocate(
sizeof(*data))
234 : th->th.th_local.bget_data);
236 memset(data,
'\0',
sizeof(*data));
238 for (i = 0; i < MAX_BGET_BINS; ++i) {
239 data->freelist[i].ql.flink = &data->freelist[i];
240 data->freelist[i].ql.blink = &data->freelist[i];
243 th->th.th_local.bget_data = data;
244 th->th.th_local.bget_list = 0;
245 #if !USE_CMP_XCHG_FOR_BGET 246 #ifdef USE_QUEUING_LOCK_FOR_BGET 247 __kmp_init_lock(&th->th.th_local.bget_lock);
249 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
254 static thr_data_t *get_thr_data(kmp_info_t *th) {
257 data = (thr_data_t *)th->th.th_local.bget_data;
259 KMP_DEBUG_ASSERT(data != 0);
265 static void __kmp_bget_dequeue(kmp_info_t *th) {
266 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
269 #if USE_CMP_XCHG_FOR_BGET 271 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
272 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
273 CCAST(
void *, old_value),
nullptr)) {
275 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
277 p = CCAST(
void *, old_value);
280 #ifdef USE_QUEUING_LOCK_FOR_BGET 281 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
283 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
286 p = (
void *)th->th.th_local.bget_list;
287 th->th.th_local.bget_list = 0;
289 #ifdef USE_QUEUING_LOCK_FOR_BGET
290 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
292 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
299 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
301 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
302 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
304 KMP_DEBUG_ASSERT(b->ql.blink == 0);
306 p = (
void *)b->ql.flink;
314 static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
315 #ifdef USE_QUEUING_LOCK_FOR_BGET
320 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
322 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
323 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
328 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
329 __kmp_gtid_from_thread(th)));
331 #if USE_CMP_XCHG_FOR_BGET 333 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
336 b->ql.flink = BFH(CCAST(
void *, old_value));
338 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
339 CCAST(
void *, old_value), buf)) {
341 old_value = TCR_PTR(th->th.th_local.bget_list);
344 b->ql.flink = BFH(CCAST(
void *, old_value));
348 #ifdef USE_QUEUING_LOCK_FOR_BGET 349 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
351 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
354 b->ql.flink = BFH(th->th.th_local.bget_list);
355 th->th.th_local.bget_list = (
void *)buf;
357 #ifdef USE_QUEUING_LOCK_FOR_BGET 358 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
360 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
366 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
369 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
370 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
372 bin = bget_get_bin(b->bh.bb.bsize);
374 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
375 &thr->freelist[bin]);
376 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
377 &thr->freelist[bin]);
379 b->ql.flink = &thr->freelist[bin];
380 b->ql.blink = thr->freelist[bin].ql.blink;
382 thr->freelist[bin].ql.blink = b;
383 b->ql.blink->ql.flink = b;
387 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
388 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
389 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
391 b->ql.blink->ql.flink = b->ql.flink;
392 b->ql.flink->ql.blink = b->ql.blink;
396 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
397 thr_data_t *thr = get_thr_data(th);
400 *total_free = *max_free = 0;
402 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
405 best = &thr->freelist[bin];
408 while (b != &thr->freelist[bin]) {
409 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
410 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
417 if (*max_free < best->bh.bb.bsize)
418 *max_free = best->bh.bb.bsize;
421 if (*max_free > (bufsize)
sizeof(bhead_t))
422 *max_free -=
sizeof(bhead_t);
426 static void *bget(kmp_info_t *th, bufsize requested_size) {
427 thr_data_t *thr = get_thr_data(th);
428 bufsize size = requested_size;
436 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
440 __kmp_bget_dequeue(th);
442 if (size < (bufsize)SizeQ) {
445 #if defined(SizeQuant) && (SizeQuant > 1) 446 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
449 size +=
sizeof(bhead_t);
450 KMP_DEBUG_ASSERT(size >= 0);
451 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
453 use_blink = (thr->mode == bget_mode_lifo);
462 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
464 b = (use_blink ? thr->freelist[bin].ql.blink
465 : thr->freelist[bin].ql.flink);
467 if (thr->mode == bget_mode_best) {
468 best = &thr->freelist[bin];
472 while (b != &thr->freelist[bin]) {
473 if (b->bh.bb.bsize >= (bufsize)size) {
474 if ((best == &thr->freelist[bin]) ||
475 (b->bh.bb.bsize < best->bh.bb.bsize)) {
481 b = (use_blink ? b->ql.blink : b->ql.flink);
486 while (b != &thr->freelist[bin]) {
487 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
496 if ((b->bh.bb.bsize - (bufsize)size) >
497 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
500 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
501 bn = BH(((
char *)ba) + size);
503 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
506 b->bh.bb.bsize -= (bufsize)size;
509 ba->bb.prevfree = b->bh.bb.bsize;
512 ba->bb.bsize = -size;
521 __kmp_bget_remove_from_freelist(b);
522 __kmp_bget_insert_into_freelist(thr, b);
524 thr->totalloc += (size_t)size;
527 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
528 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
533 ba = BH(((
char *)b) + b->bh.bb.bsize);
535 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
540 __kmp_bget_remove_from_freelist(b);
542 thr->totalloc += (size_t)b->bh.bb.bsize;
546 b->bh.bb.bsize = -(b->bh.bb.bsize);
549 TCW_PTR(ba->bb.bthr, th);
555 buf = (
void *)&(b->ql);
556 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
562 b = (use_blink ? b->ql.blink : b->ql.flink);
570 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
578 if (thr->acqfcn != 0) {
579 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
584 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
586 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
589 bdh = BDH((*thr->acqfcn)((bufsize)size));
593 bdh->bh.bb.bsize = 0;
596 TCW_PTR(bdh->bh.bb.bthr, th);
598 bdh->bh.bb.prevfree = 0;
601 thr->totalloc += (size_t)size;
605 buf = (
void *)(bdh + 1);
606 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
615 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
618 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
619 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
620 if (newpool != NULL) {
621 bpool(th, newpool, thr->exp_incr);
638 static void *bgetz(kmp_info_t *th, bufsize size) {
639 char *buf = (
char *)bget(th, size);
645 b = BH(buf -
sizeof(bhead_t));
646 rsize = -(b->bb.bsize);
650 bd = BDH(buf -
sizeof(bdhead_t));
651 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
653 rsize -=
sizeof(bhead_t);
656 KMP_DEBUG_ASSERT(rsize >= size);
658 (void)memset(buf, 0, (bufsize)rsize);
660 return ((
void *)buf);
668 static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
673 nbuf = bget(th, size);
680 b = BH(((
char *)buf) -
sizeof(bhead_t));
681 osize = -b->bb.bsize;
686 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
687 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
689 osize -=
sizeof(bhead_t);
692 KMP_DEBUG_ASSERT(osize > 0);
694 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
695 (
size_t)((size < osize) ? size : osize));
702 static void brel(kmp_info_t *th,
void *buf) {
703 thr_data_t *thr = get_thr_data(th);
707 KMP_DEBUG_ASSERT(buf != NULL);
708 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
710 b = BFH(((
char *)buf) -
sizeof(bhead_t));
712 if (b->bh.bb.bsize == 0) {
715 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
716 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
718 thr->totalloc -= (size_t)bdh->tsize;
723 (
void)memset((
char *)buf, 0x55, (
size_t)(bdh->tsize -
sizeof(bdhead_t)));
726 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
728 KMP_DEBUG_ASSERT(thr->relfcn != 0);
729 (*thr->relfcn)((
void *)bdh);
733 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
737 __kmp_bget_enqueue(bth, buf
738 #ifdef USE_QUEUING_LOCK_FOR_BGET
740 __kmp_gtid_from_thread(th)
747 if (b->bh.bb.bsize >= 0) {
750 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
754 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
758 thr->totalloc += (size_t)b->bh.bb.bsize;
763 if (b->bh.bb.prevfree != 0) {
768 bufsize size = b->bh.bb.bsize;
771 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
773 b = BFH(((
char *)b) - b->bh.bb.prevfree);
774 b->bh.bb.bsize -= size;
777 __kmp_bget_remove_from_freelist(b);
782 b->bh.bb.bsize = -b->bh.bb.bsize;
786 __kmp_bget_insert_into_freelist(thr, b);
792 bn = BFH(((
char *)b) + b->bh.bb.bsize);
793 if (bn->bh.bb.bsize > 0) {
797 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
800 __kmp_bget_remove_from_freelist(bn);
802 b->bh.bb.bsize += bn->bh.bb.bsize;
806 __kmp_bget_remove_from_freelist(b);
807 __kmp_bget_insert_into_freelist(thr, b);
815 bn = BFH(((
char *)b) + b->bh.bb.bsize);
818 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
819 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
821 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
826 bn->bh.bb.prevfree = b->bh.bb.bsize;
832 if (thr->relfcn != 0 &&
833 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
839 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
840 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
841 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
845 __kmp_bget_remove_from_freelist(b);
847 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
853 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
856 if (thr->last_pool == b)
866 static void bectl(kmp_info_t *th, bget_compact_t compact,
867 bget_acquire_t acquire, bget_release_t release,
869 thr_data_t *thr = get_thr_data(th);
871 thr->compfcn = compact;
872 thr->acqfcn = acquire;
873 thr->relfcn = release;
874 thr->exp_incr = pool_incr;
878 static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
880 thr_data_t *thr = get_thr_data(th);
881 bfhead_t *b = BFH(buf);
884 __kmp_bget_dequeue(th);
887 len &= ~(SizeQuant - 1);
889 if (thr->pool_len == 0) {
891 }
else if (len != thr->pool_len) {
897 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
903 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
908 b->bh.bb.prevfree = 0;
917 len -=
sizeof(bhead_t);
918 b->bh.bb.bsize = (bufsize)len;
920 TCW_PTR(b->bh.bb.bthr,
921 (kmp_info_t *)((kmp_uintptr_t)th |
925 __kmp_bget_insert_into_freelist(thr, b);
928 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
929 (
size_t)(len -
sizeof(bfhead_t)));
931 bn = BH(((
char *)b) + len);
932 bn->bb.prevfree = (bufsize)len;
934 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
936 bn->bb.bsize = ESent;
940 static void bfreed(kmp_info_t *th) {
941 int bin = 0, count = 0;
942 int gtid = __kmp_gtid_from_thread(th);
943 thr_data_t *thr = get_thr_data(th);
946 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
947 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
948 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
949 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
950 " drel=%" KMP_INT64_SPEC
"\n",
951 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
952 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
953 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
954 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
957 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
960 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
962 bufsize bs = b->bh.bb.bsize;
964 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
965 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
966 KMP_DEBUG_ASSERT(bs > 0);
970 __kmp_printf_no_lock(
971 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
975 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
976 if ((bs >
sizeof(bfhead_t)) &&
978 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
980 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above " 981 "free block have been overstored.)\n",
990 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
993 void __kmp_initialize_bget(kmp_info_t *th) {
994 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
998 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
999 (bufsize)__kmp_malloc_pool_incr);
1002 void __kmp_finalize_bget(kmp_info_t *th) {
1006 KMP_DEBUG_ASSERT(th != 0);
1009 thr = (thr_data_t *)th->th.th_local.bget_data;
1010 KMP_DEBUG_ASSERT(thr != NULL);
1018 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1019 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1020 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1021 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1022 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1026 __kmp_bget_remove_from_freelist(b);
1028 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1033 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1038 if (th->th.th_local.bget_data != NULL) {
1039 __kmp_free(th->th.th_local.bget_data);
1040 th->th.th_local.bget_data = NULL;
1044 void kmpc_set_poolsize(
size_t size) {
1045 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1046 (bget_release_t)free, (bufsize)size);
1049 size_t kmpc_get_poolsize(
void) {
1052 p = get_thr_data(__kmp_get_thread());
1057 void kmpc_set_poolmode(
int mode) {
1060 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1061 mode == bget_mode_best) {
1062 p = get_thr_data(__kmp_get_thread());
1063 p->mode = (bget_mode_t)mode;
1067 int kmpc_get_poolmode(
void) {
1070 p = get_thr_data(__kmp_get_thread());
1075 void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1076 kmp_info_t *th = __kmp_get_thread();
1079 __kmp_bget_dequeue(th);
1087 void kmpc_poolprint(
void) {
1088 kmp_info_t *th = __kmp_get_thread();
1090 __kmp_bget_dequeue(th);
1095 #endif // #if KMP_USE_BGET 1097 void *kmpc_malloc(
size_t size) {
1099 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1102 *(
void **)ptr = ptr;
1103 ptr = (
void **)ptr + 1;
1108 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) 1110 void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1112 void *ptr_allocated;
1113 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1114 if (!IS_POWER_OF_TWO(alignment)) {
1119 size = size +
sizeof(
void *) + alignment;
1120 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1121 if (ptr_allocated != NULL) {
1123 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1125 *((
void **)ptr - 1) = ptr_allocated;
1132 void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1134 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1137 *(
void **)ptr = ptr;
1138 ptr = (
void **)ptr + 1;
1143 void *kmpc_realloc(
void *ptr,
size_t size) {
1144 void *result = NULL;
1147 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1149 if (result != NULL) {
1150 *(
void **)result = result;
1151 result = (
void **)result + 1;
1153 }
else if (size == 0) {
1159 KMP_ASSERT(*((
void **)ptr - 1));
1160 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1162 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1163 (bufsize)(size +
sizeof(ptr)));
1164 if (result != NULL) {
1165 *(
void **)result = result;
1166 result = (
void **)result + 1;
1173 void kmpc_free(
void *ptr) {
1174 if (!__kmp_init_serial) {
1178 kmp_info_t *th = __kmp_get_thread();
1179 __kmp_bget_dequeue(th);
1181 KMP_ASSERT(*((
void **)ptr - 1));
1182 brel(th, *((
void **)ptr - 1));
1186 void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1188 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1189 (
int)size KMP_SRC_LOC_PARM));
1190 ptr = bget(th, (bufsize)size);
1191 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1195 void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1196 size_t elsize KMP_SRC_LOC_DECL) {
1198 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1199 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1200 ptr = bgetz(th, (bufsize)(nelem * elsize));
1201 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1205 void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1206 size_t size KMP_SRC_LOC_DECL) {
1207 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1208 ptr, (
int)size KMP_SRC_LOC_PARM));
1209 ptr = bgetr(th, ptr, (bufsize)size);
1210 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1214 void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1215 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1216 ptr KMP_SRC_LOC_PARM));
1218 __kmp_bget_dequeue(th);
1221 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1226 static int (*p_hbw_check)(void);
1227 static void *(*p_hbw_malloc)(size_t);
1228 static void (*p_hbw_free)(
void *);
1229 static int (*p_hbw_set_policy)(int);
1230 static const char *kmp_mk_lib_name;
1231 static void *h_memkind;
1233 void __kmp_init_memkind() {
1234 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1235 kmp_mk_lib_name =
"libmemkind.so";
1236 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1238 p_hbw_check = (int (*)())dlsym(h_memkind,
"hbw_check_available");
1239 p_hbw_malloc = (
void *(*)(size_t))dlsym(h_memkind,
"hbw_malloc");
1240 p_hbw_free = (void (*)(
void *))dlsym(h_memkind,
"hbw_free");
1241 p_hbw_set_policy = (int (*)(int))dlsym(h_memkind,
"hbw_set_policy");
1242 if (p_hbw_check && p_hbw_malloc && p_hbw_free && p_hbw_set_policy) {
1243 __kmp_memkind_available = 1;
1244 if (p_hbw_check() == 0) {
1245 p_hbw_set_policy(1);
1246 __kmp_hbw_mem_available = 1;
1254 p_hbw_malloc = NULL;
1256 p_hbw_set_policy = NULL;
1258 kmp_mk_lib_name =
"";
1261 p_hbw_malloc = NULL;
1263 p_hbw_set_policy = NULL;
1267 void __kmp_fini_memkind() {
1268 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 1274 p_hbw_malloc = NULL;
1276 p_hbw_set_policy = NULL;
1280 void __kmpc_set_default_allocator(
int gtid,
const omp_allocator_t *allocator) {
1281 if (allocator == OMP_NULL_ALLOCATOR)
1282 allocator = omp_default_mem_alloc;
1284 allocator == omp_default_mem_alloc ||
1285 allocator == omp_large_cap_mem_alloc ||
1286 allocator == omp_const_mem_alloc || allocator == omp_high_bw_mem_alloc ||
1287 allocator == omp_low_lat_mem_alloc || allocator == omp_cgroup_mem_alloc ||
1288 allocator == omp_pteam_mem_alloc || allocator == omp_thread_mem_alloc);
1289 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1291 const omp_allocator_t *__kmpc_get_default_allocator(
int gtid) {
1292 return __kmp_threads[gtid]->th.th_def_allocator;
1295 typedef struct kmp_mem_desc {
1299 const omp_allocator_t *allocator;
1301 static int alignment =
sizeof(
void *);
1303 void *__kmpc_alloc(
int gtid,
size_t size,
const omp_allocator_t *allocator) {
1304 KMP_DEBUG_ASSERT(__kmp_init_serial);
1305 if (allocator == OMP_NULL_ALLOCATOR)
1306 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1308 int sz_desc =
sizeof(kmp_mem_desc_t);
1310 kmp_mem_desc_t desc;
1312 kmp_uintptr_t addr_align;
1313 kmp_uintptr_t addr_descr;
1315 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)size, allocator));
1317 desc.size_a = size + sz_desc + alignment;
1318 if (allocator == omp_default_mem_alloc)
1319 ptr = __kmp_allocate(desc.size_a);
1320 if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
1321 KMP_DEBUG_ASSERT(p_hbw_malloc != NULL);
1322 ptr = p_hbw_malloc(desc.size_a);
1325 KE_TRACE(10, (
"__kmpc_alloc: T#%d %p=alloc(%d) hbw %d\n", gtid, ptr,
1326 desc.size_a, __kmp_hbw_mem_available));
1330 addr = (kmp_uintptr_t)ptr;
1331 addr_align = (addr + sz_desc + alignment - 1) & ~(alignment - 1);
1332 addr_descr = addr_align - sz_desc;
1334 desc.ptr_alloc = ptr;
1335 desc.ptr_align = (
void *)addr_align;
1336 desc.allocator = allocator;
1337 *((kmp_mem_desc_t *)addr_descr) = desc;
1340 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid));
1341 return desc.ptr_align;
1344 void __kmpc_free(
int gtid,
void *ptr,
const omp_allocator_t *allocator) {
1345 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1349 kmp_mem_desc_t desc;
1350 kmp_uintptr_t addr_align;
1351 kmp_uintptr_t addr_descr;
1353 addr_align = (kmp_uintptr_t)ptr;
1354 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
1355 desc = *((kmp_mem_desc_t *)addr_descr);
1357 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1359 KMP_DEBUG_ASSERT(desc.allocator == allocator);
1361 allocator = desc.allocator;
1363 KMP_DEBUG_ASSERT(allocator);
1365 if (allocator == omp_default_mem_alloc)
1366 __kmp_free(desc.ptr_alloc);
1367 if (allocator == omp_high_bw_mem_alloc && __kmp_hbw_mem_available) {
1368 KMP_DEBUG_ASSERT(p_hbw_free != NULL);
1369 p_hbw_free(desc.ptr_alloc);
1371 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
1381 struct kmp_mem_descr {
1382 void *ptr_allocated;
1383 size_t size_allocated;
1385 size_t size_aligned;
1387 typedef struct kmp_mem_descr kmp_mem_descr_t;
1392 static void *___kmp_allocate_align(
size_t size,
1393 size_t alignment KMP_SRC_LOC_DECL) {
1410 kmp_mem_descr_t descr;
1411 kmp_uintptr_t addr_allocated;
1412 kmp_uintptr_t addr_aligned;
1413 kmp_uintptr_t addr_descr;
1415 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1416 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
1418 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1419 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
1422 descr.size_aligned = size;
1423 descr.size_allocated =
1424 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
1427 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1429 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1431 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
1432 descr.ptr_allocated));
1433 if (descr.ptr_allocated == NULL) {
1434 KMP_FATAL(OutOfHeapMemory);
1437 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1439 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1440 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
1442 descr.ptr_aligned = (
void *)addr_aligned;
1444 KE_TRACE(26, (
" ___kmp_allocate_align: " 1445 "ptr_allocated=%p, size_allocated=%d, " 1446 "ptr_aligned=%p, size_aligned=%d\n",
1447 descr.ptr_allocated, (
int)descr.size_allocated,
1448 descr.ptr_aligned, (
int)descr.size_aligned));
1450 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1451 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
1452 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1453 addr_allocated + descr.size_allocated);
1454 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1456 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1459 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1465 *((kmp_mem_descr_t *)addr_descr) = descr;
1469 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1470 return descr.ptr_aligned;
1477 void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
1479 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
1480 (
int)size KMP_SRC_LOC_PARM));
1481 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1482 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
1490 void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
1491 int page_size = 8 * 1024;
1494 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
1495 (
int)size KMP_SRC_LOC_PARM));
1496 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1497 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
1503 void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
1504 kmp_mem_descr_t descr;
1505 kmp_uintptr_t addr_allocated;
1506 kmp_uintptr_t addr_aligned;
1509 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1510 KMP_ASSERT(ptr != NULL);
1512 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
1514 KE_TRACE(26, (
" __kmp_free: " 1515 "ptr_allocated=%p, size_allocated=%d, " 1516 "ptr_aligned=%p, size_aligned=%d\n",
1517 descr.ptr_allocated, (
int)descr.size_allocated,
1518 descr.ptr_aligned, (
int)descr.size_aligned));
1520 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1521 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
1523 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1524 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1525 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
1526 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1527 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1528 addr_allocated + descr.size_allocated);
1531 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1536 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
1538 _free_src_loc(descr.ptr_allocated, _file_, _line_);
1540 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1544 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
1547 #if USE_FAST_MEMORY == 3 1553 #define KMP_FREE_LIST_LIMIT 16 1556 #define DCACHE_LINE 128 1558 void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
1565 kmp_mem_descr_t *descr;
1567 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1568 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
1570 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1571 idx = num_lines - 1;
1572 KMP_DEBUG_ASSERT(idx >= 0);
1576 }
else if ((idx >>= 2) == 0) {
1579 }
else if ((idx >>= 2) == 0) {
1582 }
else if ((idx >>= 2) == 0) {
1589 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1592 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
1595 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t)))
1599 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1604 while (!KMP_COMPARE_AND_STORE_PTR(
1605 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
1607 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1611 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
1614 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t)))
1621 size = num_lines * DCACHE_LINE;
1623 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1624 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " 1626 __kmp_gtid_from_thread(this_thr), alloc_size));
1627 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
1630 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
1632 ~(DCACHE_LINE - 1));
1633 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
1635 descr->ptr_allocated = alloc_ptr;
1637 descr->ptr_aligned = (
void *)this_thr;
1640 descr->size_aligned = size;
1643 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
1644 __kmp_gtid_from_thread(this_thr), ptr));
1650 void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
1651 kmp_mem_descr_t *descr;
1652 kmp_info_t *alloc_thr;
1657 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1658 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1659 KMP_ASSERT(ptr != NULL);
1661 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
1663 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
1664 (
int)descr->size_aligned));
1666 size = descr->size_aligned;
1668 idx = DCACHE_LINE * 2;
1671 }
else if ((idx <<= 1) == size) {
1673 }
else if ((idx <<= 2) == size) {
1675 }
else if ((idx <<= 2) == size) {
1678 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1682 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
1683 if (alloc_thr == this_thr) {
1685 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1686 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1688 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1691 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1692 *((
void **)ptr) = NULL;
1693 descr->size_allocated = (size_t)1;
1696 kmp_mem_descr_t *dsc =
1697 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
1699 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1701 dsc->size_allocated + 1;
1702 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1704 *((
void **)ptr) = head;
1705 descr->size_allocated = q_sz;
1706 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1713 void *next = *((
void **)head);
1714 while (next != NULL) {
1717 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
1720 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
1723 next = *((
void **)next);
1725 KMP_DEBUG_ASSERT(q_th != NULL);
1727 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1730 *((
void **)tail) = old_ptr;
1732 while (!KMP_COMPARE_AND_STORE_PTR(
1733 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1735 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1736 *((
void **)tail) = old_ptr;
1740 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1741 *((
void **)ptr) = NULL;
1742 descr->size_allocated = (size_t)1;
1749 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1750 __kmp_gtid_from_thread(this_thr), size));
1751 __kmp_bget_dequeue(this_thr);
1752 brel(this_thr, descr->ptr_allocated);
1755 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
1761 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
1762 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
1764 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
1769 void __kmp_free_fast_memory(kmp_info_t *th) {
1772 thr_data_t *thr = get_thr_data(th);
1776 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
1778 __kmp_bget_dequeue(th);
1781 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1782 bfhead_t *b = thr->freelist[bin].ql.flink;
1783 while (b != &thr->freelist[bin]) {
1784 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
1792 while (lst != NULL) {
1794 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
1795 lst, next, th, __kmp_gtid_from_thread(th)));
1796 (*thr->relfcn)(lst);
1802 lst = (
void **)next;
1806 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
1809 #endif // USE_FAST_MEMORY