LLVM OpenMP* Runtime Library
kmp_threadprivate.cpp
1 /*
2  * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_i18n.h"
16 #include "kmp_itt.h"
17 
18 #define USE_CHECKS_COMMON
19 
20 #define KMP_INLINE_SUBR 1
21 
22 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
23  void *data_addr, size_t pc_size);
24 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
25  void *data_addr,
26  size_t pc_size);
27 
28 struct shared_table __kmp_threadprivate_d_table;
29 
30 static
31 #ifdef KMP_INLINE_SUBR
32  __forceinline
33 #endif
34  struct private_common *
35  __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
36  void *pc_addr)
37 
38 {
39  struct private_common *tn;
40 
41 #ifdef KMP_TASK_COMMON_DEBUG
42  KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
43  "address %p\n",
44  gtid, pc_addr));
45  dump_list();
46 #endif
47 
48  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
49  if (tn->gbl_addr == pc_addr) {
50 #ifdef KMP_TASK_COMMON_DEBUG
51  KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
52  "node %p on list\n",
53  gtid, pc_addr));
54 #endif
55  return tn;
56  }
57  }
58  return 0;
59 }
60 
61 static
62 #ifdef KMP_INLINE_SUBR
63  __forceinline
64 #endif
65  struct shared_common *
66  __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
67  void *pc_addr) {
68  struct shared_common *tn;
69 
70  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
71  if (tn->gbl_addr == pc_addr) {
72 #ifdef KMP_TASK_COMMON_DEBUG
73  KC_TRACE(
74  10,
75  ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
76  gtid, pc_addr));
77 #endif
78  return tn;
79  }
80  }
81  return 0;
82 }
83 
84 // Create a template for the data initialized storage. Either the template is
85 // NULL indicating zero fill, or the template is a copy of the original data.
86 static struct private_data *__kmp_init_common_data(void *pc_addr,
87  size_t pc_size) {
88  struct private_data *d;
89  size_t i;
90  char *p;
91 
92  d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
93  /*
94  d->data = 0; // AC: commented out because __kmp_allocate zeroes the
95  memory
96  d->next = 0;
97  */
98  d->size = pc_size;
99  d->more = 1;
100 
101  p = (char *)pc_addr;
102 
103  for (i = pc_size; i > 0; --i) {
104  if (*p++ != '\0') {
105  d->data = __kmp_allocate(pc_size);
106  KMP_MEMCPY(d->data, pc_addr, pc_size);
107  break;
108  }
109  }
110 
111  return d;
112 }
113 
114 // Initialize the data area from the template.
115 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
116  char *addr = (char *)pc_addr;
117  int i, offset;
118 
119  for (offset = 0; d != 0; d = d->next) {
120  for (i = d->more; i > 0; --i) {
121  if (d->data == 0)
122  memset(&addr[offset], '\0', d->size);
123  else
124  KMP_MEMCPY(&addr[offset], d->data, d->size);
125  offset += d->size;
126  }
127  }
128 }
129 
130 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
131 void __kmp_common_initialize(void) {
132  if (!TCR_4(__kmp_init_common)) {
133  int q;
134 #ifdef KMP_DEBUG
135  int gtid;
136 #endif
137 
138  __kmp_threadpriv_cache_list = NULL;
139 
140 #ifdef KMP_DEBUG
141  /* verify the uber masters were initialized */
142  for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
143  if (__kmp_root[gtid]) {
144  KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
145  for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
146  KMP_DEBUG_ASSERT(
147  !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
148  /* __kmp_root[ gitd ]-> r.r_uber_thread ->
149  * th.th_pri_common -> data[ q ] = 0;*/
150  }
151 #endif /* KMP_DEBUG */
152 
153  for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
154  __kmp_threadprivate_d_table.data[q] = 0;
155 
156  TCW_4(__kmp_init_common, TRUE);
157  }
158 }
159 
160 /* Call all destructors for threadprivate data belonging to all threads.
161  Currently unused! */
162 void __kmp_common_destroy(void) {
163  if (TCR_4(__kmp_init_common)) {
164  int q;
165 
166  TCW_4(__kmp_init_common, FALSE);
167 
168  for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
169  int gtid;
170  struct private_common *tn;
171  struct shared_common *d_tn;
172 
173  /* C++ destructors need to be called once per thread before exiting.
174  Don't call destructors for master thread though unless we used copy
175  constructor */
176 
177  for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
178  d_tn = d_tn->next) {
179  if (d_tn->is_vec) {
180  if (d_tn->dt.dtorv != 0) {
181  for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
182  if (__kmp_threads[gtid]) {
183  if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
184  : (!KMP_UBER_GTID(gtid))) {
185  tn = __kmp_threadprivate_find_task_common(
186  __kmp_threads[gtid]->th.th_pri_common, gtid,
187  d_tn->gbl_addr);
188  if (tn) {
189  (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
190  }
191  }
192  }
193  }
194  if (d_tn->obj_init != 0) {
195  (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
196  }
197  }
198  } else {
199  if (d_tn->dt.dtor != 0) {
200  for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
201  if (__kmp_threads[gtid]) {
202  if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
203  : (!KMP_UBER_GTID(gtid))) {
204  tn = __kmp_threadprivate_find_task_common(
205  __kmp_threads[gtid]->th.th_pri_common, gtid,
206  d_tn->gbl_addr);
207  if (tn) {
208  (*d_tn->dt.dtor)(tn->par_addr);
209  }
210  }
211  }
212  }
213  if (d_tn->obj_init != 0) {
214  (*d_tn->dt.dtor)(d_tn->obj_init);
215  }
216  }
217  }
218  }
219  __kmp_threadprivate_d_table.data[q] = 0;
220  }
221  }
222 }
223 
224 /* Call all destructors for threadprivate data belonging to this thread */
225 void __kmp_common_destroy_gtid(int gtid) {
226  struct private_common *tn;
227  struct shared_common *d_tn;
228 
229  if (!TCR_4(__kmp_init_gtid)) {
230  // This is possible when one of multiple roots initiates early library
231  // termination in a sequential region while other teams are active, and its
232  // child threads are about to end.
233  return;
234  }
235 
236  KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
237  if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
238 
239  if (TCR_4(__kmp_init_common)) {
240 
241  /* Cannot do this here since not all threads have destroyed their data */
242  /* TCW_4(__kmp_init_common, FALSE); */
243 
244  for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
245 
246  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
247  tn->gbl_addr);
248 
249  KMP_DEBUG_ASSERT(d_tn);
250 
251  if (d_tn->is_vec) {
252  if (d_tn->dt.dtorv != 0) {
253  (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
254  }
255  if (d_tn->obj_init != 0) {
256  (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
257  }
258  } else {
259  if (d_tn->dt.dtor != 0) {
260  (void)(*d_tn->dt.dtor)(tn->par_addr);
261  }
262  if (d_tn->obj_init != 0) {
263  (void)(*d_tn->dt.dtor)(d_tn->obj_init);
264  }
265  }
266  }
267  KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
268  "complete\n",
269  gtid));
270  }
271  }
272 }
273 
274 #ifdef KMP_TASK_COMMON_DEBUG
275 static void dump_list(void) {
276  int p, q;
277 
278  for (p = 0; p < __kmp_all_nth; ++p) {
279  if (!__kmp_threads[p])
280  continue;
281  for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
282  if (__kmp_threads[p]->th.th_pri_common->data[q]) {
283  struct private_common *tn;
284 
285  KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
286 
287  for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
288  tn = tn->next) {
289  KC_TRACE(10,
290  ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
291  tn->gbl_addr, tn->par_addr));
292  }
293  }
294  }
295  }
296 }
297 #endif /* KMP_TASK_COMMON_DEBUG */
298 
299 // NOTE: this routine is to be called only from the serial part of the program.
300 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
301  void *data_addr, size_t pc_size) {
302  struct shared_common **lnk_tn, *d_tn;
303  KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
304  __kmp_threads[gtid]->th.th_root->r.r_active == 0);
305 
306  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
307  pc_addr);
308 
309  if (d_tn == 0) {
310  d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
311 
312  d_tn->gbl_addr = pc_addr;
313  d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
314  /*
315  d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
316  zeroes the memory
317  d_tn->ct.ctor = 0;
318  d_tn->cct.cctor = 0;;
319  d_tn->dt.dtor = 0;
320  d_tn->is_vec = FALSE;
321  d_tn->vec_len = 0L;
322  */
323  d_tn->cmn_size = pc_size;
324 
325  __kmp_acquire_lock(&__kmp_global_lock, gtid);
326 
327  lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
328 
329  d_tn->next = *lnk_tn;
330  *lnk_tn = d_tn;
331 
332  __kmp_release_lock(&__kmp_global_lock, gtid);
333  }
334 }
335 
336 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
337  void *data_addr,
338  size_t pc_size) {
339  struct private_common *tn, **tt;
340  struct shared_common *d_tn;
341 
342  /* +++++++++ START OF CRITICAL SECTION +++++++++ */
343  __kmp_acquire_lock(&__kmp_global_lock, gtid);
344 
345  tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
346 
347  tn->gbl_addr = pc_addr;
348 
349  d_tn = __kmp_find_shared_task_common(
350  &__kmp_threadprivate_d_table, gtid,
351  pc_addr); /* Only the MASTER data table exists. */
352 
353  if (d_tn != 0) {
354  /* This threadprivate variable has already been seen. */
355 
356  if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
357  d_tn->cmn_size = pc_size;
358 
359  if (d_tn->is_vec) {
360  if (d_tn->ct.ctorv != 0) {
361  /* Construct from scratch so no prototype exists */
362  d_tn->obj_init = 0;
363  } else if (d_tn->cct.cctorv != 0) {
364  /* Now data initialize the prototype since it was previously
365  * registered */
366  d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
367  (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
368  } else {
369  d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
370  }
371  } else {
372  if (d_tn->ct.ctor != 0) {
373  /* Construct from scratch so no prototype exists */
374  d_tn->obj_init = 0;
375  } else if (d_tn->cct.cctor != 0) {
376  /* Now data initialize the prototype since it was previously
377  registered */
378  d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
379  (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
380  } else {
381  d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
382  }
383  }
384  }
385  } else {
386  struct shared_common **lnk_tn;
387 
388  d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
389  d_tn->gbl_addr = pc_addr;
390  d_tn->cmn_size = pc_size;
391  d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
392  /*
393  d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
394  zeroes the memory
395  d_tn->ct.ctor = 0;
396  d_tn->cct.cctor = 0;
397  d_tn->dt.dtor = 0;
398  d_tn->is_vec = FALSE;
399  d_tn->vec_len = 0L;
400  */
401  lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
402 
403  d_tn->next = *lnk_tn;
404  *lnk_tn = d_tn;
405  }
406 
407  tn->cmn_size = d_tn->cmn_size;
408 
409  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
410  tn->par_addr = (void *)pc_addr;
411  } else {
412  tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
413  }
414 
415  __kmp_release_lock(&__kmp_global_lock, gtid);
416 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
417 
418 #ifdef USE_CHECKS_COMMON
419  if (pc_size > d_tn->cmn_size) {
420  KC_TRACE(
421  10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
422  " ,%" KMP_UINTPTR_SPEC ")\n",
423  pc_addr, pc_size, d_tn->cmn_size));
424  KMP_FATAL(TPCommonBlocksInconsist);
425  }
426 #endif /* USE_CHECKS_COMMON */
427 
428  tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
429 
430 #ifdef KMP_TASK_COMMON_DEBUG
431  if (*tt != 0) {
432  KC_TRACE(
433  10,
434  ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
435  gtid, pc_addr));
436  }
437 #endif
438  tn->next = *tt;
439  *tt = tn;
440 
441 #ifdef KMP_TASK_COMMON_DEBUG
442  KC_TRACE(10,
443  ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
444  gtid, pc_addr));
445  dump_list();
446 #endif
447 
448  /* Link the node into a simple list */
449 
450  tn->link = __kmp_threads[gtid]->th.th_pri_head;
451  __kmp_threads[gtid]->th.th_pri_head = tn;
452 
453  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
454  return tn;
455 
456  /* if C++ object with copy constructor, use it;
457  * else if C++ object with constructor, use it for the non-master copies only;
458  * else use pod_init and memcpy
459  *
460  * C++ constructors need to be called once for each non-master thread on
461  * allocate
462  * C++ copy constructors need to be called once for each thread on allocate */
463 
464  /* C++ object with constructors/destructors; don't call constructors for
465  master thread though */
466  if (d_tn->is_vec) {
467  if (d_tn->ct.ctorv != 0) {
468  (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
469  } else if (d_tn->cct.cctorv != 0) {
470  (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
471  } else if (tn->par_addr != tn->gbl_addr) {
472  __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
473  }
474  } else {
475  if (d_tn->ct.ctor != 0) {
476  (void)(*d_tn->ct.ctor)(tn->par_addr);
477  } else if (d_tn->cct.cctor != 0) {
478  (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
479  } else if (tn->par_addr != tn->gbl_addr) {
480  __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
481  }
482  }
483  /* !BUILD_OPENMP_C
484  if (tn->par_addr != tn->gbl_addr)
485  __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
486 
487  return tn;
488 }
489 
490 /* ------------------------------------------------------------------------ */
491 /* We are currently parallel, and we know the thread id. */
492 /* ------------------------------------------------------------------------ */
493 
506 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
507  kmpc_cctor cctor, kmpc_dtor dtor) {
508  struct shared_common *d_tn, **lnk_tn;
509 
510  KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
511 
512 #ifdef USE_CHECKS_COMMON
513  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
514  KMP_ASSERT(cctor == 0);
515 #endif /* USE_CHECKS_COMMON */
516 
517  /* Only the global data table exists. */
518  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
519 
520  if (d_tn == 0) {
521  d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
522  d_tn->gbl_addr = data;
523 
524  d_tn->ct.ctor = ctor;
525  d_tn->cct.cctor = cctor;
526  d_tn->dt.dtor = dtor;
527  /*
528  d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate
529  zeroes the memory
530  d_tn->vec_len = 0L;
531  d_tn->obj_init = 0;
532  d_tn->pod_init = 0;
533  */
534  lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
535 
536  d_tn->next = *lnk_tn;
537  *lnk_tn = d_tn;
538  }
539 }
540 
541 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
542  size_t size) {
543  void *ret;
544  struct private_common *tn;
545 
546  KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
547 
548 #ifdef USE_CHECKS_COMMON
549  if (!__kmp_init_serial)
550  KMP_FATAL(RTLNotInitialized);
551 #endif /* USE_CHECKS_COMMON */
552 
553  if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
554  /* The parallel address will NEVER overlap with the data_address */
555  /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
556  * data_address; use data_address = data */
557 
558  KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
559  global_tid));
560  kmp_threadprivate_insert_private_data(global_tid, data, data, size);
561 
562  ret = data;
563  } else {
564  KC_TRACE(
565  50,
566  ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
567  global_tid, data));
568  tn = __kmp_threadprivate_find_task_common(
569  __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
570 
571  if (tn) {
572  KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
573 #ifdef USE_CHECKS_COMMON
574  if ((size_t)size > tn->cmn_size) {
575  KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
576  " ,%" KMP_UINTPTR_SPEC ")\n",
577  data, size, tn->cmn_size));
578  KMP_FATAL(TPCommonBlocksInconsist);
579  }
580 #endif /* USE_CHECKS_COMMON */
581  } else {
582  /* The parallel address will NEVER overlap with the data_address */
583  /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
584  * data_address = data */
585  KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
586  tn = kmp_threadprivate_insert(global_tid, data, data, size);
587  }
588 
589  ret = tn->par_addr;
590  }
591  KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
592  global_tid, ret));
593 
594  return ret;
595 }
596 
597 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
598  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
599  while (ptr && ptr->data != data)
600  ptr = ptr->next;
601  return ptr;
602 }
603 
615 void *
617  kmp_int32 global_tid, // gtid.
618  void *data, // Pointer to original global variable.
619  size_t size, // Size of original global variable.
620  void ***cache) {
621  KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
622  "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
623  global_tid, *cache, data, size));
624 
625  if (TCR_PTR(*cache) == 0) {
626  __kmp_acquire_lock(&__kmp_global_lock, global_tid);
627 
628  if (TCR_PTR(*cache) == 0) {
629  __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
630  // Compiler often passes in NULL cache, even if it's already been created
631  void **my_cache;
632  kmp_cached_addr_t *tp_cache_addr;
633  // Look for an existing cache
634  tp_cache_addr = __kmp_find_cache(data);
635  if (!tp_cache_addr) { // Cache was never created; do it now
636  __kmp_tp_cached = 1;
637  KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
638  sizeof(void *) * __kmp_tp_capacity +
639  sizeof(kmp_cached_addr_t)););
640  // No need to zero the allocated memory; __kmp_allocate does that.
641  KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
642  "address %p\n",
643  global_tid, my_cache));
644  /* TODO: free all this memory in __kmp_common_destroy using
645  * __kmp_threadpriv_cache_list */
646  /* Add address of mycache to linked list for cleanup later */
647  tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
648  tp_cache_addr->addr = my_cache;
649  tp_cache_addr->data = data;
650  tp_cache_addr->compiler_cache = cache;
651  tp_cache_addr->next = __kmp_threadpriv_cache_list;
652  __kmp_threadpriv_cache_list = tp_cache_addr;
653  } else { // A cache was already created; use it
654  my_cache = tp_cache_addr->addr;
655  tp_cache_addr->compiler_cache = cache;
656  }
657  KMP_MB();
658 
659  TCW_PTR(*cache, my_cache);
660  __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
661 
662  KMP_MB();
663  }
664  __kmp_release_lock(&__kmp_global_lock, global_tid);
665  }
666 
667  void *ret;
668  if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
669  ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
670 
671  TCW_PTR((*cache)[global_tid], ret);
672  }
673  KC_TRACE(10,
674  ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
675  global_tid, ret));
676  return ret;
677 }
678 
679 // This function should only be called when both __kmp_tp_cached_lock and
680 // kmp_forkjoin_lock are held.
681 void __kmp_threadprivate_resize_cache(int newCapacity) {
682  KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
683  newCapacity));
684 
685  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
686 
687  while (ptr) {
688  if (ptr->data) { // this location has an active cache; resize it
689  void **my_cache;
690  KMP_ITT_IGNORE(my_cache =
691  (void **)__kmp_allocate(sizeof(void *) * newCapacity +
692  sizeof(kmp_cached_addr_t)););
693  // No need to zero the allocated memory; __kmp_allocate does that.
694  KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
695  my_cache));
696  // Now copy old cache into new cache
697  void **old_cache = ptr->addr;
698  for (int i = 0; i < __kmp_tp_capacity; ++i) {
699  my_cache[i] = old_cache[i];
700  }
701 
702  // Add address of new my_cache to linked list for cleanup later
703  kmp_cached_addr_t *tp_cache_addr;
704  tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
705  tp_cache_addr->addr = my_cache;
706  tp_cache_addr->data = ptr->data;
707  tp_cache_addr->compiler_cache = ptr->compiler_cache;
708  tp_cache_addr->next = __kmp_threadpriv_cache_list;
709  __kmp_threadpriv_cache_list = tp_cache_addr;
710 
711  // Copy new cache to compiler's location: We can copy directly
712  // to (*compiler_cache) if compiler guarantees it will keep
713  // using the same location for the cache. This is not yet true
714  // for some compilers, in which case we have to check if
715  // compiler_cache is still pointing at old cache, and if so, we
716  // can point it at the new cache with an atomic compare&swap
717  // operation. (Old method will always work, but we should shift
718  // to new method (commented line below) when Intel and Clang
719  // compilers use new method.)
720  (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
721  my_cache);
722  // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
723 
724  // If the store doesn't happen here, the compiler's old behavior will
725  // inevitably call __kmpc_threadprivate_cache with a new location for the
726  // cache, and that function will store the resized cache there at that
727  // point.
728 
729  // Nullify old cache's data pointer so we skip it next time
730  ptr->data = NULL;
731  }
732  ptr = ptr->next;
733  }
734  // After all caches are resized, update __kmp_tp_capacity to the new size
735  *(volatile int *)&__kmp_tp_capacity = newCapacity;
736 }
737 
749  kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
750  kmpc_dtor_vec dtor,
751  size_t vector_length) {
752  struct shared_common *d_tn, **lnk_tn;
753 
754  KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
755 
756 #ifdef USE_CHECKS_COMMON
757  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
758  KMP_ASSERT(cctor == 0);
759 #endif /* USE_CHECKS_COMMON */
760 
761  d_tn = __kmp_find_shared_task_common(
762  &__kmp_threadprivate_d_table, -1,
763  data); /* Only the global data table exists. */
764 
765  if (d_tn == 0) {
766  d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
767  d_tn->gbl_addr = data;
768 
769  d_tn->ct.ctorv = ctor;
770  d_tn->cct.cctorv = cctor;
771  d_tn->dt.dtorv = dtor;
772  d_tn->is_vec = TRUE;
773  d_tn->vec_len = (size_t)vector_length;
774  // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory
775  // d_tn->pod_init = 0;
776  lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
777 
778  d_tn->next = *lnk_tn;
779  *lnk_tn = d_tn;
780  }
781 }
782 
783 void __kmp_cleanup_threadprivate_caches() {
784  kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
785 
786  while (ptr) {
787  void **cache = ptr->addr;
788  __kmp_threadpriv_cache_list = ptr->next;
789  if (*ptr->compiler_cache)
790  *ptr->compiler_cache = NULL;
791  ptr->compiler_cache = NULL;
792  ptr->data = NULL;
793  ptr->addr = NULL;
794  ptr->next = NULL;
795  // Threadprivate data pointed at by cache entries are destroyed at end of
796  // __kmp_launch_thread with __kmp_common_destroy_gtid.
797  __kmp_free(cache); // implicitly frees ptr too
798  ptr = __kmp_threadpriv_cache_list;
799  }
800 }
void(* kmpc_dtor)(void *)
Definition: kmp.h:1449
void(* kmpc_dtor_vec)(void *, size_t)
Definition: kmp.h:1472
void *(* kmpc_ctor_vec)(void *, size_t)
Definition: kmp.h:1466
void * __kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data, size_t size, void ***cache)
void *(* kmpc_cctor_vec)(void *, void *, size_t)
Definition: kmp.h:1478
void *(* kmpc_cctor)(void *, void *)
Definition: kmp.h:1456
void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
Definition: kmp.h:224
void *(* kmpc_ctor)(void *)
Definition: kmp.h:1443
void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length)