• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_i18n.h"
15 #include "kmp_itt.h"
16 
17 #define USE_CHECKS_COMMON
18 
19 #define KMP_INLINE_SUBR 1
20 
21 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22                                            void *data_addr, size_t pc_size);
23 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24                                                 void *data_addr,
25                                                 size_t pc_size);
26 
27 struct shared_table __kmp_threadprivate_d_table;
28 
29 static
30 #ifdef KMP_INLINE_SUBR
31     __forceinline
32 #endif
33     struct private_common *
__kmp_threadprivate_find_task_common(struct common_table * tbl,int gtid,void * pc_addr)34     __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35                                          void *pc_addr)
36 
37 {
38   struct private_common *tn;
39 
40 #ifdef KMP_TASK_COMMON_DEBUG
41   KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42                 "address %p\n",
43                 gtid, pc_addr));
44   dump_list();
45 #endif
46 
47   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48     if (tn->gbl_addr == pc_addr) {
49 #ifdef KMP_TASK_COMMON_DEBUG
50       KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51                     "node %p on list\n",
52                     gtid, pc_addr));
53 #endif
54       return tn;
55     }
56   }
57   return 0;
58 }
59 
60 static
61 #ifdef KMP_INLINE_SUBR
62     __forceinline
63 #endif
64     struct shared_common *
__kmp_find_shared_task_common(struct shared_table * tbl,int gtid,void * pc_addr)65     __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66                                   void *pc_addr) {
67   struct shared_common *tn;
68 
69   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70     if (tn->gbl_addr == pc_addr) {
71 #ifdef KMP_TASK_COMMON_DEBUG
72       KC_TRACE(
73           10,
74           ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75            gtid, pc_addr));
76 #endif
77       return tn;
78     }
79   }
80   return 0;
81 }
82 
83 // Create a template for the data initialized storage. Either the template is
84 // NULL indicating zero fill, or the template is a copy of the original data.
__kmp_init_common_data(void * pc_addr,size_t pc_size)85 static struct private_data *__kmp_init_common_data(void *pc_addr,
86                                                    size_t pc_size) {
87   struct private_data *d;
88   size_t i;
89   char *p;
90 
91   d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92   /*
93       d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
94      memory
95       d->next = 0;
96   */
97   d->size = pc_size;
98   d->more = 1;
99 
100   p = (char *)pc_addr;
101 
102   for (i = pc_size; i > 0; --i) {
103     if (*p++ != '\0') {
104       d->data = __kmp_allocate(pc_size);
105       KMP_MEMCPY(d->data, pc_addr, pc_size);
106       break;
107     }
108   }
109 
110   return d;
111 }
112 
113 // Initialize the data area from the template.
__kmp_copy_common_data(void * pc_addr,struct private_data * d)114 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115   char *addr = (char *)pc_addr;
116   int i, offset;
117 
118   for (offset = 0; d != 0; d = d->next) {
119     for (i = d->more; i > 0; --i) {
120       if (d->data == 0)
121         memset(&addr[offset], '\0', d->size);
122       else
123         KMP_MEMCPY(&addr[offset], d->data, d->size);
124       offset += d->size;
125     }
126   }
127 }
128 
129 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
__kmp_common_initialize(void)130 void __kmp_common_initialize(void) {
131   if (!TCR_4(__kmp_init_common)) {
132     int q;
133 #ifdef KMP_DEBUG
134     int gtid;
135 #endif
136 
137     __kmp_threadpriv_cache_list = NULL;
138 
139 #ifdef KMP_DEBUG
140     /* verify the uber masters were initialized */
141     for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
142       if (__kmp_root[gtid]) {
143         KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
144         for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
145           KMP_DEBUG_ASSERT(
146               !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
147         /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
148          * th.th_pri_common -> data[ q ] = 0;*/
149       }
150 #endif /* KMP_DEBUG */
151 
152     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
153       __kmp_threadprivate_d_table.data[q] = 0;
154 
155     TCW_4(__kmp_init_common, TRUE);
156   }
157 }
158 
159 /* Call all destructors for threadprivate data belonging to all threads.
160    Currently unused! */
__kmp_common_destroy(void)161 void __kmp_common_destroy(void) {
162   if (TCR_4(__kmp_init_common)) {
163     int q;
164 
165     TCW_4(__kmp_init_common, FALSE);
166 
167     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
168       int gtid;
169       struct private_common *tn;
170       struct shared_common *d_tn;
171 
172       /* C++ destructors need to be called once per thread before exiting.
173          Don't call destructors for master thread though unless we used copy
174          constructor */
175 
176       for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
177            d_tn = d_tn->next) {
178         if (d_tn->is_vec) {
179           if (d_tn->dt.dtorv != 0) {
180             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
181               if (__kmp_threads[gtid]) {
182                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
183                                        : (!KMP_UBER_GTID(gtid))) {
184                   tn = __kmp_threadprivate_find_task_common(
185                       __kmp_threads[gtid]->th.th_pri_common, gtid,
186                       d_tn->gbl_addr);
187                   if (tn) {
188                     (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
189                   }
190                 }
191               }
192             }
193             if (d_tn->obj_init != 0) {
194               (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
195             }
196           }
197         } else {
198           if (d_tn->dt.dtor != 0) {
199             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
200               if (__kmp_threads[gtid]) {
201                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
202                                        : (!KMP_UBER_GTID(gtid))) {
203                   tn = __kmp_threadprivate_find_task_common(
204                       __kmp_threads[gtid]->th.th_pri_common, gtid,
205                       d_tn->gbl_addr);
206                   if (tn) {
207                     (*d_tn->dt.dtor)(tn->par_addr);
208                   }
209                 }
210               }
211             }
212             if (d_tn->obj_init != 0) {
213               (*d_tn->dt.dtor)(d_tn->obj_init);
214             }
215           }
216         }
217       }
218       __kmp_threadprivate_d_table.data[q] = 0;
219     }
220   }
221 }
222 
223 /* Call all destructors for threadprivate data belonging to this thread */
__kmp_common_destroy_gtid(int gtid)224 void __kmp_common_destroy_gtid(int gtid) {
225   struct private_common *tn;
226   struct shared_common *d_tn;
227 
228   if (!TCR_4(__kmp_init_gtid)) {
229     // This is possible when one of multiple roots initiates early library
230     // termination in a sequential region while other teams are active, and its
231     // child threads are about to end.
232     return;
233   }
234 
235   KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
236   if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
237 
238     if (TCR_4(__kmp_init_common)) {
239 
240       /* Cannot do this here since not all threads have destroyed their data */
241       /* TCW_4(__kmp_init_common, FALSE); */
242 
243       for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
244 
245         d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
246                                              tn->gbl_addr);
247         if (d_tn == NULL)
248           continue;
249         if (d_tn->is_vec) {
250           if (d_tn->dt.dtorv != 0) {
251             (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
252           }
253           if (d_tn->obj_init != 0) {
254             (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
255           }
256         } else {
257           if (d_tn->dt.dtor != 0) {
258             (void)(*d_tn->dt.dtor)(tn->par_addr);
259           }
260           if (d_tn->obj_init != 0) {
261             (void)(*d_tn->dt.dtor)(d_tn->obj_init);
262           }
263         }
264       }
265       KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
266                     "complete\n",
267                     gtid));
268     }
269   }
270 }
271 
272 #ifdef KMP_TASK_COMMON_DEBUG
dump_list(void)273 static void dump_list(void) {
274   int p, q;
275 
276   for (p = 0; p < __kmp_all_nth; ++p) {
277     if (!__kmp_threads[p])
278       continue;
279     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
280       if (__kmp_threads[p]->th.th_pri_common->data[q]) {
281         struct private_common *tn;
282 
283         KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
284 
285         for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
286              tn = tn->next) {
287           KC_TRACE(10,
288                    ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
289                     tn->gbl_addr, tn->par_addr));
290         }
291       }
292     }
293   }
294 }
295 #endif /* KMP_TASK_COMMON_DEBUG */
296 
297 // NOTE: this routine is to be called only from the serial part of the program.
kmp_threadprivate_insert_private_data(int gtid,void * pc_addr,void * data_addr,size_t pc_size)298 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
299                                            void *data_addr, size_t pc_size) {
300   struct shared_common **lnk_tn, *d_tn;
301   KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
302                    __kmp_threads[gtid]->th.th_root->r.r_active == 0);
303 
304   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
305                                        pc_addr);
306 
307   if (d_tn == 0) {
308     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
309 
310     d_tn->gbl_addr = pc_addr;
311     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
312     /*
313             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
314        zeroes the memory
315             d_tn->ct.ctor = 0;
316             d_tn->cct.cctor = 0;;
317             d_tn->dt.dtor = 0;
318             d_tn->is_vec = FALSE;
319             d_tn->vec_len = 0L;
320     */
321     d_tn->cmn_size = pc_size;
322 
323     __kmp_acquire_lock(&__kmp_global_lock, gtid);
324 
325     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
326 
327     d_tn->next = *lnk_tn;
328     *lnk_tn = d_tn;
329 
330     __kmp_release_lock(&__kmp_global_lock, gtid);
331   }
332 }
333 
kmp_threadprivate_insert(int gtid,void * pc_addr,void * data_addr,size_t pc_size)334 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
335                                                 void *data_addr,
336                                                 size_t pc_size) {
337   struct private_common *tn, **tt;
338   struct shared_common *d_tn;
339 
340   /* +++++++++ START OF CRITICAL SECTION +++++++++ */
341   __kmp_acquire_lock(&__kmp_global_lock, gtid);
342 
343   tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
344 
345   tn->gbl_addr = pc_addr;
346 
347   d_tn = __kmp_find_shared_task_common(
348       &__kmp_threadprivate_d_table, gtid,
349       pc_addr); /* Only the MASTER data table exists. */
350 
351   if (d_tn != 0) {
352     /* This threadprivate variable has already been seen. */
353 
354     if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
355       d_tn->cmn_size = pc_size;
356 
357       if (d_tn->is_vec) {
358         if (d_tn->ct.ctorv != 0) {
359           /* Construct from scratch so no prototype exists */
360           d_tn->obj_init = 0;
361         } else if (d_tn->cct.cctorv != 0) {
362           /* Now data initialize the prototype since it was previously
363            * registered */
364           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
365           (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
366         } else {
367           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
368         }
369       } else {
370         if (d_tn->ct.ctor != 0) {
371           /* Construct from scratch so no prototype exists */
372           d_tn->obj_init = 0;
373         } else if (d_tn->cct.cctor != 0) {
374           /* Now data initialize the prototype since it was previously
375              registered */
376           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
377           (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
378         } else {
379           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
380         }
381       }
382     }
383   } else {
384     struct shared_common **lnk_tn;
385 
386     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
387     d_tn->gbl_addr = pc_addr;
388     d_tn->cmn_size = pc_size;
389     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
390     /*
391             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
392        zeroes the memory
393             d_tn->ct.ctor = 0;
394             d_tn->cct.cctor = 0;
395             d_tn->dt.dtor = 0;
396             d_tn->is_vec = FALSE;
397             d_tn->vec_len = 0L;
398     */
399     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
400 
401     d_tn->next = *lnk_tn;
402     *lnk_tn = d_tn;
403   }
404 
405   tn->cmn_size = d_tn->cmn_size;
406 
407   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
408     tn->par_addr = (void *)pc_addr;
409   } else {
410     tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
411   }
412 
413   __kmp_release_lock(&__kmp_global_lock, gtid);
414 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
415 
416 #ifdef USE_CHECKS_COMMON
417   if (pc_size > d_tn->cmn_size) {
418     KC_TRACE(
419         10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
420              " ,%" KMP_UINTPTR_SPEC ")\n",
421              pc_addr, pc_size, d_tn->cmn_size));
422     KMP_FATAL(TPCommonBlocksInconsist);
423   }
424 #endif /* USE_CHECKS_COMMON */
425 
426   tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
427 
428 #ifdef KMP_TASK_COMMON_DEBUG
429   if (*tt != 0) {
430     KC_TRACE(
431         10,
432         ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
433          gtid, pc_addr));
434   }
435 #endif
436   tn->next = *tt;
437   *tt = tn;
438 
439 #ifdef KMP_TASK_COMMON_DEBUG
440   KC_TRACE(10,
441            ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
442             gtid, pc_addr));
443   dump_list();
444 #endif
445 
446   /* Link the node into a simple list */
447 
448   tn->link = __kmp_threads[gtid]->th.th_pri_head;
449   __kmp_threads[gtid]->th.th_pri_head = tn;
450 
451   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
452     return tn;
453 
454   /* if C++ object with copy constructor, use it;
455    * else if C++ object with constructor, use it for the non-master copies only;
456    * else use pod_init and memcpy
457    *
458    * C++ constructors need to be called once for each non-master thread on
459    * allocate
460    * C++ copy constructors need to be called once for each thread on allocate */
461 
462   /* C++ object with constructors/destructors; don't call constructors for
463      master thread though */
464   if (d_tn->is_vec) {
465     if (d_tn->ct.ctorv != 0) {
466       (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
467     } else if (d_tn->cct.cctorv != 0) {
468       (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
469     } else if (tn->par_addr != tn->gbl_addr) {
470       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
471     }
472   } else {
473     if (d_tn->ct.ctor != 0) {
474       (void)(*d_tn->ct.ctor)(tn->par_addr);
475     } else if (d_tn->cct.cctor != 0) {
476       (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
477     } else if (tn->par_addr != tn->gbl_addr) {
478       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
479     }
480   }
481   /* !BUILD_OPENMP_C
482       if (tn->par_addr != tn->gbl_addr)
483           __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
484 
485   return tn;
486 }
487 
488 /* ------------------------------------------------------------------------ */
489 /* We are currently parallel, and we know the thread id.                    */
490 /* ------------------------------------------------------------------------ */
491 
492 /*!
493  @ingroup THREADPRIVATE
494 
495  @param loc source location information
496  @param data  pointer to data being privatized
497  @param ctor  pointer to constructor function for data
498  @param cctor  pointer to copy constructor function for data
499  @param dtor  pointer to destructor function for data
500 
501  Register constructors and destructors for thread private data.
502  This function is called when executing in parallel, when we know the thread id.
503 */
__kmpc_threadprivate_register(ident_t * loc,void * data,kmpc_ctor ctor,kmpc_cctor cctor,kmpc_dtor dtor)504 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
505                                    kmpc_cctor cctor, kmpc_dtor dtor) {
506   struct shared_common *d_tn, **lnk_tn;
507 
508   KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
509 
510 #ifdef USE_CHECKS_COMMON
511   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
512   KMP_ASSERT(cctor == 0);
513 #endif /* USE_CHECKS_COMMON */
514 
515   /* Only the global data table exists. */
516   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
517 
518   if (d_tn == 0) {
519     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
520     d_tn->gbl_addr = data;
521 
522     d_tn->ct.ctor = ctor;
523     d_tn->cct.cctor = cctor;
524     d_tn->dt.dtor = dtor;
525     /*
526             d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
527        zeroes the memory
528             d_tn->vec_len = 0L;
529             d_tn->obj_init = 0;
530             d_tn->pod_init = 0;
531     */
532     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
533 
534     d_tn->next = *lnk_tn;
535     *lnk_tn = d_tn;
536   }
537 }
538 
__kmpc_threadprivate(ident_t * loc,kmp_int32 global_tid,void * data,size_t size)539 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
540                            size_t size) {
541   void *ret;
542   struct private_common *tn;
543 
544   KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
545 
546 #ifdef USE_CHECKS_COMMON
547   if (!__kmp_init_serial)
548     KMP_FATAL(RTLNotInitialized);
549 #endif /* USE_CHECKS_COMMON */
550 
551   if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
552     /* The parallel address will NEVER overlap with the data_address */
553     /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
554      * data_address; use data_address = data */
555 
556     KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
557                   global_tid));
558     kmp_threadprivate_insert_private_data(global_tid, data, data, size);
559 
560     ret = data;
561   } else {
562     KC_TRACE(
563         50,
564         ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
565          global_tid, data));
566     tn = __kmp_threadprivate_find_task_common(
567         __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
568 
569     if (tn) {
570       KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
571 #ifdef USE_CHECKS_COMMON
572       if ((size_t)size > tn->cmn_size) {
573         KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
574                       " ,%" KMP_UINTPTR_SPEC ")\n",
575                       data, size, tn->cmn_size));
576         KMP_FATAL(TPCommonBlocksInconsist);
577       }
578 #endif /* USE_CHECKS_COMMON */
579     } else {
580       /* The parallel address will NEVER overlap with the data_address */
581       /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
582        * data_address = data */
583       KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
584       tn = kmp_threadprivate_insert(global_tid, data, data, size);
585     }
586 
587     ret = tn->par_addr;
588   }
589   KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
590                 global_tid, ret));
591 
592   return ret;
593 }
594 
__kmp_find_cache(void * data)595 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
596   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
597   while (ptr && ptr->data != data)
598     ptr = ptr->next;
599   return ptr;
600 }
601 
602 /*!
603  @ingroup THREADPRIVATE
604  @param loc source location information
605  @param global_tid  global thread number
606  @param data  pointer to data to privatize
607  @param size  size of data to privatize
608  @param cache  pointer to cache
609  @return pointer to private storage
610 
611  Allocate private storage for threadprivate data.
612 */
613 void *
__kmpc_threadprivate_cached(ident_t * loc,kmp_int32 global_tid,void * data,size_t size,void *** cache)614 __kmpc_threadprivate_cached(ident_t *loc,
615                             kmp_int32 global_tid, // gtid.
616                             void *data, // Pointer to original global variable.
617                             size_t size, // Size of original global variable.
618                             void ***cache) {
619   KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
620                 "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
621                 global_tid, *cache, data, size));
622 
623   if (TCR_PTR(*cache) == 0) {
624     __kmp_acquire_lock(&__kmp_global_lock, global_tid);
625 
626     if (TCR_PTR(*cache) == 0) {
627       __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
628       // Compiler often passes in NULL cache, even if it's already been created
629       void **my_cache;
630       kmp_cached_addr_t *tp_cache_addr;
631       // Look for an existing cache
632       tp_cache_addr = __kmp_find_cache(data);
633       if (!tp_cache_addr) { // Cache was never created; do it now
634         __kmp_tp_cached = 1;
635         KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
636                            sizeof(void *) * __kmp_tp_capacity +
637                            sizeof(kmp_cached_addr_t)););
638         // No need to zero the allocated memory; __kmp_allocate does that.
639         KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
640                       "address %p\n",
641                       global_tid, my_cache));
642         /* TODO: free all this memory in __kmp_common_destroy using
643          * __kmp_threadpriv_cache_list */
644         /* Add address of mycache to linked list for cleanup later  */
645         tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
646         tp_cache_addr->addr = my_cache;
647         tp_cache_addr->data = data;
648         tp_cache_addr->compiler_cache = cache;
649         tp_cache_addr->next = __kmp_threadpriv_cache_list;
650         __kmp_threadpriv_cache_list = tp_cache_addr;
651       } else { // A cache was already created; use it
652         my_cache = tp_cache_addr->addr;
653         tp_cache_addr->compiler_cache = cache;
654       }
655       KMP_MB();
656 
657       TCW_PTR(*cache, my_cache);
658       __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
659 
660       KMP_MB();
661     }
662     __kmp_release_lock(&__kmp_global_lock, global_tid);
663   }
664 
665   void *ret;
666   if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
667     ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
668 
669     TCW_PTR((*cache)[global_tid], ret);
670   }
671   KC_TRACE(10,
672            ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
673             global_tid, ret));
674   return ret;
675 }
676 
677 // This function should only be called when both __kmp_tp_cached_lock and
678 // kmp_forkjoin_lock are held.
__kmp_threadprivate_resize_cache(int newCapacity)679 void __kmp_threadprivate_resize_cache(int newCapacity) {
680   KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
681                 newCapacity));
682 
683   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
684 
685   while (ptr) {
686     if (ptr->data) { // this location has an active cache; resize it
687       void **my_cache;
688       KMP_ITT_IGNORE(my_cache =
689                          (void **)__kmp_allocate(sizeof(void *) * newCapacity +
690                                                  sizeof(kmp_cached_addr_t)););
691       // No need to zero the allocated memory; __kmp_allocate does that.
692       KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
693                     my_cache));
694       // Now copy old cache into new cache
695       void **old_cache = ptr->addr;
696       for (int i = 0; i < __kmp_tp_capacity; ++i) {
697         my_cache[i] = old_cache[i];
698       }
699 
700       // Add address of new my_cache to linked list for cleanup later
701       kmp_cached_addr_t *tp_cache_addr;
702       tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
703       tp_cache_addr->addr = my_cache;
704       tp_cache_addr->data = ptr->data;
705       tp_cache_addr->compiler_cache = ptr->compiler_cache;
706       tp_cache_addr->next = __kmp_threadpriv_cache_list;
707       __kmp_threadpriv_cache_list = tp_cache_addr;
708 
709       // Copy new cache to compiler's location: We can copy directly
710       // to (*compiler_cache) if compiler guarantees it will keep
711       // using the same location for the cache. This is not yet true
712       // for some compilers, in which case we have to check if
713       // compiler_cache is still pointing at old cache, and if so, we
714       // can point it at the new cache with an atomic compare&swap
715       // operation. (Old method will always work, but we should shift
716       // to new method (commented line below) when Intel and Clang
717       // compilers use new method.)
718       (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
719                                       my_cache);
720       // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
721 
722       // If the store doesn't happen here, the compiler's old behavior will
723       // inevitably call __kmpc_threadprivate_cache with a new location for the
724       // cache, and that function will store the resized cache there at that
725       // point.
726 
727       // Nullify old cache's data pointer so we skip it next time
728       ptr->data = NULL;
729     }
730     ptr = ptr->next;
731   }
732   // After all caches are resized, update __kmp_tp_capacity to the new size
733   *(volatile int *)&__kmp_tp_capacity = newCapacity;
734 }
735 
736 /*!
737  @ingroup THREADPRIVATE
738  @param loc source location information
739  @param data  pointer to data being privatized
740  @param ctor  pointer to constructor function for data
741  @param cctor  pointer to copy constructor function for data
742  @param dtor  pointer to destructor function for data
743  @param vector_length length of the vector (bytes or elements?)
744  Register vector constructors and destructors for thread private data.
745 */
__kmpc_threadprivate_register_vec(ident_t * loc,void * data,kmpc_ctor_vec ctor,kmpc_cctor_vec cctor,kmpc_dtor_vec dtor,size_t vector_length)746 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
747                                        kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
748                                        kmpc_dtor_vec dtor,
749                                        size_t vector_length) {
750   struct shared_common *d_tn, **lnk_tn;
751 
752   KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
753 
754 #ifdef USE_CHECKS_COMMON
755   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
756   KMP_ASSERT(cctor == 0);
757 #endif /* USE_CHECKS_COMMON */
758 
759   d_tn = __kmp_find_shared_task_common(
760       &__kmp_threadprivate_d_table, -1,
761       data); /* Only the global data table exists. */
762 
763   if (d_tn == 0) {
764     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
765     d_tn->gbl_addr = data;
766 
767     d_tn->ct.ctorv = ctor;
768     d_tn->cct.cctorv = cctor;
769     d_tn->dt.dtorv = dtor;
770     d_tn->is_vec = TRUE;
771     d_tn->vec_len = (size_t)vector_length;
772     // d_tn->obj_init = 0;  // AC: __kmp_allocate zeroes the memory
773     // d_tn->pod_init = 0;
774     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
775 
776     d_tn->next = *lnk_tn;
777     *lnk_tn = d_tn;
778   }
779 }
780 
__kmp_cleanup_threadprivate_caches()781 void __kmp_cleanup_threadprivate_caches() {
782   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
783 
784   while (ptr) {
785     void **cache = ptr->addr;
786     __kmp_threadpriv_cache_list = ptr->next;
787     if (*ptr->compiler_cache)
788       *ptr->compiler_cache = NULL;
789     ptr->compiler_cache = NULL;
790     ptr->data = NULL;
791     ptr->addr = NULL;
792     ptr->next = NULL;
793     // Threadprivate data pointed at by cache entries are destroyed at end of
794     // __kmp_launch_thread with __kmp_common_destroy_gtid.
795     __kmp_free(cache); // implicitly frees ptr too
796     ptr = __kmp_threadpriv_cache_list;
797   }
798 }
799