• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#if USE_ITT_BUILD
2/*
3 * kmp_itt.inl -- Inline functions of ITT Notify.
4 */
5
6//===----------------------------------------------------------------------===//
7//
8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9// See https://llvm.org/LICENSE.txt for license information.
10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11//
12//===----------------------------------------------------------------------===//
13
14// Inline function definitions. This file should be included into kmp_itt.h file
15// for production build (to let compiler inline functions) or into kmp_itt.c
16// file for debug build (to reduce the number of files to recompile and save
17// build time).
18
19#include "kmp.h"
20#include "kmp_str.h"
21
22#if KMP_ITT_DEBUG
23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24#define KMP_ITT_DEBUG_LOCK()                                                   \
25  { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26#define KMP_ITT_DEBUG_PRINT(...)                                               \
27  {                                                                            \
28    fprintf(stderr, "#%02d: ", __kmp_get_gtid());                              \
29    fprintf(stderr, __VA_ARGS__);                                              \
30    fflush(stderr);                                                            \
31    __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock);                       \
32  }
33#else
34#define KMP_ITT_DEBUG_LOCK()
35#define KMP_ITT_DEBUG_PRINT(...)
36#endif // KMP_ITT_DEBUG
37
38// Ensure that the functions are static if they're supposed to be being inlined.
39// Otherwise they cannot be used in more than one file, since there will be
40// multiple definitions.
41#if KMP_DEBUG
42#define LINKAGE
43#else
44#define LINKAGE static inline
45#endif
46
47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48// this API to support user-defined synchronization primitives, but does not use
49// ZCA; it would be safe to turn this off until wider support becomes available.
50#if USE_ITT_ZCA
51#ifdef __INTEL_COMPILER
52#if __INTEL_COMPILER >= 1200
53#undef __itt_sync_acquired
54#undef __itt_sync_releasing
55#define __itt_sync_acquired(addr)                                              \
56  __notify_zc_intrinsic((char *)"sync_acquired", addr)
57#define __itt_sync_releasing(addr)                                             \
58  __notify_intrinsic((char *)"sync_releasing", addr)
59#endif
60#endif
61#endif
62
63static kmp_bootstrap_lock_t metadata_lock =
64    KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
65
66/* Parallel region reporting.
67 * __kmp_itt_region_forking should be called by master thread of a team.
68   Exact moment of call does not matter, but it should be completed before any
69   thread of this team calls __kmp_itt_region_starting.
70 * __kmp_itt_region_starting should be called by each thread of a team just
71   before entering parallel region body.
72 * __kmp_itt_region_finished should be called by each thread of a team right
73   after returning from parallel region body.
74 * __kmp_itt_region_joined should be called by master thread of a team, after
75   all threads called __kmp_itt_region_finished.
76
77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
78 execute some more user code -- such a thread can execute tasks.
79
80 Note: The overhead of logging region_starting and region_finished in each
81 thread is too large, so these calls are not used. */
82
83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
84#if USE_ITT_NOTIFY
85  kmp_team_t *team = __kmp_team_from_gtid(gtid);
86  if (team->t.t_active_level > 1) {
87    // The frame notifications are only supported for the outermost teams.
88    return;
89  }
90  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
91  if (loc) {
92    // Use the reserved_2 field to store the index to the region domain.
93    // Assume that reserved_2 contains zero initially.  Since zero is special
94    // value here, store the index into domain array increased by 1.
95    if (loc->reserved_2 == 0) {
96      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
97        int frm =
98            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
99        if (frm >= KMP_MAX_FRAME_DOMAINS) {
100          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
101          return; // loc->reserved_2 is still 0
102        }
103        // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
104        //    frm = loc->reserved_2 - 1;   // get value saved by other thread
105        //    for same loc
106        //} // AC: this block is to replace next unsynchronized line
107
108        // We need to save indexes for both region and barrier frames. We'll use
109        // loc->reserved_2 field but put region index to the low two bytes and
110        // barrier indexes to the high two bytes. It is OK because
111        // KMP_MAX_FRAME_DOMAINS = 512.
112        loc->reserved_2 |= (frm + 1); // save "new" value
113
114        // Transform compiler-generated region location into the format
115        // that the tools more or less standardized on:
116        //   "<func>$omp$parallel@[file:]<line>[:<col>]"
117        char *buff = NULL;
118        kmp_str_loc_t str_loc =
119            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
120        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
121                                team_size, str_loc.file, str_loc.line,
122                                str_loc.col);
123
124        __itt_suppress_push(__itt_suppress_memory_errors);
125        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
126        __itt_suppress_pop();
127
128        __kmp_str_free(&buff);
129        if (barriers) {
130          if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
131            int frm = KMP_TEST_THEN_INC32(
132                &__kmp_barrier_domain_count); // get "old" value
133            if (frm >= KMP_MAX_FRAME_DOMAINS) {
134              KMP_TEST_THEN_DEC32(
135                  &__kmp_barrier_domain_count); // revert the count
136              return; // loc->reserved_2 is still 0
137            }
138            char *buff = NULL;
139            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
140                                    str_loc.file, str_loc.col);
141            __itt_suppress_push(__itt_suppress_memory_errors);
142            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
143            __itt_suppress_pop();
144            __kmp_str_free(&buff);
145            // Save the barrier frame index to the high two bytes.
146            loc->reserved_2 |= (frm + 1) << 16;
147          }
148        }
149        __kmp_str_loc_free(&str_loc);
150        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
151      }
152    } else { // Region domain exists for this location
153      // Check if team size was changed. Then create new region domain for this
154      // location
155      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
156      if ((frm < KMP_MAX_FRAME_DOMAINS) &&
157          (__kmp_itt_region_team_size[frm] != team_size)) {
158        char *buff = NULL;
159        kmp_str_loc_t str_loc =
160            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
161        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
162                                team_size, str_loc.file, str_loc.line,
163                                str_loc.col);
164
165        __itt_suppress_push(__itt_suppress_memory_errors);
166        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
167        __itt_suppress_pop();
168
169        __kmp_str_free(&buff);
170        __kmp_str_loc_free(&str_loc);
171        __kmp_itt_region_team_size[frm] = team_size;
172        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
173      } else { // Team size was not changed. Use existing domain.
174        __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
175      }
176    }
177    KMP_ITT_DEBUG_LOCK();
178    KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
179                        loc->reserved_2, loc);
180  }
181#endif
182} // __kmp_itt_region_forking
183
184// -----------------------------------------------------------------------------
185LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
186                                    __itt_timestamp end, int imbalance,
187                                    ident_t *loc, int team_size, int region) {
188#if USE_ITT_NOTIFY
189  if (region) {
190    kmp_team_t *team = __kmp_team_from_gtid(gtid);
191    int serialized = (region == 2 ? 1 : 0);
192    if (team->t.t_active_level + serialized > 1) {
193      // The frame notifications are only supported for the outermost teams.
194      return;
195    }
196    // Check region domain has not been created before. It's index is saved in
197    // the low two bytes.
198    if ((loc->reserved_2 & 0x0000FFFF) == 0) {
199      if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
200        int frm =
201            KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
202        if (frm >= KMP_MAX_FRAME_DOMAINS) {
203          KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
204          return; // loc->reserved_2 is still 0
205        }
206
207        // We need to save indexes for both region and barrier frames. We'll use
208        // loc->reserved_2 field but put region index to the low two bytes and
209        // barrier indexes to the high two bytes. It is OK because
210        // KMP_MAX_FRAME_DOMAINS = 512.
211        loc->reserved_2 |= (frm + 1); // save "new" value
212
213        // Transform compiler-generated region location into the format
214        // that the tools more or less standardized on:
215        //   "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
216        char *buff = NULL;
217        kmp_str_loc_t str_loc =
218            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
219        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
220                                team_size, str_loc.file, str_loc.line,
221                                str_loc.col);
222
223        __itt_suppress_push(__itt_suppress_memory_errors);
224        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
225        __itt_suppress_pop();
226
227        __kmp_str_free(&buff);
228        __kmp_str_loc_free(&str_loc);
229        __kmp_itt_region_team_size[frm] = team_size;
230        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
231      }
232    } else { // Region domain exists for this location
233      // Check if team size was changed. Then create new region domain for this
234      // location
235      unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
236      if (frm >= KMP_MAX_FRAME_DOMAINS)
237        return; // something's gone wrong, returning
238      if (__kmp_itt_region_team_size[frm] != team_size) {
239        char *buff = NULL;
240        kmp_str_loc_t str_loc =
241            __kmp_str_loc_init(loc->psource, /* init_fname */ false);
242        buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
243                                team_size, str_loc.file, str_loc.line,
244                                str_loc.col);
245
246        __itt_suppress_push(__itt_suppress_memory_errors);
247        __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
248        __itt_suppress_pop();
249
250        __kmp_str_free(&buff);
251        __kmp_str_loc_free(&str_loc);
252        __kmp_itt_region_team_size[frm] = team_size;
253        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
254      } else { // Team size was not changed. Use existing domain.
255        __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
256      }
257    }
258    KMP_ITT_DEBUG_LOCK();
259    KMP_ITT_DEBUG_PRINT(
260        "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
261        gtid, loc->reserved_2, region, loc, begin, end);
262    return;
263  } else { // called for barrier reporting
264    if (loc) {
265      if ((loc->reserved_2 & 0xFFFF0000) == 0) {
266        if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
267          int frm = KMP_TEST_THEN_INC32(
268              &__kmp_barrier_domain_count); // get "old" value
269          if (frm >= KMP_MAX_FRAME_DOMAINS) {
270            KMP_TEST_THEN_DEC32(
271                &__kmp_barrier_domain_count); // revert the count
272            return; // loc->reserved_2 is still 0
273          }
274          // Save the barrier frame index to the high two bytes.
275          loc->reserved_2 |= (frm + 1) << 16; // save "new" value
276
277          // Transform compiler-generated region location into the format
278          // that the tools more or less standardized on:
279          //   "<func>$omp$frame@[file:]<line>[:<col>]"
280          kmp_str_loc_t str_loc =
281              __kmp_str_loc_init(loc->psource, /* init_fname */ false);
282          if (imbalance) {
283            char *buff_imb = NULL;
284            buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
285                                        str_loc.func, team_size, str_loc.file,
286                                        str_loc.col);
287            __itt_suppress_push(__itt_suppress_memory_errors);
288            __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
289            __itt_suppress_pop();
290            __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
291                                  end);
292            __kmp_str_free(&buff_imb);
293          } else {
294            char *buff = NULL;
295            buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
296                                    str_loc.file, str_loc.col);
297            __itt_suppress_push(__itt_suppress_memory_errors);
298            __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
299            __itt_suppress_pop();
300            __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
301                                  end);
302            __kmp_str_free(&buff);
303          }
304          __kmp_str_loc_free(&str_loc);
305        }
306      } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
307        if (imbalance) {
308          __itt_frame_submit_v3(
309              __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
310              begin, end);
311        } else {
312          __itt_frame_submit_v3(
313              __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
314              begin, end);
315        }
316      }
317      KMP_ITT_DEBUG_LOCK();
318      KMP_ITT_DEBUG_PRINT(
319          "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
320          loc->reserved_2, loc, begin, end);
321    }
322  }
323#endif
324} // __kmp_itt_frame_submit
325
326// -----------------------------------------------------------------------------
327LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
328                                          kmp_uint64 end, kmp_uint64 imbalance,
329                                          kmp_uint64 reduction) {
330#if USE_ITT_NOTIFY
331  if (metadata_domain == NULL) {
332    __kmp_acquire_bootstrap_lock(&metadata_lock);
333    if (metadata_domain == NULL) {
334      __itt_suppress_push(__itt_suppress_memory_errors);
335      metadata_domain = __itt_domain_create("OMP Metadata");
336      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
337      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
338      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
339      __itt_suppress_pop();
340    }
341    __kmp_release_bootstrap_lock(&metadata_lock);
342  }
343
344  kmp_uint64 imbalance_data[4];
345  imbalance_data[0] = begin;
346  imbalance_data[1] = end;
347  imbalance_data[2] = imbalance;
348  imbalance_data[3] = reduction;
349
350  __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
351                     __itt_metadata_u64, 4, imbalance_data);
352#endif
353} // __kmp_itt_metadata_imbalance
354
355// -----------------------------------------------------------------------------
356LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
357                                     kmp_uint64 iterations, kmp_uint64 chunk) {
358#if USE_ITT_NOTIFY
359  if (metadata_domain == NULL) {
360    __kmp_acquire_bootstrap_lock(&metadata_lock);
361    if (metadata_domain == NULL) {
362      __itt_suppress_push(__itt_suppress_memory_errors);
363      metadata_domain = __itt_domain_create("OMP Metadata");
364      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
365      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
366      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
367      __itt_suppress_pop();
368    }
369    __kmp_release_bootstrap_lock(&metadata_lock);
370  }
371
372  // Parse line and column from psource string: ";file;func;line;col;;"
373  KMP_DEBUG_ASSERT(loc->psource);
374  kmp_uint64 loop_data[5];
375  int line, col;
376  __kmp_str_loc_numbers(loc->psource, &line, &col);
377  loop_data[0] = line;
378  loop_data[1] = col;
379  loop_data[2] = sched_type;
380  loop_data[3] = iterations;
381  loop_data[4] = chunk;
382
383  __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
384                     __itt_metadata_u64, 5, loop_data);
385#endif
386} // __kmp_itt_metadata_loop
387
388// -----------------------------------------------------------------------------
389LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
390#if USE_ITT_NOTIFY
391  if (metadata_domain == NULL) {
392    __kmp_acquire_bootstrap_lock(&metadata_lock);
393    if (metadata_domain == NULL) {
394      __itt_suppress_push(__itt_suppress_memory_errors);
395      metadata_domain = __itt_domain_create("OMP Metadata");
396      string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
397      string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
398      string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
399      __itt_suppress_pop();
400    }
401    __kmp_release_bootstrap_lock(&metadata_lock);
402  }
403
404  int line, col;
405  __kmp_str_loc_numbers(loc->psource, &line, &col);
406  kmp_uint64 single_data[2];
407  single_data[0] = line;
408  single_data[1] = col;
409
410  __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
411                     __itt_metadata_u64, 2, single_data);
412#endif
413} // __kmp_itt_metadata_single
414
415// -----------------------------------------------------------------------------
416LINKAGE void __kmp_itt_region_starting(int gtid) {
417#if USE_ITT_NOTIFY
418#endif
419} // __kmp_itt_region_starting
420
421// -----------------------------------------------------------------------------
422LINKAGE void __kmp_itt_region_finished(int gtid) {
423#if USE_ITT_NOTIFY
424#endif
425} // __kmp_itt_region_finished
426
427// ----------------------------------------------------------------------------
428LINKAGE void __kmp_itt_region_joined(int gtid) {
429#if USE_ITT_NOTIFY
430  kmp_team_t *team = __kmp_team_from_gtid(gtid);
431  if (team->t.t_active_level > 1) {
432    // The frame notifications are only supported for the outermost teams.
433    return;
434  }
435  ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
436  if (loc && loc->reserved_2) {
437    unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
438    if (frm < KMP_MAX_FRAME_DOMAINS) {
439      KMP_ITT_DEBUG_LOCK();
440      __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
441      KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
442                          loc->reserved_2, loc);
443    }
444  }
445#endif
446} // __kmp_itt_region_joined
447
448/* Barriers reporting.
449
450   A barrier consists of two phases:
451   1. Gather -- master waits for arriving of all the worker threads; each
452      worker thread registers arrival and goes further.
453   2. Release -- each worker threads waits until master lets it go; master lets
454      worker threads go.
455
456   Function should be called by each thread:
457   * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
458   * __kmp_itt_barrier_middle()   -- between gather and release phases.
459   * __kmp_itt_barrier_finished() -- after release phase.
460
461   Note: Call __kmp_itt_barrier_object() before call to
462   __kmp_itt_barrier_starting() and save result in local variable.
463   __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
464   would return itt sync object for the next barrier!
465
466   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
467   does not have barrier object or barrier data structure. Barrier is just a
468   counter in team and thread structures. We could use an address of team
469   structure as a barrier sync object, but ITT wants different objects for
470   different barriers (even whithin the same team). So let us use team address
471   as barrier sync object for the first barrier, then increase it by one for the
472   next barrier, and so on (but wrap it not to use addresses outside of team
473   structure). */
474
475void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
476                               int delta // 0 (current barrier) is default
477                               // value; specify -1 to get previous
478                               // barrier.
479                               ) {
480  void *object = NULL;
481#if USE_ITT_NOTIFY
482  kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
483  kmp_team_t *team = thr->th.th_team;
484
485  // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
486  // be NULL. This "if" helps to avoid crash. However, this is not complete
487  // solution, and reporting fork/join barriers to ITT should be revisited.
488
489  if (team != NULL) {
490    // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
491    // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
492    kmp_uint64 counter =
493        team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
494    // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
495    // barriers of different types do not have the same ids.
496    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
497    // This condition is a must (we would have zero divide otherwise).
498    KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
499    // More strong condition: make sure we have room at least for for two
500    // different ids (for each barrier type).
501    object = reinterpret_cast<void *>(
502        kmp_uintptr_t(team) +
503        counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier +
504        bt);
505    KMP_ITT_DEBUG_LOCK();
506    KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
507                        counter, object);
508
509    if (set_name) {
510      ident_t const *loc = NULL;
511      char const *src = NULL;
512      char const *type = "OMP Barrier";
513      switch (bt) {
514      case bs_plain_barrier: {
515        // For plain barrier compiler calls __kmpc_barrier() function, which
516        // saves location in thr->th.th_ident.
517        loc = thr->th.th_ident;
518        // Get the barrier type from flags provided by compiler.
519        kmp_int32 expl = 0;
520        kmp_uint32 impl = 0;
521        if (loc != NULL) {
522          src = loc->psource;
523          expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
524          impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
525        }
526        if (impl) {
527          switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
528          case KMP_IDENT_BARRIER_IMPL_FOR: {
529            type = "OMP For Barrier";
530          } break;
531          case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
532            type = "OMP Sections Barrier";
533          } break;
534          case KMP_IDENT_BARRIER_IMPL_SINGLE: {
535            type = "OMP Single Barrier";
536          } break;
537          case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
538            type = "OMP Workshare Barrier";
539          } break;
540          default: {
541            type = "OMP Implicit Barrier";
542            KMP_DEBUG_ASSERT(0);
543          }
544          }
545        } else if (expl) {
546          type = "OMP Explicit Barrier";
547        }
548      } break;
549      case bs_forkjoin_barrier: {
550        // In case of fork/join barrier we can read thr->th.th_ident, because it
551        // contains location of last passed construct (while join barrier is not
552        // such one). Use th_ident of master thread instead -- __kmp_join_call()
553        // called by the master thread saves location.
554        //
555        // AC: cannot read from master because __kmp_join_call may be not called
556        //    yet, so we read the location from team. This is the same location.
557        //    And team is valid at the enter to join barrier where this happens.
558        loc = team->t.t_ident;
559        if (loc != NULL) {
560          src = loc->psource;
561        }
562        type = "OMP Join Barrier";
563      } break;
564      }
565      KMP_ITT_DEBUG_LOCK();
566      __itt_sync_create(object, type, src, __itt_attr_barrier);
567      KMP_ITT_DEBUG_PRINT(
568          "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
569          type, src);
570    }
571  }
572#endif
573  return object;
574} // __kmp_itt_barrier_object
575
576// -----------------------------------------------------------------------------
577void __kmp_itt_barrier_starting(int gtid, void *object) {
578#if USE_ITT_NOTIFY
579  if (!KMP_MASTER_GTID(gtid)) {
580    KMP_ITT_DEBUG_LOCK();
581    __itt_sync_releasing(object);
582    KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
583  }
584  KMP_ITT_DEBUG_LOCK();
585  __itt_sync_prepare(object);
586  KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
587#endif
588} // __kmp_itt_barrier_starting
589
590// -----------------------------------------------------------------------------
591void __kmp_itt_barrier_middle(int gtid, void *object) {
592#if USE_ITT_NOTIFY
593  if (KMP_MASTER_GTID(gtid)) {
594    KMP_ITT_DEBUG_LOCK();
595    __itt_sync_acquired(object);
596    KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
597    KMP_ITT_DEBUG_LOCK();
598    __itt_sync_releasing(object);
599    KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
600  } else {
601  }
602#endif
603} // __kmp_itt_barrier_middle
604
605// -----------------------------------------------------------------------------
606void __kmp_itt_barrier_finished(int gtid, void *object) {
607#if USE_ITT_NOTIFY
608  if (KMP_MASTER_GTID(gtid)) {
609  } else {
610    KMP_ITT_DEBUG_LOCK();
611    __itt_sync_acquired(object);
612    KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
613  }
614#endif
615} // __kmp_itt_barrier_finished
616
617/* Taskwait reporting.
618   ITT need an address (void *) to be specified as a sync object. OpenMP RTL
619   does not have taskwait structure, so we need to construct something. */
620
621void *__kmp_itt_taskwait_object(int gtid) {
622  void *object = NULL;
623#if USE_ITT_NOTIFY
624  if (UNLIKELY(__itt_sync_create_ptr)) {
625    kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
626    kmp_taskdata_t *taskdata = thread->th.th_current_task;
627    object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
628                                      taskdata->td_taskwait_counter %
629                                          sizeof(kmp_taskdata_t));
630  }
631#endif
632  return object;
633} // __kmp_itt_taskwait_object
634
635void __kmp_itt_taskwait_starting(int gtid, void *object) {
636#if USE_ITT_NOTIFY
637  kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
638  kmp_taskdata_t *taskdata = thread->th.th_current_task;
639  ident_t const *loc = taskdata->td_taskwait_ident;
640  char const *src = (loc == NULL ? NULL : loc->psource);
641  KMP_ITT_DEBUG_LOCK();
642  __itt_sync_create(object, "OMP Taskwait", src, 0);
643  KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
644                      object, src);
645  KMP_ITT_DEBUG_LOCK();
646  __itt_sync_prepare(object);
647  KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
648#endif
649} // __kmp_itt_taskwait_starting
650
651void __kmp_itt_taskwait_finished(int gtid, void *object) {
652#if USE_ITT_NOTIFY
653  KMP_ITT_DEBUG_LOCK();
654  __itt_sync_acquired(object);
655  KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
656  KMP_ITT_DEBUG_LOCK();
657  __itt_sync_destroy(object);
658  KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
659#endif
660} // __kmp_itt_taskwait_finished
661
662/* Task reporting.
663   Only those tasks are reported which are executed by a thread spinning at
664   barrier (or taskwait). Synch object passed to the function must be barrier of
665   taskwait the threads waiting at. */
666
667void __kmp_itt_task_starting(
668    void *object // ITT sync object: barrier or taskwait.
669    ) {
670#if USE_ITT_NOTIFY
671  if (UNLIKELY(object != NULL)) {
672    KMP_ITT_DEBUG_LOCK();
673    __itt_sync_cancel(object);
674    KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
675  }
676#endif
677} // __kmp_itt_task_starting
678
679// -----------------------------------------------------------------------------
680void __kmp_itt_task_finished(
681    void *object // ITT sync object: barrier or taskwait.
682    ) {
683#if USE_ITT_NOTIFY
684  KMP_ITT_DEBUG_LOCK();
685  __itt_sync_prepare(object);
686  KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
687#endif
688} // __kmp_itt_task_finished
689
690/* Lock reporting.
691 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
692   operation (set/unset). It is not a real event shown to the user but just
693   setting a name for synchronization object. `lock' is an address of sync
694   object, the same address should be used in all subsequent calls.
695 * __kmp_itt_lock_acquiring() should be called before setting the lock.
696 * __kmp_itt_lock_acquired() should be called after setting the lock.
697 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
698 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
699   for the lock.
700 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
701   operation. After __kmp_itt_lock_destroyed() all the references to the same
702   address will be considered as another sync object, not related with the
703   original one.  */
704
705#if KMP_USE_DYNAMIC_LOCK
706// Takes location information directly
707__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
708                                       const ident_t *loc) {
709#if USE_ITT_NOTIFY
710  if (__itt_sync_create_ptr) {
711    char const *src = (loc == NULL ? NULL : loc->psource);
712    KMP_ITT_DEBUG_LOCK();
713    __itt_sync_create(lock, type, src, 0);
714    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
715                        src);
716  }
717#endif
718}
719#else // KMP_USE_DYNAMIC_LOCK
720// Internal guts -- common code for locks and critical sections, do not call
721// directly.
722__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
723#if USE_ITT_NOTIFY
724  if (__itt_sync_create_ptr) {
725    ident_t const *loc = NULL;
726    if (__kmp_get_user_lock_location_ != NULL)
727      loc = __kmp_get_user_lock_location_((lock));
728    char const *src = (loc == NULL ? NULL : loc->psource);
729    KMP_ITT_DEBUG_LOCK();
730    __itt_sync_create(lock, type, src, 0);
731    KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
732                        src);
733  }
734#endif
735} // ___kmp_itt_lock_init
736#endif // KMP_USE_DYNAMIC_LOCK
737
738// Internal guts -- common code for locks and critical sections, do not call
739// directly.
740__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
741#if USE_ITT_NOTIFY
742  KMP_ITT_DEBUG_LOCK();
743  __itt_sync_destroy(lock);
744  KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
745#endif
746} // ___kmp_itt_lock_fini
747
748// -----------------------------------------------------------------------------
749#if KMP_USE_DYNAMIC_LOCK
750void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
751  ___kmp_itt_lock_init(lock, "OMP Lock", loc);
752}
753#else
754void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
755  ___kmp_itt_lock_init(lock, "OMP Lock");
756} // __kmp_itt_lock_creating
757#endif
758
759void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
760#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
761  // postpone lock object access
762  if (__itt_sync_prepare_ptr) {
763    if (KMP_EXTRACT_D_TAG(lock) == 0) {
764      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
765      __itt_sync_prepare(ilk->lock);
766    } else {
767      __itt_sync_prepare(lock);
768    }
769  }
770#else
771  __itt_sync_prepare(lock);
772#endif
773} // __kmp_itt_lock_acquiring
774
775void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
776#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
777  // postpone lock object access
778  if (__itt_sync_acquired_ptr) {
779    if (KMP_EXTRACT_D_TAG(lock) == 0) {
780      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
781      __itt_sync_acquired(ilk->lock);
782    } else {
783      __itt_sync_acquired(lock);
784    }
785  }
786#else
787  __itt_sync_acquired(lock);
788#endif
789} // __kmp_itt_lock_acquired
790
791void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
792#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
793  if (__itt_sync_releasing_ptr) {
794    if (KMP_EXTRACT_D_TAG(lock) == 0) {
795      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
796      __itt_sync_releasing(ilk->lock);
797    } else {
798      __itt_sync_releasing(lock);
799    }
800  }
801#else
802  __itt_sync_releasing(lock);
803#endif
804} // __kmp_itt_lock_releasing
805
806void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
807#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
808  if (__itt_sync_cancel_ptr) {
809    if (KMP_EXTRACT_D_TAG(lock) == 0) {
810      kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
811      __itt_sync_cancel(ilk->lock);
812    } else {
813      __itt_sync_cancel(lock);
814    }
815  }
816#else
817  __itt_sync_cancel(lock);
818#endif
819} // __kmp_itt_lock_cancelled
820
821void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
822  ___kmp_itt_lock_fini(lock, "OMP Lock");
823} // __kmp_itt_lock_destroyed
824
825/* Critical reporting.
826   Critical sections are treated exactly as locks (but have different object
827   type). */
828#if KMP_USE_DYNAMIC_LOCK
829void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
830  ___kmp_itt_lock_init(lock, "OMP Critical", loc);
831}
832#else
833void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
834  ___kmp_itt_lock_init(lock, "OMP Critical");
835} // __kmp_itt_critical_creating
836#endif
837
838void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
839  __itt_sync_prepare(lock);
840} // __kmp_itt_critical_acquiring
841
842void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
843  __itt_sync_acquired(lock);
844} // __kmp_itt_critical_acquired
845
846void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
847  __itt_sync_releasing(lock);
848} // __kmp_itt_critical_releasing
849
850void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
851  ___kmp_itt_lock_fini(lock, "OMP Critical");
852} // __kmp_itt_critical_destroyed
853
854/* Single reporting. */
855
856void __kmp_itt_single_start(int gtid) {
857#if USE_ITT_NOTIFY
858  if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
859    kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
860    ident_t *loc = thr->th.th_ident;
861    char const *src = (loc == NULL ? NULL : loc->psource);
862    kmp_str_buf_t name;
863    __kmp_str_buf_init(&name);
864    __kmp_str_buf_print(&name, "OMP Single-%s", src);
865    KMP_ITT_DEBUG_LOCK();
866    thr->th.th_itt_mark_single = __itt_mark_create(name.str);
867    KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
868                        thr->th.th_itt_mark_single);
869    __kmp_str_buf_free(&name);
870    KMP_ITT_DEBUG_LOCK();
871    __itt_mark(thr->th.th_itt_mark_single, NULL);
872    KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
873                        thr->th.th_itt_mark_single);
874  }
875#endif
876} // __kmp_itt_single_start
877
878void __kmp_itt_single_end(int gtid) {
879#if USE_ITT_NOTIFY
880  __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
881  KMP_ITT_DEBUG_LOCK();
882  __itt_mark_off(mark);
883  KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
884#endif
885} // __kmp_itt_single_end
886
887/* Ordered reporting.
888 * __kmp_itt_ordered_init is called by each thread *before* first using sync
889   object. ITT team would like it to be called once, but it requires extra
890   synchronization.
891 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
892   section (before synchronization).
893 * __kmp_itt_ordered_start is called just before entering user code (after
894   synchronization).
895 * __kmp_itt_ordered_end is called after returning from user code.
896
897 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
898 Events are not generated in case of serialized team. */
899
900void __kmp_itt_ordered_init(int gtid) {
901#if USE_ITT_NOTIFY
902  if (__itt_sync_create_ptr) {
903    kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
904    ident_t const *loc = thr->th.th_ident;
905    char const *src = (loc == NULL ? NULL : loc->psource);
906    __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
907                      "OMP Ordered", src, 0);
908  }
909#endif
910} // __kmp_itt_ordered_init
911
912void __kmp_itt_ordered_prep(int gtid) {
913#if USE_ITT_NOTIFY
914  if (__itt_sync_create_ptr) {
915    kmp_team_t *t = __kmp_team_from_gtid(gtid);
916    if (!t->t.t_serialized) {
917      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
918      __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
919    }
920  }
921#endif
922} // __kmp_itt_ordered_prep
923
924void __kmp_itt_ordered_start(int gtid) {
925#if USE_ITT_NOTIFY
926  if (__itt_sync_create_ptr) {
927    kmp_team_t *t = __kmp_team_from_gtid(gtid);
928    if (!t->t.t_serialized) {
929      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
930      __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
931    }
932  }
933#endif
934} // __kmp_itt_ordered_start
935
936void __kmp_itt_ordered_end(int gtid) {
937#if USE_ITT_NOTIFY
938  if (__itt_sync_create_ptr) {
939    kmp_team_t *t = __kmp_team_from_gtid(gtid);
940    if (!t->t.t_serialized) {
941      kmp_info_t *th = __kmp_thread_from_gtid(gtid);
942      __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
943    }
944  }
945#endif
946} // __kmp_itt_ordered_end
947
948/* Threads reporting. */
949
950void __kmp_itt_thread_ignore() {
951  __itt_thr_ignore();
952} // __kmp_itt_thread_ignore
953
954void __kmp_itt_thread_name(int gtid) {
955#if USE_ITT_NOTIFY
956  if (__itt_thr_name_set_ptr) {
957    kmp_str_buf_t name;
958    __kmp_str_buf_init(&name);
959    if (KMP_MASTER_GTID(gtid)) {
960      __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
961    } else {
962      __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
963    }
964    KMP_ITT_DEBUG_LOCK();
965    __itt_thr_name_set(name.str, name.used);
966    KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
967    __kmp_str_buf_free(&name);
968  }
969#endif
970} // __kmp_itt_thread_name
971
972/* System object reporting.
973   ITT catches operations with system sync objects (like Windows* OS on IA-32
974   architecture API critical sections and events). We only need to specify
975   name ("OMP Scheduler") for the object to let ITT know it is an object used
976   by OpenMP RTL for internal purposes. */
977
978void __kmp_itt_system_object_created(void *object, char const *name) {
979#if USE_ITT_NOTIFY
980  KMP_ITT_DEBUG_LOCK();
981  __itt_sync_create(object, "OMP Scheduler", name, 0);
982  KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
983                      object, name);
984#endif
985} // __kmp_itt_system_object_created
986
987/* Stack stitching api.
988   Master calls "create" and put the stitching id into team structure.
989   Workers read the stitching id and call "enter" / "leave" api.
990   Master calls "destroy" at the end of the parallel region. */
991
992__itt_caller __kmp_itt_stack_caller_create() {
993#if USE_ITT_NOTIFY
994  if (!__itt_stack_caller_create_ptr)
995    return NULL;
996  KMP_ITT_DEBUG_LOCK();
997  __itt_caller id = __itt_stack_caller_create();
998  KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
999  return id;
1000#endif
1001  return NULL;
1002}
1003
1004void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1005#if USE_ITT_NOTIFY
1006  if (__itt_stack_caller_destroy_ptr) {
1007    KMP_ITT_DEBUG_LOCK();
1008    __itt_stack_caller_destroy(id);
1009    KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1010  }
1011#endif
1012}
1013
1014void __kmp_itt_stack_callee_enter(__itt_caller id) {
1015#if USE_ITT_NOTIFY
1016  if (__itt_stack_callee_enter_ptr) {
1017    KMP_ITT_DEBUG_LOCK();
1018    __itt_stack_callee_enter(id);
1019    KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1020  }
1021#endif
1022}
1023
1024void __kmp_itt_stack_callee_leave(__itt_caller id) {
1025#if USE_ITT_NOTIFY
1026  if (__itt_stack_callee_leave_ptr) {
1027    KMP_ITT_DEBUG_LOCK();
1028    __itt_stack_callee_leave(id);
1029    KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1030  }
1031#endif
1032}
1033
1034#endif /* USE_ITT_BUILD */
1035