1 #if USE_ITT_BUILD 2 /* 3 * kmp_itt.h -- ITT Notify interface. 4 */ 5 6 //===----------------------------------------------------------------------===// 7 // 8 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9 // See https://llvm.org/LICENSE.txt for license information. 10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef KMP_ITT_H 15 #define KMP_ITT_H 16 17 #include "kmp_lock.h" 18 19 #define INTEL_ITTNOTIFY_API_PRIVATE 20 #include "ittnotify.h" 21 #include "legacy/ittnotify.h" 22 23 #if KMP_DEBUG 24 #define __kmp_inline // Turn off inlining in debug mode. 25 #else 26 #define __kmp_inline static inline 27 #endif 28 29 #if USE_ITT_NOTIFY 30 extern kmp_int32 __kmp_itt_prepare_delay; 31 #ifdef __cplusplus 32 extern "C" void __kmp_itt_fini_ittlib(void); 33 #else 34 extern void __kmp_itt_fini_ittlib(void); 35 #endif 36 #endif 37 38 // Simplify the handling of an argument that is only required when USE_ITT_BUILD 39 // is enabled. 40 #define USE_ITT_BUILD_ARG(x) , x 41 42 void __kmp_itt_initialize(); 43 void __kmp_itt_destroy(); 44 void __kmp_itt_reset(); 45 46 // ----------------------------------------------------------------------------- 47 // New stuff for reporting high-level constructs. 48 49 // Note the naming convention: 50 // __kmp_itt_xxxing() function should be called before action, while 51 // __kmp_itt_xxxed() function should be called after action. 52 53 // --- Parallel region reporting --- 54 __kmp_inline void 55 __kmp_itt_region_forking(int gtid, int team_size, 56 int barriers); // Master only, before forking threads. 57 __kmp_inline void 58 __kmp_itt_region_joined(int gtid); // Master only, after joining threads. 59 // (*) Note: A thread may execute tasks after this point, though. 60 61 // --- Frame reporting --- 62 // region=0: no regions, region=1: parallel, region=2: serialized parallel 63 __kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 64 __itt_timestamp end, int imbalance, 65 ident_t *loc, int team_size, 66 int region = 0); 67 68 // --- Metadata reporting --- 69 // begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated 70 // wait time value, reduction -if this is a reduction barrier 71 __kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 72 kmp_uint64 end, 73 kmp_uint64 imbalance, 74 kmp_uint64 reduction); 75 // sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); 76 // iterations - loop trip count, chunk - chunk size 77 __kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 78 kmp_uint64 iterations, 79 kmp_uint64 chunk); 80 __kmp_inline void __kmp_itt_metadata_single(ident_t *loc); 81 82 // --- Barrier reporting --- 83 __kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0, 84 int delta = 0); 85 __kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object); 86 __kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object); 87 __kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object); 88 89 // --- Taskwait reporting --- 90 __kmp_inline void *__kmp_itt_taskwait_object(int gtid); 91 __kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object); 92 __kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object); 93 94 // --- Task reporting --- 95 __kmp_inline void __kmp_itt_task_starting(void *object); 96 __kmp_inline void __kmp_itt_task_finished(void *object); 97 98 // --- Lock reporting --- 99 #if KMP_USE_DYNAMIC_LOCK 100 __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock, 101 const ident_t *); 102 #else 103 __kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock); 104 #endif 105 __kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock); 106 __kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock); 107 __kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock); 108 __kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock); 109 __kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock); 110 111 // --- Critical reporting --- 112 #if KMP_USE_DYNAMIC_LOCK 113 __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock, 114 const ident_t *); 115 #else 116 __kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock); 117 #endif 118 __kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock); 119 __kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock); 120 __kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock); 121 __kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock); 122 123 // --- Single reporting --- 124 __kmp_inline void __kmp_itt_single_start(int gtid); 125 __kmp_inline void __kmp_itt_single_end(int gtid); 126 127 // --- Ordered reporting --- 128 __kmp_inline void __kmp_itt_ordered_init(int gtid); 129 __kmp_inline void __kmp_itt_ordered_prep(int gtid); 130 __kmp_inline void __kmp_itt_ordered_start(int gtid); 131 __kmp_inline void __kmp_itt_ordered_end(int gtid); 132 133 // --- Threads reporting --- 134 __kmp_inline void __kmp_itt_thread_ignore(); 135 __kmp_inline void __kmp_itt_thread_name(int gtid); 136 137 // --- System objects --- 138 __kmp_inline void __kmp_itt_system_object_created(void *object, 139 char const *name); 140 141 // --- Stack stitching --- 142 __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); 143 __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); 144 __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); 145 __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); 146 147 // ----------------------------------------------------------------------------- 148 // Old stuff for reporting low-level internal synchronization. 149 150 #if USE_ITT_NOTIFY 151 152 /* Support for SSC marks, which are used by SDE 153 http://software.intel.com/en-us/articles/intel-software-development-emulator 154 to mark points in instruction traces that represent spin-loops and are 155 therefore uninteresting when collecting traces for architecture simulation. 156 */ 157 #ifndef INCLUDE_SSC_MARKS 158 #define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) 159 #endif 160 161 /* Linux 64 only for now */ 162 #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) 163 // Portable (at least for gcc and icc) code to insert the necessary instructions 164 // to set %ebx and execute the unlikely no-op. 165 #if defined(__INTEL_COMPILER) 166 #define INSERT_SSC_MARK(tag) __SSC_MARK(tag) 167 #else 168 #define INSERT_SSC_MARK(tag) \ 169 __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \ 170 : "%ebx") 171 #endif 172 #else 173 #define INSERT_SSC_MARK(tag) ((void)0) 174 #endif 175 176 /* Markers for the start and end of regions that represent polling and are 177 therefore uninteresting to architectural simulations 0x4376 and 0x4377 are 178 arbitrary numbers that should be unique in the space of SSC tags, but there 179 is no central issuing authority rather randomness is expected to work. */ 180 #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) 181 #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) 182 183 // Markers for architecture simulation. 184 // FORKING : Before the master thread forks. 185 // JOINING : At the start of the join. 186 // INVOKING : Before the threads invoke microtasks. 187 // DISPATCH_INIT: At the start of dynamically scheduled loop. 188 // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. 189 #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) 190 #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) 191 #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) 192 #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) 193 #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) 194 195 // The object is an address that associates a specific set of the prepare, 196 // acquire, release, and cancel operations. 197 198 /* Sync prepare indicates a thread is going to start waiting for another thread 199 to send a release event. This operation should be done just before the 200 thread begins checking for the existence of the release event */ 201 202 /* Sync cancel indicates a thread is cancelling a wait on another thread and 203 continuing execution without waiting for the other thread to release it */ 204 205 /* Sync acquired indicates a thread has received a release event from another 206 thread and has stopped waiting. This operation must occur only after the 207 release event is received. */ 208 209 /* Sync release indicates a thread is going to send a release event to another 210 thread so it will stop waiting and continue execution. This operation must 211 just happen before the release event. */ 212 213 #define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj)) 214 #define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj)) 215 #define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj)) 216 #define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj)) 217 218 /* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called 219 with a delay (and not called at all if waiting time is small). So, in spin 220 loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before 221 spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and 222 KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */ 223 224 #undef KMP_FSYNC_SPIN_INIT 225 #define KMP_FSYNC_SPIN_INIT(obj, spin) \ 226 int sync_iters = 0; \ 227 if (__itt_fsync_prepare_ptr) { \ 228 if (obj == NULL) { \ 229 obj = spin; \ 230 } /* if */ \ 231 } /* if */ \ 232 SSC_MARK_SPIN_START() 233 234 #undef KMP_FSYNC_SPIN_PREPARE 235 #define KMP_FSYNC_SPIN_PREPARE(obj) \ 236 do { \ 237 if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \ 238 ++sync_iters; \ 239 if (sync_iters >= __kmp_itt_prepare_delay) { \ 240 KMP_FSYNC_PREPARE((void *)obj); \ 241 } /* if */ \ 242 } /* if */ \ 243 } while (0) 244 #undef KMP_FSYNC_SPIN_ACQUIRED 245 #define KMP_FSYNC_SPIN_ACQUIRED(obj) \ 246 do { \ 247 SSC_MARK_SPIN_END(); \ 248 if (sync_iters >= __kmp_itt_prepare_delay) { \ 249 KMP_FSYNC_ACQUIRED((void *)obj); \ 250 } /* if */ \ 251 } while (0) 252 253 /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: 254 KMP_ITT_IGNORE( 255 ptr = malloc( size ); 256 ); 257 */ 258 #define KMP_ITT_IGNORE(statement) \ 259 do { \ 260 __itt_state_t __itt_state_; \ 261 if (__itt_state_get_ptr) { \ 262 __itt_state_ = __itt_state_get(); \ 263 __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \ 264 } /* if */ \ 265 { statement } \ 266 if (__itt_state_get_ptr) { \ 267 __itt_state_set(__itt_state_); \ 268 } /* if */ \ 269 } while (0) 270 271 const int KMP_MAX_FRAME_DOMAINS = 272 512; // Maximum number of frame domains to use (maps to 273 // different OpenMP regions in the user source code). 274 extern kmp_int32 __kmp_barrier_domain_count; 275 extern kmp_int32 __kmp_region_domain_count; 276 extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; 277 extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; 278 extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; 279 extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; 280 extern __itt_domain *metadata_domain; 281 extern __itt_string_handle *string_handle_imbl; 282 extern __itt_string_handle *string_handle_loop; 283 extern __itt_string_handle *string_handle_sngl; 284 285 #else 286 287 // Null definitions of the synchronization tracing functions. 288 #define KMP_FSYNC_PREPARE(obj) ((void)0) 289 #define KMP_FSYNC_CANCEL(obj) ((void)0) 290 #define KMP_FSYNC_ACQUIRED(obj) ((void)0) 291 #define KMP_FSYNC_RELEASING(obj) ((void)0) 292 293 #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) 294 #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) 295 #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) 296 297 #define KMP_ITT_IGNORE(stmt) \ 298 do { \ 299 stmt \ 300 } while (0) 301 302 #endif // USE_ITT_NOTIFY 303 304 #if !KMP_DEBUG 305 // In release mode include definitions of inline functions. 306 #include "kmp_itt.inl" 307 #endif 308 309 #endif // KMP_ITT_H 310 311 #else /* USE_ITT_BUILD */ 312 313 // Null definitions of the synchronization tracing functions. 314 // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. 315 // By defining these we avoid unpleasant ifdef tests in many places. 316 #define KMP_FSYNC_PREPARE(obj) ((void)0) 317 #define KMP_FSYNC_CANCEL(obj) ((void)0) 318 #define KMP_FSYNC_ACQUIRED(obj) ((void)0) 319 #define KMP_FSYNC_RELEASING(obj) ((void)0) 320 321 #define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) 322 #define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) 323 #define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) 324 325 #define KMP_ITT_IGNORE(stmt) \ 326 do { \ 327 stmt \ 328 } while (0) 329 330 #define USE_ITT_BUILD_ARG(x) 331 332 #endif /* USE_ITT_BUILD */ 333