1#if USE_ITT_BUILD 2/* 3 * kmp_itt.inl -- Inline functions of ITT Notify. 4 */ 5 6//===----------------------------------------------------------------------===// 7// 8// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 9// See https://llvm.org/LICENSE.txt for license information. 10// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11// 12//===----------------------------------------------------------------------===// 13 14// Inline function definitions. This file should be included into kmp_itt.h file 15// for production build (to let compiler inline functions) or into kmp_itt.c 16// file for debug build (to reduce the number of files to recompile and save 17// build time). 18 19#include "kmp.h" 20#include "kmp_str.h" 21 22#if KMP_ITT_DEBUG 23extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; 24#define KMP_ITT_DEBUG_LOCK() \ 25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } 26#define KMP_ITT_DEBUG_PRINT(...) \ 27 { \ 28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ 29 fprintf(stderr, __VA_ARGS__); \ 30 fflush(stderr); \ 31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ 32 } 33#else 34#define KMP_ITT_DEBUG_LOCK() 35#define KMP_ITT_DEBUG_PRINT(...) 36#endif // KMP_ITT_DEBUG 37 38// Ensure that the functions are static if they're supposed to be being inlined. 39// Otherwise they cannot be used in more than one file, since there will be 40// multiple definitions. 41#if KMP_DEBUG 42#define LINKAGE 43#else 44#define LINKAGE static inline 45#endif 46 47// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses 48// this API to support user-defined synchronization primitives, but does not use 49// ZCA; it would be safe to turn this off until wider support becomes available. 50#if USE_ITT_ZCA 51#ifdef __INTEL_COMPILER 52#if __INTEL_COMPILER >= 1200 53#undef __itt_sync_acquired 54#undef __itt_sync_releasing 55#define __itt_sync_acquired(addr) \ 56 __notify_zc_intrinsic((char *)"sync_acquired", addr) 57#define __itt_sync_releasing(addr) \ 58 __notify_intrinsic((char *)"sync_releasing", addr) 59#endif 60#endif 61#endif 62 63static kmp_bootstrap_lock_t metadata_lock = 64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); 65 66/* Parallel region reporting. 67 * __kmp_itt_region_forking should be called by master thread of a team. 68 Exact moment of call does not matter, but it should be completed before any 69 thread of this team calls __kmp_itt_region_starting. 70 * __kmp_itt_region_starting should be called by each thread of a team just 71 before entering parallel region body. 72 * __kmp_itt_region_finished should be called by each thread of a team right 73 after returning from parallel region body. 74 * __kmp_itt_region_joined should be called by master thread of a team, after 75 all threads called __kmp_itt_region_finished. 76 77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can 78 execute some more user code -- such a thread can execute tasks. 79 80 Note: The overhead of logging region_starting and region_finished in each 81 thread is too large, so these calls are not used. */ 82 83LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { 84#if USE_ITT_NOTIFY 85 kmp_team_t *team = __kmp_team_from_gtid(gtid); 86 if (team->t.t_active_level > 1) { 87 // The frame notifications are only supported for the outermost teams. 88 return; 89 } 90 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 91 if (loc) { 92 // Use the reserved_2 field to store the index to the region domain. 93 // Assume that reserved_2 contains zero initially. Since zero is special 94 // value here, store the index into domain array increased by 1. 95 if (loc->reserved_2 == 0) { 96 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 97 int frm = 98 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 99 if (frm >= KMP_MAX_FRAME_DOMAINS) { 100 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 101 return; // loc->reserved_2 is still 0 102 } 103 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { 104 // frm = loc->reserved_2 - 1; // get value saved by other thread 105 // for same loc 106 //} // AC: this block is to replace next unsynchronized line 107 108 // We need to save indexes for both region and barrier frames. We'll use 109 // loc->reserved_2 field but put region index to the low two bytes and 110 // barrier indexes to the high two bytes. It is OK because 111 // KMP_MAX_FRAME_DOMAINS = 512. 112 loc->reserved_2 |= (frm + 1); // save "new" value 113 114 // Transform compiler-generated region location into the format 115 // that the tools more or less standardized on: 116 // "<func>$omp$parallel@[file:]<line>[:<col>]" 117 char *buff = NULL; 118 kmp_str_loc_t str_loc = 119 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 120 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 121 team_size, str_loc.file, str_loc.line, 122 str_loc.col); 123 124 __itt_suppress_push(__itt_suppress_memory_errors); 125 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 126 __itt_suppress_pop(); 127 128 __kmp_str_free(&buff); 129 if (barriers) { 130 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 131 int frm = KMP_TEST_THEN_INC32( 132 &__kmp_barrier_domain_count); // get "old" value 133 if (frm >= KMP_MAX_FRAME_DOMAINS) { 134 KMP_TEST_THEN_DEC32( 135 &__kmp_barrier_domain_count); // revert the count 136 return; // loc->reserved_2 is still 0 137 } 138 char *buff = NULL; 139 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 140 str_loc.file, str_loc.col); 141 __itt_suppress_push(__itt_suppress_memory_errors); 142 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 143 __itt_suppress_pop(); 144 __kmp_str_free(&buff); 145 // Save the barrier frame index to the high two bytes. 146 loc->reserved_2 |= (frm + 1) << 16; 147 } 148 } 149 __kmp_str_loc_free(&str_loc); 150 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 151 } 152 } else { // Region domain exists for this location 153 // Check if team size was changed. Then create new region domain for this 154 // location 155 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 156 if ((frm < KMP_MAX_FRAME_DOMAINS) && 157 (__kmp_itt_region_team_size[frm] != team_size)) { 158 char *buff = NULL; 159 kmp_str_loc_t str_loc = 160 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 161 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 162 team_size, str_loc.file, str_loc.line, 163 str_loc.col); 164 165 __itt_suppress_push(__itt_suppress_memory_errors); 166 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 167 __itt_suppress_pop(); 168 169 __kmp_str_free(&buff); 170 __kmp_str_loc_free(&str_loc); 171 __kmp_itt_region_team_size[frm] = team_size; 172 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 173 } else { // Team size was not changed. Use existing domain. 174 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); 175 } 176 } 177 KMP_ITT_DEBUG_LOCK(); 178 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, 179 loc->reserved_2, loc); 180 } 181#endif 182} // __kmp_itt_region_forking 183 184// ----------------------------------------------------------------------------- 185LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, 186 __itt_timestamp end, int imbalance, 187 ident_t *loc, int team_size, int region) { 188#if USE_ITT_NOTIFY 189 if (region) { 190 kmp_team_t *team = __kmp_team_from_gtid(gtid); 191 int serialized = (region == 2 ? 1 : 0); 192 if (team->t.t_active_level + serialized > 1) { 193 // The frame notifications are only supported for the outermost teams. 194 return; 195 } 196 // Check region domain has not been created before. It's index is saved in 197 // the low two bytes. 198 if ((loc->reserved_2 & 0x0000FFFF) == 0) { 199 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { 200 int frm = 201 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value 202 if (frm >= KMP_MAX_FRAME_DOMAINS) { 203 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count 204 return; // loc->reserved_2 is still 0 205 } 206 207 // We need to save indexes for both region and barrier frames. We'll use 208 // loc->reserved_2 field but put region index to the low two bytes and 209 // barrier indexes to the high two bytes. It is OK because 210 // KMP_MAX_FRAME_DOMAINS = 512. 211 loc->reserved_2 |= (frm + 1); // save "new" value 212 213 // Transform compiler-generated region location into the format 214 // that the tools more or less standardized on: 215 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]" 216 char *buff = NULL; 217 kmp_str_loc_t str_loc = 218 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 219 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 220 team_size, str_loc.file, str_loc.line, 221 str_loc.col); 222 223 __itt_suppress_push(__itt_suppress_memory_errors); 224 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 225 __itt_suppress_pop(); 226 227 __kmp_str_free(&buff); 228 __kmp_str_loc_free(&str_loc); 229 __kmp_itt_region_team_size[frm] = team_size; 230 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 231 } 232 } else { // Region domain exists for this location 233 // Check if team size was changed. Then create new region domain for this 234 // location 235 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 236 if (frm >= KMP_MAX_FRAME_DOMAINS) 237 return; // something's gone wrong, returning 238 if (__kmp_itt_region_team_size[frm] != team_size) { 239 char *buff = NULL; 240 kmp_str_loc_t str_loc = 241 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 242 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, 243 team_size, str_loc.file, str_loc.line, 244 str_loc.col); 245 246 __itt_suppress_push(__itt_suppress_memory_errors); 247 __kmp_itt_region_domains[frm] = __itt_domain_create(buff); 248 __itt_suppress_pop(); 249 250 __kmp_str_free(&buff); 251 __kmp_str_loc_free(&str_loc); 252 __kmp_itt_region_team_size[frm] = team_size; 253 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 254 } else { // Team size was not changed. Use existing domain. 255 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); 256 } 257 } 258 KMP_ITT_DEBUG_LOCK(); 259 KMP_ITT_DEBUG_PRINT( 260 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", 261 gtid, loc->reserved_2, region, loc, begin, end); 262 return; 263 } else { // called for barrier reporting 264 if (loc) { 265 if ((loc->reserved_2 & 0xFFFF0000) == 0) { 266 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { 267 int frm = KMP_TEST_THEN_INC32( 268 &__kmp_barrier_domain_count); // get "old" value 269 if (frm >= KMP_MAX_FRAME_DOMAINS) { 270 KMP_TEST_THEN_DEC32( 271 &__kmp_barrier_domain_count); // revert the count 272 return; // loc->reserved_2 is still 0 273 } 274 // Save the barrier frame index to the high two bytes. 275 loc->reserved_2 |= (frm + 1) << 16; // save "new" value 276 277 // Transform compiler-generated region location into the format 278 // that the tools more or less standardized on: 279 // "<func>$omp$frame@[file:]<line>[:<col>]" 280 kmp_str_loc_t str_loc = 281 __kmp_str_loc_init(loc->psource, /* init_fname */ false); 282 if (imbalance) { 283 char *buff_imb = NULL; 284 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", 285 str_loc.func, team_size, str_loc.file, 286 str_loc.col); 287 __itt_suppress_push(__itt_suppress_memory_errors); 288 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); 289 __itt_suppress_pop(); 290 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, 291 end); 292 __kmp_str_free(&buff_imb); 293 } else { 294 char *buff = NULL; 295 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, 296 str_loc.file, str_loc.col); 297 __itt_suppress_push(__itt_suppress_memory_errors); 298 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); 299 __itt_suppress_pop(); 300 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, 301 end); 302 __kmp_str_free(&buff); 303 } 304 __kmp_str_loc_free(&str_loc); 305 } 306 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS 307 if (imbalance) { 308 __itt_frame_submit_v3( 309 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, 310 begin, end); 311 } else { 312 __itt_frame_submit_v3( 313 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, 314 begin, end); 315 } 316 } 317 KMP_ITT_DEBUG_LOCK(); 318 KMP_ITT_DEBUG_PRINT( 319 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, 320 loc->reserved_2, loc, begin, end); 321 } 322 } 323#endif 324} // __kmp_itt_frame_submit 325 326// ----------------------------------------------------------------------------- 327LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, 328 kmp_uint64 end, kmp_uint64 imbalance, 329 kmp_uint64 reduction) { 330#if USE_ITT_NOTIFY 331 if (metadata_domain == NULL) { 332 __kmp_acquire_bootstrap_lock(&metadata_lock); 333 if (metadata_domain == NULL) { 334 __itt_suppress_push(__itt_suppress_memory_errors); 335 metadata_domain = __itt_domain_create("OMP Metadata"); 336 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 337 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 338 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 339 __itt_suppress_pop(); 340 } 341 __kmp_release_bootstrap_lock(&metadata_lock); 342 } 343 344 kmp_uint64 imbalance_data[4]; 345 imbalance_data[0] = begin; 346 imbalance_data[1] = end; 347 imbalance_data[2] = imbalance; 348 imbalance_data[3] = reduction; 349 350 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, 351 __itt_metadata_u64, 4, imbalance_data); 352#endif 353} // __kmp_itt_metadata_imbalance 354 355// ----------------------------------------------------------------------------- 356LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, 357 kmp_uint64 iterations, kmp_uint64 chunk) { 358#if USE_ITT_NOTIFY 359 if (metadata_domain == NULL) { 360 __kmp_acquire_bootstrap_lock(&metadata_lock); 361 if (metadata_domain == NULL) { 362 __itt_suppress_push(__itt_suppress_memory_errors); 363 metadata_domain = __itt_domain_create("OMP Metadata"); 364 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 365 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 366 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 367 __itt_suppress_pop(); 368 } 369 __kmp_release_bootstrap_lock(&metadata_lock); 370 } 371 372 // Parse line and column from psource string: ";file;func;line;col;;" 373 KMP_DEBUG_ASSERT(loc->psource); 374 kmp_uint64 loop_data[5]; 375 int line, col; 376 __kmp_str_loc_numbers(loc->psource, &line, &col); 377 loop_data[0] = line; 378 loop_data[1] = col; 379 loop_data[2] = sched_type; 380 loop_data[3] = iterations; 381 loop_data[4] = chunk; 382 383 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, 384 __itt_metadata_u64, 5, loop_data); 385#endif 386} // __kmp_itt_metadata_loop 387 388// ----------------------------------------------------------------------------- 389LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { 390#if USE_ITT_NOTIFY 391 if (metadata_domain == NULL) { 392 __kmp_acquire_bootstrap_lock(&metadata_lock); 393 if (metadata_domain == NULL) { 394 __itt_suppress_push(__itt_suppress_memory_errors); 395 metadata_domain = __itt_domain_create("OMP Metadata"); 396 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); 397 string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); 398 string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); 399 __itt_suppress_pop(); 400 } 401 __kmp_release_bootstrap_lock(&metadata_lock); 402 } 403 404 int line, col; 405 __kmp_str_loc_numbers(loc->psource, &line, &col); 406 kmp_uint64 single_data[2]; 407 single_data[0] = line; 408 single_data[1] = col; 409 410 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, 411 __itt_metadata_u64, 2, single_data); 412#endif 413} // __kmp_itt_metadata_single 414 415// ----------------------------------------------------------------------------- 416LINKAGE void __kmp_itt_region_starting(int gtid) { 417#if USE_ITT_NOTIFY 418#endif 419} // __kmp_itt_region_starting 420 421// ----------------------------------------------------------------------------- 422LINKAGE void __kmp_itt_region_finished(int gtid) { 423#if USE_ITT_NOTIFY 424#endif 425} // __kmp_itt_region_finished 426 427// ---------------------------------------------------------------------------- 428LINKAGE void __kmp_itt_region_joined(int gtid) { 429#if USE_ITT_NOTIFY 430 kmp_team_t *team = __kmp_team_from_gtid(gtid); 431 if (team->t.t_active_level > 1) { 432 // The frame notifications are only supported for the outermost teams. 433 return; 434 } 435 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; 436 if (loc && loc->reserved_2) { 437 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; 438 if (frm < KMP_MAX_FRAME_DOMAINS) { 439 KMP_ITT_DEBUG_LOCK(); 440 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); 441 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, 442 loc->reserved_2, loc); 443 } 444 } 445#endif 446} // __kmp_itt_region_joined 447 448/* Barriers reporting. 449 450 A barrier consists of two phases: 451 1. Gather -- master waits for arriving of all the worker threads; each 452 worker thread registers arrival and goes further. 453 2. Release -- each worker threads waits until master lets it go; master lets 454 worker threads go. 455 456 Function should be called by each thread: 457 * __kmp_itt_barrier_starting() -- before arriving to the gather phase. 458 * __kmp_itt_barrier_middle() -- between gather and release phases. 459 * __kmp_itt_barrier_finished() -- after release phase. 460 461 Note: Call __kmp_itt_barrier_object() before call to 462 __kmp_itt_barrier_starting() and save result in local variable. 463 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) 464 would return itt sync object for the next barrier! 465 466 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 467 does not have barrier object or barrier data structure. Barrier is just a 468 counter in team and thread structures. We could use an address of team 469 structure as a barrier sync object, but ITT wants different objects for 470 different barriers (even whithin the same team). So let us use team address 471 as barrier sync object for the first barrier, then increase it by one for the 472 next barrier, and so on (but wrap it not to use addresses outside of team 473 structure). */ 474 475void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, 476 int delta // 0 (current barrier) is default 477 // value; specify -1 to get previous 478 // barrier. 479 ) { 480 void *object = NULL; 481#if USE_ITT_NOTIFY 482 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 483 kmp_team_t *team = thr->th.th_team; 484 485 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can 486 // be NULL. This "if" helps to avoid crash. However, this is not complete 487 // solution, and reporting fork/join barriers to ITT should be revisited. 488 489 if (team != NULL) { 490 // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. 491 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. 492 kmp_uint64 counter = 493 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; 494 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so 495 // barriers of different types do not have the same ids. 496 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); 497 // This condition is a must (we would have zero divide otherwise). 498 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); 499 // More strong condition: make sure we have room at least for for two 500 // different ids (for each barrier type). 501 object = reinterpret_cast<void *>( 502 kmp_uintptr_t(team) + 503 counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier + 504 bt); 505 KMP_ITT_DEBUG_LOCK(); 506 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, 507 counter, object); 508 509 if (set_name) { 510 ident_t const *loc = NULL; 511 char const *src = NULL; 512 char const *type = "OMP Barrier"; 513 switch (bt) { 514 case bs_plain_barrier: { 515 // For plain barrier compiler calls __kmpc_barrier() function, which 516 // saves location in thr->th.th_ident. 517 loc = thr->th.th_ident; 518 // Get the barrier type from flags provided by compiler. 519 kmp_int32 expl = 0; 520 kmp_uint32 impl = 0; 521 if (loc != NULL) { 522 src = loc->psource; 523 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; 524 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; 525 } 526 if (impl) { 527 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { 528 case KMP_IDENT_BARRIER_IMPL_FOR: { 529 type = "OMP For Barrier"; 530 } break; 531 case KMP_IDENT_BARRIER_IMPL_SECTIONS: { 532 type = "OMP Sections Barrier"; 533 } break; 534 case KMP_IDENT_BARRIER_IMPL_SINGLE: { 535 type = "OMP Single Barrier"; 536 } break; 537 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { 538 type = "OMP Workshare Barrier"; 539 } break; 540 default: { 541 type = "OMP Implicit Barrier"; 542 KMP_DEBUG_ASSERT(0); 543 } 544 } 545 } else if (expl) { 546 type = "OMP Explicit Barrier"; 547 } 548 } break; 549 case bs_forkjoin_barrier: { 550 // In case of fork/join barrier we can read thr->th.th_ident, because it 551 // contains location of last passed construct (while join barrier is not 552 // such one). Use th_ident of master thread instead -- __kmp_join_call() 553 // called by the master thread saves location. 554 // 555 // AC: cannot read from master because __kmp_join_call may be not called 556 // yet, so we read the location from team. This is the same location. 557 // And team is valid at the enter to join barrier where this happens. 558 loc = team->t.t_ident; 559 if (loc != NULL) { 560 src = loc->psource; 561 } 562 type = "OMP Join Barrier"; 563 } break; 564 } 565 KMP_ITT_DEBUG_LOCK(); 566 __itt_sync_create(object, type, src, __itt_attr_barrier); 567 KMP_ITT_DEBUG_PRINT( 568 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, 569 type, src); 570 } 571 } 572#endif 573 return object; 574} // __kmp_itt_barrier_object 575 576// ----------------------------------------------------------------------------- 577void __kmp_itt_barrier_starting(int gtid, void *object) { 578#if USE_ITT_NOTIFY 579 if (!KMP_MASTER_GTID(gtid)) { 580 KMP_ITT_DEBUG_LOCK(); 581 __itt_sync_releasing(object); 582 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); 583 } 584 KMP_ITT_DEBUG_LOCK(); 585 __itt_sync_prepare(object); 586 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); 587#endif 588} // __kmp_itt_barrier_starting 589 590// ----------------------------------------------------------------------------- 591void __kmp_itt_barrier_middle(int gtid, void *object) { 592#if USE_ITT_NOTIFY 593 if (KMP_MASTER_GTID(gtid)) { 594 KMP_ITT_DEBUG_LOCK(); 595 __itt_sync_acquired(object); 596 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); 597 KMP_ITT_DEBUG_LOCK(); 598 __itt_sync_releasing(object); 599 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); 600 } else { 601 } 602#endif 603} // __kmp_itt_barrier_middle 604 605// ----------------------------------------------------------------------------- 606void __kmp_itt_barrier_finished(int gtid, void *object) { 607#if USE_ITT_NOTIFY 608 if (KMP_MASTER_GTID(gtid)) { 609 } else { 610 KMP_ITT_DEBUG_LOCK(); 611 __itt_sync_acquired(object); 612 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); 613 } 614#endif 615} // __kmp_itt_barrier_finished 616 617/* Taskwait reporting. 618 ITT need an address (void *) to be specified as a sync object. OpenMP RTL 619 does not have taskwait structure, so we need to construct something. */ 620 621void *__kmp_itt_taskwait_object(int gtid) { 622 void *object = NULL; 623#if USE_ITT_NOTIFY 624 if (UNLIKELY(__itt_sync_create_ptr)) { 625 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 626 kmp_taskdata_t *taskdata = thread->th.th_current_task; 627 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) + 628 taskdata->td_taskwait_counter % 629 sizeof(kmp_taskdata_t)); 630 } 631#endif 632 return object; 633} // __kmp_itt_taskwait_object 634 635void __kmp_itt_taskwait_starting(int gtid, void *object) { 636#if USE_ITT_NOTIFY 637 kmp_info_t *thread = __kmp_thread_from_gtid(gtid); 638 kmp_taskdata_t *taskdata = thread->th.th_current_task; 639 ident_t const *loc = taskdata->td_taskwait_ident; 640 char const *src = (loc == NULL ? NULL : loc->psource); 641 KMP_ITT_DEBUG_LOCK(); 642 __itt_sync_create(object, "OMP Taskwait", src, 0); 643 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", 644 object, src); 645 KMP_ITT_DEBUG_LOCK(); 646 __itt_sync_prepare(object); 647 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); 648#endif 649} // __kmp_itt_taskwait_starting 650 651void __kmp_itt_taskwait_finished(int gtid, void *object) { 652#if USE_ITT_NOTIFY 653 KMP_ITT_DEBUG_LOCK(); 654 __itt_sync_acquired(object); 655 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); 656 KMP_ITT_DEBUG_LOCK(); 657 __itt_sync_destroy(object); 658 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); 659#endif 660} // __kmp_itt_taskwait_finished 661 662/* Task reporting. 663 Only those tasks are reported which are executed by a thread spinning at 664 barrier (or taskwait). Synch object passed to the function must be barrier of 665 taskwait the threads waiting at. */ 666 667void __kmp_itt_task_starting( 668 void *object // ITT sync object: barrier or taskwait. 669 ) { 670#if USE_ITT_NOTIFY 671 if (UNLIKELY(object != NULL)) { 672 KMP_ITT_DEBUG_LOCK(); 673 __itt_sync_cancel(object); 674 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); 675 } 676#endif 677} // __kmp_itt_task_starting 678 679// ----------------------------------------------------------------------------- 680void __kmp_itt_task_finished( 681 void *object // ITT sync object: barrier or taskwait. 682 ) { 683#if USE_ITT_NOTIFY 684 KMP_ITT_DEBUG_LOCK(); 685 __itt_sync_prepare(object); 686 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); 687#endif 688} // __kmp_itt_task_finished 689 690/* Lock reporting. 691 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock 692 operation (set/unset). It is not a real event shown to the user but just 693 setting a name for synchronization object. `lock' is an address of sync 694 object, the same address should be used in all subsequent calls. 695 * __kmp_itt_lock_acquiring() should be called before setting the lock. 696 * __kmp_itt_lock_acquired() should be called after setting the lock. 697 * __kmp_itt_lock_realeasing() should be called before unsetting the lock. 698 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting 699 for the lock. 700 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock 701 operation. After __kmp_itt_lock_destroyed() all the references to the same 702 address will be considered as another sync object, not related with the 703 original one. */ 704 705#if KMP_USE_DYNAMIC_LOCK 706// Takes location information directly 707__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, 708 const ident_t *loc) { 709#if USE_ITT_NOTIFY 710 if (__itt_sync_create_ptr) { 711 char const *src = (loc == NULL ? NULL : loc->psource); 712 KMP_ITT_DEBUG_LOCK(); 713 __itt_sync_create(lock, type, src, 0); 714 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 715 src); 716 } 717#endif 718} 719#else // KMP_USE_DYNAMIC_LOCK 720// Internal guts -- common code for locks and critical sections, do not call 721// directly. 722__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { 723#if USE_ITT_NOTIFY 724 if (__itt_sync_create_ptr) { 725 ident_t const *loc = NULL; 726 if (__kmp_get_user_lock_location_ != NULL) 727 loc = __kmp_get_user_lock_location_((lock)); 728 char const *src = (loc == NULL ? NULL : loc->psource); 729 KMP_ITT_DEBUG_LOCK(); 730 __itt_sync_create(lock, type, src, 0); 731 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, 732 src); 733 } 734#endif 735} // ___kmp_itt_lock_init 736#endif // KMP_USE_DYNAMIC_LOCK 737 738// Internal guts -- common code for locks and critical sections, do not call 739// directly. 740__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { 741#if USE_ITT_NOTIFY 742 KMP_ITT_DEBUG_LOCK(); 743 __itt_sync_destroy(lock); 744 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); 745#endif 746} // ___kmp_itt_lock_fini 747 748// ----------------------------------------------------------------------------- 749#if KMP_USE_DYNAMIC_LOCK 750void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { 751 ___kmp_itt_lock_init(lock, "OMP Lock", loc); 752} 753#else 754void __kmp_itt_lock_creating(kmp_user_lock_p lock) { 755 ___kmp_itt_lock_init(lock, "OMP Lock"); 756} // __kmp_itt_lock_creating 757#endif 758 759void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { 760#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 761 // postpone lock object access 762 if (__itt_sync_prepare_ptr) { 763 if (KMP_EXTRACT_D_TAG(lock) == 0) { 764 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 765 __itt_sync_prepare(ilk->lock); 766 } else { 767 __itt_sync_prepare(lock); 768 } 769 } 770#else 771 __itt_sync_prepare(lock); 772#endif 773} // __kmp_itt_lock_acquiring 774 775void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { 776#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 777 // postpone lock object access 778 if (__itt_sync_acquired_ptr) { 779 if (KMP_EXTRACT_D_TAG(lock) == 0) { 780 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 781 __itt_sync_acquired(ilk->lock); 782 } else { 783 __itt_sync_acquired(lock); 784 } 785 } 786#else 787 __itt_sync_acquired(lock); 788#endif 789} // __kmp_itt_lock_acquired 790 791void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { 792#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 793 if (__itt_sync_releasing_ptr) { 794 if (KMP_EXTRACT_D_TAG(lock) == 0) { 795 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 796 __itt_sync_releasing(ilk->lock); 797 } else { 798 __itt_sync_releasing(lock); 799 } 800 } 801#else 802 __itt_sync_releasing(lock); 803#endif 804} // __kmp_itt_lock_releasing 805 806void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { 807#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY 808 if (__itt_sync_cancel_ptr) { 809 if (KMP_EXTRACT_D_TAG(lock) == 0) { 810 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); 811 __itt_sync_cancel(ilk->lock); 812 } else { 813 __itt_sync_cancel(lock); 814 } 815 } 816#else 817 __itt_sync_cancel(lock); 818#endif 819} // __kmp_itt_lock_cancelled 820 821void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { 822 ___kmp_itt_lock_fini(lock, "OMP Lock"); 823} // __kmp_itt_lock_destroyed 824 825/* Critical reporting. 826 Critical sections are treated exactly as locks (but have different object 827 type). */ 828#if KMP_USE_DYNAMIC_LOCK 829void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { 830 ___kmp_itt_lock_init(lock, "OMP Critical", loc); 831} 832#else 833void __kmp_itt_critical_creating(kmp_user_lock_p lock) { 834 ___kmp_itt_lock_init(lock, "OMP Critical"); 835} // __kmp_itt_critical_creating 836#endif 837 838void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { 839 __itt_sync_prepare(lock); 840} // __kmp_itt_critical_acquiring 841 842void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { 843 __itt_sync_acquired(lock); 844} // __kmp_itt_critical_acquired 845 846void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { 847 __itt_sync_releasing(lock); 848} // __kmp_itt_critical_releasing 849 850void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { 851 ___kmp_itt_lock_fini(lock, "OMP Critical"); 852} // __kmp_itt_critical_destroyed 853 854/* Single reporting. */ 855 856void __kmp_itt_single_start(int gtid) { 857#if USE_ITT_NOTIFY 858 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { 859 kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); 860 ident_t *loc = thr->th.th_ident; 861 char const *src = (loc == NULL ? NULL : loc->psource); 862 kmp_str_buf_t name; 863 __kmp_str_buf_init(&name); 864 __kmp_str_buf_print(&name, "OMP Single-%s", src); 865 KMP_ITT_DEBUG_LOCK(); 866 thr->th.th_itt_mark_single = __itt_mark_create(name.str); 867 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, 868 thr->th.th_itt_mark_single); 869 __kmp_str_buf_free(&name); 870 KMP_ITT_DEBUG_LOCK(); 871 __itt_mark(thr->th.th_itt_mark_single, NULL); 872 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", 873 thr->th.th_itt_mark_single); 874 } 875#endif 876} // __kmp_itt_single_start 877 878void __kmp_itt_single_end(int gtid) { 879#if USE_ITT_NOTIFY 880 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; 881 KMP_ITT_DEBUG_LOCK(); 882 __itt_mark_off(mark); 883 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); 884#endif 885} // __kmp_itt_single_end 886 887/* Ordered reporting. 888 * __kmp_itt_ordered_init is called by each thread *before* first using sync 889 object. ITT team would like it to be called once, but it requires extra 890 synchronization. 891 * __kmp_itt_ordered_prep is called when thread is going to enter ordered 892 section (before synchronization). 893 * __kmp_itt_ordered_start is called just before entering user code (after 894 synchronization). 895 * __kmp_itt_ordered_end is called after returning from user code. 896 897 Sync object is th->th.th_dispatch->th_dispatch_sh_current. 898 Events are not generated in case of serialized team. */ 899 900void __kmp_itt_ordered_init(int gtid) { 901#if USE_ITT_NOTIFY 902 if (__itt_sync_create_ptr) { 903 kmp_info_t *thr = __kmp_thread_from_gtid(gtid); 904 ident_t const *loc = thr->th.th_ident; 905 char const *src = (loc == NULL ? NULL : loc->psource); 906 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, 907 "OMP Ordered", src, 0); 908 } 909#endif 910} // __kmp_itt_ordered_init 911 912void __kmp_itt_ordered_prep(int gtid) { 913#if USE_ITT_NOTIFY 914 if (__itt_sync_create_ptr) { 915 kmp_team_t *t = __kmp_team_from_gtid(gtid); 916 if (!t->t.t_serialized) { 917 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 918 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); 919 } 920 } 921#endif 922} // __kmp_itt_ordered_prep 923 924void __kmp_itt_ordered_start(int gtid) { 925#if USE_ITT_NOTIFY 926 if (__itt_sync_create_ptr) { 927 kmp_team_t *t = __kmp_team_from_gtid(gtid); 928 if (!t->t.t_serialized) { 929 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 930 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); 931 } 932 } 933#endif 934} // __kmp_itt_ordered_start 935 936void __kmp_itt_ordered_end(int gtid) { 937#if USE_ITT_NOTIFY 938 if (__itt_sync_create_ptr) { 939 kmp_team_t *t = __kmp_team_from_gtid(gtid); 940 if (!t->t.t_serialized) { 941 kmp_info_t *th = __kmp_thread_from_gtid(gtid); 942 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); 943 } 944 } 945#endif 946} // __kmp_itt_ordered_end 947 948/* Threads reporting. */ 949 950void __kmp_itt_thread_ignore() { 951 __itt_thr_ignore(); 952} // __kmp_itt_thread_ignore 953 954void __kmp_itt_thread_name(int gtid) { 955#if USE_ITT_NOTIFY 956 if (__itt_thr_name_set_ptr) { 957 kmp_str_buf_t name; 958 __kmp_str_buf_init(&name); 959 if (KMP_MASTER_GTID(gtid)) { 960 __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid); 961 } else { 962 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); 963 } 964 KMP_ITT_DEBUG_LOCK(); 965 __itt_thr_name_set(name.str, name.used); 966 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); 967 __kmp_str_buf_free(&name); 968 } 969#endif 970} // __kmp_itt_thread_name 971 972/* System object reporting. 973 ITT catches operations with system sync objects (like Windows* OS on IA-32 974 architecture API critical sections and events). We only need to specify 975 name ("OMP Scheduler") for the object to let ITT know it is an object used 976 by OpenMP RTL for internal purposes. */ 977 978void __kmp_itt_system_object_created(void *object, char const *name) { 979#if USE_ITT_NOTIFY 980 KMP_ITT_DEBUG_LOCK(); 981 __itt_sync_create(object, "OMP Scheduler", name, 0); 982 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", 983 object, name); 984#endif 985} // __kmp_itt_system_object_created 986 987/* Stack stitching api. 988 Master calls "create" and put the stitching id into team structure. 989 Workers read the stitching id and call "enter" / "leave" api. 990 Master calls "destroy" at the end of the parallel region. */ 991 992__itt_caller __kmp_itt_stack_caller_create() { 993#if USE_ITT_NOTIFY 994 if (!__itt_stack_caller_create_ptr) 995 return NULL; 996 KMP_ITT_DEBUG_LOCK(); 997 __itt_caller id = __itt_stack_caller_create(); 998 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); 999 return id; 1000#endif 1001 return NULL; 1002} 1003 1004void __kmp_itt_stack_caller_destroy(__itt_caller id) { 1005#if USE_ITT_NOTIFY 1006 if (__itt_stack_caller_destroy_ptr) { 1007 KMP_ITT_DEBUG_LOCK(); 1008 __itt_stack_caller_destroy(id); 1009 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); 1010 } 1011#endif 1012} 1013 1014void __kmp_itt_stack_callee_enter(__itt_caller id) { 1015#if USE_ITT_NOTIFY 1016 if (__itt_stack_callee_enter_ptr) { 1017 KMP_ITT_DEBUG_LOCK(); 1018 __itt_stack_callee_enter(id); 1019 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); 1020 } 1021#endif 1022} 1023 1024void __kmp_itt_stack_callee_leave(__itt_caller id) { 1025#if USE_ITT_NOTIFY 1026 if (__itt_stack_callee_leave_ptr) { 1027 KMP_ITT_DEBUG_LOCK(); 1028 __itt_stack_callee_leave(id); 1029 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); 1030 } 1031#endif 1032} 1033 1034#endif /* USE_ITT_BUILD */ 1035