1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
15
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
18
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
24
25 static const char *ompt_thread_t_values[] = {
26 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27 "ompt_thread_other"};
28
29 static const char *ompt_task_status_t_values[] = {
30 "ompt_task_UNDEFINED",
31 "ompt_task_complete", // 1
32 "ompt_task_yield", // 2
33 "ompt_task_cancel", // 3
34 "ompt_task_detach", // 4
35 "ompt_task_early_fulfill", // 5
36 "ompt_task_late_fulfill", // 6
37 "ompt_task_switch" // 7
38 };
39 static const char* ompt_cancel_flag_t_values[] = {
40 "ompt_cancel_parallel",
41 "ompt_cancel_sections",
42 "ompt_cancel_loop",
43 "ompt_cancel_taskgroup",
44 "ompt_cancel_activated",
45 "ompt_cancel_detected",
46 "ompt_cancel_discarded_task"
47 };
48
49 static const char *ompt_dependence_type_t_values[] = {
50 "ompt_dependence_type_UNDEFINED",
51 "ompt_dependence_type_in", // 1
52 "ompt_dependence_type_out", // 2
53 "ompt_dependence_type_inout", // 3
54 "ompt_dependence_type_mutexinoutset", // 4
55 "ompt_dependence_type_source", // 5
56 "ompt_dependence_type_sink", // 6
57 "ompt_dependence_type_inoutset" // 7
58 };
59
format_task_type(int type,char * buffer)60 static void format_task_type(int type, char *buffer) {
61 char *progress = buffer;
62 if (type & ompt_task_initial)
63 progress += sprintf(progress, "ompt_task_initial");
64 if (type & ompt_task_implicit)
65 progress += sprintf(progress, "ompt_task_implicit");
66 if (type & ompt_task_explicit)
67 progress += sprintf(progress, "ompt_task_explicit");
68 if (type & ompt_task_target)
69 progress += sprintf(progress, "ompt_task_target");
70 if (type & ompt_task_undeferred)
71 progress += sprintf(progress, "|ompt_task_undeferred");
72 if (type & ompt_task_untied)
73 progress += sprintf(progress, "|ompt_task_untied");
74 if (type & ompt_task_final)
75 progress += sprintf(progress, "|ompt_task_final");
76 if (type & ompt_task_mergeable)
77 progress += sprintf(progress, "|ompt_task_mergeable");
78 if (type & ompt_task_merged)
79 progress += sprintf(progress, "|ompt_task_merged");
80 }
81
82 static ompt_set_callback_t ompt_set_callback;
83 static ompt_get_callback_t ompt_get_callback;
84 static ompt_get_state_t ompt_get_state;
85 static ompt_get_task_info_t ompt_get_task_info;
86 static ompt_get_task_memory_t ompt_get_task_memory;
87 static ompt_get_thread_data_t ompt_get_thread_data;
88 static ompt_get_parallel_info_t ompt_get_parallel_info;
89 static ompt_get_unique_id_t ompt_get_unique_id;
90 static ompt_finalize_tool_t ompt_finalize_tool;
91 static ompt_get_num_procs_t ompt_get_num_procs;
92 static ompt_get_num_places_t ompt_get_num_places;
93 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
94 static ompt_get_place_num_t ompt_get_place_num;
95 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
96 static ompt_get_proc_id_t ompt_get_proc_id;
97 static ompt_enumerate_states_t ompt_enumerate_states;
98 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
99
print_ids(int level)100 static void print_ids(int level)
101 {
102 int task_type, thread_num;
103 ompt_frame_t *frame;
104 ompt_data_t *task_parallel_data;
105 ompt_data_t *task_data;
106 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
107 &task_parallel_data, &thread_num);
108 char buffer[2048];
109 format_task_type(task_type, buffer);
110 if (frame)
111 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
112 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
113 "task_type=%s=%d, thread_num=%d\n",
114 ompt_get_thread_data()->value, level,
115 exists_task ? task_parallel_data->value : 0,
116 exists_task ? task_data->value : 0, frame->exit_frame.ptr,
117 frame->enter_frame.ptr, buffer, task_type, thread_num);
118 }
119
120 #define get_frame_address(level) __builtin_frame_address(level)
121
122 #define print_frame(level) \
123 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
124 ompt_get_thread_data()->value, level, get_frame_address(level))
125
126 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
127 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
128 #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
129 #define print_frame_from_outlined_fn(level) print_frame(level+1)
130 #else
131 #define print_frame_from_outlined_fn(level) print_frame(level)
132 #endif
133
134 #if defined(__clang__) && __clang_major__ >= 5
135 #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
136 #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
137 #endif
138 #endif
139
140 // This macro helps to define a label at the current position that can be used
141 // to get the current address in the code.
142 //
143 // For print_current_address():
144 // To reliably determine the offset between the address of the label and the
145 // actual return address, we insert a NOP instruction as a jump target as the
146 // compiler would otherwise insert an instruction that we can't control. The
147 // instruction length is target dependent and is explained below.
148 //
149 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
150 // workaround for a bug in the Intel Compiler.)
151 #define define_ompt_label(id) \
152 {} \
153 __asm__("nop"); \
154 ompt_label_##id:
155
156 // This macro helps to get the address of a label that is inserted by the above
157 // macro define_ompt_label(). The address is obtained with a GNU extension
158 // (&&label) that has been tested with gcc, clang and icc.
159 #define get_ompt_label_address(id) (&& ompt_label_##id)
160
161 // This macro prints the exact address that a previously called runtime function
162 // returns to.
163 #define print_current_address(id) \
164 define_ompt_label(id) \
165 print_possible_return_addresses(get_ompt_label_address(id))
166
167 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
168 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
169 // a MOV instruction for non-void runtime functions which is 3 bytes long.
170 #define print_possible_return_addresses(addr) \
171 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
172 ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
173 #elif KMP_ARCH_PPC64
174 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
175 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
176 // functions Clang inserts a STW instruction (but only if compiling under
177 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
178 #define print_possible_return_addresses(addr) \
179 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
180 ((char *)addr) - 8, ((char *)addr) - 12)
181 #elif KMP_ARCH_AARCH64
182 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
183 // store instruction (another 4 bytes long).
184 #define print_possible_return_addresses(addr) \
185 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
186 ((char *)addr) - 4, ((char *)addr) - 8)
187 #elif KMP_ARCH_RISCV64
188 #if __riscv_compressed
189 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
190 // inserts a J instruction (targeting the successor basic block), which
191 // accounts for another 4 bytes. Finally, an additional J instruction may
192 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
193 // another branch).
194 #define print_possible_return_addresses(addr) \
195 printf("%" PRIu64 ": current_address=%p or %p\n", \
196 ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
197 #else
198 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
199 // inserts a J instruction (targeting the successor basic block), which
200 // accounts for another 4 bytes. Finally, an additional J instruction may
201 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
202 // another branch).
203 #define print_possible_return_addresses(addr) \
204 printf("%" PRIu64 ": current_address=%p or %p\n", \
205 ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
206 #endif
207 #else
208 #error Unsupported target architecture, cannot determine address offset!
209 #endif
210
211
212 // This macro performs a somewhat similar job to print_current_address(), except
213 // that it discards a certain number of nibbles from the address and only prints
214 // the most significant bits / nibbles. This can be used for cases where the
215 // return address can only be approximated.
216 //
217 // To account for overflows (ie the most significant bits / nibbles have just
218 // changed as we are a few bytes above the relevant power of two) the addresses
219 // of the "current" and of the "previous block" are printed.
220 #define print_fuzzy_address(id) \
221 define_ompt_label(id) \
222 print_fuzzy_address_blocks(get_ompt_label_address(id))
223
224 // If you change this define you need to adapt all capture patterns in the tests
225 // to include or discard the new number of nibbles!
226 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
227 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
228 #define print_fuzzy_address_blocks(addr) \
229 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
230 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
231 ompt_get_thread_data()->value, \
232 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
233 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
234 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
235 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
236
237 #define register_callback_t(name, type) \
238 do { \
239 type f_##name = &on_##name; \
240 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
241 printf("0: Could not register callback '" #name "'\n"); \
242 } while (0)
243
244 #define register_callback(name) register_callback_t(name, name##_t)
245
246 #ifndef USE_PRIVATE_TOOL
247 static void
on_ompt_callback_mutex_acquire(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)248 on_ompt_callback_mutex_acquire(
249 ompt_mutex_t kind,
250 unsigned int hint,
251 unsigned int impl,
252 ompt_wait_id_t wait_id,
253 const void *codeptr_ra)
254 {
255 switch(kind)
256 {
257 case ompt_mutex_lock:
258 printf("%" PRIu64 ":" _TOOL_PREFIX
259 " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
260 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
261 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
262 break;
263 case ompt_mutex_nest_lock:
264 printf("%" PRIu64 ":" _TOOL_PREFIX
265 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
266 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
267 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
268 break;
269 case ompt_mutex_critical:
270 printf("%" PRIu64 ":" _TOOL_PREFIX
271 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
272 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
273 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
274 break;
275 case ompt_mutex_atomic:
276 printf("%" PRIu64 ":" _TOOL_PREFIX
277 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
278 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
279 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
280 break;
281 case ompt_mutex_ordered:
282 printf("%" PRIu64 ":" _TOOL_PREFIX
283 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
284 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
285 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
286 break;
287 default:
288 break;
289 }
290 }
291
292 static void
on_ompt_callback_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)293 on_ompt_callback_mutex_acquired(
294 ompt_mutex_t kind,
295 ompt_wait_id_t wait_id,
296 const void *codeptr_ra)
297 {
298 switch(kind)
299 {
300 case ompt_mutex_lock:
301 printf("%" PRIu64 ":" _TOOL_PREFIX
302 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
303 ompt_get_thread_data()->value, wait_id, codeptr_ra);
304 break;
305 case ompt_mutex_nest_lock:
306 printf("%" PRIu64 ":" _TOOL_PREFIX
307 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
308 ", codeptr_ra=%p \n",
309 ompt_get_thread_data()->value, wait_id, codeptr_ra);
310 break;
311 case ompt_mutex_critical:
312 printf("%" PRIu64 ":" _TOOL_PREFIX
313 " ompt_event_acquired_critical: wait_id=%" PRIu64
314 ", codeptr_ra=%p \n",
315 ompt_get_thread_data()->value, wait_id, codeptr_ra);
316 break;
317 case ompt_mutex_atomic:
318 printf("%" PRIu64 ":" _TOOL_PREFIX
319 " ompt_event_acquired_atomic: wait_id=%" PRIu64
320 ", codeptr_ra=%p \n",
321 ompt_get_thread_data()->value, wait_id, codeptr_ra);
322 break;
323 case ompt_mutex_ordered:
324 printf("%" PRIu64 ":" _TOOL_PREFIX
325 " ompt_event_acquired_ordered: wait_id=%" PRIu64
326 ", codeptr_ra=%p \n",
327 ompt_get_thread_data()->value, wait_id, codeptr_ra);
328 break;
329 default:
330 break;
331 }
332 }
333
334 static void
on_ompt_callback_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)335 on_ompt_callback_mutex_released(
336 ompt_mutex_t kind,
337 ompt_wait_id_t wait_id,
338 const void *codeptr_ra)
339 {
340 switch(kind)
341 {
342 case ompt_mutex_lock:
343 printf("%" PRIu64 ":" _TOOL_PREFIX
344 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
345 ompt_get_thread_data()->value, wait_id, codeptr_ra);
346 break;
347 case ompt_mutex_nest_lock:
348 printf("%" PRIu64 ":" _TOOL_PREFIX
349 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
350 ", codeptr_ra=%p \n",
351 ompt_get_thread_data()->value, wait_id, codeptr_ra);
352 break;
353 case ompt_mutex_critical:
354 printf("%" PRIu64 ":" _TOOL_PREFIX
355 " ompt_event_release_critical: wait_id=%" PRIu64
356 ", codeptr_ra=%p \n",
357 ompt_get_thread_data()->value, wait_id, codeptr_ra);
358 break;
359 case ompt_mutex_atomic:
360 printf("%" PRIu64 ":" _TOOL_PREFIX
361 " ompt_event_release_atomic: wait_id=%" PRIu64
362 ", codeptr_ra=%p \n",
363 ompt_get_thread_data()->value, wait_id, codeptr_ra);
364 break;
365 case ompt_mutex_ordered:
366 printf("%" PRIu64 ":" _TOOL_PREFIX
367 " ompt_event_release_ordered: wait_id=%" PRIu64
368 ", codeptr_ra=%p \n",
369 ompt_get_thread_data()->value, wait_id, codeptr_ra);
370 break;
371 default:
372 break;
373 }
374 }
375
376 static void
on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint,ompt_wait_id_t wait_id,const void * codeptr_ra)377 on_ompt_callback_nest_lock(
378 ompt_scope_endpoint_t endpoint,
379 ompt_wait_id_t wait_id,
380 const void *codeptr_ra)
381 {
382 switch(endpoint)
383 {
384 case ompt_scope_begin:
385 printf("%" PRIu64 ":" _TOOL_PREFIX
386 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
387 ", codeptr_ra=%p \n",
388 ompt_get_thread_data()->value, wait_id, codeptr_ra);
389 break;
390 case ompt_scope_end:
391 printf("%" PRIu64 ":" _TOOL_PREFIX
392 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
393 ", codeptr_ra=%p \n",
394 ompt_get_thread_data()->value, wait_id, codeptr_ra);
395 break;
396 case ompt_scope_beginend:
397 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
398 exit(-1);
399 }
400 }
401
402 static void
on_ompt_callback_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)403 on_ompt_callback_sync_region(
404 ompt_sync_region_t kind,
405 ompt_scope_endpoint_t endpoint,
406 ompt_data_t *parallel_data,
407 ompt_data_t *task_data,
408 const void *codeptr_ra)
409 {
410 switch(endpoint)
411 {
412 case ompt_scope_begin:
413 switch(kind)
414 {
415 case ompt_sync_region_barrier:
416 case ompt_sync_region_barrier_implicit:
417 case ompt_sync_region_barrier_implicit_workshare:
418 case ompt_sync_region_barrier_implicit_parallel:
419 case ompt_sync_region_barrier_teams:
420 case ompt_sync_region_barrier_explicit:
421 case ompt_sync_region_barrier_implementation:
422 printf("%" PRIu64 ":" _TOOL_PREFIX
423 " ompt_event_barrier_begin: parallel_id=%" PRIu64
424 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
425 ompt_get_thread_data()->value, parallel_data->value,
426 task_data->value, codeptr_ra);
427 print_ids(0);
428 break;
429 case ompt_sync_region_taskwait:
430 printf("%" PRIu64 ":" _TOOL_PREFIX
431 " ompt_event_taskwait_begin: parallel_id=%" PRIu64
432 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
433 ompt_get_thread_data()->value, parallel_data->value,
434 task_data->value, codeptr_ra);
435 break;
436 case ompt_sync_region_taskgroup:
437 printf("%" PRIu64 ":" _TOOL_PREFIX
438 " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
439 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
440 ompt_get_thread_data()->value, parallel_data->value,
441 task_data->value, codeptr_ra);
442 break;
443 case ompt_sync_region_reduction:
444 printf("ompt_sync_region_reduction should never be passed to "
445 "on_ompt_callback_sync_region\n");
446 exit(-1);
447 break;
448 }
449 break;
450 case ompt_scope_end:
451 switch(kind)
452 {
453 case ompt_sync_region_barrier:
454 case ompt_sync_region_barrier_implicit:
455 case ompt_sync_region_barrier_explicit:
456 case ompt_sync_region_barrier_implicit_workshare:
457 case ompt_sync_region_barrier_implicit_parallel:
458 case ompt_sync_region_barrier_teams:
459 case ompt_sync_region_barrier_implementation:
460 printf("%" PRIu64 ":" _TOOL_PREFIX
461 " ompt_event_barrier_end: parallel_id=%" PRIu64
462 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
463 ompt_get_thread_data()->value,
464 (parallel_data) ? parallel_data->value : 0, task_data->value,
465 codeptr_ra);
466 break;
467 case ompt_sync_region_taskwait:
468 printf("%" PRIu64 ":" _TOOL_PREFIX
469 " ompt_event_taskwait_end: parallel_id=%" PRIu64
470 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
471 ompt_get_thread_data()->value,
472 (parallel_data) ? parallel_data->value : 0, task_data->value,
473 codeptr_ra);
474 break;
475 case ompt_sync_region_taskgroup:
476 printf("%" PRIu64 ":" _TOOL_PREFIX
477 " ompt_event_taskgroup_end: parallel_id=%" PRIu64
478 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
479 ompt_get_thread_data()->value,
480 (parallel_data) ? parallel_data->value : 0, task_data->value,
481 codeptr_ra);
482 break;
483 case ompt_sync_region_reduction:
484 printf("ompt_sync_region_reduction should never be passed to "
485 "on_ompt_callback_sync_region\n");
486 exit(-1);
487 break;
488 }
489 break;
490 case ompt_scope_beginend:
491 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
492 exit(-1);
493 }
494 }
495
496 static void
on_ompt_callback_sync_region_wait(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)497 on_ompt_callback_sync_region_wait(
498 ompt_sync_region_t kind,
499 ompt_scope_endpoint_t endpoint,
500 ompt_data_t *parallel_data,
501 ompt_data_t *task_data,
502 const void *codeptr_ra)
503 {
504 switch(endpoint)
505 {
506 case ompt_scope_begin:
507 switch(kind)
508 {
509 case ompt_sync_region_barrier:
510 case ompt_sync_region_barrier_implicit:
511 case ompt_sync_region_barrier_implicit_workshare:
512 case ompt_sync_region_barrier_implicit_parallel:
513 case ompt_sync_region_barrier_teams:
514 case ompt_sync_region_barrier_explicit:
515 case ompt_sync_region_barrier_implementation:
516 printf("%" PRIu64 ":" _TOOL_PREFIX
517 " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
518 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
519 ompt_get_thread_data()->value, parallel_data->value,
520 task_data->value, codeptr_ra);
521 break;
522 case ompt_sync_region_taskwait:
523 printf("%" PRIu64 ":" _TOOL_PREFIX
524 " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
525 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
526 ompt_get_thread_data()->value, parallel_data->value,
527 task_data->value, codeptr_ra);
528 break;
529 case ompt_sync_region_taskgroup:
530 printf("%" PRIu64 ":" _TOOL_PREFIX
531 " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
532 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
533 ompt_get_thread_data()->value, parallel_data->value,
534 task_data->value, codeptr_ra);
535 break;
536 case ompt_sync_region_reduction:
537 printf("ompt_sync_region_reduction should never be passed to "
538 "on_ompt_callback_sync_region_wait\n");
539 exit(-1);
540 break;
541 }
542 break;
543 case ompt_scope_end:
544 switch(kind)
545 {
546 case ompt_sync_region_barrier:
547 case ompt_sync_region_barrier_implicit:
548 case ompt_sync_region_barrier_implicit_workshare:
549 case ompt_sync_region_barrier_implicit_parallel:
550 case ompt_sync_region_barrier_teams:
551 case ompt_sync_region_barrier_explicit:
552 case ompt_sync_region_barrier_implementation:
553 printf("%" PRIu64 ":" _TOOL_PREFIX
554 " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
555 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
556 ompt_get_thread_data()->value,
557 (parallel_data) ? parallel_data->value : 0, task_data->value,
558 codeptr_ra);
559 break;
560 case ompt_sync_region_taskwait:
561 printf("%" PRIu64 ":" _TOOL_PREFIX
562 " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
563 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
564 ompt_get_thread_data()->value,
565 (parallel_data) ? parallel_data->value : 0, task_data->value,
566 codeptr_ra);
567 break;
568 case ompt_sync_region_taskgroup:
569 printf("%" PRIu64 ":" _TOOL_PREFIX
570 " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
571 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
572 ompt_get_thread_data()->value,
573 (parallel_data) ? parallel_data->value : 0, task_data->value,
574 codeptr_ra);
575 break;
576 case ompt_sync_region_reduction:
577 printf("ompt_sync_region_reduction should never be passed to "
578 "on_ompt_callback_sync_region_wait\n");
579 exit(-1);
580 break;
581 }
582 break;
583 case ompt_scope_beginend:
584 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
585 exit(-1);
586 }
587 }
588
on_ompt_callback_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)589 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
590 ompt_scope_endpoint_t endpoint,
591 ompt_data_t *parallel_data,
592 ompt_data_t *task_data,
593 const void *codeptr_ra) {
594 switch (endpoint) {
595 case ompt_scope_begin:
596 printf("%" PRIu64 ":" _TOOL_PREFIX
597 " ompt_event_reduction_begin: parallel_id=%" PRIu64
598 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
599 ompt_get_thread_data()->value,
600 (parallel_data) ? parallel_data->value : 0, task_data->value,
601 codeptr_ra);
602 break;
603 case ompt_scope_end:
604 printf("%" PRIu64 ":" _TOOL_PREFIX
605 " ompt_event_reduction_end: parallel_id=%" PRIu64
606 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
607 ompt_get_thread_data()->value,
608 (parallel_data) ? parallel_data->value : 0, task_data->value,
609 codeptr_ra);
610 break;
611 case ompt_scope_beginend:
612 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
613 exit(-1);
614 }
615 }
616
617 static void
on_ompt_callback_flush(ompt_data_t * thread_data,const void * codeptr_ra)618 on_ompt_callback_flush(
619 ompt_data_t *thread_data,
620 const void *codeptr_ra)
621 {
622 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
623 thread_data->value, codeptr_ra);
624 }
625
626 static void
on_ompt_callback_cancel(ompt_data_t * task_data,int flags,const void * codeptr_ra)627 on_ompt_callback_cancel(
628 ompt_data_t *task_data,
629 int flags,
630 const void *codeptr_ra)
631 {
632 const char* first_flag_value;
633 const char* second_flag_value;
634 if(flags & ompt_cancel_parallel)
635 first_flag_value = ompt_cancel_flag_t_values[0];
636 else if(flags & ompt_cancel_sections)
637 first_flag_value = ompt_cancel_flag_t_values[1];
638 else if(flags & ompt_cancel_loop)
639 first_flag_value = ompt_cancel_flag_t_values[2];
640 else if(flags & ompt_cancel_taskgroup)
641 first_flag_value = ompt_cancel_flag_t_values[3];
642
643 if(flags & ompt_cancel_activated)
644 second_flag_value = ompt_cancel_flag_t_values[4];
645 else if(flags & ompt_cancel_detected)
646 second_flag_value = ompt_cancel_flag_t_values[5];
647 else if(flags & ompt_cancel_discarded_task)
648 second_flag_value = ompt_cancel_flag_t_values[6];
649
650 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
651 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
652 ompt_get_thread_data()->value, task_data->value, first_flag_value,
653 second_flag_value, flags, codeptr_ra);
654 }
655
656 static void
on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int flags)657 on_ompt_callback_implicit_task(
658 ompt_scope_endpoint_t endpoint,
659 ompt_data_t *parallel_data,
660 ompt_data_t *task_data,
661 unsigned int team_size,
662 unsigned int thread_num,
663 int flags)
664 {
665 switch(endpoint)
666 {
667 case ompt_scope_begin:
668 if(task_data->ptr)
669 printf("%s\n", "0: task_data initially not null");
670 task_data->value = ompt_get_unique_id();
671
672 //there is no parallel_begin callback for implicit parallel region
673 //thus it is initialized in initial task
674 if(flags & ompt_task_initial)
675 {
676 char buffer[2048];
677
678 format_task_type(flags, buffer);
679 // Only check initial task not created by teams construct
680 if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
681 printf("%s\n", "0: parallel_data initially not null");
682 parallel_data->value = ompt_get_unique_id();
683 printf("%" PRIu64 ":" _TOOL_PREFIX
684 " ompt_event_initial_task_begin: parallel_id=%" PRIu64
685 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
686 ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
687 ompt_get_thread_data()->value, parallel_data->value,
688 task_data->value, team_size, thread_num, flags);
689 } else {
690 printf("%" PRIu64 ":" _TOOL_PREFIX
691 " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
692 ", task_id=%" PRIu64 ", team_size=%" PRIu32
693 ", thread_num=%" PRIu32 "\n",
694 ompt_get_thread_data()->value, parallel_data->value,
695 task_data->value, team_size, thread_num);
696 }
697
698 break;
699 case ompt_scope_end:
700 if(flags & ompt_task_initial){
701 printf("%" PRIu64 ":" _TOOL_PREFIX
702 " ompt_event_initial_task_end: parallel_id=%" PRIu64
703 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
704 ", index=%" PRIu32 "\n",
705 ompt_get_thread_data()->value,
706 (parallel_data) ? parallel_data->value : 0, task_data->value,
707 team_size, thread_num);
708 } else {
709 printf("%" PRIu64 ":" _TOOL_PREFIX
710 " ompt_event_implicit_task_end: parallel_id=%" PRIu64
711 ", task_id=%" PRIu64 ", team_size=%" PRIu32
712 ", thread_num=%" PRIu32 "\n",
713 ompt_get_thread_data()->value,
714 (parallel_data) ? parallel_data->value : 0, task_data->value,
715 team_size, thread_num);
716 }
717 break;
718 case ompt_scope_beginend:
719 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
720 exit(-1);
721 }
722 }
723
724 static void
on_ompt_callback_lock_init(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)725 on_ompt_callback_lock_init(
726 ompt_mutex_t kind,
727 unsigned int hint,
728 unsigned int impl,
729 ompt_wait_id_t wait_id,
730 const void *codeptr_ra)
731 {
732 switch(kind)
733 {
734 case ompt_mutex_lock:
735 printf("%" PRIu64 ":" _TOOL_PREFIX
736 " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
737 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
738 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
739 break;
740 case ompt_mutex_nest_lock:
741 printf("%" PRIu64 ":" _TOOL_PREFIX
742 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
743 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
744 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
745 break;
746 default:
747 break;
748 }
749 }
750
751 static void
on_ompt_callback_lock_destroy(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)752 on_ompt_callback_lock_destroy(
753 ompt_mutex_t kind,
754 ompt_wait_id_t wait_id,
755 const void *codeptr_ra)
756 {
757 switch(kind)
758 {
759 case ompt_mutex_lock:
760 printf("%" PRIu64 ":" _TOOL_PREFIX
761 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
762 ompt_get_thread_data()->value, wait_id, codeptr_ra);
763 break;
764 case ompt_mutex_nest_lock:
765 printf("%" PRIu64 ":" _TOOL_PREFIX
766 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
767 ", codeptr_ra=%p \n",
768 ompt_get_thread_data()->value, wait_id, codeptr_ra);
769 break;
770 default:
771 break;
772 }
773 }
774
775 static void
on_ompt_callback_work(ompt_work_t wstype,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,uint64_t count,const void * codeptr_ra)776 on_ompt_callback_work(
777 ompt_work_t wstype,
778 ompt_scope_endpoint_t endpoint,
779 ompt_data_t *parallel_data,
780 ompt_data_t *task_data,
781 uint64_t count,
782 const void *codeptr_ra)
783 {
784 switch(endpoint)
785 {
786 case ompt_scope_begin:
787 switch(wstype)
788 {
789 case ompt_work_loop:
790 printf("%" PRIu64 ":" _TOOL_PREFIX
791 " ompt_event_loop_begin: parallel_id=%" PRIu64
792 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
793 "\n",
794 ompt_get_thread_data()->value, parallel_data->value,
795 task_data->value, codeptr_ra, count);
796 break;
797 case ompt_work_sections:
798 printf("%" PRIu64 ":" _TOOL_PREFIX
799 " ompt_event_sections_begin: parallel_id=%" PRIu64
800 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
801 "\n",
802 ompt_get_thread_data()->value, parallel_data->value,
803 task_data->value, codeptr_ra, count);
804 break;
805 case ompt_work_single_executor:
806 printf("%" PRIu64 ":" _TOOL_PREFIX
807 " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
808 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
809 "\n",
810 ompt_get_thread_data()->value, parallel_data->value,
811 task_data->value, codeptr_ra, count);
812 break;
813 case ompt_work_single_other:
814 printf("%" PRIu64 ":" _TOOL_PREFIX
815 " ompt_event_single_others_begin: parallel_id=%" PRIu64
816 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
817 ompt_get_thread_data()->value, parallel_data->value,
818 task_data->value, codeptr_ra, count);
819 break;
820 case ompt_work_workshare:
821 //impl
822 break;
823 case ompt_work_distribute:
824 printf("%" PRIu64 ":" _TOOL_PREFIX
825 " ompt_event_distribute_begin: parallel_id=%" PRIu64
826 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
827 "\n",
828 ompt_get_thread_data()->value, parallel_data->value,
829 task_data->value, codeptr_ra, count);
830 break;
831 case ompt_work_taskloop:
832 //impl
833 printf("%" PRIu64 ":" _TOOL_PREFIX
834 " ompt_event_taskloop_begin: parallel_id=%" PRIu64
835 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
836 "\n",
837 ompt_get_thread_data()->value, parallel_data->value,
838 task_data->value, codeptr_ra, count);
839 break;
840 case ompt_work_scope:
841 printf("%" PRIu64 ":" _TOOL_PREFIX
842 " ompt_event_scope_begin: parallel_id=%" PRIu64
843 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
844 "\n",
845 ompt_get_thread_data()->value, parallel_data->value,
846 task_data->value, codeptr_ra, count);
847 break;
848 }
849 break;
850 case ompt_scope_end:
851 switch(wstype)
852 {
853 case ompt_work_loop:
854 printf("%" PRIu64 ":" _TOOL_PREFIX
855 " ompt_event_loop_end: parallel_id=%" PRIu64
856 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
857 ompt_get_thread_data()->value, parallel_data->value,
858 task_data->value, codeptr_ra, count);
859 break;
860 case ompt_work_sections:
861 printf("%" PRIu64 ":" _TOOL_PREFIX
862 " ompt_event_sections_end: parallel_id=%" PRIu64
863 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
864 ompt_get_thread_data()->value, parallel_data->value,
865 task_data->value, codeptr_ra, count);
866 break;
867 case ompt_work_single_executor:
868 printf("%" PRIu64 ":" _TOOL_PREFIX
869 " ompt_event_single_in_block_end: parallel_id=%" PRIu64
870 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
871 ompt_get_thread_data()->value, parallel_data->value,
872 task_data->value, codeptr_ra, count);
873 break;
874 case ompt_work_single_other:
875 printf("%" PRIu64 ":" _TOOL_PREFIX
876 " ompt_event_single_others_end: parallel_id=%" PRIu64
877 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
878 ompt_get_thread_data()->value, parallel_data->value,
879 task_data->value, codeptr_ra, count);
880 break;
881 case ompt_work_workshare:
882 //impl
883 break;
884 case ompt_work_distribute:
885 printf("%" PRIu64 ":" _TOOL_PREFIX
886 " ompt_event_distribute_end: parallel_id=%" PRIu64
887 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
888 "\n",
889 ompt_get_thread_data()->value, parallel_data->value,
890 task_data->value, codeptr_ra, count);
891 break;
892 case ompt_work_taskloop:
893 //impl
894 printf("%" PRIu64 ":" _TOOL_PREFIX
895 " ompt_event_taskloop_end: parallel_id=%" PRIu64
896 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
897 "\n",
898 ompt_get_thread_data()->value, parallel_data->value,
899 task_data->value, codeptr_ra, count);
900 break;
901 case ompt_work_scope:
902 printf("%" PRIu64 ":" _TOOL_PREFIX
903 " ompt_event_scope_end: parallel_id=%" PRIu64
904 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
905 "\n",
906 ompt_get_thread_data()->value, parallel_data->value,
907 task_data->value, codeptr_ra, count);
908 break;
909 }
910 break;
911 case ompt_scope_beginend:
912 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
913 exit(-1);
914 }
915 }
916
on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)917 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
918 ompt_data_t *parallel_data,
919 ompt_data_t *task_data,
920 const void *codeptr_ra) {
921 switch(endpoint)
922 {
923 case ompt_scope_begin:
924 printf("%" PRIu64 ":" _TOOL_PREFIX
925 " ompt_event_masked_begin: parallel_id=%" PRIu64
926 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
927 ompt_get_thread_data()->value, parallel_data->value,
928 task_data->value, codeptr_ra);
929 break;
930 case ompt_scope_end:
931 printf("%" PRIu64 ":" _TOOL_PREFIX
932 " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
933 ", codeptr_ra=%p\n",
934 ompt_get_thread_data()->value, parallel_data->value,
935 task_data->value, codeptr_ra);
936 break;
937 case ompt_scope_beginend:
938 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
939 exit(-1);
940 }
941 }
942
on_ompt_callback_parallel_begin(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)943 static void on_ompt_callback_parallel_begin(
944 ompt_data_t *encountering_task_data,
945 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
946 uint32_t requested_team_size, int flag, const void *codeptr_ra) {
947 if(parallel_data->ptr)
948 printf("0: parallel_data initially not null\n");
949 parallel_data->value = ompt_get_unique_id();
950 int invoker = flag & 0xF;
951 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
952 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
953 printf("%" PRIu64 ":" _TOOL_PREFIX
954 " ompt_event_%s_begin: parent_task_id=%" PRIu64
955 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
956 "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
957 ", codeptr_ra=%p, invoker=%d\n",
958 ompt_get_thread_data()->value, event, encountering_task_data->value,
959 encountering_task_frame->exit_frame.ptr,
960 encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
961 requested_team_size, codeptr_ra, invoker);
962 }
963
on_ompt_callback_parallel_end(ompt_data_t * parallel_data,ompt_data_t * encountering_task_data,int flag,const void * codeptr_ra)964 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
965 ompt_data_t *encountering_task_data,
966 int flag, const void *codeptr_ra) {
967 int invoker = flag & 0xF;
968 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
969 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
970 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
971 ompt_get_thread_data()->value, event, parallel_data->value,
972 encountering_task_data->value, invoker, codeptr_ra);
973 }
974
975 static void
on_ompt_callback_task_create(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)976 on_ompt_callback_task_create(
977 ompt_data_t *encountering_task_data,
978 const ompt_frame_t *encountering_task_frame,
979 ompt_data_t* new_task_data,
980 int type,
981 int has_dependences,
982 const void *codeptr_ra)
983 {
984 if(new_task_data->ptr)
985 printf("0: new_task_data initially not null\n");
986 new_task_data->value = ompt_get_unique_id();
987 char buffer[2048];
988
989 format_task_type(type, buffer);
990
991 printf(
992 "%" PRIu64 ":" _TOOL_PREFIX
993 " ompt_event_task_create: parent_task_id=%" PRIu64
994 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
995 "new_task_id=%" PRIu64
996 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
997 ompt_get_thread_data()->value,
998 encountering_task_data ? encountering_task_data->value : 0,
999 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
1000 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
1001 new_task_data->value, codeptr_ra, buffer, type,
1002 has_dependences ? "yes" : "no");
1003 }
1004
1005 static void
on_ompt_callback_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)1006 on_ompt_callback_task_schedule(
1007 ompt_data_t *first_task_data,
1008 ompt_task_status_t prior_task_status,
1009 ompt_data_t *second_task_data)
1010 {
1011 printf("%" PRIu64 ":" _TOOL_PREFIX
1012 " ompt_event_task_schedule: first_task_id=%" PRIu64
1013 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
1014 ompt_get_thread_data()->value, first_task_data->value,
1015 (second_task_data ? second_task_data->value : -1),
1016 ompt_task_status_t_values[prior_task_status], prior_task_status);
1017 if (prior_task_status == ompt_task_complete ||
1018 prior_task_status == ompt_task_late_fulfill) {
1019 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
1020 "\n", ompt_get_thread_data()->value, first_task_data->value);
1021 }
1022 }
1023
1024 static void
on_ompt_callback_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)1025 on_ompt_callback_dependences(
1026 ompt_data_t *task_data,
1027 const ompt_dependence_t *deps,
1028 int ndeps)
1029 {
1030 char buffer[2048];
1031 char *progress = buffer;
1032 for (int i = 0; i < ndeps && progress < buffer + 2000; i++) {
1033 if (deps[i].dependence_type == ompt_dependence_type_source ||
1034 deps[i].dependence_type == ompt_dependence_type_sink)
1035 progress +=
1036 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
1037 ompt_dependence_type_t_values[deps[i].dependence_type]);
1038 else
1039 progress +=
1040 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
1041 ompt_dependence_type_t_values[deps[i].dependence_type]);
1042 }
1043 if (ndeps > 0)
1044 progress[-2] = 0;
1045 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
1046 ", deps=[%s], ndeps=%d\n",
1047 ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
1048 }
1049
1050 static void
on_ompt_callback_task_dependence(ompt_data_t * first_task_data,ompt_data_t * second_task_data)1051 on_ompt_callback_task_dependence(
1052 ompt_data_t *first_task_data,
1053 ompt_data_t *second_task_data)
1054 {
1055 printf("%" PRIu64 ":" _TOOL_PREFIX
1056 " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
1057 ", second_task_id=%" PRIu64 "\n",
1058 ompt_get_thread_data()->value, first_task_data->value,
1059 second_task_data->value);
1060 }
1061
1062 static void
on_ompt_callback_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)1063 on_ompt_callback_thread_begin(
1064 ompt_thread_t thread_type,
1065 ompt_data_t *thread_data)
1066 {
1067 if(thread_data->ptr)
1068 printf("%s\n", "0: thread_data initially not null");
1069 thread_data->value = ompt_get_unique_id();
1070 printf("%" PRIu64 ":" _TOOL_PREFIX
1071 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
1072 ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
1073 thread_type, thread_data->value);
1074 }
1075
1076 static void
on_ompt_callback_thread_end(ompt_data_t * thread_data)1077 on_ompt_callback_thread_end(
1078 ompt_data_t *thread_data)
1079 {
1080 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
1081 "\n",
1082 ompt_get_thread_data()->value, thread_data->value);
1083 }
1084
1085 static int
on_ompt_callback_control_tool(uint64_t command,uint64_t modifier,void * arg,const void * codeptr_ra)1086 on_ompt_callback_control_tool(
1087 uint64_t command,
1088 uint64_t modifier,
1089 void *arg,
1090 const void *codeptr_ra)
1091 {
1092 ompt_frame_t* omptTaskFrame;
1093 ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1094 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1095 ", modifier=%" PRIu64
1096 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1097 "current_task_frame.reenter=%p \n",
1098 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1099 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1100
1101 // the following would interfere with expected output for OMPT tests, so skip
1102 #ifndef _OMPT_TESTS
1103 // print task data
1104 int task_level = 0;
1105 ompt_data_t *task_data;
1106 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1107 NULL, NULL)) {
1108 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1109 ompt_get_thread_data()->value, task_level, task_data->value);
1110 task_level++;
1111 }
1112
1113 // print parallel data
1114 int parallel_level = 0;
1115 ompt_data_t *parallel_data;
1116 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)¶llel_data,
1117 NULL)) {
1118 printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1119 "\n",
1120 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1121 parallel_level++;
1122 }
1123 #endif
1124 return 0; //success
1125 }
1126
ompt_initialize(ompt_function_lookup_t lookup,int initial_device_num,ompt_data_t * tool_data)1127 int ompt_initialize(
1128 ompt_function_lookup_t lookup,
1129 int initial_device_num,
1130 ompt_data_t *tool_data)
1131 {
1132 ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1133 ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1134 ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1135 ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1136 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1137 ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1138 ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1139 ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1140 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1141
1142 ompt_get_unique_id();
1143
1144 ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1145 ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1146 ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1147 ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1148 ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1149 ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1150 ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1151 ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1152
1153 register_callback(ompt_callback_mutex_acquire);
1154 register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1155 register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1156 register_callback(ompt_callback_nest_lock);
1157 register_callback(ompt_callback_sync_region);
1158 register_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1159 register_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1160 register_callback(ompt_callback_control_tool);
1161 register_callback(ompt_callback_flush);
1162 register_callback(ompt_callback_cancel);
1163 register_callback(ompt_callback_implicit_task);
1164 register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1165 register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1166 register_callback(ompt_callback_work);
1167 register_callback(ompt_callback_masked);
1168 register_callback(ompt_callback_parallel_begin);
1169 register_callback(ompt_callback_parallel_end);
1170 register_callback(ompt_callback_task_create);
1171 register_callback(ompt_callback_task_schedule);
1172 register_callback(ompt_callback_dependences);
1173 register_callback(ompt_callback_task_dependence);
1174 register_callback(ompt_callback_thread_begin);
1175 register_callback(ompt_callback_thread_end);
1176 printf("0: NULL_POINTER=%p\n", (void*)NULL);
1177 return 1; //success
1178 }
1179
ompt_finalize(ompt_data_t * tool_data)1180 void ompt_finalize(ompt_data_t *tool_data)
1181 {
1182 printf("0: ompt_event_runtime_shutdown\n");
1183 }
1184
1185 #ifdef __cplusplus
1186 extern "C" {
1187 #endif
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1188 ompt_start_tool_result_t* ompt_start_tool(
1189 unsigned int omp_version,
1190 const char *runtime_version)
1191 {
1192 static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1193 return &ompt_start_tool_result;
1194 }
1195 #ifdef __cplusplus
1196 }
1197 #endif
1198 #endif // ifndef USE_PRIVATE_TOOL
1199 #ifdef _OMPT_TESTS
1200 #undef _OMPT_TESTS
1201 #endif
1202