1 #include "omp-tools.h"
2
3 #define ompt_start_tool disable_ompt_start_tool
4 #define _TOOL_PREFIX " _first_tool:"
5 #include "callback.h"
6 #undef _TOOL_PREFIX
7 #undef ompt_start_tool
8
9 #define CLIENT_TOOL_LIBRARIES_VAR "CUSTOM_DATA_STORAGE_TOOL_LIBRARIES"
10 static ompt_data_t *custom_get_client_ompt_data(ompt_data_t *);
11 static void free_data_pair(ompt_data_t *);
12 #define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA custom_get_client_ompt_data
13 #define OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA free_data_pair
14 #define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA \
15 custom_get_client_ompt_data
16 #define OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA free_data_pair
17 #define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA custom_get_client_ompt_data
18 #define OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA free_data_pair
19 #include "ompt-multiplex.h"
20
21 typedef struct custom_data_pair_s {
22 ompt_data_t own_data;
23 ompt_data_t client_data;
24 } custom_data_pair_t;
25
custom_get_client_ompt_data(ompt_data_t * data)26 static ompt_data_t *custom_get_client_ompt_data(ompt_data_t *data) {
27 if (data)
28 return &(((custom_data_pair_t *)(data->ptr))->client_data);
29 else
30 return NULL;
31 }
32
get_own_ompt_data(ompt_data_t * data)33 static ompt_data_t *get_own_ompt_data(ompt_data_t *data) {
34 if (data)
35 return &(((custom_data_pair_t *)(data->ptr))->own_data);
36 else
37 return NULL;
38 }
39
40 static ompt_multiplex_data_pair_t *
allocate_data_pair(ompt_data_t * data_pointer)41 allocate_data_pair(ompt_data_t *data_pointer) {
42 data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t));
43 if (!data_pointer->ptr) {
44 printf("Malloc ERROR\n");
45 exit(-1);
46 }
47 ompt_multiplex_data_pair_t *data_pair =
48 (ompt_multiplex_data_pair_t *)data_pointer->ptr;
49 data_pair->own_data.ptr = NULL;
50 data_pair->client_data.ptr = NULL;
51 return data_pair;
52 }
53
free_data_pair(ompt_data_t * data_pointer)54 static void free_data_pair(ompt_data_t *data_pointer) {
55 free((*data_pointer).ptr);
56 }
57
on_cds_ompt_callback_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)58 static void on_cds_ompt_callback_sync_region(ompt_sync_region_t kind,
59 ompt_scope_endpoint_t endpoint,
60 ompt_data_t *parallel_data,
61 ompt_data_t *task_data,
62 const void *codeptr_ra) {
63 parallel_data = get_own_ompt_data(parallel_data);
64 task_data = get_own_ompt_data(task_data);
65 on_ompt_callback_sync_region(kind, endpoint, parallel_data, task_data,
66 codeptr_ra);
67 }
68
on_cds_ompt_callback_sync_region_wait(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)69 static void on_cds_ompt_callback_sync_region_wait(
70 ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint,
71 ompt_data_t *parallel_data, ompt_data_t *task_data,
72 const void *codeptr_ra) {
73 parallel_data = get_own_ompt_data(parallel_data);
74 task_data = get_own_ompt_data(task_data);
75 on_ompt_callback_sync_region_wait(kind, endpoint, parallel_data, task_data,
76 codeptr_ra);
77 }
78
on_cds_ompt_callback_flush(ompt_data_t * thread_data,const void * codeptr_ra)79 static void on_cds_ompt_callback_flush(ompt_data_t *thread_data,
80 const void *codeptr_ra) {
81 thread_data = get_own_ompt_data(thread_data);
82 on_cds_ompt_callback_flush(thread_data, codeptr_ra);
83 }
84
on_cds_ompt_callback_cancel(ompt_data_t * task_data,int flags,const void * codeptr_ra)85 static void on_cds_ompt_callback_cancel(ompt_data_t *task_data, int flags,
86 const void *codeptr_ra) {
87 task_data = get_own_ompt_data(task_data);
88 on_ompt_callback_cancel(task_data, flags, codeptr_ra);
89 }
90
on_cds_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int type)91 static void on_cds_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,
92 ompt_data_t *parallel_data,
93 ompt_data_t *task_data,
94 unsigned int team_size,
95 unsigned int thread_num,
96 int type) {
97 if (endpoint == ompt_scope_begin && (type & ompt_task_initial)) {
98 allocate_data_pair(parallel_data);
99 }
100 if (endpoint == ompt_scope_begin) {
101 allocate_data_pair(task_data);
102 }
103 parallel_data = get_own_ompt_data(parallel_data);
104 task_data = get_own_ompt_data(task_data);
105 on_ompt_callback_implicit_task(endpoint, parallel_data, task_data, team_size,
106 thread_num, type);
107 }
108
on_cds_ompt_callback_work(ompt_work_t wstype,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,uint64_t count,const void * codeptr_ra)109 static void on_cds_ompt_callback_work(ompt_work_t wstype,
110 ompt_scope_endpoint_t endpoint,
111 ompt_data_t *parallel_data,
112 ompt_data_t *task_data, uint64_t count,
113 const void *codeptr_ra) {
114 parallel_data = get_own_ompt_data(parallel_data);
115 task_data = get_own_ompt_data(task_data);
116 on_ompt_callback_work(wstype, endpoint, parallel_data, task_data, count,
117 codeptr_ra);
118 }
119
on_cds_ompt_callback_master(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)120 static void on_cds_ompt_callback_master(ompt_scope_endpoint_t endpoint,
121 ompt_data_t *parallel_data,
122 ompt_data_t *task_data,
123 const void *codeptr_ra) {
124 parallel_data = get_own_ompt_data(parallel_data);
125 task_data = get_own_ompt_data(task_data);
126 on_ompt_callback_masked(endpoint, parallel_data, task_data, codeptr_ra);
127 }
128
on_cds_ompt_callback_parallel_begin(ompt_data_t * parent_task_data,const ompt_frame_t * parent_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int invoker,const void * codeptr_ra)129 static void on_cds_ompt_callback_parallel_begin(
130 ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame,
131 ompt_data_t *parallel_data, uint32_t requested_team_size, int invoker,
132 const void *codeptr_ra) {
133 parent_task_data = get_own_ompt_data(parent_task_data);
134 if (parallel_data->ptr)
135 printf("%s\n", "0: parallel_data initially not null");
136 allocate_data_pair(parallel_data);
137 parallel_data = get_own_ompt_data(parallel_data);
138 on_ompt_callback_parallel_begin(parent_task_data, parent_task_frame,
139 parallel_data, requested_team_size, invoker,
140 codeptr_ra);
141 }
142
on_cds_ompt_callback_parallel_end(ompt_data_t * parallel_data,ompt_data_t * task_data,int invoker,const void * codeptr_ra)143 static void on_cds_ompt_callback_parallel_end(ompt_data_t *parallel_data,
144 ompt_data_t *task_data,
145 int invoker,
146 const void *codeptr_ra) {
147 task_data = get_own_ompt_data(task_data);
148 parallel_data = get_own_ompt_data(parallel_data);
149 on_ompt_callback_parallel_end(parallel_data, task_data, invoker, codeptr_ra);
150 }
151
on_cds_ompt_callback_task_create(ompt_data_t * parent_task_data,const ompt_frame_t * parent_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)152 static void on_cds_ompt_callback_task_create(ompt_data_t *parent_task_data,
153 const ompt_frame_t *parent_frame,
154 ompt_data_t *new_task_data,
155 int type, int has_dependences,
156 const void *codeptr_ra) {
157 parent_task_data = get_own_ompt_data(parent_task_data);
158 if (new_task_data->ptr)
159 printf("%s\n", "0: new_task_data initially not null");
160 allocate_data_pair(new_task_data);
161 new_task_data = get_own_ompt_data(new_task_data);
162 on_ompt_callback_task_create(parent_task_data, parent_frame, new_task_data,
163 type, has_dependences, codeptr_ra);
164 }
165
166 static void
on_cds_ompt_callback_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)167 on_cds_ompt_callback_task_schedule(ompt_data_t *first_task_data,
168 ompt_task_status_t prior_task_status,
169 ompt_data_t *second_task_data) {
170 ompt_data_t *original_first_task_data = first_task_data;
171 first_task_data = get_own_ompt_data(first_task_data);
172 second_task_data = get_own_ompt_data(second_task_data);
173 on_ompt_callback_task_schedule(first_task_data, prior_task_status,
174 second_task_data);
175 }
176
on_cds_ompt_callback_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)177 static void on_cds_ompt_callback_dependences(ompt_data_t *task_data,
178 const ompt_dependence_t *deps,
179 int ndeps) {
180 task_data = get_own_ompt_data(task_data);
181 on_ompt_callback_dependences(task_data, deps, ndeps);
182 }
183
184 static void
on_cds_ompt_callback_task_dependence(ompt_data_t * first_task_data,ompt_data_t * second_task_data)185 on_cds_ompt_callback_task_dependence(ompt_data_t *first_task_data,
186 ompt_data_t *second_task_data) {
187 first_task_data = get_own_ompt_data(first_task_data);
188 second_task_data = get_own_ompt_data(second_task_data);
189 on_ompt_callback_task_dependence(first_task_data, second_task_data);
190 }
191
on_cds_ompt_callback_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)192 static void on_cds_ompt_callback_thread_begin(ompt_thread_t thread_type,
193 ompt_data_t *thread_data) {
194 if (thread_data->ptr)
195 printf("%s\n", "0: thread_data initially not null");
196 allocate_data_pair(thread_data);
197 thread_data = get_own_ompt_data(thread_data);
198 on_ompt_callback_thread_begin(thread_type, thread_data);
199 }
200
on_cds_ompt_callback_thread_end(ompt_data_t * thread_data)201 static void on_cds_ompt_callback_thread_end(ompt_data_t *thread_data) {
202 thread_data = get_own_ompt_data(thread_data);
203 on_ompt_callback_thread_end(thread_data);
204 }
205
on_cds_ompt_callback_control_tool(uint64_t command,uint64_t modifier,void * arg,const void * codeptr_ra)206 static int on_cds_ompt_callback_control_tool(uint64_t command,
207 uint64_t modifier, void *arg,
208 const void *codeptr_ra) {
209 printf("%" PRIu64 ": _first_tool: ompt_event_control_tool: command=%" PRIu64
210 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p \n",
211 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra);
212
213 // print task data
214 int task_level = 0;
215 ompt_data_t *task_data;
216 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
217 NULL, NULL)) {
218 task_data = get_own_ompt_data(task_data);
219 printf("%" PRIu64 ": _first_tool: task level %d: task_id=%" PRIu64 "\n",
220 ompt_get_thread_data()->value, task_level, task_data->value);
221 task_level++;
222 }
223
224 // print parallel data
225 int parallel_level = 0;
226 ompt_data_t *parallel_data;
227 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)¶llel_data,
228 NULL)) {
229 parallel_data = get_own_ompt_data(parallel_data);
230 printf("%" PRIu64 ": _first_tool: parallel level %d: parallel_id=%" PRIu64
231 "\n",
232 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
233 parallel_level++;
234 }
235 return 0; // success
236 }
237
238 static ompt_get_thread_data_t ompt_cds_get_thread_data;
ompt_get_own_thread_data()239 ompt_data_t *ompt_get_own_thread_data() {
240 return get_own_ompt_data(ompt_cds_get_thread_data());
241 }
242
243 #define register_callback2_t(name, type) \
244 do { \
245 type f_##name = &on_cds_##name; \
246 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
247 printf("0: Could not register callback '" #name "'\n"); \
248 } while (0)
249
250 #define register_callback2(name) register_callback2_t(name, name##_t)
251
ompt_cds_initialize(ompt_function_lookup_t lookup,int initial_device_num,ompt_data_t * tool_data)252 int ompt_cds_initialize(ompt_function_lookup_t lookup, int initial_device_num,
253 ompt_data_t *tool_data) {
254 ompt_initialize(lookup, initial_device_num, tool_data);
255 ompt_cds_get_thread_data = ompt_get_thread_data;
256 ompt_get_thread_data = ompt_get_own_thread_data;
257
258 register_callback(ompt_callback_mutex_acquire);
259 register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
260 register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
261 register_callback(ompt_callback_nest_lock);
262 register_callback2(ompt_callback_sync_region);
263 register_callback2_t(ompt_callback_sync_region_wait,
264 ompt_callback_sync_region_t);
265 register_callback2(ompt_callback_control_tool);
266 register_callback2(ompt_callback_flush);
267 register_callback2(ompt_callback_cancel);
268 register_callback2(ompt_callback_implicit_task);
269 register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
270 register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
271 register_callback2(ompt_callback_work);
272 register_callback2(ompt_callback_master);
273 register_callback2(ompt_callback_parallel_begin);
274 register_callback2(ompt_callback_parallel_end);
275 register_callback2(ompt_callback_task_create);
276 register_callback2(ompt_callback_task_schedule);
277 register_callback2(ompt_callback_dependences);
278 register_callback2(ompt_callback_task_dependence);
279 register_callback2(ompt_callback_thread_begin);
280 register_callback2(ompt_callback_thread_end);
281 return 1; // success
282 }
283
ompt_cds_finalize(ompt_data_t * tool_data)284 void ompt_cds_finalize(ompt_data_t *tool_data) {
285 printf("0: ompt_event_runtime_shutdown\n");
286 }
287
ompt_start_tool(unsigned int omp_version,const char * runtime_version)288 ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
289 const char *runtime_version) {
290 static ompt_start_tool_result_t ompt_start_tool_result = {
291 &ompt_cds_initialize, &ompt_cds_finalize, 0};
292 return &ompt_start_tool_result;
293 }
294