• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
4  *
5  */
6 
7 /** FIXME: Convert numbers based on machine and file */
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #ifndef NO_AUDIT
12 #include <libaudit.h>
13 #endif
14 #include "trace-local.h"
15 #include "trace-hash.h"
16 #include "trace-hash-local.h"
17 #include "list.h"
18 
19 #include <linux/time64.h>
20 
21 #ifdef WARN_NO_AUDIT
22 # warning "lib audit not found, using raw syscalls "	\
23 	"(install audit-libs-devel(for fedora) or libaudit-dev(for debian/ubuntu) and try again)"
24 #endif
25 
26 #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
27 #define TASK_STATE_MAX		1024
28 
29 #define task_from_item(item)	container_of(item, struct task_data, hash)
30 #define start_from_item(item)	container_of(item, struct start_data, hash)
31 #define event_from_item(item)	container_of(item, struct event_hash, hash)
32 #define stack_from_item(item)	container_of(item, struct stack_data, hash)
33 #define group_from_item(item)	container_of(item, struct group_data, hash)
34 #define event_data_from_item(item)	container_of(item, struct event_data, hash)
35 
nsecs_per_sec(unsigned long long ts)36 static unsigned long long nsecs_per_sec(unsigned long long ts)
37 {
38 	return ts / NSEC_PER_SEC;
39 }
40 
mod_to_usec(unsigned long long ts)41 static unsigned long long mod_to_usec(unsigned long long ts)
42 {
43 	return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC;
44 }
45 
46 struct handle_data;
47 struct event_hash;
48 struct event_data;
49 
50 typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash);
51 typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid,
52 				 struct event_data *data,
53 				 struct tep_record *record, int cpu);
54 
55 enum event_data_type {
56 	EVENT_TYPE_UNDEFINED,
57 	EVENT_TYPE_STACK,
58 	EVENT_TYPE_SCHED_SWITCH,
59 	EVENT_TYPE_WAKEUP,
60 	EVENT_TYPE_FUNC,
61 	EVENT_TYPE_SYSCALL,
62 	EVENT_TYPE_IRQ,
63 	EVENT_TYPE_SOFTIRQ,
64 	EVENT_TYPE_SOFTIRQ_RAISE,
65 	EVENT_TYPE_PROCESS_EXEC,
66 	EVENT_TYPE_USER_MATE,
67 };
68 
69 struct event_data {
70 	struct trace_hash_item	hash;
71 	int			id;
72 	int			trace;
73 	struct tep_event	*event;
74 
75 	struct event_data	*end;
76 	struct event_data	*start;
77 
78 	struct tep_format_field	*pid_field;
79 	struct tep_format_field	*start_match_field;	/* match with start */
80 	struct tep_format_field	*end_match_field;	/* match with end */
81 	struct tep_format_field	*data_field;	/* optional */
82 
83 	event_data_print	print_func;
84 	handle_event_func	handle_event;
85 	void			*private;
86 	int			migrate;	/* start/end pairs can migrate cpus */
87 	int			global;		/* use global tasks */
88 	enum event_data_type	type;
89 };
90 
91 struct stack_data {
92 	struct trace_hash_item  hash;
93 	unsigned long long	count;
94 	unsigned long long	time;
95 	unsigned long long	time_min;
96 	unsigned long long	ts_min;
97 	unsigned long long	time_max;
98 	unsigned long long	ts_max;
99 	unsigned long long	time_avg;
100 	unsigned long		size;
101 	char			caller[];
102 };
103 
104 struct stack_holder {
105 	unsigned long		size;
106 	void			*caller;
107 	struct tep_record	*record;
108 };
109 
110 struct start_data {
111 	struct trace_hash_item	hash;
112 	struct event_data	*event_data;
113 	struct list_head	list;
114 	struct task_data	*task;
115 	unsigned long long 	timestamp;
116 	unsigned long long 	search_val;
117 	unsigned long long	val;
118 	int			cpu;
119 
120 	struct stack_holder	stack;
121 };
122 
123 struct event_hash {
124 	struct trace_hash_item	hash;
125 	struct event_data	*event_data;
126 	unsigned long long	search_val;
127 	unsigned long long	val;
128 	unsigned long long	count;
129 	unsigned long long	time_total;
130 	unsigned long long	time_avg;
131 	unsigned long long	time_max;
132 	unsigned long long	ts_max;
133 	unsigned long long	time_min;
134 	unsigned long long	ts_min;
135 	unsigned long long	time_std;
136 	unsigned long long	last_time;
137 
138 	struct trace_hash	stacks;
139 };
140 
141 struct group_data {
142 	struct trace_hash_item	hash;
143 	char			*comm;
144 	struct trace_hash	event_hash;
145 };
146 
147 struct task_data {
148 	struct trace_hash_item	hash;
149 	int			pid;
150 	int			sleeping;
151 
152 	char			*comm;
153 
154 	struct trace_hash	start_hash;
155 	struct trace_hash	event_hash;
156 
157 	struct task_data	*proxy;
158 	struct start_data	*last_start;
159 	struct event_hash	*last_event;
160 	struct tep_record	*last_stack;
161 	struct handle_data	*handle;
162 	struct group_data	*group;
163 };
164 
165 struct cpu_info {
166 	int			current;
167 };
168 
169 struct sched_switch_data {
170 	struct tep_format_field	*prev_state;
171 	int			match_state;
172 };
173 
174 struct handle_data {
175 	struct handle_data	*next;
176 	struct tracecmd_input	*handle;
177 	struct tep_handle	*pevent;
178 
179 	struct trace_hash	events;
180 	struct trace_hash	group_hash;
181 
182 	struct cpu_info		**cpu_data;
183 
184 	struct tep_format_field	*common_pid;
185 	struct tep_format_field	*wakeup_comm;
186 	struct tep_format_field	*switch_prev_comm;
187 	struct tep_format_field	*switch_next_comm;
188 
189 	struct sched_switch_data sched_switch_blocked;
190 	struct sched_switch_data sched_switch_preempt;
191 
192 	struct trace_hash	task_hash;
193 	struct list_head	*cpu_starts;
194 	struct list_head	migrate_starts;
195 
196 	struct task_data	*global_task;
197 	struct task_data	*global_percpu_tasks;
198 
199 	int			cpus;
200 };
201 
202 static struct handle_data *handles;
203 static struct event_data *stacktrace_event;
204 static bool merge_like_comms = false;
205 
trace_profile_set_merge_like_comms(void)206 void trace_profile_set_merge_like_comms(void)
207 {
208 	merge_like_comms = true;
209 }
210 
211 static struct start_data *
add_start(struct task_data * task,struct event_data * event_data,struct tep_record * record,unsigned long long search_val,unsigned long long val)212 add_start(struct task_data *task,
213 	  struct event_data *event_data, struct tep_record *record,
214 	  unsigned long long search_val, unsigned long long val)
215 {
216 	struct start_data *start;
217 
218 	start = malloc(sizeof(*start));
219 	if (!start)
220 		return NULL;
221 	memset(start, 0, sizeof(*start));
222 	start->hash.key = trace_hash(search_val);
223 	start->search_val = search_val;
224 	start->val = val;
225 	start->timestamp = record->ts;
226 	start->event_data = event_data;
227 	start->cpu = record->cpu;
228 	start->task = task;
229 	trace_hash_add(&task->start_hash, &start->hash);
230 	if (event_data->migrate)
231 		list_add(&start->list, &task->handle->migrate_starts);
232 	else
233 		list_add(&start->list, &task->handle->cpu_starts[record->cpu]);
234 	return start;
235 }
236 
237 struct event_data_match {
238 	struct event_data	*event_data;
239 	unsigned long long	search_val;
240 	unsigned long long	val;
241 };
242 
match_start(struct trace_hash_item * item,void * data)243 static int match_start(struct trace_hash_item *item, void *data)
244 {
245 	struct start_data *start = start_from_item(item);
246 	struct event_data_match *edata = data;
247 
248 	return start->event_data == edata->event_data &&
249 		start->search_val == edata->search_val;
250 }
251 
match_event(struct trace_hash_item * item,void * data)252 static int match_event(struct trace_hash_item *item, void *data)
253 {
254 	struct event_data_match *edata = data;
255 	struct event_hash *event = event_from_item(item);
256 
257 	return event->event_data == edata->event_data &&
258 		event->search_val == edata->search_val &&
259 		event->val == edata->val;
260 }
261 
262 static struct event_hash *
find_event_hash(struct task_data * task,struct event_data_match * edata)263 find_event_hash(struct task_data *task, struct event_data_match *edata)
264 {
265 	struct event_hash *event_hash;
266 	struct trace_hash_item *item;
267 	unsigned long long key;
268 
269 	key = (unsigned long)edata->event_data +
270 		(unsigned long)edata->search_val +
271 		(unsigned long)edata->val;
272 	key = trace_hash(key);
273 	item = trace_hash_find(&task->event_hash, key, match_event, edata);
274 	if (item)
275 		return event_from_item(item);
276 
277 	event_hash = malloc(sizeof(*event_hash));
278 	if (!event_hash)
279 		return NULL;
280 	memset(event_hash, 0, sizeof(*event_hash));
281 
282 	event_hash->event_data = edata->event_data;
283 	event_hash->search_val = edata->search_val;
284 	event_hash->val = edata->val;
285 	event_hash->hash.key = key;
286 	trace_hash_init(&event_hash->stacks, 32);
287 
288 	trace_hash_add(&task->event_hash, &event_hash->hash);
289 
290 	return event_hash;
291 }
292 
293 static struct event_hash *
find_start_event_hash(struct task_data * task,struct event_data * event_data,struct start_data * start)294 find_start_event_hash(struct task_data *task, struct event_data *event_data,
295 		      struct start_data *start)
296 {
297 	struct event_data_match edata;
298 
299 	edata.event_data = event_data;
300 	edata.search_val = start->search_val;
301 	edata.val = start->val;
302 
303 	return find_event_hash(task, &edata);
304 }
305 
306 static struct start_data *
find_start(struct task_data * task,struct event_data * event_data,unsigned long long search_val)307 find_start(struct task_data *task, struct event_data *event_data,
308 	   unsigned long long search_val)
309 {
310 	unsigned long long key = trace_hash(search_val);
311 	struct event_data_match edata;
312 	void *data = &edata;
313 	struct trace_hash_item *item;
314 	struct start_data *start;
315 
316 	edata.event_data = event_data;
317 	edata.search_val = search_val;
318 
319 	item = trace_hash_find(&task->start_hash, key, match_start, data);
320 	if (!item)
321 		return NULL;
322 
323 	start = start_from_item(item);
324 	return start;
325 }
326 
327 struct stack_match {
328 	void		*caller;
329 	unsigned long	size;
330 };
331 
match_stack(struct trace_hash_item * item,void * data)332 static int match_stack(struct trace_hash_item *item, void *data)
333 {
334 	struct stack_data *stack = stack_from_item(item);
335 	struct stack_match *match = data;
336 
337 	if (match->size != stack->size)
338 		return 0;
339 
340 	return memcmp(stack->caller, match->caller, stack->size) == 0;
341 }
342 
343 
add_event_stack(struct event_hash * event_hash,void * caller,unsigned long size,unsigned long long time,unsigned long long ts)344 static void add_event_stack(struct event_hash *event_hash,
345 			    void *caller, unsigned long size,
346 			    unsigned long long time, unsigned long long ts)
347 {
348 	unsigned long long key;
349 	struct stack_data *stack;
350 	struct stack_match match;
351 	struct trace_hash_item *item;
352 	int i;
353 
354 	match.caller = caller;
355 	match.size = size;
356 
357 	if (size < sizeof(int))
358 		die("Stack size of less than sizeof(int)??");
359 
360 	for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int))
361 		key += trace_hash(*(int *)(caller + i));
362 
363 	item = trace_hash_find(&event_hash->stacks, key, match_stack, &match);
364 	if (!item) {
365 		stack = malloc(sizeof(*stack) + size);
366 		if (!stack) {
367 			warning("Could not allocate stack");
368 			return;
369 		}
370 		memset(stack, 0, sizeof(*stack));
371 		memcpy(&stack->caller, caller, size);
372 		stack->size = size;
373 		stack->hash.key = key;
374 		trace_hash_add(&event_hash->stacks, &stack->hash);
375 	} else
376 		stack = stack_from_item(item);
377 
378 	stack->count++;
379 	stack->time += time;
380 	if (stack->count == 1 || time < stack->time_min) {
381 		stack->time_min = time;
382 		stack->ts_min = ts;
383 	}
384 	if (time > stack->time_max) {
385 		stack->time_max = time;
386 		stack->ts_max = ts;
387 	}
388 }
389 
free_start(struct start_data * start)390 static void free_start(struct start_data *start)
391 {
392 	if (start->task->last_start == start)
393 		start->task->last_start = NULL;
394 	if (start->stack.record)
395 		tracecmd_free_record(start->stack.record);
396 	trace_hash_del(&start->hash);
397 	list_del(&start->list);
398 	free(start);
399 }
400 
401 static struct event_hash *
add_and_free_start(struct task_data * task,struct start_data * start,struct event_data * event_data,unsigned long long ts)402 add_and_free_start(struct task_data *task, struct start_data *start,
403 		   struct event_data *event_data, unsigned long long ts)
404 {
405 	struct event_hash *event_hash;
406 	long long delta;
407 
408 	delta = ts - start->timestamp;
409 
410 	/*
411 	 * It's possible on a live trace, because of timestamps being
412 	 * different on different CPUs, we can go back in time. When
413 	 * that happens, just zero out the delta.
414 	 */
415 	if (delta < 0)
416 		delta = 0;
417 
418 	event_hash = find_start_event_hash(task, event_data, start);
419 	if (!event_hash)
420 		return NULL;
421 	event_hash->count++;
422 	event_hash->time_total += delta;
423 	event_hash->last_time = delta;
424 
425 	if (delta > event_hash->time_max) {
426 		event_hash->time_max = delta;
427 		event_hash->ts_max = ts;
428 	}
429 
430 	if (event_hash->count == 1 || delta < event_hash->time_min) {
431 		event_hash->time_min = delta;
432 		event_hash->ts_min = ts;
433 	}
434 
435 	if (start->stack.record) {
436 		unsigned long size;
437 		void *caller;
438 
439 		size = start->stack.size;
440 		caller = start->stack.caller;
441 
442 		add_event_stack(event_hash, caller, size, delta,
443 				start->stack.record->ts);
444 		tracecmd_free_record(start->stack.record);
445 		start->stack.record = NULL;
446 	}
447 
448 	free_start(start);
449 
450 	return event_hash;
451 }
452 
453 static struct event_hash *
find_and_update_start(struct task_data * task,struct event_data * event_data,unsigned long long ts,unsigned long long search_val)454 find_and_update_start(struct task_data *task, struct event_data *event_data,
455 		      unsigned long long ts, unsigned long long search_val)
456 {
457 	struct start_data *start;
458 
459 	start = find_start(task, event_data, search_val);
460 	if (!start)
461 		return NULL;
462 	return add_and_free_start(task, start, event_data, ts);
463 }
464 
match_task(struct trace_hash_item * item,void * data)465 static int match_task(struct trace_hash_item *item, void *data)
466 {
467 	struct task_data *task = task_from_item(item);
468 	int pid = *(unsigned long *)data;
469 
470 	return task->pid == pid;
471 }
472 
init_task(struct handle_data * h,struct task_data * task)473 static void init_task(struct handle_data *h, struct task_data *task)
474 {
475 	task->handle = h;
476 
477 	trace_hash_init(&task->start_hash, 16);
478 	trace_hash_init(&task->event_hash, 32);
479 }
480 
481 static struct task_data *
add_task(struct handle_data * h,int pid)482 add_task(struct handle_data *h, int pid)
483 {
484 	unsigned long long key = trace_hash(pid);
485 	struct task_data *task;
486 
487 	task = malloc(sizeof(*task));
488 	if (!task) {
489 		warning("Could not allocate task");
490 		return NULL;
491 	}
492 	memset(task, 0, sizeof(*task));
493 
494 	task->pid = pid;
495 	task->hash.key = key;
496 	trace_hash_add(&h->task_hash, &task->hash);
497 
498 	init_task(h, task);
499 
500 	return task;
501 }
502 
503 static struct task_data *
find_task(struct handle_data * h,int pid)504 find_task(struct handle_data *h, int pid)
505 {
506 	unsigned long long key = trace_hash(pid);
507 	struct trace_hash_item *item;
508 	static struct task_data *last_task;
509 	void *data = (unsigned long *)&pid;
510 
511 	if (last_task && last_task->pid == pid)
512 		return last_task;
513 
514 	item = trace_hash_find(&h->task_hash, key, match_task, data);
515 
516 	if (item)
517 		last_task = task_from_item(item);
518 	else
519 		last_task = add_task(h, pid);
520 
521 	return last_task;
522 }
523 
match_group(struct trace_hash_item * item,void * data)524 static int match_group(struct trace_hash_item *item, void *data)
525 {
526 	struct group_data *group = group_from_item(item);
527 
528 	return strcmp(group->comm, (char *)data) == 0;
529 }
530 
531 
532 static void
add_task_comm(struct task_data * task,struct tep_format_field * field,struct tep_record * record)533 add_task_comm(struct task_data *task, struct tep_format_field *field,
534 	      struct tep_record *record)
535 {
536 	const char *comm;
537 
538 	task->comm = malloc(field->size + 1);
539 	if (!task->comm) {
540 		warning("Could not allocate task comm");
541 		return;
542 	}
543 	comm = record->data + field->offset;
544 	memcpy(task->comm, comm, field->size);
545 	task->comm[field->size] = 0;
546 }
547 
548 /* Account for tasks that don't have starts */
account_task(struct task_data * task,struct event_data * event_data,struct tep_record * record)549 static void account_task(struct task_data *task, struct event_data *event_data,
550 			 struct tep_record *record)
551 {
552 	struct event_data_match edata;
553 	struct event_hash *event_hash;
554 	struct task_data *proxy = NULL;
555 	unsigned long long search_val = 0;
556 	unsigned long long val = 0;
557 	unsigned long long pid;
558 
559 	/*
560 	 * If an event has the pid_field set, then find that task for
561 	 * this event instead. Let this task proxy for it to handle
562 	 * stack traces on this event.
563 	 */
564 	if (event_data->pid_field) {
565 		tep_read_number_field(event_data->pid_field,
566 				      record->data, &pid);
567 		proxy = task;
568 		task = find_task(task->handle, pid);
569 		if (!task)
570 			return;
571 		proxy->proxy = task;
572 	}
573 
574 	/*
575 	 * If data_field is defined, use that for val,
576 	 * if the start_field is defined, use that for search_val.
577 	 */
578 	if (event_data->data_field) {
579 		tep_read_number_field(event_data->data_field,
580 				      record->data, &val);
581 	}
582 	if (event_data->start_match_field) {
583 		tep_read_number_field(event_data->start_match_field,
584 				      record->data, &search_val);
585 	}
586 
587 	edata.event_data = event_data;
588 	edata.search_val = val;
589 	edata.val = val;
590 
591 	event_hash = find_event_hash(task, &edata);
592 	if (!event_hash) {
593 		warning("failed to allocate event_hash");
594 		return;
595 	}
596 
597 	event_hash->count++;
598 	task->last_event = event_hash;
599 }
600 
601 static struct task_data *
find_event_task(struct handle_data * h,struct event_data * event_data,struct tep_record * record,unsigned long long pid)602 find_event_task(struct handle_data *h, struct event_data *event_data,
603 		struct tep_record *record, unsigned long long pid)
604 {
605 	if (event_data->global) {
606 		if (event_data->migrate)
607 			return h->global_task;
608 		else
609 			return &h->global_percpu_tasks[record->cpu];
610 	}
611 
612 	/* If pid_field is defined, use that to find the task */
613 	if (event_data->pid_field)
614 		tep_read_number_field(event_data->pid_field,
615 				      record->data, &pid);
616 	return find_task(h, pid);
617 }
618 
619 static struct task_data *
handle_end_event(struct handle_data * h,struct event_data * event_data,struct tep_record * record,int pid)620 handle_end_event(struct handle_data *h, struct event_data *event_data,
621 		 struct tep_record *record, int pid)
622 {
623 	struct event_hash *event_hash;
624 	struct task_data *task;
625 	unsigned long long val;
626 
627 	task = find_event_task(h, event_data, record, pid);
628 	if (!task)
629 		return NULL;
630 
631 	tep_read_number_field(event_data->start_match_field, record->data,
632 			      &val);
633 	event_hash = find_and_update_start(task, event_data->start, record->ts, val);
634 	task->last_start = NULL;
635 	task->last_event = event_hash;
636 
637 	return task;
638 }
639 
640 static struct task_data *
handle_start_event(struct handle_data * h,struct event_data * event_data,struct tep_record * record,unsigned long long pid)641 handle_start_event(struct handle_data *h, struct event_data *event_data,
642 		   struct tep_record *record, unsigned long long pid)
643 {
644 	struct start_data *start;
645 	struct task_data *task;
646 	unsigned long long val;
647 
648 	task = find_event_task(h, event_data, record, pid);
649 	if (!task)
650 		return NULL;
651 
652 	tep_read_number_field(event_data->end_match_field, record->data,
653 				 &val);
654 	start = add_start(task, event_data, record, val, val);
655 	if (!start) {
656 		warning("Failed to allocate start of task");
657 		return NULL;
658 	}
659 
660 	task->last_start = start;
661 	task->last_event = NULL;
662 
663 	return task;
664 }
665 
handle_event_data(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)666 static int handle_event_data(struct handle_data *h,
667 			     unsigned long long pid,
668 			     struct event_data *event_data,
669 			     struct tep_record *record, int cpu)
670 {
671 	struct task_data *task = NULL;
672 
673 	/* If this is the end of a event pair (start is set) */
674 	if (event_data->start)
675 		task = handle_end_event(h, event_data, record, pid);
676 
677 	/* If this is the start of a event pair (end is set) */
678 	if (event_data->end) {
679 		task = handle_start_event(h, event_data, record, pid);
680 		/* handle_start_event only returns NULL on error */
681 		if (!task)
682 			return -1;
683 	}
684 
685 	if (!task) {
686 		task = find_task(h, pid);
687 		if (!task)
688 			return -1;
689 		task->proxy = NULL;
690 		task->last_start = NULL;
691 		task->last_event = NULL;
692 		account_task(task, event_data, record);
693 	}
694 
695 	return 0;
696 }
697 
handle_missed_events(struct handle_data * h,int cpu)698 static void handle_missed_events(struct handle_data *h, int cpu)
699 {
700 	struct start_data *start;
701 	struct start_data *n;
702 
703 	/* Clear all starts on this CPU */
704 	list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) {
705 		free_start(start);
706 	}
707 
708 	/* Now clear all starts whose events can migrate */
709 	list_for_each_entry_safe(start, n, &h->migrate_starts, list) {
710 		free_start(start);
711 	}
712 }
713 
match_event_data(struct trace_hash_item * item,void * data)714 static int match_event_data(struct trace_hash_item *item, void *data)
715 {
716 	struct event_data *event_data = event_data_from_item(item);
717 	int id = (int)(unsigned long)data;
718 
719 	return event_data->id == id;
720 }
721 
722 static struct event_data *
find_event_data(struct handle_data * h,int id)723 find_event_data(struct handle_data *h, int id)
724 {
725 	struct trace_hash_item *item;
726 	unsigned long long key = trace_hash(id);
727 	void *data = (void *)(unsigned long)id;
728 
729 	item = trace_hash_find(&h->events, key, match_event_data, data);
730 	if (item)
731 		return event_data_from_item(item);
732 	return NULL;
733 }
734 
trace_profile_record(struct tracecmd_input * handle,struct tep_record * record)735 static void trace_profile_record(struct tracecmd_input *handle,
736 				 struct tep_record *record)
737 {
738 	static struct handle_data *last_handle;
739 	struct tep_record *stack_record;
740 	struct event_data *event_data;
741 	struct task_data *task;
742 	struct handle_data *h;
743 	struct tep_handle *pevent;
744 	unsigned long long pid;
745 	int cpu = record->cpu;
746 	int id;
747 
748 	if (last_handle && last_handle->handle == handle)
749 		h = last_handle;
750 	else {
751 		for (h = handles; h; h = h->next) {
752 			if (h->handle == handle)
753 				break;
754 		}
755 		if (!h)
756 			die("Handle not found?");
757 		last_handle = h;
758 	}
759 
760 	if (record->missed_events)
761 		handle_missed_events(h, cpu);
762 
763 	pevent = h->pevent;
764 
765 	id = tep_data_type(pevent, record);
766 
767 	event_data = find_event_data(h, id);
768 
769 	if (!event_data)
770 		return;
771 
772 
773 	/* Get this current PID */
774 	tep_read_number_field(h->common_pid, record->data, &pid);
775 
776 	task = find_task(h, pid);
777 	if (!task)
778 		return;
779 	stack_record = task->last_stack;
780 
781 	if (event_data->handle_event)
782 		event_data->handle_event(h, pid, event_data, record, cpu);
783 	else
784 		handle_event_data(h, pid, event_data, record, cpu);
785 
786 	/* If the last stack hasn't changed, free it */
787 	if (stack_record && task->last_stack == stack_record) {
788 		tracecmd_free_record(stack_record);
789 		task->last_stack = NULL;
790 	}
791 }
792 
793 static struct event_data *
add_event(struct handle_data * h,const char * system,const char * event_name,enum event_data_type type)794 add_event(struct handle_data *h, const char *system, const char *event_name,
795 	  enum event_data_type type)
796 {
797 	struct event_data *event_data;
798 	struct tep_event *event;
799 
800 	event = tep_find_event_by_name(h->pevent, system, event_name);
801 	if (!event)
802 		return NULL;
803 
804 	if (!h->common_pid) {
805 		h->common_pid = tep_find_common_field(event, "common_pid");
806 		if (!h->common_pid)
807 			die("No 'common_pid' found in event");
808 	}
809 
810 	event_data = malloc(sizeof(*event_data));
811 	if (!event_data) {
812 		warning("Could not allocate event_data");
813 		return NULL;
814 	}
815 	memset(event_data, 0, sizeof(*event_data));
816 	event_data->id = event->id;
817 	event_data->event = event;
818 	event_data->type = type;
819 	event_data->hash.key = trace_hash(event_data->event->id);
820 
821 	trace_hash_add(&h->events, &event_data->hash);
822 
823 	return event_data;
824 }
825 
826 static void
mate_events(struct handle_data * h,struct event_data * start,const char * pid_field,const char * end_match_field,struct event_data * end,const char * start_match_field,int migrate,int global)827 mate_events(struct handle_data *h, struct event_data *start,
828 	    const char *pid_field, const char *end_match_field,
829 	    struct event_data *end, const char *start_match_field,
830 	    int migrate, int global)
831 {
832 	start->end = end;
833 	end->start = start;
834 
835 	if (pid_field) {
836 		start->pid_field = tep_find_field(start->event, pid_field);
837 		if (!start->pid_field)
838 			die("Event: %s does not have field %s",
839 			    start->event->name, pid_field);
840 	}
841 
842 	/* Field to match with end */
843 	start->end_match_field = tep_find_field(start->event, end_match_field);
844 	if (!start->end_match_field)
845 		die("Event: %s does not have field %s",
846 		    start->event->name, end_match_field);
847 
848 	/* Field to match with start */
849 	end->start_match_field = tep_find_field(end->event, start_match_field);
850 	if (!end->start_match_field)
851 		die("Event: %s does not have field %s",
852 		    end->event->name, start_match_field);
853 
854 	start->migrate = migrate;
855 	start->global = global;
856 	end->migrate = migrate;
857 	end->global = global;
858 }
859 
860 /**
861  * tracecmd_mate_events - match events to profile against
862  * @handle: The input handle where the events exist.
863  * @start_event: The event that starts the transaction
864  * @pid_field: Use this over common_pid (may be NULL to use common_pid)
865  * @end_match_field: The field that matches the end events @start_match_field
866  * @end_event: The event that ends the transaction
867  * @start_match_field: The end event field that matches start's @end_match_field
868  * @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no
869  * @global: The events are global and not per task
870  */
tracecmd_mate_events(struct tracecmd_input * handle,struct tep_event * start_event,const char * pid_field,const char * end_match_field,struct tep_event * end_event,const char * start_match_field,int migrate,int global)871 void tracecmd_mate_events(struct tracecmd_input *handle,
872 			  struct tep_event *start_event,
873 			  const char *pid_field, const char *end_match_field,
874 			  struct tep_event *end_event,
875 			  const char *start_match_field,
876 			  int migrate, int global)
877 {
878 	struct handle_data *h;
879 	struct event_data *start;
880 	struct event_data *end;
881 
882 	for (h = handles; h; h = h->next) {
883 		if (h->handle == handle)
884 			break;
885 	}
886 	if (!h)
887 		die("Handle not found for trace profile");
888 
889 	start = add_event(h, start_event->system, start_event->name,
890 			  EVENT_TYPE_USER_MATE);
891 
892 	end = add_event(h, end_event->system, end_event->name,
893 			EVENT_TYPE_USER_MATE);
894 
895 	if (!start || !end)
896 		return;
897 
898 	mate_events(h, start, pid_field, end_match_field, end, start_match_field,
899 		    migrate, global);
900 }
901 
func_print(struct trace_seq * s,struct event_hash * event_hash)902 static void func_print(struct trace_seq *s, struct event_hash *event_hash)
903 {
904 	const char *func;
905 
906 	func = tep_find_function(event_hash->event_data->event->tep,
907 				 event_hash->val);
908 	if (func)
909 		trace_seq_printf(s, "func: %s()", func);
910 	else
911 		trace_seq_printf(s, "func: 0x%llx", event_hash->val);
912 }
913 
syscall_print(struct trace_seq * s,struct event_hash * event_hash)914 static void syscall_print(struct trace_seq *s, struct event_hash *event_hash)
915 {
916 #ifndef NO_AUDIT
917 	const char *name = NULL;
918 	int machine;
919 
920 	machine = audit_detect_machine();
921 	if (machine < 0)
922 		goto fail;
923 	name = audit_syscall_to_name(event_hash->val, machine);
924 	if (!name)
925 		goto fail;
926 	trace_seq_printf(s, "syscall:%s", name);
927 	return;
928 fail:
929 #endif
930 	trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
931 			 (int)event_hash->val);
932 }
933 
934 /* From Linux include/linux/interrupt.h */
935 #define SOFTIRQS				\
936 		C(HI),				\
937 		C(TIMER),			\
938 		C(NET_TX),			\
939 		C(NET_RX),			\
940 		C(BLOCK),			\
941 		C(BLOCK_IOPOLL),		\
942 		C(TASKLET),			\
943 		C(SCHED),			\
944 		C(HRTIMER),			\
945 		C(RCU),				\
946 		C(NR),
947 
948 #undef C
949 #define C(a)	a##_SOFTIRQ
950 
951 enum { SOFTIRQS };
952 
953 #undef C
954 #define C(a)	#a
955 
956 static const char *softirq_map[] = { SOFTIRQS };
957 
softirq_print(struct trace_seq * s,struct event_hash * event_hash)958 static void softirq_print(struct trace_seq *s, struct event_hash *event_hash)
959 {
960 	int softirq = (int)event_hash->val;
961 
962 	if (softirq < NR_SOFTIRQ)
963 		trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name,
964 				 softirq_map[softirq]);
965 	else
966 		trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name,
967 				 softirq);
968 }
969 
sched_switch_print(struct trace_seq * s,struct event_hash * event_hash)970 static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash)
971 {
972 	const char states[] = TASK_STATE_TO_CHAR_STR;
973 	int i;
974 
975 	trace_seq_printf(s, "%s:", event_hash->event_data->event->name);
976 
977 	if (event_hash->val) {
978 		int val = event_hash->val;
979 
980 		for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) {
981 			if (val & 1)
982 				trace_seq_putc(s, states[i+1]);
983 		}
984 	} else
985 		trace_seq_putc(s, 'R');
986 }
987 
handle_sched_switch_event(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)988 static int handle_sched_switch_event(struct handle_data *h,
989 				     unsigned long long pid,
990 				     struct event_data *event_data,
991 				     struct tep_record *record, int cpu)
992 {
993 	struct task_data *task;
994 	unsigned long long prev_pid;
995 	unsigned long long prev_state;
996 	unsigned long long next_pid;
997 	struct start_data *start;
998 
999 	/* pid_field holds prev_pid, data_field holds prev_state */
1000 	tep_read_number_field(event_data->pid_field,
1001 			      record->data, &prev_pid);
1002 
1003 	tep_read_number_field(event_data->data_field,
1004 				 record->data, &prev_state);
1005 
1006 	/* only care about real states */
1007 	prev_state &= TASK_STATE_MAX - 1;
1008 
1009 	/* end_match_field holds next_pid */
1010 	tep_read_number_field(event_data->end_match_field,
1011 			      record->data, &next_pid);
1012 
1013 	task = find_task(h, prev_pid);
1014 	if (!task)
1015 		return -1;
1016 	if (!task->comm)
1017 		add_task_comm(task, h->switch_prev_comm, record);
1018 
1019 	if (prev_state)
1020 		task->sleeping = 1;
1021 	else
1022 		task->sleeping = 0;
1023 
1024 	/* task is being scheduled out. prev_state tells why */
1025 	start = add_start(task, event_data, record, prev_pid, prev_state);
1026 	task->last_start = start;
1027 	task->last_event = NULL;
1028 
1029 	task = find_task(h, next_pid);
1030 	if (!task)
1031 		return -1;
1032 
1033 	if (!task->comm)
1034 		add_task_comm(task, h->switch_next_comm, record);
1035 
1036 	/*
1037 	 * If the next task was blocked, it required a wakeup to
1038 	 * restart, and there should be one.
1039 	 * But if it was preempted, we look for the previous sched switch.
1040 	 * Unfortunately, we have to look for both types of events as
1041 	 * we do not know why next_pid scheduled out.
1042 	 *
1043 	 * event_data->start holds the sched_wakeup event data.
1044 	 */
1045 	find_and_update_start(task, event_data->start, record->ts, next_pid);
1046 
1047 	/* Look for this task if it was preempted (no wakeup found). */
1048 	find_and_update_start(task, event_data, record->ts, next_pid);
1049 
1050 	return 0;
1051 }
1052 
handle_stacktrace_event(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)1053 static int handle_stacktrace_event(struct handle_data *h,
1054 				   unsigned long long pid,
1055 				   struct event_data *event_data,
1056 				   struct tep_record *record, int cpu)
1057 {
1058 	struct task_data *orig_task;
1059 	struct task_data *proxy;
1060 	struct task_data *task;
1061 	unsigned long long size;
1062 	struct event_hash *event_hash;
1063 	struct start_data *start;
1064 	void *caller;
1065 
1066 	task = find_task(h, pid);
1067 	if (!task)
1068 		return -1;
1069 
1070 	if (task->last_stack) {
1071 		tracecmd_free_record(task->last_stack);
1072 		task->last_stack = NULL;
1073 	}
1074 
1075 	if ((proxy = task->proxy)) {
1076 		task->proxy = NULL;
1077 		orig_task = task;
1078 		task = proxy;
1079 	}
1080 
1081 	if (!task->last_start && !task->last_event) {
1082 		/*
1083 		 * Save this stack in case function graph needs it.
1084 		 * Need the original task, not a proxy.
1085 		 */
1086 		if (proxy)
1087 			task = orig_task;
1088 		tracecmd_record_ref(record);
1089 		task->last_stack = record;
1090 		return 0;
1091 	}
1092 
1093 	/*
1094 	 * start_match_field holds the size.
1095 	 * data_field holds the caller location.
1096 	 */
1097 	size = record->size - event_data->data_field->offset;
1098 	caller = record->data + event_data->data_field->offset;
1099 
1100 	/*
1101 	 * If there's a "start" then don't add the stack until
1102 	 * it finds a matching "end".
1103 	 */
1104 	if ((start = task->last_start)) {
1105 		tracecmd_record_ref(record);
1106 		start->stack.record = record;
1107 		start->stack.size = size;
1108 		start->stack.caller = caller;
1109 		task->last_start = NULL;
1110 		task->last_event = NULL;
1111 		return 0;
1112 	}
1113 
1114 	event_hash = task->last_event;
1115 	task->last_event = NULL;
1116 
1117 	add_event_stack(event_hash, caller, size, event_hash->last_time,
1118 			record->ts);
1119 
1120 	return 0;
1121 }
1122 
handle_fgraph_entry_event(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)1123 static int handle_fgraph_entry_event(struct handle_data *h,
1124 				    unsigned long long pid,
1125 				    struct event_data *event_data,
1126 				    struct tep_record *record, int cpu)
1127 {
1128 	unsigned long long size;
1129 	struct start_data *start;
1130 	struct task_data *task;
1131 	void *caller;
1132 
1133 	task = handle_start_event(h, event_data, record, pid);
1134 	if (!task)
1135 		return -1;
1136 
1137 	/*
1138 	 * If a stack trace hasn't been used for a previous task,
1139 	 * then it could be a function trace that we can use for
1140 	 * the function graph. But stack traces come before the function
1141 	 * graph events (unfortunately). So we need to attach the previous
1142 	 * stack trace (if there is one) to this start event.
1143 	 */
1144 	if (task->last_stack) {
1145 		start = task->last_start;
1146 		record = task->last_stack;
1147 		size = record->size - stacktrace_event->data_field->offset;
1148 		caller = record->data + stacktrace_event->data_field->offset;
1149 		start->stack.record = record;
1150 		start->stack.size = size;
1151 		start->stack.caller = caller;
1152 		task->last_stack = NULL;
1153 		task->last_event = NULL;
1154 	}
1155 
1156 	/* Do not map stacks after this event to this event */
1157 	task->last_start = NULL;
1158 
1159 	return 0;
1160 }
1161 
handle_fgraph_exit_event(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)1162 static int handle_fgraph_exit_event(struct handle_data *h,
1163 				    unsigned long long pid,
1164 				    struct event_data *event_data,
1165 				    struct tep_record *record, int cpu)
1166 {
1167 	struct task_data *task;
1168 
1169 	task = handle_end_event(h, event_data, record, pid);
1170 	if (!task)
1171 		return -1;
1172 	/* Do not match stacks with function graph exit events */
1173 	task->last_event = NULL;
1174 
1175 	return 0;
1176 }
1177 
handle_process_exec(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)1178 static int handle_process_exec(struct handle_data *h,
1179 			       unsigned long long pid,
1180 			       struct event_data *event_data,
1181 			       struct tep_record *record, int cpu)
1182 {
1183 	struct task_data *task;
1184 	unsigned long long val;
1185 
1186 	/* Task has execed, remove the comm for it */
1187 	if (event_data->data_field) {
1188 		tep_read_number_field(event_data->data_field,
1189 				      record->data, &val);
1190 		pid = val;
1191 	}
1192 
1193 	task = find_task(h, pid);
1194 	if (!task)
1195 		return -1;
1196 
1197 	free(task->comm);
1198 	task->comm = NULL;
1199 
1200 	return 0;
1201 }
1202 
handle_sched_wakeup_event(struct handle_data * h,unsigned long long pid,struct event_data * event_data,struct tep_record * record,int cpu)1203 static int handle_sched_wakeup_event(struct handle_data *h,
1204 				     unsigned long long pid,
1205 				     struct event_data *event_data,
1206 				     struct tep_record *record, int cpu)
1207 {
1208 	struct task_data *proxy;
1209 	struct task_data *task = NULL;
1210 	struct start_data *start;
1211 	unsigned long long success;
1212 
1213 	proxy = find_task(h, pid);
1214 	if (!proxy)
1215 		return -1;
1216 
1217 	/* If present, data_field holds "success" */
1218 	if (event_data->data_field) {
1219 		tep_read_number_field(event_data->data_field,
1220 				      record->data, &success);
1221 
1222 		/* If not a successful wakeup, ignore this */
1223 		if (!success)
1224 			return 0;
1225 	}
1226 
1227 	tep_read_number_field(event_data->pid_field,
1228 			      record->data, &pid);
1229 
1230 	task = find_task(h, pid);
1231 	if (!task)
1232 		return -1;
1233 
1234 	if (!task->comm)
1235 		add_task_comm(task, h->wakeup_comm, record);
1236 
1237 	/* if the task isn't sleeping, then ignore the wake up */
1238 	if (!task->sleeping) {
1239 		/* Ignore any following stack traces */
1240 		proxy->proxy = NULL;
1241 		proxy->last_start = NULL;
1242 		proxy->last_event = NULL;
1243 		return 0;
1244 	}
1245 
1246 	/* It's being woken up */
1247 	task->sleeping = 0;
1248 
1249 	/*
1250 	 * We need the stack trace to be hooked to the woken up
1251 	 * task, not the waker.
1252 	 */
1253 	proxy->proxy = task;
1254 
1255 	/* There should be a blocked schedule out of this task */
1256 	find_and_update_start(task, event_data->start, record->ts, pid);
1257 
1258 	/* Set this up for timing how long the wakeup takes */
1259 	start = add_start(task, event_data, record, pid, pid);
1260 	task->last_event = NULL;
1261 	task->last_start = start;
1262 
1263 	return 0;
1264 }
1265 
trace_init_profile(struct tracecmd_input * handle,struct hook_list * hook,int global)1266 void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook,
1267 			int global)
1268 {
1269 	struct tep_handle *pevent = tracecmd_get_tep(handle);
1270 	struct tep_format_field **fields;
1271 	struct handle_data *h;
1272 	struct event_data *event_data;
1273 	struct event_data *sched_switch;
1274 	struct event_data *sched_wakeup;
1275 	struct event_data *irq_entry;
1276 	struct event_data *irq_exit;
1277 	struct event_data *softirq_entry;
1278 	struct event_data *softirq_exit;
1279 	struct event_data *softirq_raise;
1280 	struct event_data *fgraph_entry;
1281 	struct event_data *fgraph_exit;
1282 	struct event_data *syscall_enter;
1283 	struct event_data *syscall_exit;
1284 	struct event_data *process_exec;
1285 	struct event_data *start_event;
1286 	struct event_data *end_event;
1287 	struct tep_event **events;
1288 	int ret;
1289 	int i;
1290 
1291 	tracecmd_set_show_data_func(handle, trace_profile_record);
1292 	h = malloc(sizeof(*h));
1293 	if (!h) {
1294 		warning("Could not allocate handle");
1295 		return;
1296 	};
1297 	memset(h, 0, sizeof(*h));
1298 	h->next = handles;
1299 	handles = h;
1300 
1301 	trace_hash_init(&h->task_hash, 1024);
1302 	trace_hash_init(&h->events, 1024);
1303 	trace_hash_init(&h->group_hash, 512);
1304 
1305 	h->handle = handle;
1306 	h->pevent = pevent;
1307 
1308 	h->cpus = tracecmd_cpus(handle);
1309 
1310 	/*
1311 	 * For streaming profiling, cpus will not be set up yet.
1312 	 * In this case, we simply use the number of cpus on the
1313 	 * system.
1314 	 */
1315 	if (!h->cpus)
1316 		h->cpus = tracecmd_count_cpus();
1317 
1318 	list_head_init(&h->migrate_starts);
1319 	h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus);
1320 	if (!h->cpu_starts)
1321 		goto free_handle;
1322 
1323 	for (i = 0; i < h->cpus; i++)
1324 		list_head_init(&h->cpu_starts[i]);
1325 
1326 	h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data));
1327 	if (!h->cpu_data)
1328 		goto free_starts;
1329 
1330 	memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data));
1331 
1332 	h->global_task = malloc(sizeof(struct task_data));
1333 	if (!h->global_task)
1334 		goto free_data;
1335 
1336 	memset(h->global_task, 0, sizeof(struct task_data));
1337 	init_task(h, h->global_task);
1338 	h->global_task->comm = strdup("Global Events");
1339 	if (!h->global_task->comm)
1340 		die("malloc");
1341 	h->global_task->pid = -1;
1342 
1343 	h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data));
1344 	if (!h->global_percpu_tasks)
1345 		die("malloc");
1346 	for (i = 0; i < h->cpus; i++) {
1347 		init_task(h, &h->global_percpu_tasks[i]);
1348 		ret = asprintf(&h->global_percpu_tasks[i].comm,
1349 			       "Global CPU[%d] Events", i);
1350 		if (ret < 0)
1351 			die("malloc");
1352 		h->global_percpu_tasks[i].pid = -1 - i;
1353 	}
1354 
1355 	irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ);
1356 	irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ);
1357 	softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ);
1358 	softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ);
1359 	softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE);
1360 	sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP);
1361 	sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH);
1362 	fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC);
1363 	fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC);
1364 	syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL);
1365 	syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL);
1366 
1367 	process_exec = add_event(h, "sched", "sched_process_exec",
1368 				 EVENT_TYPE_PROCESS_EXEC);
1369 
1370 	stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK);
1371 	if (stacktrace_event) {
1372 		stacktrace_event->handle_event = handle_stacktrace_event;
1373 
1374 		stacktrace_event->data_field = tep_find_field(stacktrace_event->event,
1375 							    "caller");
1376 		if (!stacktrace_event->data_field)
1377 			die("Event: %s does not have field caller",
1378 			    stacktrace_event->event->name);
1379 	}
1380 
1381 	if (process_exec) {
1382 		process_exec->handle_event = handle_process_exec;
1383 		process_exec->data_field = tep_find_field(process_exec->event,
1384 							     "old_pid");
1385 	}
1386 
1387 	if (sched_switch) {
1388 		sched_switch->handle_event = handle_sched_switch_event;
1389 		sched_switch->data_field = tep_find_field(sched_switch->event,
1390 							     "prev_state");
1391 		if (!sched_switch->data_field)
1392 			die("Event: %s does not have field prev_state",
1393 			    sched_switch->event->name);
1394 
1395 		h->switch_prev_comm = tep_find_field(sched_switch->event,
1396 							"prev_comm");
1397 		if (!h->switch_prev_comm)
1398 			die("Event: %s does not have field prev_comm",
1399 			    sched_switch->event->name);
1400 
1401 		h->switch_next_comm = tep_find_field(sched_switch->event,
1402 							"next_comm");
1403 		if (!h->switch_next_comm)
1404 			die("Event: %s does not have field next_comm",
1405 			    sched_switch->event->name);
1406 
1407 		sched_switch->print_func = sched_switch_print;
1408 	}
1409 
1410 	if (sched_switch && sched_wakeup) {
1411 		mate_events(h, sched_switch, "prev_pid", "next_pid",
1412 			    sched_wakeup, "pid", 1, 0);
1413 		mate_events(h, sched_wakeup, "pid", "pid",
1414 			    sched_switch, "prev_pid", 1, 0);
1415 		sched_wakeup->handle_event = handle_sched_wakeup_event;
1416 
1417 		/* The 'success' field may or may not be present */
1418 		sched_wakeup->data_field = tep_find_field(sched_wakeup->event,
1419 							     "success");
1420 
1421 		h->wakeup_comm = tep_find_field(sched_wakeup->event, "comm");
1422 		if (!h->wakeup_comm)
1423 			die("Event: %s does not have field comm",
1424 			    sched_wakeup->event->name);
1425 	}
1426 
1427 	if (irq_entry && irq_exit)
1428 		mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global);
1429 
1430 	if (softirq_entry)
1431 		softirq_entry->print_func = softirq_print;
1432 
1433 	if (softirq_exit)
1434 		softirq_exit->print_func = softirq_print;
1435 
1436 	if (softirq_raise)
1437 		softirq_raise->print_func = softirq_print;
1438 
1439 	if (softirq_entry && softirq_exit)
1440 		mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec",
1441 			    0, global);
1442 
1443 	if (softirq_entry && softirq_raise)
1444 		mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec",
1445 			    0, global);
1446 
1447 	if (fgraph_entry && fgraph_exit) {
1448 		mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0);
1449 		fgraph_entry->handle_event = handle_fgraph_entry_event;
1450 		fgraph_exit->handle_event = handle_fgraph_exit_event;
1451 		fgraph_entry->print_func = func_print;
1452 	}
1453 
1454 	if (syscall_enter && syscall_exit) {
1455 		mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0);
1456 		syscall_enter->print_func = syscall_print;
1457 		syscall_exit->print_func = syscall_print;
1458 	}
1459 
1460 	events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
1461 	if (!events)
1462 		die("malloc");
1463 
1464 	/* Add some other events */
1465 	event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC);
1466 	if (event_data) {
1467 		event_data->data_field =
1468 			tep_find_field(event_data->event, "ip");
1469 	}
1470 
1471 	/* Add any user defined hooks */
1472 	for (; hook; hook = hook->next) {
1473 		start_event = add_event(h, hook->start_system, hook->start_event,
1474 					EVENT_TYPE_USER_MATE);
1475 		end_event = add_event(h, hook->end_system, hook->end_event,
1476 				      EVENT_TYPE_USER_MATE);
1477 		if (!start_event) {
1478 			warning("Event %s not found", hook->start_event);
1479 			continue;
1480 		}
1481 		if (!end_event) {
1482 			warning("Event %s not found", hook->end_event);
1483 			continue;
1484 		}
1485 		mate_events(h, start_event, hook->pid, hook->start_match,
1486 			    end_event, hook->end_match, hook->migrate,
1487 			    hook->global);
1488 	}
1489 
1490 	/* Now add any defined event that we haven't processed */
1491 	for (i = 0; events[i]; i++) {
1492 		event_data = find_event_data(h, events[i]->id);
1493 		if (event_data)
1494 			continue;
1495 
1496 		event_data = add_event(h, events[i]->system, events[i]->name,
1497 				       EVENT_TYPE_UNDEFINED);
1498 
1499 		fields = tep_event_fields(events[i]);
1500 		if (!fields)
1501 			die("malloc");
1502 
1503 		if (fields[0])
1504 			event_data->data_field = fields[0];
1505 
1506 		free(fields);
1507 	}
1508 	return;
1509 
1510  free_data:
1511 	free(h->cpu_data);
1512  free_starts:
1513 	free(h->cpu_starts);
1514  free_handle:
1515 	handles = h->next;
1516 	free(h);
1517 	warning("Failed handle allocations");
1518 }
1519 
output_event_stack(struct tep_handle * pevent,struct stack_data * stack)1520 static void output_event_stack(struct tep_handle *pevent, struct stack_data *stack)
1521 {
1522 	int longsize = tep_get_long_size(pevent);
1523 	unsigned long long val;
1524 	const char *func;
1525 	unsigned long long stop = -1ULL;
1526 	void *ptr;
1527 	int i;
1528 
1529 	if (longsize < 8)
1530 		stop &= (1ULL << (longsize * 8)) - 1;
1531 
1532 	if (stack->count)
1533 		stack->time_avg = stack->time / stack->count;
1534 
1535 	printf("     <stack> %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n",
1536 	       stack->count, stack->time, stack->time_min,
1537 	       nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min),
1538 	       stack->time_max,
1539 	       nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max),
1540 	       stack->time_avg);
1541 
1542 	for (i = 0; i < stack->size; i += longsize) {
1543 		ptr = stack->caller + i;
1544 		switch (longsize) {
1545 		case 4:
1546 			/* todo, read value from pevent */
1547 			val = *(unsigned int *)ptr;
1548 			break;
1549 		case 8:
1550 			val = *(unsigned long long *)ptr;
1551 			break;
1552 		default:
1553 			die("Strange long size %d", longsize);
1554 		}
1555 		if (val == stop)
1556 			break;
1557 		func = tep_find_function(pevent, val);
1558 		if (func)
1559 			printf("       => %s (0x%llx)\n", func, val);
1560 		else
1561 			printf("       => 0x%llx\n", val);
1562 	}
1563 }
1564 
1565 struct stack_chain {
1566 	struct stack_chain *children;
1567 	unsigned long long	val;
1568 	unsigned long long	time;
1569 	unsigned long long	time_min;
1570 	unsigned long long	ts_min;
1571 	unsigned long long	time_max;
1572 	unsigned long long	ts_max;
1573 	unsigned long long	time_avg;
1574 	unsigned long long	count;
1575 	int			percent;
1576 	int			nr_children;
1577 };
1578 
compare_chains(const void * a,const void * b)1579 static int compare_chains(const void *a, const void *b)
1580 {
1581 	const struct stack_chain * A = a;
1582 	const struct stack_chain * B = b;
1583 
1584 	if (A->time > B->time)
1585 		return -1;
1586 	if (A->time < B->time)
1587 		return 1;
1588 	/* If stacks don't use time, then use count */
1589 	if (A->count > B->count)
1590 		return -1;
1591 	if (A->count < B->count)
1592 		return 1;
1593 	return 0;
1594 }
1595 
calc_percent(unsigned long long val,unsigned long long total)1596 static int calc_percent(unsigned long long val, unsigned long long total)
1597 {
1598 	return (val * 100 + total / 2) / total;
1599 }
1600 
stack_overflows(struct stack_data * stack,int longsize,int level)1601 static int stack_overflows(struct stack_data *stack, int longsize, int level)
1602 {
1603 	return longsize * level > stack->size - longsize;
1604 }
1605 
1606 static unsigned long long
stack_value(struct stack_data * stack,int longsize,int level)1607 stack_value(struct stack_data *stack, int longsize, int level)
1608 {
1609 	void *ptr;
1610 
1611 	ptr = &stack->caller[longsize * level];
1612 	return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr;
1613 }
1614 
1615 static struct stack_chain *
make_stack_chain(struct stack_data ** stacks,int cnt,int longsize,int level,int * nr_children)1616 make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level,
1617 		 int *nr_children)
1618 {
1619 	struct stack_chain *chain;
1620 	unsigned long long	total_time = 0;
1621 	unsigned long long	total_count = 0;
1622 	unsigned long long	time;
1623 	unsigned long long	time_min;
1624 	unsigned long long	ts_min;
1625 	unsigned long long	time_max;
1626 	unsigned long long	ts_max;
1627 	unsigned long long	count;
1628 	unsigned long long	stop = -1ULL;
1629 	int nr_chains = 0;
1630 	u64 last = 0;
1631 	u64 val;
1632 	int start;
1633 	int i;
1634 	int x;
1635 
1636 	if (longsize < 8)
1637 		stop &= (1ULL << (longsize * 8)) - 1;
1638 
1639 	/* First find out how many diffs there are */
1640 	for (i = 0; i < cnt; i++) {
1641 		if (stack_overflows(stacks[i], longsize, level))
1642 			continue;
1643 
1644 		val = stack_value(stacks[i], longsize, level);
1645 
1646 		if (val == stop)
1647 			continue;
1648 
1649 		if (!nr_chains || val != last)
1650 			nr_chains++;
1651 		last = val;
1652 	}
1653 
1654 	if (!nr_chains) {
1655 		*nr_children = 0;
1656 		return NULL;
1657 	}
1658 
1659 	chain = malloc(sizeof(*chain) * nr_chains);
1660 	if (!chain) {
1661 		warning("Could not allocate chain");
1662 		return NULL;
1663 	}
1664 	memset(chain, 0, sizeof(*chain) * nr_chains);
1665 
1666 	x = 0;
1667 	count = 0;
1668 	start = 0;
1669 	time = 0;
1670 	time_min = 0;
1671 	time_max = 0;
1672 
1673 	for (i = 0; i < cnt; i++) {
1674 		if (stack_overflows(stacks[i], longsize, level)) {
1675 			start = i+1;
1676 			continue;
1677 		}
1678 
1679 		val = stack_value(stacks[i], longsize, level);
1680 
1681 		if (val == stop) {
1682 			start = i+1;
1683 			continue;
1684 		}
1685 
1686 		count += stacks[i]->count;
1687 		time += stacks[i]->time;
1688 		if (stacks[i]->time_max > time_max) {
1689 			time_max = stacks[i]->time_max;
1690 			ts_max = stacks[i]->ts_max;
1691 		}
1692 		if (i == start || stacks[i]->time_min < time_min) {
1693 			time_min = stacks[i]->time_min;
1694 			ts_min = stacks[i]->ts_min;
1695 		}
1696 		if (i == cnt - 1 ||
1697 		    stack_overflows(stacks[i+1], longsize, level) ||
1698 		    val != stack_value(stacks[i+1], longsize, level)) {
1699 
1700 			total_time += time;
1701 			total_count += count;
1702 			chain[x].val = val;
1703 			chain[x].time_avg = time / count;
1704 			chain[x].count = count;
1705 			chain[x].time = time;
1706 			chain[x].time_min = time_min;
1707 			chain[x].ts_min = ts_min;
1708 			chain[x].time_max = time_max;
1709 			chain[x].ts_max = ts_max;
1710 			chain[x].children =
1711 				make_stack_chain(&stacks[start], (i - start) + 1,
1712 						 longsize, level+1,
1713 						 &chain[x].nr_children);
1714 			x++;
1715 			start = i + 1;
1716 			count = 0;
1717 			time = 0;
1718 			time_min = 0;
1719 			time_max = 0;
1720 		}
1721 	}
1722 
1723 	qsort(chain, nr_chains, sizeof(*chain), compare_chains);
1724 
1725 	*nr_children = nr_chains;
1726 
1727 	/* Should never happen */
1728 	if (!total_time && !total_count)
1729 		return chain;
1730 
1731 
1732 	/* Now calculate percentage */
1733 	time = 0;
1734 	for (i = 0; i < nr_chains; i++) {
1735 		if (total_time)
1736 			chain[i].percent = calc_percent(chain[i].time, total_time);
1737 		/* In case stacks don't have time */
1738 		else if (total_count)
1739 			chain[i].percent = calc_percent(chain[i].count, total_count);
1740 	}
1741 
1742 	return chain;
1743 }
1744 
free_chain(struct stack_chain * chain,int nr_chains)1745 static void free_chain(struct stack_chain *chain, int nr_chains)
1746 {
1747 	int i;
1748 
1749 	if (!chain)
1750 		return;
1751 
1752 	for (i = 0; i < nr_chains; i++)
1753 		free_chain(chain[i].children, chain[i].nr_children);
1754 
1755 	free(chain);
1756 }
1757 
1758 #define INDENT	5
1759 
print_indent(int level,unsigned long long mask)1760 static void print_indent(int level, unsigned long long mask)
1761 {
1762 	char line;
1763 	int p;
1764 
1765 	for (p = 0; p < level + 1; p++) {
1766 		if (mask & (1ULL << p))
1767 			line = '|';
1768 		else
1769 			line = ' ';
1770 		printf("%*c ", INDENT, line);
1771 	}
1772 }
1773 
print_chain_func(struct tep_handle * pevent,struct stack_chain * chain)1774 static void print_chain_func(struct tep_handle *pevent, struct stack_chain *chain)
1775 {
1776 	unsigned long long val = chain->val;
1777 	const char *func;
1778 
1779 	func = tep_find_function(pevent, val);
1780 	if (func)
1781 		printf("%s (0x%llx)\n", func, val);
1782 	else
1783 		printf("0x%llx\n", val);
1784 }
1785 
output_chain(struct tep_handle * pevent,struct stack_chain * chain,int level,int nr_chains,unsigned long long * mask)1786 static void output_chain(struct tep_handle *pevent, struct stack_chain *chain, int level,
1787 			 int nr_chains, unsigned long long *mask)
1788 {
1789 	struct stack_chain *child;
1790 	int nr_children;
1791 	int i;
1792 	char line = '|';
1793 
1794 	if (!nr_chains)
1795 		return;
1796 
1797 	*mask |= (1ULL << (level + 1));
1798 	print_indent(level + 1, *mask);
1799 	printf("\n");
1800 
1801 	for (i = 0; i < nr_chains; i++) {
1802 
1803 		print_indent(level, *mask);
1804 
1805 		printf("%*c ", INDENT, '+');
1806 
1807 		if (i == nr_chains - 1) {
1808 			*mask &= ~(1ULL << (level + 1));
1809 			line = ' ';
1810 		}
1811 
1812 		print_chain_func(pevent, &chain[i]);
1813 
1814 		print_indent(level, *mask);
1815 
1816 		printf("%*c ", INDENT, line);
1817 		printf("  %d%% (%lld)", chain[i].percent, chain[i].count);
1818 		if (chain[i].time)
1819 			printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld",
1820 			       chain[i].time, chain[i].time_max,
1821 			       nsecs_per_sec(chain[i].ts_max),
1822 			       mod_to_usec(chain[i].ts_max),
1823 			       chain[i].time_min,
1824 			       nsecs_per_sec(chain[i].ts_min),
1825 			       mod_to_usec(chain[i].ts_min),
1826 			       chain[i].time_avg);
1827 		printf("\n");
1828 
1829 		for (child = chain[i].children, nr_children = chain[i].nr_children;
1830 		     child && nr_children == 1;
1831 		     nr_children = child->nr_children, child = child->children) {
1832 			print_indent(level, *mask);
1833 			printf("%*c ", INDENT, line);
1834 			printf("   ");
1835 			print_chain_func(pevent, child);
1836 		}
1837 
1838 		if (child)
1839 			output_chain(pevent, child, level+1, nr_children, mask);
1840 
1841 		print_indent(level + 1, *mask);
1842 		printf("\n");
1843 	}
1844 	*mask &= ~(1ULL << (level + 1));
1845 	print_indent(level, *mask);
1846 	printf("\n");
1847 }
1848 
compare_stacks(const void * a,const void * b)1849 static int compare_stacks(const void *a, const void *b)
1850 {
1851 	struct stack_data * const *A = a;
1852 	struct stack_data * const *B = b;
1853 	unsigned int sa, sb;
1854 	int size;
1855 	int i;
1856 
1857 	/* only compare up to the smaller size of the two */
1858 	if ((*A)->size > (*B)->size)
1859 		size = (*B)->size;
1860 	else
1861 		size = (*A)->size;
1862 
1863 	for (i = 0; i < size; i += sizeof(sa)) {
1864 		sa = *(unsigned *)&(*A)->caller[i];
1865 		sb = *(unsigned *)&(*B)->caller[i];
1866 		if (sa > sb)
1867 			return 1;
1868 		if (sa < sb)
1869 			return -1;
1870 	}
1871 
1872 	/* They are the same up to size. Then bigger size wins */
1873 	if ((*A)->size > (*B)->size)
1874 		return 1;
1875 	if ((*A)->size < (*B)->size)
1876 		return -1;
1877 	return 0;
1878 }
1879 
output_stacks(struct tep_handle * pevent,struct trace_hash * stack_hash)1880 static void output_stacks(struct tep_handle *pevent, struct trace_hash *stack_hash)
1881 {
1882 	struct trace_hash_item **bucket;
1883 	struct trace_hash_item *item;
1884 	struct stack_data **stacks;
1885 	struct stack_chain *chain;
1886 	unsigned long long mask = 0;
1887 	int nr_chains;
1888 	int longsize = tep_get_long_size(pevent);
1889 	int nr_stacks;
1890 	int i;
1891 
1892 	nr_stacks = 0;
1893 	trace_hash_for_each_bucket(bucket, stack_hash) {
1894 		trace_hash_for_each_item(item, bucket) {
1895 			nr_stacks++;
1896 		}
1897 	}
1898 
1899 	stacks = malloc(sizeof(*stacks) * nr_stacks);
1900 	if (!stacks) {
1901 		warning("Could not allocate stacks");
1902 		return;
1903 	}
1904 
1905 	nr_stacks = 0;
1906 	trace_hash_for_each_bucket(bucket, stack_hash) {
1907 		trace_hash_for_each_item(item, bucket) {
1908 			stacks[nr_stacks++] = stack_from_item(item);
1909 		}
1910 	}
1911 
1912 	qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks);
1913 
1914 	chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains);
1915 
1916 	output_chain(pevent, chain, 0, nr_chains, &mask);
1917 
1918 	if (0)
1919 		for (i = 0; i < nr_stacks; i++)
1920 			output_event_stack(pevent, stacks[i]);
1921 
1922 	free(stacks);
1923 	free_chain(chain, nr_chains);
1924 }
1925 
output_event(struct event_hash * event_hash)1926 static void output_event(struct event_hash *event_hash)
1927 {
1928 	struct event_data *event_data = event_hash->event_data;
1929 	struct tep_handle *pevent = event_data->event->tep;
1930 	struct trace_seq s;
1931 
1932 	trace_seq_init(&s);
1933 
1934 	if (event_data->print_func)
1935 		event_data->print_func(&s, event_hash);
1936 	else if (event_data->type == EVENT_TYPE_FUNC)
1937 		func_print(&s, event_hash);
1938 	else
1939 		trace_seq_printf(&s, "%s:0x%llx",
1940 				 event_data->event->name,
1941 				 event_hash->val);
1942 	trace_seq_terminate(&s);
1943 
1944 	printf("  Event: %s (%lld)",
1945 	       s.buffer, event_hash->count);
1946 
1947 	trace_seq_destroy(&s);
1948 
1949 	if (event_hash->time_total) {
1950 		event_hash->time_avg = event_hash->time_total / event_hash->count;
1951 		printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)",
1952 		       event_hash->time_total, event_hash->time_avg,
1953 		       event_hash->time_max,
1954 		       nsecs_per_sec(event_hash->ts_max),
1955 		       mod_to_usec(event_hash->ts_max),
1956 		       event_hash->time_min,
1957 		       nsecs_per_sec(event_hash->ts_min),
1958 		       mod_to_usec(event_hash->ts_min));
1959 	}
1960 	printf("\n");
1961 
1962 	output_stacks(pevent, &event_hash->stacks);
1963 }
1964 
compare_events(const void * a,const void * b)1965 static int compare_events(const void *a, const void *b)
1966 {
1967 	struct event_hash * const *A = a;
1968 	struct event_hash * const *B = b;
1969 	const struct event_data *event_data_a = (*A)->event_data;
1970 	const struct event_data *event_data_b = (*B)->event_data;
1971 
1972 	/* Schedule switch goes first */
1973 	if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) {
1974 		if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH)
1975 			return -1;
1976 		/* lower the state the better */
1977 		if ((*A)->val > (*B)->val)
1978 			return 1;
1979 		if ((*A)->val < (*B)->val)
1980 			return -1;
1981 		return 0;
1982 	} else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH)
1983 			return 1;
1984 
1985 	/* Wakeups are next */
1986 	if (event_data_a->type == EVENT_TYPE_WAKEUP) {
1987 		if (event_data_b->type != EVENT_TYPE_WAKEUP)
1988 			return -1;
1989 		return 0;
1990 	} else if (event_data_b->type == EVENT_TYPE_WAKEUP)
1991 		return 1;
1992 
1993 	if (event_data_a->id > event_data_b->id)
1994 		return 1;
1995 	if (event_data_a->id < event_data_b->id)
1996 		return -1;
1997 	if ((*A)->time_total > (*B)->time_total)
1998 		return -1;
1999 	if ((*A)->time_total < (*B)->time_total)
2000 		return 1;
2001 	return 0;
2002 }
2003 
output_task(struct handle_data * h,struct task_data * task)2004 static void output_task(struct handle_data *h, struct task_data *task)
2005 {
2006 	struct trace_hash_item **bucket;
2007 	struct trace_hash_item *item;
2008 	struct event_hash **events;
2009 	const char *comm;
2010 	int nr_events = 0;
2011 	int i;
2012 
2013 	if (task->group)
2014 		return;
2015 
2016 	if (task->comm)
2017 		comm = task->comm;
2018 	else
2019 		comm = tep_data_comm_from_pid(h->pevent, task->pid);
2020 
2021 	if (task->pid < 0)
2022 		printf("%s\n", task->comm);
2023 	else
2024 		printf("\ntask: %s-%d\n", comm, task->pid);
2025 
2026 	trace_hash_for_each_bucket(bucket, &task->event_hash) {
2027 		trace_hash_for_each_item(item, bucket) {
2028 			nr_events++;
2029 		}
2030 	}
2031 
2032 	events = malloc(sizeof(*events) * nr_events);
2033 	if (!events) {
2034 		warning("Could not allocate events");
2035 		return;
2036 	}
2037 
2038 	i = 0;
2039 	trace_hash_for_each_bucket(bucket, &task->event_hash) {
2040 		trace_hash_for_each_item(item, bucket) {
2041 			events[i++] = event_from_item(item);
2042 		}
2043 	}
2044 
2045 	qsort(events, nr_events, sizeof(*events), compare_events);
2046 
2047 	for (i = 0; i < nr_events; i++)
2048 		output_event(events[i]);
2049 
2050 	free(events);
2051 }
2052 
output_group(struct handle_data * h,struct group_data * group)2053 static void output_group(struct handle_data *h, struct group_data *group)
2054 {
2055 	struct trace_hash_item **bucket;
2056 	struct trace_hash_item *item;
2057 	struct event_hash **events;
2058 	int nr_events = 0;
2059 	int i;
2060 
2061 	printf("\ngroup: %s\n", group->comm);
2062 
2063 	trace_hash_for_each_bucket(bucket, &group->event_hash) {
2064 		trace_hash_for_each_item(item, bucket) {
2065 			nr_events++;
2066 		}
2067 	}
2068 
2069 	events = malloc(sizeof(*events) * nr_events);
2070 	if (!events) {
2071 		warning("Could not allocate events");
2072 		return;
2073 	}
2074 
2075 	i = 0;
2076 	trace_hash_for_each_bucket(bucket, &group->event_hash) {
2077 		trace_hash_for_each_item(item, bucket) {
2078 			events[i++] = event_from_item(item);
2079 		}
2080 	}
2081 
2082 	qsort(events, nr_events, sizeof(*events), compare_events);
2083 
2084 	for (i = 0; i < nr_events; i++)
2085 		output_event(events[i]);
2086 
2087 	free(events);
2088 }
2089 
compare_tasks(const void * a,const void * b)2090 static int compare_tasks(const void *a, const void *b)
2091 {
2092 	struct task_data * const *A = a;
2093 	struct task_data * const *B = b;
2094 
2095 	if ((*A)->pid > (*B)->pid)
2096 		return 1;
2097 	else if ((*A)->pid < (*B)->pid)
2098 		return -1;
2099 	return 0;
2100 }
2101 
compare_groups(const void * a,const void * b)2102 static int compare_groups(const void *a, const void *b)
2103 {
2104 	const char *A = a;
2105 	const char *B = b;
2106 
2107 	return strcmp(A, B);
2108 }
2109 
free_event_hash(struct event_hash * event_hash)2110 static void free_event_hash(struct event_hash *event_hash)
2111 {
2112 	struct trace_hash_item **bucket;
2113 	struct trace_hash_item *item;
2114 	struct stack_data *stack;
2115 
2116 	trace_hash_for_each_bucket(bucket, &event_hash->stacks) {
2117 		trace_hash_while_item(item, bucket) {
2118 			stack = stack_from_item(item);
2119 			trace_hash_del(&stack->hash);
2120 			free(stack);
2121 		}
2122 	}
2123 	trace_hash_free(&event_hash->stacks);
2124 	free(event_hash);
2125 }
2126 
__free_task(struct task_data * task)2127 static void __free_task(struct task_data *task)
2128 {
2129 	struct trace_hash_item **bucket;
2130 	struct trace_hash_item *item;
2131 	struct start_data *start;
2132 	struct event_hash *event_hash;
2133 
2134 	free(task->comm);
2135 
2136 	trace_hash_for_each_bucket(bucket, &task->start_hash) {
2137 		trace_hash_while_item(item, bucket) {
2138 			start = start_from_item(item);
2139 			if (start->stack.record)
2140 				tracecmd_free_record(start->stack.record);
2141 			list_del(&start->list);
2142 			trace_hash_del(item);
2143 			free(start);
2144 		}
2145 	}
2146 	trace_hash_free(&task->start_hash);
2147 
2148 	trace_hash_for_each_bucket(bucket, &task->event_hash) {
2149 		trace_hash_while_item(item, bucket) {
2150 			event_hash = event_from_item(item);
2151 			trace_hash_del(item);
2152 			free_event_hash(event_hash);
2153 		}
2154 	}
2155 	trace_hash_free(&task->event_hash);
2156 
2157 	if (task->last_stack)
2158 		tracecmd_free_record(task->last_stack);
2159 }
2160 
free_task(struct task_data * task)2161 static void free_task(struct task_data *task)
2162 {
2163 	__free_task(task);
2164 	free(task);
2165 }
2166 
free_group(struct group_data * group)2167 static void free_group(struct group_data *group)
2168 {
2169 	struct trace_hash_item **bucket;
2170 	struct trace_hash_item *item;
2171 	struct event_hash *event_hash;
2172 
2173 	free(group->comm);
2174 
2175 	trace_hash_for_each_bucket(bucket, &group->event_hash) {
2176 		trace_hash_while_item(item, bucket) {
2177 			event_hash = event_from_item(item);
2178 			trace_hash_del(item);
2179 			free_event_hash(event_hash);
2180 		}
2181 	}
2182 	trace_hash_free(&group->event_hash);
2183 	free(group);
2184 }
2185 
show_global_task(struct handle_data * h,struct task_data * task)2186 static void show_global_task(struct handle_data *h,
2187 			     struct task_data *task)
2188 {
2189 	if (trace_hash_empty(&task->event_hash))
2190 		return;
2191 
2192 	output_task(h, task);
2193 }
2194 
output_tasks(struct handle_data * h)2195 static void output_tasks(struct handle_data *h)
2196 {
2197 	struct trace_hash_item **bucket;
2198 	struct trace_hash_item *item;
2199 	struct task_data **tasks;
2200 	int nr_tasks = 0;
2201 	int i;
2202 
2203 	trace_hash_for_each_bucket(bucket, &h->task_hash) {
2204 		trace_hash_for_each_item(item, bucket) {
2205 			nr_tasks++;
2206 		}
2207 	}
2208 
2209 	tasks = malloc(sizeof(*tasks) * nr_tasks);
2210 	if (!tasks) {
2211 		warning("Could not allocate tasks");
2212 		return;
2213 	}
2214 
2215 	nr_tasks = 0;
2216 
2217 	trace_hash_for_each_bucket(bucket, &h->task_hash) {
2218 		trace_hash_while_item(item, bucket) {
2219 			tasks[nr_tasks++] = task_from_item(item);
2220 			trace_hash_del(item);
2221 		}
2222 	}
2223 
2224 	qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks);
2225 
2226 	for (i = 0; i < nr_tasks; i++) {
2227 		output_task(h, tasks[i]);
2228 		free_task(tasks[i]);
2229 	}
2230 
2231 	free(tasks);
2232 }
2233 
output_groups(struct handle_data * h)2234 static void output_groups(struct handle_data *h)
2235 {
2236 	struct trace_hash_item **bucket;
2237 	struct trace_hash_item *item;
2238 	struct group_data **groups;
2239 	int nr_groups = 0;
2240 	int i;
2241 
2242 	trace_hash_for_each_bucket(bucket, &h->group_hash) {
2243 		trace_hash_for_each_item(item, bucket) {
2244 			nr_groups++;
2245 		}
2246 	}
2247 
2248 	if (nr_groups == 0)
2249 		return;
2250 
2251 	groups = malloc(sizeof(*groups) * nr_groups);
2252 	if (!groups) {
2253 		warning("Could not allocate groups");
2254 		return;
2255 	}
2256 
2257 	nr_groups = 0;
2258 
2259 	trace_hash_for_each_bucket(bucket, &h->group_hash) {
2260 		trace_hash_while_item(item, bucket) {
2261 			groups[nr_groups++] = group_from_item(item);
2262 			trace_hash_del(item);
2263 		}
2264 	}
2265 
2266 	qsort(groups, nr_groups, sizeof(*groups), compare_groups);
2267 
2268 	for (i = 0; i < nr_groups; i++) {
2269 		output_group(h, groups[i]);
2270 		free_group(groups[i]);
2271 	}
2272 
2273 	free(groups);
2274 }
2275 
output_handle(struct handle_data * h)2276 static void output_handle(struct handle_data *h)
2277 {
2278 	int i;
2279 
2280 	show_global_task(h, h->global_task);
2281 	for (i = 0; i < h->cpus; i++)
2282 		show_global_task(h, &h->global_percpu_tasks[i]);
2283 
2284 	output_groups(h);
2285 	output_tasks(h);
2286 }
2287 
merge_event_stack(struct event_hash * event,struct stack_data * stack)2288 static void merge_event_stack(struct event_hash *event,
2289 			      struct stack_data *stack)
2290 {
2291 	struct stack_data *exist;
2292 	struct trace_hash_item *item;
2293 	struct stack_match match;
2294 
2295 	match.caller = stack->caller;
2296 	match.size = stack->size;
2297 	item = trace_hash_find(&event->stacks, stack->hash.key, match_stack,
2298 			       &match);
2299 	if (!item) {
2300 		trace_hash_add(&event->stacks, &stack->hash);
2301 		return;
2302 	}
2303 	exist = stack_from_item(item);
2304 	exist->count += stack->count;
2305 	exist->time += stack->time;
2306 
2307 	if (exist->time_max < stack->time_max) {
2308 		exist->time_max = stack->time_max;
2309 		exist->ts_max = stack->ts_max;
2310 	}
2311 	if (exist->time_min > stack->time_min) {
2312 		exist->time_min = stack->time_min;
2313 		exist->ts_min = stack->ts_min;
2314 	}
2315 	free(stack);
2316 }
2317 
merge_stacks(struct event_hash * exist,struct event_hash * event)2318 static void merge_stacks(struct event_hash *exist, struct event_hash *event)
2319 {
2320 	struct stack_data *stack;
2321 	struct trace_hash_item *item;
2322 	struct trace_hash_item **bucket;
2323 
2324 	trace_hash_for_each_bucket(bucket, &event->stacks) {
2325 		trace_hash_while_item(item, bucket) {
2326 			stack = stack_from_item(item);
2327 			trace_hash_del(&stack->hash);
2328 			merge_event_stack(exist, stack);
2329 		}
2330 	}
2331 }
2332 
merge_event_into_group(struct group_data * group,struct event_hash * event)2333 static void merge_event_into_group(struct group_data *group,
2334 				   struct event_hash *event)
2335 {
2336 	struct event_hash *exist;
2337 	struct trace_hash_item *item;
2338 	struct event_data_match edata;
2339 	unsigned long long key;
2340 
2341 	if (event->event_data->type == EVENT_TYPE_WAKEUP) {
2342 		edata.event_data = event->event_data;
2343 		event->search_val = 0;
2344 		event->val = 0;
2345 		key = trace_hash((unsigned long)event->event_data);
2346 	} else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) {
2347 		edata.event_data = event->event_data;
2348 		event->search_val = event->val;
2349 		key = (unsigned long)event->event_data +
2350 			((unsigned long)event->val * 2);
2351 		key = trace_hash(key);
2352 	} else {
2353 		key = event->hash.key;
2354 	}
2355 
2356 	edata.event_data = event->event_data;
2357 	edata.search_val = event->search_val;
2358 	edata.val = event->val;
2359 
2360 	item = trace_hash_find(&group->event_hash, key, match_event, &edata);
2361 	if (!item) {
2362 		event->hash.key = key;
2363 		trace_hash_add(&group->event_hash, &event->hash);
2364 		return;
2365 	}
2366 
2367 	exist = event_from_item(item);
2368 	exist->count += event->count;
2369 	exist->time_total += event->time_total;
2370 
2371 	if (exist->time_max < event->time_max) {
2372 		exist->time_max = event->time_max;
2373 		exist->ts_max = event->ts_max;
2374 	}
2375 	if (exist->time_min > event->time_min) {
2376 		exist->time_min = event->time_min;
2377 		exist->ts_min = event->ts_min;
2378 	}
2379 
2380 	merge_stacks(exist, event);
2381 	free_event_hash(event);
2382 }
2383 
add_group(struct handle_data * h,struct task_data * task)2384 static void add_group(struct handle_data *h, struct task_data *task)
2385 {
2386 	unsigned long long key;
2387 	struct trace_hash_item *item;
2388 	struct group_data *grp;
2389 	struct trace_hash_item **bucket;
2390 	void *data = task->comm;
2391 
2392 	if (!task->comm)
2393 		return;
2394 
2395 	key = trace_hash_str(task->comm);
2396 
2397 	item = trace_hash_find(&h->group_hash, key, match_group, data);
2398 	if (item) {
2399 		grp = group_from_item(item);
2400 	} else {
2401 		grp = malloc(sizeof(*grp));
2402 		if (!grp) {
2403 			warning("Could not allocate group");
2404 			return;
2405 		}
2406 		memset(grp, 0, sizeof(*grp));
2407 
2408 		grp->comm = strdup(task->comm);
2409 		if (!grp->comm)
2410 			die("strdup");
2411 		grp->hash.key = key;
2412 		trace_hash_add(&h->group_hash, &grp->hash);
2413 		trace_hash_init(&grp->event_hash, 32);
2414 	}
2415 	task->group = grp;
2416 
2417 	trace_hash_for_each_bucket(bucket, &task->event_hash) {
2418 		trace_hash_while_item(item, bucket) {
2419 			struct event_hash *event_hash;
2420 
2421 			event_hash = event_from_item(item);
2422 			trace_hash_del(&event_hash->hash);
2423 			merge_event_into_group(grp, event_hash);
2424 		}
2425 	}
2426 }
2427 
merge_tasks(struct handle_data * h)2428 static void merge_tasks(struct handle_data *h)
2429 {
2430 	struct trace_hash_item **bucket;
2431 	struct trace_hash_item *item;
2432 
2433 	if (!merge_like_comms)
2434 		return;
2435 
2436 	trace_hash_for_each_bucket(bucket, &h->task_hash) {
2437 		trace_hash_for_each_item(item, bucket)
2438 			add_group(h, task_from_item(item));
2439 	}
2440 }
2441 
do_trace_profile(void)2442 int do_trace_profile(void)
2443 {
2444 	struct handle_data *h;
2445 
2446 	for (h = handles; h; h = h->next) {
2447 		if (merge_like_comms)
2448 			merge_tasks(h);
2449 		output_handle(h);
2450 		trace_hash_free(&h->task_hash);
2451 	}
2452 
2453 	return 0;
2454 }
2455