• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * builtin-top.c
3  *
4  * Builtin top command: Display a continuously updated profile of
5  * any workload, CPU or specific PID.
6  *
7  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8  *
9  * Improvements and fixes by:
10  *
11  *   Arjan van de Ven <arjan@linux.intel.com>
12  *   Yanmin Zhang <yanmin.zhang@intel.com>
13  *   Wu Fengguang <fengguang.wu@intel.com>
14  *   Mike Galbraith <efault@gmx.de>
15  *   Paul Mackerras <paulus@samba.org>
16  *
17  * Released under the GPL v2. (and only v2, not any later version)
18  */
19 #include "builtin.h"
20 
21 #include "perf.h"
22 
23 #include "util/annotate.h"
24 #include "util/cache.h"
25 #include "util/color.h"
26 #include "util/evlist.h"
27 #include "util/evsel.h"
28 #include "util/session.h"
29 #include "util/symbol.h"
30 #include "util/thread.h"
31 #include "util/thread_map.h"
32 #include "util/top.h"
33 #include "util/util.h"
34 /* ANDROID_CHANGE_BEGIN */
35 #if 0
36 #include <linux/rbtree.h>
37 #else
38 #include "util/include/linux/rbtree.h"
39 #endif
40 /* ANDROID_CHANGE_END */
41 #include "util/parse-options.h"
42 #include "util/parse-events.h"
43 #include "util/cpumap.h"
44 #include "util/xyarray.h"
45 
46 #include "util/debug.h"
47 
48 #include <assert.h>
49 #include <fcntl.h>
50 
51 #include <stdio.h>
52 #include <termios.h>
53 #include <unistd.h>
54 #include <inttypes.h>
55 
56 #include <errno.h>
57 #include <time.h>
58 #include <sched.h>
59 
60 #include <sys/syscall.h>
61 #include <sys/ioctl.h>
62 #include <sys/poll.h>
63 /* ANDROID_CHANGE_BEGIN */
64 #ifndef __APPLE__
65 #include <sys/prctl.h>
66 #endif
67 /* ANDROID_CHANGE_END */
68 #include <sys/wait.h>
69 #include <sys/uio.h>
70 #include <sys/mman.h>
71 
72 /* ANDROID_CHANGE_BEGIN */
73 #ifndef __APPLE__
74 /* Suppress kernel-name space pollution in <linux/types.h> below */
75 #include <features.h>
76 #include <linux/unistd.h>
77 #include <linux/types.h>
78 #else
79 #include "util/include/linux/types.h"
80 #endif
81 /* ANDROID_CHANGE_END */
82 
83 static struct perf_top top = {
84 	.count_filter		= 5,
85 	.delay_secs		= 2,
86 	.display_weighted	= -1,
87 	.target_pid		= -1,
88 	.target_tid		= -1,
89 	.active_symbols		= LIST_HEAD_INIT(top.active_symbols),
90 	.active_symbols_lock	= PTHREAD_MUTEX_INITIALIZER,
91 	.active_symbols_cond	= PTHREAD_COND_INITIALIZER,
92 	.freq			= 1000, /* 1 KHz */
93 };
94 
95 static bool			system_wide			=  false;
96 
97 static bool			use_tui, use_stdio;
98 
99 static int			default_interval		=      0;
100 
101 static bool			kptr_restrict_warned;
102 static bool			vmlinux_warned;
103 static bool			inherit				=  false;
104 static int			realtime_prio			=      0;
105 static bool			group				=  false;
106 static unsigned int		page_size;
107 static unsigned int		mmap_pages			=    128;
108 
109 static bool			dump_symtab                     =  false;
110 
111 static struct winsize		winsize;
112 
113 static const char		*sym_filter			=   NULL;
114 struct sym_entry		*sym_filter_entry_sched		=   NULL;
115 static int			sym_pcnt_filter			=      5;
116 
117 /*
118  * Source functions
119  */
120 
get_term_dimensions(struct winsize * ws)121 void get_term_dimensions(struct winsize *ws)
122 {
123 	char *s = getenv("LINES");
124 
125 	if (s != NULL) {
126 		ws->ws_row = atoi(s);
127 		s = getenv("COLUMNS");
128 		if (s != NULL) {
129 			ws->ws_col = atoi(s);
130 			if (ws->ws_row && ws->ws_col)
131 				return;
132 		}
133 	}
134 #ifdef TIOCGWINSZ
135 	if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
136 	    ws->ws_row && ws->ws_col)
137 		return;
138 #endif
139 	ws->ws_row = 25;
140 	ws->ws_col = 80;
141 }
142 
update_print_entries(struct winsize * ws)143 static void update_print_entries(struct winsize *ws)
144 {
145 	top.print_entries = ws->ws_row;
146 
147 	if (top.print_entries > 9)
148 		top.print_entries -= 9;
149 }
150 
sig_winch_handler(int sig __used)151 static void sig_winch_handler(int sig __used)
152 {
153 	get_term_dimensions(&winsize);
154 	update_print_entries(&winsize);
155 }
156 
parse_source(struct sym_entry * syme)157 static int parse_source(struct sym_entry *syme)
158 {
159 	struct symbol *sym;
160 	struct annotation *notes;
161 	struct map *map;
162 	int err = -1;
163 
164 	if (!syme)
165 		return -1;
166 
167 	sym = sym_entry__symbol(syme);
168 	map = syme->map;
169 
170 	/*
171 	 * We can't annotate with just /proc/kallsyms
172 	 */
173 	if (map->dso->symtab_type == SYMTAB__KALLSYMS) {
174 		pr_err("Can't annotate %s: No vmlinux file was found in the "
175 		       "path\n", sym->name);
176 		sleep(1);
177 		return -1;
178 	}
179 
180 	notes = symbol__annotation(sym);
181 	if (notes->src != NULL) {
182 		pthread_mutex_lock(&notes->lock);
183 		goto out_assign;
184 	}
185 
186 	pthread_mutex_lock(&notes->lock);
187 
188 	if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
189 		pthread_mutex_unlock(&notes->lock);
190 		pr_err("Not enough memory for annotating '%s' symbol!\n",
191 		       sym->name);
192 		sleep(1);
193 		return err;
194 	}
195 
196     /* ANDROID_CHANGE_BEGIN */
197 #if 0
198 	err = symbol__annotate(sym, syme->map, 0);
199 #else
200 	err = symbol__annotate(sym, syme->map, 0, false);
201 #endif
202     /* ANDROID_CHANGE_END */
203 	if (err == 0) {
204 out_assign:
205 		top.sym_filter_entry = syme;
206 	}
207 
208 	pthread_mutex_unlock(&notes->lock);
209 	return err;
210 }
211 
__zero_source_counters(struct sym_entry * syme)212 static void __zero_source_counters(struct sym_entry *syme)
213 {
214 	struct symbol *sym = sym_entry__symbol(syme);
215 	symbol__annotate_zero_histograms(sym);
216 }
217 
record_precise_ip(struct sym_entry * syme,int counter,u64 ip)218 static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
219 {
220 	struct annotation *notes;
221 	struct symbol *sym;
222 
223 	if (syme != top.sym_filter_entry)
224 		return;
225 
226 	sym = sym_entry__symbol(syme);
227 	notes = symbol__annotation(sym);
228 
229 	if (pthread_mutex_trylock(&notes->lock))
230 		return;
231 
232 	ip = syme->map->map_ip(syme->map, ip);
233 	symbol__inc_addr_samples(sym, syme->map, counter, ip);
234 
235 	pthread_mutex_unlock(&notes->lock);
236 }
237 
show_details(struct sym_entry * syme)238 static void show_details(struct sym_entry *syme)
239 {
240 	struct annotation *notes;
241 	struct symbol *symbol;
242 	int more;
243 
244 	if (!syme)
245 		return;
246 
247 	symbol = sym_entry__symbol(syme);
248 	notes = symbol__annotation(symbol);
249 
250 	pthread_mutex_lock(&notes->lock);
251 
252 	if (notes->src == NULL)
253 		goto out_unlock;
254 
255 	printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
256 	printf("  Events  Pcnt (>=%d%%)\n", sym_pcnt_filter);
257 
258 	more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
259 				       0, sym_pcnt_filter, top.print_entries, 4);
260 	if (top.zero)
261 		symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
262 	else
263 		symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
264 	if (more != 0)
265 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
266 out_unlock:
267 	pthread_mutex_unlock(&notes->lock);
268 }
269 
270 static const char		CONSOLE_CLEAR[] = "";
271 
__list_insert_active_sym(struct sym_entry * syme)272 static void __list_insert_active_sym(struct sym_entry *syme)
273 {
274 	list_add(&syme->node, &top.active_symbols);
275 }
276 
print_sym_table(struct perf_session * session)277 static void print_sym_table(struct perf_session *session)
278 {
279 	char bf[160];
280 	int printed = 0;
281 	struct rb_node *nd;
282 	struct sym_entry *syme;
283 	struct rb_root tmp = RB_ROOT;
284 	const int win_width = winsize.ws_col - 1;
285 	int sym_width, dso_width, dso_short_width;
286 	float sum_ksamples = perf_top__decay_samples(&top, &tmp);
287 
288 	puts(CONSOLE_CLEAR);
289 
290 	perf_top__header_snprintf(&top, bf, sizeof(bf));
291 	printf("%s\n", bf);
292 
293 	perf_top__reset_sample_counters(&top);
294 
295 	printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
296 
297 	if (session->hists.stats.total_lost != 0) {
298 		color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
299 		printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
300 		       session->hists.stats.total_lost);
301 	}
302 
303 	if (top.sym_filter_entry) {
304 		show_details(top.sym_filter_entry);
305 		return;
306 	}
307 
308 	perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
309 			      &sym_width);
310 
311 	if (sym_width + dso_width > winsize.ws_col - 29) {
312 		dso_width = dso_short_width;
313 		if (sym_width + dso_width > winsize.ws_col - 29)
314 			sym_width = winsize.ws_col - dso_width - 29;
315 	}
316 	putchar('\n');
317 	if (top.evlist->nr_entries == 1)
318 		printf("             samples  pcnt");
319 	else
320 		printf("   weight    samples  pcnt");
321 
322 	if (verbose)
323 		printf("         RIP       ");
324 	printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
325 	printf("   %s    _______ _____",
326 	       top.evlist->nr_entries == 1 ? "      " : "______");
327 	if (verbose)
328 		printf(" ________________");
329 	printf(" %-*.*s", sym_width, sym_width, graph_line);
330 	printf(" %-*.*s", dso_width, dso_width, graph_line);
331 	puts("\n");
332 
333 	for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
334 		struct symbol *sym;
335 		double pcnt;
336 
337 		syme = rb_entry(nd, struct sym_entry, rb_node);
338 		sym = sym_entry__symbol(syme);
339 		if (++printed > top.print_entries ||
340 		    (int)syme->snap_count < top.count_filter)
341 			continue;
342 
343 		pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
344 					 sum_ksamples));
345 
346 		if (top.evlist->nr_entries == 1 || !top.display_weighted)
347 			printf("%20.2f ", syme->weight);
348 		else
349 			printf("%9.1f %10ld ", syme->weight, syme->snap_count);
350 
351 		percent_color_fprintf(stdout, "%4.1f%%", pcnt);
352 		if (verbose)
353 			printf(" %016" PRIx64, sym->start);
354 		printf(" %-*.*s", sym_width, sym_width, sym->name);
355 		printf(" %-*.*s\n", dso_width, dso_width,
356 		       dso_width >= syme->map->dso->long_name_len ?
357 					syme->map->dso->long_name :
358 					syme->map->dso->short_name);
359 	}
360 }
361 
prompt_integer(int * target,const char * msg)362 static void prompt_integer(int *target, const char *msg)
363 {
364 	char *buf = malloc(0), *p;
365 	size_t dummy = 0;
366 	int tmp;
367 
368 	fprintf(stdout, "\n%s: ", msg);
369 	if (getline(&buf, &dummy, stdin) < 0)
370 		return;
371 
372 	p = strchr(buf, '\n');
373 	if (p)
374 		*p = 0;
375 
376 	p = buf;
377 	while(*p) {
378 		if (!isdigit(*p))
379 			goto out_free;
380 		p++;
381 	}
382 	tmp = strtoul(buf, NULL, 10);
383 	*target = tmp;
384 out_free:
385 	free(buf);
386 }
387 
prompt_percent(int * target,const char * msg)388 static void prompt_percent(int *target, const char *msg)
389 {
390 	int tmp = 0;
391 
392 	prompt_integer(&tmp, msg);
393 	if (tmp >= 0 && tmp <= 100)
394 		*target = tmp;
395 }
396 
prompt_symbol(struct sym_entry ** target,const char * msg)397 static void prompt_symbol(struct sym_entry **target, const char *msg)
398 {
399 	char *buf = malloc(0), *p;
400 	struct sym_entry *syme = *target, *n, *found = NULL;
401 	size_t dummy = 0;
402 
403 	/* zero counters of active symbol */
404 	if (syme) {
405 		__zero_source_counters(syme);
406 		*target = NULL;
407 	}
408 
409 	fprintf(stdout, "\n%s: ", msg);
410 	if (getline(&buf, &dummy, stdin) < 0)
411 		goto out_free;
412 
413 	p = strchr(buf, '\n');
414 	if (p)
415 		*p = 0;
416 
417 	pthread_mutex_lock(&top.active_symbols_lock);
418 	syme = list_entry(top.active_symbols.next, struct sym_entry, node);
419 	pthread_mutex_unlock(&top.active_symbols_lock);
420 
421 	list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
422 		struct symbol *sym = sym_entry__symbol(syme);
423 
424 		if (!strcmp(buf, sym->name)) {
425 			found = syme;
426 			break;
427 		}
428 	}
429 
430 	if (!found) {
431 		fprintf(stderr, "Sorry, %s is not active.\n", buf);
432 		sleep(1);
433 		return;
434 	} else
435 		parse_source(found);
436 
437 out_free:
438 	free(buf);
439 }
440 
print_mapped_keys(void)441 static void print_mapped_keys(void)
442 {
443 	char *name = NULL;
444 
445 	if (top.sym_filter_entry) {
446 		struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
447 		name = sym->name;
448 	}
449 
450 	fprintf(stdout, "\nMapped keys:\n");
451 	fprintf(stdout, "\t[d]     display refresh delay.             \t(%d)\n", top.delay_secs);
452 	fprintf(stdout, "\t[e]     display entries (lines).           \t(%d)\n", top.print_entries);
453 
454 	if (top.evlist->nr_entries > 1)
455 		fprintf(stdout, "\t[E]     active event counter.              \t(%s)\n", event_name(top.sym_evsel));
456 
457 	fprintf(stdout, "\t[f]     profile display filter (count).    \t(%d)\n", top.count_filter);
458 
459 	fprintf(stdout, "\t[F]     annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
460 	fprintf(stdout, "\t[s]     annotate symbol.                   \t(%s)\n", name?: "NULL");
461 	fprintf(stdout, "\t[S]     stop annotation.\n");
462 
463 	if (top.evlist->nr_entries > 1)
464 		fprintf(stdout, "\t[w]     toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
465 
466 	fprintf(stdout,
467 		"\t[K]     hide kernel_symbols symbols.     \t(%s)\n",
468 		top.hide_kernel_symbols ? "yes" : "no");
469 	fprintf(stdout,
470 		"\t[U]     hide user symbols.               \t(%s)\n",
471 		top.hide_user_symbols ? "yes" : "no");
472 	fprintf(stdout, "\t[z]     toggle sample zeroing.             \t(%d)\n", top.zero ? 1 : 0);
473 	fprintf(stdout, "\t[qQ]    quit.\n");
474 }
475 
key_mapped(int c)476 static int key_mapped(int c)
477 {
478 	switch (c) {
479 		case 'd':
480 		case 'e':
481 		case 'f':
482 		case 'z':
483 		case 'q':
484 		case 'Q':
485 		case 'K':
486 		case 'U':
487 		case 'F':
488 		case 's':
489 		case 'S':
490 			return 1;
491 		case 'E':
492 		case 'w':
493 			return top.evlist->nr_entries > 1 ? 1 : 0;
494 		default:
495 			break;
496 	}
497 
498 	return 0;
499 }
500 
handle_keypress(struct perf_session * session,int c)501 static void handle_keypress(struct perf_session *session, int c)
502 {
503 	if (!key_mapped(c)) {
504 		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
505 		struct termios tc, save;
506 
507 		print_mapped_keys();
508 		fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
509 		fflush(stdout);
510 
511 		tcgetattr(0, &save);
512 		tc = save;
513 		tc.c_lflag &= ~(ICANON | ECHO);
514 		tc.c_cc[VMIN] = 0;
515 		tc.c_cc[VTIME] = 0;
516 		tcsetattr(0, TCSANOW, &tc);
517 
518 		poll(&stdin_poll, 1, -1);
519 		c = getc(stdin);
520 
521 		tcsetattr(0, TCSAFLUSH, &save);
522 		if (!key_mapped(c))
523 			return;
524 	}
525 
526 	switch (c) {
527 		case 'd':
528 			prompt_integer(&top.delay_secs, "Enter display delay");
529 			if (top.delay_secs < 1)
530 				top.delay_secs = 1;
531 			break;
532 		case 'e':
533 			prompt_integer(&top.print_entries, "Enter display entries (lines)");
534 			if (top.print_entries == 0) {
535 				sig_winch_handler(SIGWINCH);
536 				signal(SIGWINCH, sig_winch_handler);
537 			} else
538 				signal(SIGWINCH, SIG_DFL);
539 			break;
540 		case 'E':
541 			if (top.evlist->nr_entries > 1) {
542 				/* Select 0 as the default event: */
543 				int counter = 0;
544 
545 				fprintf(stderr, "\nAvailable events:");
546 
547 				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
548 					fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
549 
550 				prompt_integer(&counter, "Enter details event counter");
551 
552 				if (counter >= top.evlist->nr_entries) {
553 					top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
554 					fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
555 					sleep(1);
556 					break;
557 				}
558 				list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
559 					if (top.sym_evsel->idx == counter)
560 						break;
561 			} else
562 				top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
563 			break;
564 		case 'f':
565 			prompt_integer(&top.count_filter, "Enter display event count filter");
566 			break;
567 		case 'F':
568 			prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
569 			break;
570 		case 'K':
571 			top.hide_kernel_symbols = !top.hide_kernel_symbols;
572 			break;
573 		case 'q':
574 		case 'Q':
575 			printf("exiting.\n");
576 			if (dump_symtab)
577 				perf_session__fprintf_dsos(session, stderr);
578 			exit(0);
579 		case 's':
580 			prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
581 			break;
582 		case 'S':
583 			if (!top.sym_filter_entry)
584 				break;
585 			else {
586 				struct sym_entry *syme = top.sym_filter_entry;
587 
588 				top.sym_filter_entry = NULL;
589 				__zero_source_counters(syme);
590 			}
591 			break;
592 		case 'U':
593 			top.hide_user_symbols = !top.hide_user_symbols;
594 			break;
595 		case 'w':
596 			top.display_weighted = ~top.display_weighted;
597 			break;
598 		case 'z':
599 			top.zero = !top.zero;
600 			break;
601 		default:
602 			break;
603 	}
604 }
605 
display_thread_tui(void * arg __used)606 static void *display_thread_tui(void *arg __used)
607 {
608 	int err = 0;
609 	pthread_mutex_lock(&top.active_symbols_lock);
610 	while (list_empty(&top.active_symbols)) {
611 		err = pthread_cond_wait(&top.active_symbols_cond,
612 					&top.active_symbols_lock);
613 		if (err)
614 			break;
615 	}
616 	pthread_mutex_unlock(&top.active_symbols_lock);
617 	if (!err)
618 		perf_top__tui_browser(&top);
619 	exit_browser(0);
620 	exit(0);
621 	return NULL;
622 }
623 
display_thread(void * arg __used)624 static void *display_thread(void *arg __used)
625 {
626 	struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
627 	struct termios tc, save;
628 	int delay_msecs, c;
629 	struct perf_session *session = (struct perf_session *) arg;
630 
631 	tcgetattr(0, &save);
632 	tc = save;
633 	tc.c_lflag &= ~(ICANON | ECHO);
634 	tc.c_cc[VMIN] = 0;
635 	tc.c_cc[VTIME] = 0;
636 
637 repeat:
638 	delay_msecs = top.delay_secs * 1000;
639 	tcsetattr(0, TCSANOW, &tc);
640 	/* trash return*/
641 	getc(stdin);
642 
643 	do {
644 		print_sym_table(session);
645 	} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
646 
647 	c = getc(stdin);
648 	tcsetattr(0, TCSAFLUSH, &save);
649 
650 	handle_keypress(session, c);
651 	goto repeat;
652 
653 	return NULL;
654 }
655 
656 /* Tag samples to be skipped. */
657 static const char *skip_symbols[] = {
658 	"default_idle",
659 	"native_safe_halt",
660 	"cpu_idle",
661 	"enter_idle",
662 	"exit_idle",
663 	"mwait_idle",
664 	"mwait_idle_with_hints",
665 	"poll_idle",
666 	"ppc64_runlatch_off",
667 	"pseries_dedicated_idle_sleep",
668 	NULL
669 };
670 
symbol_filter(struct map * map,struct symbol * sym)671 static int symbol_filter(struct map *map, struct symbol *sym)
672 {
673 	struct sym_entry *syme;
674 	const char *name = sym->name;
675 	int i;
676 
677 	/*
678 	 * ppc64 uses function descriptors and appends a '.' to the
679 	 * start of every instruction address. Remove it.
680 	 */
681 	if (name[0] == '.')
682 		name++;
683 
684 	if (!strcmp(name, "_text") ||
685 	    !strcmp(name, "_etext") ||
686 	    !strcmp(name, "_sinittext") ||
687 	    !strncmp("init_module", name, 11) ||
688 	    !strncmp("cleanup_module", name, 14) ||
689 	    strstr(name, "_text_start") ||
690 	    strstr(name, "_text_end"))
691 		return 1;
692 
693 	syme = symbol__priv(sym);
694 	syme->map = map;
695 	symbol__annotate_init(map, sym);
696 
697 	if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
698 		/* schedule initial sym_filter_entry setup */
699 		sym_filter_entry_sched = syme;
700 		sym_filter = NULL;
701 	}
702 
703 	for (i = 0; skip_symbols[i]; i++) {
704 		if (!strcmp(skip_symbols[i], name)) {
705 			sym->ignore = true;
706 			break;
707 		}
708 	}
709 
710 	return 0;
711 }
712 
perf_event__process_sample(const union perf_event * event,struct perf_sample * sample,struct perf_session * session)713 static void perf_event__process_sample(const union perf_event *event,
714 				       struct perf_sample *sample,
715 				       struct perf_session *session)
716 {
717 	u64 ip = event->ip.ip;
718 	struct sym_entry *syme;
719 	struct addr_location al;
720 	struct machine *machine;
721 	u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
722 
723 	++top.samples;
724 
725 	switch (origin) {
726 	case PERF_RECORD_MISC_USER:
727 		++top.us_samples;
728 		if (top.hide_user_symbols)
729 			return;
730 		machine = perf_session__find_host_machine(session);
731 		break;
732 	case PERF_RECORD_MISC_KERNEL:
733 		++top.kernel_samples;
734 		if (top.hide_kernel_symbols)
735 			return;
736 		machine = perf_session__find_host_machine(session);
737 		break;
738 	case PERF_RECORD_MISC_GUEST_KERNEL:
739 		++top.guest_kernel_samples;
740 		machine = perf_session__find_machine(session, event->ip.pid);
741 		break;
742 	case PERF_RECORD_MISC_GUEST_USER:
743 		++top.guest_us_samples;
744 		/*
745 		 * TODO: we don't process guest user from host side
746 		 * except simple counting.
747 		 */
748 		return;
749 	default:
750 		return;
751 	}
752 
753 	if (!machine && perf_guest) {
754 		pr_err("Can't find guest [%d]'s kernel information\n",
755 			event->ip.pid);
756 		return;
757 	}
758 
759 	if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
760 		top.exact_samples++;
761 
762 	if (perf_event__preprocess_sample(event, session, &al, sample,
763 					  symbol_filter) < 0 ||
764 	    al.filtered)
765 		return;
766 
767 	if (!kptr_restrict_warned &&
768 	    symbol_conf.kptr_restrict &&
769 	    al.cpumode == PERF_RECORD_MISC_KERNEL) {
770 		ui__warning(
771 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
772 "Check /proc/sys/kernel/kptr_restrict.\n\n"
773 "Kernel%s samples will not be resolved.\n",
774 			  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
775 			  " modules" : "");
776 		if (use_browser <= 0)
777 			sleep(5);
778 		kptr_restrict_warned = true;
779 	}
780 
781 	if (al.sym == NULL) {
782 		const char *msg = "Kernel samples will not be resolved.\n";
783 		/*
784 		 * As we do lazy loading of symtabs we only will know if the
785 		 * specified vmlinux file is invalid when we actually have a
786 		 * hit in kernel space and then try to load it. So if we get
787 		 * here and there are _no_ symbols in the DSO backing the
788 		 * kernel map, bail out.
789 		 *
790 		 * We may never get here, for instance, if we use -K/
791 		 * --hide-kernel-symbols, even if the user specifies an
792 		 * invalid --vmlinux ;-)
793 		 */
794 		if (!kptr_restrict_warned && !vmlinux_warned &&
795 		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
796 		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
797 			if (symbol_conf.vmlinux_name) {
798 				ui__warning("The %s file can't be used.\n%s",
799 					    symbol_conf.vmlinux_name, msg);
800 			} else {
801 				ui__warning("A vmlinux file was not found.\n%s",
802 					    msg);
803 			}
804 
805 			if (use_browser <= 0)
806 				sleep(5);
807 			vmlinux_warned = true;
808 		}
809 
810 		return;
811 	}
812 
813 	/* let's see, whether we need to install initial sym_filter_entry */
814 	if (sym_filter_entry_sched) {
815 		top.sym_filter_entry = sym_filter_entry_sched;
816 		sym_filter_entry_sched = NULL;
817 		if (parse_source(top.sym_filter_entry) < 0) {
818 			struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
819 
820 			pr_err("Can't annotate %s", sym->name);
821 			if (top.sym_filter_entry->map->dso->symtab_type == SYMTAB__KALLSYMS) {
822 				pr_err(": No vmlinux file was found in the path:\n");
823 				machine__fprintf_vmlinux_path(machine, stderr);
824 			} else
825 				pr_err(".\n");
826 			exit(1);
827 		}
828 	}
829 
830 	syme = symbol__priv(al.sym);
831 	if (!al.sym->ignore) {
832 		struct perf_evsel *evsel;
833 
834 		evsel = perf_evlist__id2evsel(top.evlist, sample->id);
835 		assert(evsel != NULL);
836 		syme->count[evsel->idx]++;
837 		record_precise_ip(syme, evsel->idx, ip);
838 		pthread_mutex_lock(&top.active_symbols_lock);
839 		if (list_empty(&syme->node) || !syme->node.next) {
840 			static bool first = true;
841 			__list_insert_active_sym(syme);
842 			if (first) {
843 				pthread_cond_broadcast(&top.active_symbols_cond);
844 				first = false;
845 			}
846 		}
847 		pthread_mutex_unlock(&top.active_symbols_lock);
848 	}
849 }
850 
perf_session__mmap_read_idx(struct perf_session * self,int idx)851 static void perf_session__mmap_read_idx(struct perf_session *self, int idx)
852 {
853 	struct perf_sample sample;
854 	union perf_event *event;
855 	int ret;
856 
857 	while ((event = perf_evlist__mmap_read(top.evlist, idx)) != NULL) {
858 		ret = perf_session__parse_sample(self, event, &sample);
859 		if (ret) {
860 			pr_err("Can't parse sample, err = %d\n", ret);
861 			continue;
862 		}
863 
864 		if (event->header.type == PERF_RECORD_SAMPLE)
865 			perf_event__process_sample(event, &sample, self);
866 		else
867 			perf_event__process(event, &sample, self);
868 	}
869 }
870 
perf_session__mmap_read(struct perf_session * self)871 static void perf_session__mmap_read(struct perf_session *self)
872 {
873 	int i;
874 
875 	for (i = 0; i < top.evlist->nr_mmaps; i++)
876 		perf_session__mmap_read_idx(self, i);
877 }
878 
start_counters(struct perf_evlist * evlist)879 static void start_counters(struct perf_evlist *evlist)
880 {
881 	struct perf_evsel *counter;
882 
883 	list_for_each_entry(counter, &evlist->entries, node) {
884 		struct perf_event_attr *attr = &counter->attr;
885 
886 		attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
887 
888 		if (top.freq) {
889 			attr->sample_type |= PERF_SAMPLE_PERIOD;
890 			attr->freq	  = 1;
891 			attr->sample_freq = top.freq;
892 		}
893 
894 		if (evlist->nr_entries > 1) {
895 			attr->sample_type |= PERF_SAMPLE_ID;
896 			attr->read_format |= PERF_FORMAT_ID;
897 		}
898 
899 		attr->mmap = 1;
900 		attr->inherit = inherit;
901 try_again:
902 		if (perf_evsel__open(counter, top.evlist->cpus,
903 				     top.evlist->threads, group) < 0) {
904 			int err = errno;
905 
906 			if (err == EPERM || err == EACCES) {
907 				ui__warning_paranoid();
908 				goto out_err;
909 			}
910 			/*
911 			 * If it's cycles then fall back to hrtimer
912 			 * based cpu-clock-tick sw counter, which
913 			 * is always available even if no PMU support:
914 			 */
915 			if (attr->type == PERF_TYPE_HARDWARE &&
916 			    attr->config == PERF_COUNT_HW_CPU_CYCLES) {
917 				if (verbose)
918 					ui__warning("Cycles event not supported,\n"
919 						    "trying to fall back to cpu-clock-ticks\n");
920 
921 				attr->type = PERF_TYPE_SOFTWARE;
922 				attr->config = PERF_COUNT_SW_CPU_CLOCK;
923 				goto try_again;
924 			}
925 
926 			if (err == ENOENT) {
927 				ui__warning("The %s event is not supported.\n",
928 					    event_name(counter));
929 				goto out_err;
930 			}
931 
932 			ui__warning("The sys_perf_event_open() syscall "
933 				    "returned with %d (%s).  /bin/dmesg "
934 				    "may provide additional information.\n"
935 				    "No CONFIG_PERF_EVENTS=y kernel support "
936 				    "configured?\n", err, strerror(err));
937 			goto out_err;
938 		}
939 	}
940 
941 	if (perf_evlist__mmap(evlist, mmap_pages, false) < 0) {
942 		ui__warning("Failed to mmap with %d (%s)\n",
943 			    errno, strerror(errno));
944 		goto out_err;
945 	}
946 
947 	return;
948 
949 out_err:
950 	exit_browser(0);
951 	exit(0);
952 }
953 
__cmd_top(void)954 static int __cmd_top(void)
955 {
956 	/* ANDROID_CHANGE_BEGIN */
957 #ifndef __APPLE__
958 	pthread_t thread;
959 	int ret __used;
960 	/*
961 	 * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
962 	 * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
963 	 */
964 	struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
965 	if (session == NULL)
966 		return -ENOMEM;
967 
968 	if (top.target_tid != -1)
969 		perf_event__synthesize_thread_map(top.evlist->threads,
970 						  perf_event__process, session);
971 	else
972 		perf_event__synthesize_threads(perf_event__process, session);
973 
974 	start_counters(top.evlist);
975 	session->evlist = top.evlist;
976 	perf_session__update_sample_type(session);
977 
978 	/* Wait for a minimal set of events before starting the snapshot */
979 	poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
980 
981 	perf_session__mmap_read(session);
982 
983 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
984 							     display_thread), session)) {
985 		printf("Could not create display thread.\n");
986 		exit(-1);
987 	}
988 
989 	if (realtime_prio) {
990 		struct sched_param param;
991 
992 		param.sched_priority = realtime_prio;
993 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
994 			printf("Could not set realtime priority.\n");
995 			exit(-1);
996 		}
997 	}
998 
999 	while (1) {
1000 		u64 hits = top.samples;
1001 
1002 		perf_session__mmap_read(session);
1003 
1004 		if (hits == top.samples)
1005 			ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
1006 	}
1007 
1008 	return 0;
1009 #else
1010 	return -1;
1011 #endif
1012 	/* ANDROID_CHANGE_END */
1013 }
1014 
1015 static const char * const top_usage[] = {
1016 	"perf top [<options>]",
1017 	NULL
1018 };
1019 
1020 static const struct option options[] = {
1021 	OPT_CALLBACK('e', "event", &top.evlist, "event",
1022 		     "event selector. use 'perf list' to list available events",
1023 		     parse_events),
1024 	OPT_INTEGER('c', "count", &default_interval,
1025 		    "event period to sample"),
1026 	OPT_INTEGER('p', "pid", &top.target_pid,
1027 		    "profile events on existing process id"),
1028 	OPT_INTEGER('t', "tid", &top.target_tid,
1029 		    "profile events on existing thread id"),
1030 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
1031 			    "system-wide collection from all CPUs"),
1032 	OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
1033 		    "list of cpus to monitor"),
1034 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1035 		   "file", "vmlinux pathname"),
1036 	OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1037 		    "hide kernel symbols"),
1038 	OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
1039 	OPT_INTEGER('r', "realtime", &realtime_prio,
1040 		    "collect data with this RT SCHED_FIFO priority"),
1041 	OPT_INTEGER('d', "delay", &top.delay_secs,
1042 		    "number of seconds to delay between refreshes"),
1043 	OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
1044 			    "dump the symbol table used for profiling"),
1045 	OPT_INTEGER('f', "count-filter", &top.count_filter,
1046 		    "only display functions with more events than this"),
1047 	OPT_BOOLEAN('g', "group", &group,
1048 			    "put the counters into a counter group"),
1049 	OPT_BOOLEAN('i', "inherit", &inherit,
1050 		    "child tasks inherit counters"),
1051 	OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1052 		    "symbol to annotate"),
1053 	OPT_BOOLEAN('z', "zero", &top.zero,
1054 		    "zero history across updates"),
1055 	OPT_INTEGER('F', "freq", &top.freq,
1056 		    "profile at this frequency"),
1057 	OPT_INTEGER('E', "entries", &top.print_entries,
1058 		    "display this many functions"),
1059 	OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1060 		    "hide user symbols"),
1061 	OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
1062 	OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
1063 	OPT_INCR('v', "verbose", &verbose,
1064 		    "be more verbose (show counter open errors, etc)"),
1065 	OPT_END()
1066 };
1067 
cmd_top(int argc,const char ** argv,const char * prefix __used)1068 int cmd_top(int argc, const char **argv, const char *prefix __used)
1069 {
1070 	struct perf_evsel *pos;
1071 	int status = -ENOMEM;
1072 
1073 	top.evlist = perf_evlist__new(NULL, NULL);
1074 	if (top.evlist == NULL)
1075 		return -ENOMEM;
1076 
1077 	page_size = sysconf(_SC_PAGE_SIZE);
1078 
1079 	argc = parse_options(argc, argv, options, top_usage, 0);
1080 	if (argc)
1081 		usage_with_options(top_usage, options);
1082 
1083 	/*
1084  	 * XXX For now start disabled, only using TUI if explicitely asked for.
1085  	 * Change that when handle_keys equivalent gets written, live annotation
1086  	 * done, etc.
1087  	 */
1088 	use_browser = 0;
1089 
1090 	if (use_stdio)
1091 		use_browser = 0;
1092 	else if (use_tui)
1093 		use_browser = 1;
1094 
1095 	setup_browser(false);
1096 
1097 	/* CPU and PID are mutually exclusive */
1098 	if (top.target_tid > 0 && top.cpu_list) {
1099 		printf("WARNING: PID switch overriding CPU\n");
1100 		sleep(1);
1101 		top.cpu_list = NULL;
1102 	}
1103 
1104 	if (top.target_pid != -1)
1105 		top.target_tid = top.target_pid;
1106 
1107 	if (perf_evlist__create_maps(top.evlist, top.target_pid,
1108 				     top.target_tid, top.cpu_list) < 0)
1109 		usage_with_options(top_usage, options);
1110 
1111 	if (!top.evlist->nr_entries &&
1112 	    perf_evlist__add_default(top.evlist) < 0) {
1113 		pr_err("Not enough memory for event selector list\n");
1114 		return -ENOMEM;
1115 	}
1116 
1117 	if (top.delay_secs < 1)
1118 		top.delay_secs = 1;
1119 
1120 	/*
1121 	 * User specified count overrides default frequency.
1122 	 */
1123 	if (default_interval)
1124 		top.freq = 0;
1125 	else if (top.freq) {
1126 		default_interval = top.freq;
1127 	} else {
1128 		fprintf(stderr, "frequency and count are zero, aborting\n");
1129 		exit(EXIT_FAILURE);
1130 	}
1131 
1132 	list_for_each_entry(pos, &top.evlist->entries, node) {
1133 		if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
1134 					 top.evlist->threads->nr) < 0)
1135 			goto out_free_fd;
1136 		/*
1137 		 * Fill in the ones not specifically initialized via -c:
1138 		 */
1139 		if (pos->attr.sample_period)
1140 			continue;
1141 
1142 		pos->attr.sample_period = default_interval;
1143 	}
1144 
1145 	if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
1146 	    perf_evlist__alloc_mmap(top.evlist) < 0)
1147 		goto out_free_fd;
1148 
1149 	top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
1150 
1151 	symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
1152 				 (top.evlist->nr_entries + 1) * sizeof(unsigned long));
1153 
1154 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
1155 	if (symbol__init() < 0)
1156 		return -1;
1157 
1158 	get_term_dimensions(&winsize);
1159 	if (top.print_entries == 0) {
1160 		update_print_entries(&winsize);
1161 		signal(SIGWINCH, sig_winch_handler);
1162 	}
1163 
1164 	status = __cmd_top();
1165 out_free_fd:
1166 	perf_evlist__delete(top.evlist);
1167 
1168 	return status;
1169 }
1170