• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2021 Google LLC.
3  *
4  * Based on klockstat from BCC by Jiri Olsa and others
5  * 2021-10-26   Barret Rhoden   Created this.
6  */
7 /* Differences from BCC python tool:
8  * - can specify a lock by ksym name, using '-L'
9  * - tracks whichever task had the max time for acquire and hold, outputted
10  *     when '-s' > 1 (otherwise it's cluttered).
11  * - does not reset stats each interval by default. Can request with -R.
12  */
13 #include <argp.h>
14 #include <errno.h>
15 #include <signal.h>
16 #include <stdio.h>
17 #ifndef _GNU_SOURCE
18 #define _GNU_SOURCE
19 #endif
20 #include <stdlib.h>
21 #include <string.h>
22 #include <time.h>
23 #include <unistd.h>
24 #include <sys/param.h>
25 
26 #include <bpf/libbpf.h>
27 #include <bpf/bpf.h>
28 #include "klockstat.h"
29 #include "klockstat.skel.h"
30 #include "trace_helpers.h"
31 
32 #define warn(...) fprintf(stderr, __VA_ARGS__)
33 
34 enum {
35 	SORT_ACQ_MAX,
36 	SORT_ACQ_COUNT,
37 	SORT_ACQ_TOTAL,
38 	SORT_HLD_MAX,
39 	SORT_HLD_COUNT,
40 	SORT_HLD_TOTAL,
41 };
42 
43 static struct prog_env {
44 	pid_t pid;
45 	pid_t tid;
46 	char *caller;
47 	char *lock_name;
48 	unsigned int nr_locks;
49 	unsigned int nr_stack_entries;
50 	unsigned int sort_acq;
51 	unsigned int sort_hld;
52 	unsigned int duration;
53 	unsigned int interval;
54 	unsigned int iterations;
55 	bool reset;
56 	bool timestamp;
57 	bool verbose;
58 } env = {
59 	.nr_locks = 99999999,
60 	.nr_stack_entries = 1,
61 	.sort_acq = SORT_ACQ_MAX,
62 	.sort_hld = SORT_HLD_MAX,
63 	.interval = 99999999,
64 	.iterations = 99999999,
65 };
66 
67 const char *argp_program_version = "klockstat 0.1";
68 const char *argp_program_bug_address =
69 	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
70 static const char args_doc[] = "FUNCTION";
71 static const char program_doc[] =
72 "Trace mutex lock acquisition and hold times, in nsec\n"
73 "\n"
74 "Usage: klockstat [-hRTv] [-p PID] [-t TID] [-c FUNC] [-L LOCK] [-n NR_LOCKS]\n"
75 "                 [-s NR_STACKS] [-S SORT] [-d DURATION] [-i INTERVAL]\n"
76 "\v"
77 "Examples:\n"
78 "  klockstat                     # trace system wide until ctrl-c\n"
79 "  klockstat -d 5                # trace for 5 seconds\n"
80 "  klockstat -i 5                # print stats every 5 seconds\n"
81 "  klockstat -p 181              # trace process 181 only\n"
82 "  klockstat -t 181              # trace thread 181 only\n"
83 "  klockstat -c pipe_            # print only for lock callers with 'pipe_'\n"
84 "                                # prefix\n"
85 "  klockstat -L cgroup_mutex     # trace the cgroup_mutex lock only\n"
86 "  klockstat -S acq_count        # sort lock acquired results by acquire count\n"
87 "  klockstat -S hld_total        # sort lock held results by total held time\n"
88 "  klockstat -S acq_count,hld_total  # combination of above\n"
89 "  klockstat -n 3                # display top 3 locks\n"
90 "  klockstat -s 6                # display 6 stack entries per lock\n"
91 ;
92 
93 static const struct argp_option opts[] = {
94 	{ "pid", 'p', "PID", 0, "Filter by process ID" },
95 	{ "tid", 't', "TID", 0, "Filter by thread ID" },
96 	{ 0, 0, 0, 0, "" },
97 	{ "caller", 'c', "FUNC", 0, "Filter by caller string prefix" },
98 	{ "lock", 'L', "LOCK", 0, "Filter by specific ksym lock name" },
99 	{ 0, 0, 0, 0, "" },
100 	{ "locks", 'n', "NR_LOCKS", 0, "Number of locks to print" },
101 	{ "stacks", 's', "NR_STACKS", 0, "Number of stack entries to print per lock" },
102 	{ "sort", 'S', "SORT", 0, "Sort by field:\n  acq_[max|total|count]\n  hld_[max|total|count]" },
103 	{ 0, 0, 0, 0, "" },
104 	{ "duration", 'd', "SECONDS", 0, "Duration to trace" },
105 	{ "interval", 'i', "SECONDS", 0, "Print interval" },
106 	{ "reset", 'R', NULL, 0, "Reset stats each interval" },
107 	{ "timestamp", 'T', NULL, 0, "Print timestamp" },
108 	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
109 
110 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
111 	{},
112 };
113 
parse_one_sort(struct prog_env * env,const char * sort)114 static bool parse_one_sort(struct prog_env *env, const char *sort)
115 {
116 	const char *field = sort + 4;
117 
118 	if (!strncmp(sort, "acq_", 4)) {
119 		if (!strcmp(field, "max")) {
120 			env->sort_acq = SORT_ACQ_MAX;
121 			return true;
122 		} else if (!strcmp(field, "total")) {
123 			env->sort_acq = SORT_ACQ_TOTAL;
124 			return true;
125 		} else if (!strcmp(field, "count")) {
126 			env->sort_acq = SORT_ACQ_COUNT;
127 			return true;
128 		}
129 	} else if (!strncmp(sort, "hld_", 4)) {
130 		if (!strcmp(field, "max")) {
131 			env->sort_hld = SORT_HLD_MAX;
132 			return true;
133 		} else if (!strcmp(field, "total")) {
134 			env->sort_hld = SORT_HLD_TOTAL;
135 			return true;
136 		} else if (!strcmp(field, "count")) {
137 			env->sort_hld = SORT_HLD_COUNT;
138 			return true;
139 		}
140 	}
141 
142 	return false;
143 }
144 
parse_sorts(struct prog_env * env,char * arg)145 static bool parse_sorts(struct prog_env *env, char *arg)
146 {
147 	char *comma = strchr(arg, ',');
148 
149 	if (comma) {
150 		*comma = '\0';
151 		comma++;
152 		if (!parse_one_sort(env, comma))
153 			return false;
154 	}
155 	return parse_one_sort(env, arg);
156 }
157 
parse_arg(int key,char * arg,struct argp_state * state)158 static error_t parse_arg(int key, char *arg, struct argp_state *state)
159 {
160 	struct prog_env *env = state->input;
161 	long duration, interval;
162 
163 	switch (key) {
164 	case 'p':
165 		errno = 0;
166 		env->pid = strtol(arg, NULL, 10);
167 		if (errno || env->pid <= 0) {
168 			warn("Invalid PID: %s\n", arg);
169 			argp_usage(state);
170 		}
171 		break;
172 	case 't':
173 		errno = 0;
174 		env->tid = strtol(arg, NULL, 10);
175 		if (errno || env->tid <= 0) {
176 			warn("Invalid TID: %s\n", arg);
177 			argp_usage(state);
178 		}
179 		break;
180 	case 'c':
181 		env->caller = arg;
182 		break;
183 	case 'L':
184 		env->lock_name = arg;
185 		break;
186 	case 'n':
187 		errno = 0;
188 		env->nr_locks = strtol(arg, NULL, 10);
189 		if (errno || env->nr_locks <= 0) {
190 			warn("Invalid NR_LOCKS: %s\n", arg);
191 			argp_usage(state);
192 		}
193 		break;
194 	case 's':
195 		errno = 0;
196 		env->nr_stack_entries = strtol(arg, NULL, 10);
197 		if (errno || env->nr_stack_entries <= 0) {
198 			warn("Invalid NR_STACKS: %s\n", arg);
199 			argp_usage(state);
200 		}
201 		break;
202 	case 'S':
203 		if (!parse_sorts(env, arg)) {
204 			warn("Bad sort string: %s\n", arg);
205 			argp_usage(state);
206 		}
207 		break;
208 	case 'd':
209 		errno = 0;
210 		duration = strtol(arg, NULL, 10);
211 		if (errno || duration <= 0) {
212 			warn("Invalid duration: %s\n", arg);
213 			argp_usage(state);
214 		}
215 		env->duration = duration;
216 		break;
217 	case 'i':
218 		errno = 0;
219 		interval = strtol(arg, NULL, 10);
220 		if (errno || interval <= 0) {
221 			warn("Invalid interval: %s\n", arg);
222 			argp_usage(state);
223 		}
224 		env->interval = interval;
225 		break;
226 	case 'R':
227 		env->reset = true;
228 		break;
229 	case 'T':
230 		env->timestamp = true;
231 		break;
232 	case 'h':
233 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
234 		break;
235 	case 'v':
236 		env->verbose = true;
237 		break;
238 	case ARGP_KEY_END:
239 		if (env->duration) {
240 			if (env->interval > env->duration)
241 				env->interval = env->duration;
242 			env->iterations = env->duration / env->interval;
243 		}
244 		break;
245 	default:
246 		return ARGP_ERR_UNKNOWN;
247 	}
248 	return 0;
249 }
250 
251 struct stack_stat {
252 	uint32_t stack_id;
253 	struct lock_stat ls;
254 	uint64_t bt[PERF_MAX_STACK_DEPTH];
255 };
256 
caller_is_traced(struct ksyms * ksyms,uint64_t caller_pc)257 static bool caller_is_traced(struct ksyms *ksyms, uint64_t caller_pc)
258 {
259 	const struct ksym *ksym;
260 
261 	if (!env.caller)
262 		return true;
263 	ksym = ksyms__map_addr(ksyms, caller_pc);
264 	if (!ksym)
265 		return true;
266 	return strncmp(env.caller, ksym->name, strlen(env.caller)) == 0;
267 }
268 
larger_first(uint64_t x,uint64_t y)269 static int larger_first(uint64_t x, uint64_t y)
270 {
271 	if (x > y)
272 		return -1;
273 	if (x == y)
274 		return 0;
275 	return 1;
276 }
277 
sort_by_acq(const void * x,const void * y)278 static int sort_by_acq(const void *x, const void *y)
279 {
280 	struct stack_stat *ss_x = *(struct stack_stat**)x;
281 	struct stack_stat *ss_y = *(struct stack_stat**)y;
282 
283 	switch (env.sort_acq) {
284 	case SORT_ACQ_MAX:
285 		return larger_first(ss_x->ls.acq_max_time,
286 				    ss_y->ls.acq_max_time);
287 	case SORT_ACQ_COUNT:
288 		return larger_first(ss_x->ls.acq_count,
289 				    ss_y->ls.acq_count);
290 	case SORT_ACQ_TOTAL:
291 		return larger_first(ss_x->ls.acq_total_time,
292 				    ss_y->ls.acq_total_time);
293 	}
294 
295 	warn("bad sort_acq %d\n", env.sort_acq);
296 	return -1;
297 }
298 
sort_by_hld(const void * x,const void * y)299 static int sort_by_hld(const void *x, const void *y)
300 {
301 	struct stack_stat *ss_x = *(struct stack_stat**)x;
302 	struct stack_stat *ss_y = *(struct stack_stat**)y;
303 
304 	switch (env.sort_hld) {
305 	case SORT_HLD_MAX:
306 		return larger_first(ss_x->ls.hld_max_time,
307 				    ss_y->ls.hld_max_time);
308 	case SORT_HLD_COUNT:
309 		return larger_first(ss_x->ls.hld_count,
310 				    ss_y->ls.hld_count);
311 	case SORT_HLD_TOTAL:
312 		return larger_first(ss_x->ls.hld_total_time,
313 				    ss_y->ls.hld_total_time);
314 	}
315 
316 	warn("bad sort_hld %d\n", env.sort_hld);
317 	return -1;
318 }
319 
symname(struct ksyms * ksyms,uint64_t pc,char * buf,size_t n)320 static char *symname(struct ksyms *ksyms, uint64_t pc, char *buf, size_t n)
321 {
322 	const struct ksym *ksym = ksyms__map_addr(ksyms, pc);
323 
324 	if (!ksym)
325 		return "Unknown";
326 	snprintf(buf, n, "%s+0x%lx", ksym->name, pc - ksym->addr);
327 	return buf;
328 }
329 
print_acq_header(void)330 static void print_acq_header(void)
331 {
332 	printf("\n                               Caller  Avg Wait    Count   Max Wait   Total Wait\n");
333 }
334 
print_acq_stat(struct ksyms * ksyms,struct stack_stat * ss,int nr_stack_entries)335 static void print_acq_stat(struct ksyms *ksyms, struct stack_stat *ss,
336 			   int nr_stack_entries)
337 {
338 	char buf[40];
339 	int i;
340 
341 	printf("%37s %9llu %8llu %10llu %12llu\n",
342 	       symname(ksyms, ss->bt[0], buf, sizeof(buf)),
343 	       ss->ls.acq_total_time / ss->ls.acq_count,
344 	       ss->ls.acq_count,
345 	       ss->ls.acq_max_time,
346 	       ss->ls.acq_total_time);
347 	for (i = 1; i < nr_stack_entries; i++) {
348 		if (!ss->bt[i])
349 			break;
350 		printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
351 	}
352 	if (nr_stack_entries > 1)
353 		printf("                              Max PID %llu, COMM %s\n",
354 		       ss->ls.acq_max_id >> 32,
355 		       ss->ls.acq_max_comm);
356 }
357 
print_hld_header(void)358 static void print_hld_header(void)
359 {
360 	printf("\n                               Caller  Avg Hold    Count   Max Hold   Total Hold\n");
361 }
362 
print_hld_stat(struct ksyms * ksyms,struct stack_stat * ss,int nr_stack_entries)363 static void print_hld_stat(struct ksyms *ksyms, struct stack_stat *ss,
364 			   int nr_stack_entries)
365 {
366 	char buf[40];
367 	int i;
368 
369 	printf("%37s %9llu %8llu %10llu %12llu\n",
370 	       symname(ksyms, ss->bt[0], buf, sizeof(buf)),
371 	       ss->ls.hld_total_time / ss->ls.hld_count,
372 	       ss->ls.hld_count,
373 	       ss->ls.hld_max_time,
374 	       ss->ls.hld_total_time);
375 	for (i = 1; i < nr_stack_entries; i++) {
376 		if (!ss->bt[i])
377 			break;
378 		printf("%37s\n", symname(ksyms, ss->bt[i], buf, sizeof(buf)));
379 	}
380 	if (nr_stack_entries > 1)
381 		printf("                              Max PID %llu, COMM %s\n",
382 		       ss->ls.hld_max_id >> 32,
383 		       ss->ls.hld_max_comm);
384 }
385 
print_stats(struct ksyms * ksyms,int stack_map,int stat_map)386 static int print_stats(struct ksyms *ksyms, int stack_map, int stat_map)
387 {
388 	struct stack_stat **stats, *ss;
389 	size_t stat_idx = 0;
390 	size_t stats_sz = 1;
391 	uint32_t lookup_key = 0;
392 	uint32_t stack_id;
393 	int ret, i;
394 
395 	stats = calloc(stats_sz, sizeof(void *));
396 	if (!stats) {
397 		warn("Out of memory\n");
398 		return -1;
399 	}
400 
401 	while (bpf_map_get_next_key(stat_map, &lookup_key, &stack_id) == 0) {
402 		if (stat_idx == stats_sz) {
403 			stats_sz *= 2;
404 			stats = reallocarray(stats, stats_sz, sizeof(void *));
405 			if (!stats) {
406 				warn("Out of memory\n");
407 				return -1;
408 			}
409 		}
410 		ss = malloc(sizeof(struct stack_stat));
411 		if (!ss) {
412 			warn("Out of memory\n");
413 			return -1;
414 		}
415 		ss->stack_id = stack_id;
416 		if (env.reset) {
417 			ret = bpf_map_lookup_and_delete_elem(stat_map,
418 							     &stack_id,
419 							     &ss->ls);
420 			lookup_key = 0;
421 		} else {
422 			ret = bpf_map_lookup_elem(stat_map, &stack_id, &ss->ls);
423 			lookup_key = stack_id;
424 		}
425 		if (ret) {
426 			free(ss);
427 			continue;
428 		}
429 		if (bpf_map_lookup_elem(stack_map, &stack_id, &ss->bt)) {
430 			/* Can still report the results without a backtrace. */
431 			warn("failed to lookup stack_id %u\n", stack_id);
432 		}
433 		if (!caller_is_traced(ksyms, ss->bt[0])) {
434 			free(ss);
435 			continue;
436 		}
437 		stats[stat_idx++] = ss;
438 	}
439 
440 	qsort(stats, stat_idx, sizeof(void*), sort_by_acq);
441 	for (i = 0; i < MIN(env.nr_locks, stat_idx); i++) {
442 		if (i == 0 || env.nr_stack_entries > 1)
443 			print_acq_header();
444 		print_acq_stat(ksyms, stats[i],
445 			       MIN(env.nr_stack_entries, PERF_MAX_STACK_DEPTH));
446 	}
447 
448 	qsort(stats, stat_idx, sizeof(void*), sort_by_hld);
449 	for (i = 0; i < MIN(env.nr_locks, stat_idx); i++) {
450 		if (i == 0 || env.nr_stack_entries > 1)
451 			print_hld_header();
452 		print_hld_stat(ksyms, stats[i],
453 			       MIN(env.nr_stack_entries, PERF_MAX_STACK_DEPTH));
454 	}
455 
456 	for (i = 0; i < stat_idx; i++)
457 		free(stats[i]);
458 	free(stats);
459 
460 	return 0;
461 }
462 
get_lock_addr(struct ksyms * ksyms,const char * lock_name)463 static void *get_lock_addr(struct ksyms *ksyms, const char *lock_name)
464 {
465 	const struct ksym *ksym = ksyms__get_symbol(ksyms, lock_name);
466 
467 	return ksym ? (void*)ksym->addr : NULL;
468 }
469 
470 static volatile bool exiting;
471 
sig_hand(int signr)472 static void sig_hand(int signr)
473 {
474 	exiting = true;
475 }
476 
477 static struct sigaction sigact = {.sa_handler = sig_hand};
478 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)479 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
480 {
481 	if (level == LIBBPF_DEBUG && !env.verbose)
482 		return 0;
483 	return vfprintf(stderr, format, args);
484 }
485 
main(int argc,char ** argv)486 int main(int argc, char **argv)
487 {
488 	static const struct argp argp = {
489 		.options = opts,
490 		.parser = parse_arg,
491 		.args_doc = args_doc,
492 		.doc = program_doc,
493 	};
494 	struct klockstat_bpf *obj = NULL;
495 	struct ksyms *ksyms = NULL;
496 	int i, err;
497 	struct tm *tm;
498 	char ts[32];
499 	time_t t;
500 	void *lock_addr = NULL;
501 
502 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
503 	if (err)
504 		return err;
505 
506 	sigaction(SIGINT, &sigact, 0);
507 
508 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
509 	libbpf_set_print(libbpf_print_fn);
510 
511 	ksyms = ksyms__load();
512 	if (!ksyms) {
513 		warn("failed to load kallsyms\n");
514 		err = 1;
515 		goto cleanup;
516 	}
517 	if (env.lock_name) {
518 		lock_addr = get_lock_addr(ksyms, env.lock_name);
519 		if (!lock_addr) {
520 			warn("failed to find lock %s\n", env.lock_name);
521 			err = 1;
522 			goto cleanup;
523 		}
524 	}
525 
526 	obj = klockstat_bpf__open();
527 	if (!obj) {
528 		warn("failed to open BPF object\n");
529 		err = 1;
530 		goto cleanup;
531 	}
532 
533 	obj->rodata->targ_tgid = env.pid;
534 	obj->rodata->targ_pid = env.tid;
535 	obj->rodata->targ_lock = lock_addr;
536 
537 	if (fentry_can_attach("mutex_lock_nested", NULL)) {
538 		bpf_program__set_attach_target(obj->progs.mutex_lock, 0,
539 					       "mutex_lock_nested");
540 		bpf_program__set_attach_target(obj->progs.mutex_lock_exit, 0,
541 					       "mutex_lock_nested");
542 		bpf_program__set_attach_target(obj->progs.mutex_lock_interruptible, 0,
543 					       "mutex_lock_interruptible_nested");
544 		bpf_program__set_attach_target(obj->progs.mutex_lock_interruptible_exit, 0,
545 					       "mutex_lock_interruptible_nested");
546 		bpf_program__set_attach_target(obj->progs.mutex_lock_killable, 0,
547 					       "mutex_lock_killable_nested");
548 		bpf_program__set_attach_target(obj->progs.mutex_lock_killable_exit, 0,
549 					       "mutex_lock_killable_nested");
550 	}
551 
552 	err = klockstat_bpf__load(obj);
553 	if (err) {
554 		warn("failed to load BPF object\n");
555 		return 1;
556 	}
557 	err = klockstat_bpf__attach(obj);
558 	if (err) {
559 		warn("failed to attach BPF object\n");
560 		goto cleanup;
561 	}
562 
563 	printf("Tracing mutex lock events...  Hit Ctrl-C to end\n");
564 
565 	for (i = 0; i < env.iterations && !exiting; i++) {
566 		sleep(env.interval);
567 
568 		printf("\n");
569 		if (env.timestamp) {
570 			time(&t);
571 			tm = localtime(&t);
572 			strftime(ts, sizeof(ts), "%H:%M:%S", tm);
573 			printf("%-8s\n", ts);
574 		}
575 
576 		if (print_stats(ksyms, bpf_map__fd(obj->maps.stack_map),
577 				bpf_map__fd(obj->maps.stat_map))) {
578 			warn("print_stats error, aborting.\n");
579 			break;
580 		}
581 	}
582 
583 	printf("Exiting trace of mutex locks\n");
584 
585 cleanup:
586 	klockstat_bpf__destroy(obj);
587 	ksyms__free(ksyms);
588 
589 	return err != 0;
590 }
591