• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Anton Protopopov
3 //
4 // Based on syscount(8) from BCC by Sasha Goldshtein
5 #include <unistd.h>
6 #include <signal.h>
7 #include <fcntl.h>
8 #include <time.h>
9 #include <argp.h>
10 #include <bpf/bpf.h>
11 #include "syscount.h"
12 #include "syscount.skel.h"
13 #include "errno_helpers.h"
14 #include "syscall_helpers.h"
15 #include "trace_helpers.h"
16 
17 /* This structure extends data_t by adding a key item which should be sorted
18  * together with the count and total_ns fields */
19 struct data_ext_t {
20 	__u64 count;
21 	__u64 total_ns;
22 	char comm[TASK_COMM_LEN];
23 	__u32 key;
24 };
25 
26 
27 #define warn(...) fprintf(stderr, __VA_ARGS__)
28 
29 const char *argp_program_version = "syscount 0.1";
30 const char *argp_program_bug_address =
31 	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
32 static const char argp_program_doc[] =
33 "\nsyscount: summarize syscall counts and latencies\n"
34 "\n"
35 "EXAMPLES:\n"
36 "    syscount                 # print top 10 syscalls by count every second\n"
37 "    syscount -p $(pidof dd)  # look only at a particular process\n"
38 "    syscount -L              # measure and sort output by latency\n"
39 "    syscount -P              # group statistics by pid, not by syscall\n"
40 "    syscount -x -i 5         # count only failed syscalls\n"
41 "    syscount -e ENOENT -i 5  # count only syscalls failed with a given errno"
42 ;
43 
44 static const struct argp_option opts[] = {
45 	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
46 	{ "pid", 'p', "PID", 0, "Process PID to trace" },
47 	{ "interval", 'i', "INTERVAL", 0, "Print summary at this interval"
48 				" (seconds), 0 for infinite wait (default)" },
49 	{ "duration", 'd', "DURATION", 0, "Total tracing duration (seconds)" },
50 	{ "top", 'T', "TOP", 0, "Print only the top syscalls (default 10)" },
51 	{ "failures", 'x', NULL, 0, "Trace only failed syscalls" },
52 	{ "latency", 'L', NULL, 0, "Collect syscall latency" },
53 	{ "milliseconds", 'm', NULL, 0, "Display latency in milliseconds"
54 					" (default: microseconds)" },
55 	{ "process", 'P', NULL, 0, "Count by process and not by syscall" },
56 	{ "errno", 'e', "ERRNO", 0, "Trace only syscalls that return this error"
57 				 "(numeric or EPERM, etc.)" },
58 	{ "list", 'l', NULL, 0, "Print list of recognized syscalls and exit" },
59 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
60 	{},
61 };
62 
63 static struct env {
64 	bool list_syscalls;
65 	bool milliseconds;
66 	bool failures;
67 	bool verbose;
68 	bool latency;
69 	bool process;
70 	int filter_errno;
71 	int interval;
72 	int duration;
73 	int top;
74 	pid_t pid;
75 } env = {
76 	.top = 10,
77 };
78 
get_int(const char * arg,int * ret,int min,int max)79 static int get_int(const char *arg, int *ret, int min, int max)
80 {
81 	char *end;
82 	long val;
83 
84 	errno = 0;
85 	val = strtol(arg, &end, 10);
86 	if (errno) {
87 		warn("strtol: %s: %s\n", arg, strerror(errno));
88 		return -1;
89 	} else if (end == arg || val < min || val > max) {
90 		return -1;
91 	}
92 	if (ret)
93 		*ret = val;
94 	return 0;
95 }
96 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)97 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
98 {
99 	if (level == LIBBPF_DEBUG && !env.verbose)
100 		return 0;
101 
102 	return vfprintf(stderr, format, args);
103 }
104 
compar_count(const void * dx,const void * dy)105 static int compar_count(const void *dx, const void *dy)
106 {
107 	__u64 x = ((struct data_ext_t *) dx)->count;
108 	__u64 y = ((struct data_ext_t *) dy)->count;
109 	return x > y ? -1 : !(x == y);
110 }
111 
compar_latency(const void * dx,const void * dy)112 static int compar_latency(const void *dx, const void *dy)
113 {
114 	__u64 x = ((struct data_ext_t *) dx)->total_ns;
115 	__u64 y = ((struct data_ext_t *) dy)->total_ns;
116 	return x > y ? -1 : !(x == y);
117 }
118 
agg_col(struct data_ext_t * val,char * buf,size_t size)119 static const char *agg_col(struct data_ext_t *val, char *buf, size_t size)
120 {
121 	if (env.process) {
122 		snprintf(buf, size, "%-6u %-15s", val->key, val->comm);
123 	} else {
124 		syscall_name(val->key, buf, size);
125 	}
126 	return buf;
127 }
128 
agg_colname(void)129 static const char *agg_colname(void)
130 {
131 	return (env.process) ? "PID    COMM" : "SYSCALL";
132 }
133 
time_colname(void)134 static const char *time_colname(void)
135 {
136 	return (env.milliseconds) ? "TIME (ms)" : "TIME (us)";
137 }
138 
print_latency_header(void)139 static void print_latency_header(void)
140 {
141 	printf("%-22s %8s %16s\n", agg_colname(), "COUNT", time_colname());
142 }
143 
print_count_header(void)144 static void print_count_header(void)
145 {
146 	printf("%-22s %8s\n", agg_colname(), "COUNT");
147 }
148 
print_latency(struct data_ext_t * vals,size_t count)149 static void print_latency(struct data_ext_t *vals, size_t count)
150 {
151 	double div = env.milliseconds ? 1000000.0 : 1000.0;
152 	char buf[2 * TASK_COMM_LEN];
153 	int i;
154 
155 	print_latency_header();
156 	for (i = 0; i < count && i < env.top; i++)
157 		printf("%-22s %8llu %16.3lf\n",
158 		       agg_col(&vals[i], buf, sizeof(buf)),
159 		       vals[i].count, vals[i].total_ns / div);
160 	printf("\n");
161 }
162 
print_count(struct data_ext_t * vals,size_t count)163 static void print_count(struct data_ext_t *vals, size_t count)
164 {
165 	char buf[2 * TASK_COMM_LEN];
166 	int i;
167 
168 	print_count_header();
169 	for (i = 0; i < count && i < env.top; i++)
170 		printf("%-22s %8llu\n",
171 		       agg_col(&vals[i], buf, sizeof(buf)), vals[i].count);
172 	printf("\n");
173 }
174 
print_timestamp()175 static void print_timestamp()
176 {
177 	time_t now = time(NULL);
178 	struct tm tm;
179 
180 	if (localtime_r(&now, &tm))
181 		printf("[%02d:%02d:%02d]\n", tm.tm_hour, tm.tm_min, tm.tm_sec);
182 	else
183 		warn("localtime_r: %s", strerror(errno));
184 }
185 
186 static bool batch_map_ops = true; /* hope for the best */
187 
read_vals_batch(int fd,struct data_ext_t * vals,__u32 * count)188 static bool read_vals_batch(int fd, struct data_ext_t *vals, __u32 *count)
189 {
190 	struct data_t orig_vals[*count];
191 	void *in = NULL, *out;
192 	__u32 i, n, n_read = 0;
193 	__u32 keys[*count];
194 	int err = 0;
195 
196 	while (n_read < *count && !err) {
197 		n = *count - n_read;
198 		err = bpf_map_lookup_and_delete_batch(fd, &in, &out,
199 				keys + n_read, orig_vals + n_read, &n, NULL);
200 		if (err && errno != ENOENT) {
201 			/* we want to propagate EINVAL upper, so that
202 			 * the batch_map_ops flag is set to false */
203 			if (errno != EINVAL)
204 				warn("bpf_map_lookup_and_delete_batch: %s\n",
205 				     strerror(-err));
206 			return false;
207 		}
208 		n_read += n;
209 		in = out;
210 	}
211 
212 	for (i = 0; i < n_read; i++) {
213 		vals[i].count = orig_vals[i].count;
214 		vals[i].total_ns = orig_vals[i].total_ns;
215 		vals[i].key = keys[i];
216 		strncpy(vals[i].comm, orig_vals[i].comm, TASK_COMM_LEN);
217 	}
218 
219 	*count = n_read;
220 	return true;
221 }
222 
read_vals(int fd,struct data_ext_t * vals,__u32 * count)223 static bool read_vals(int fd, struct data_ext_t *vals, __u32 *count)
224 {
225 	__u32 keys[MAX_ENTRIES];
226 	struct data_t val;
227 	__u32 key = -1;
228 	__u32 next_key;
229 	int i = 0, j;
230 	int err;
231 
232 	if (batch_map_ops) {
233 		bool ok = read_vals_batch(fd, vals, count);
234 		if (!ok && errno == EINVAL) {
235 			/* fall back to a racy variant */
236 			batch_map_ops = false;
237 		} else {
238 			return ok;
239 		}
240 	}
241 
242 	if (!vals || !count || !*count)
243 		return true;
244 
245 	for (key = -1; i < *count; ) {
246 		err = bpf_map_get_next_key(fd, &key, &next_key);
247 		if (err && errno != ENOENT) {
248 			warn("failed to get next key: %s\n", strerror(errno));
249 			return false;
250 		} else if (err) {
251 			break;
252 		}
253 		key = keys[i++] = next_key;
254 	}
255 
256 	for (j = 0; j < i; j++) {
257 		err = bpf_map_lookup_elem(fd, &keys[j], &val);
258 		if (err && errno != ENOENT) {
259 			warn("failed to lookup element: %s\n", strerror(errno));
260 			return false;
261 		}
262 		vals[j].count = val.count;
263 		vals[j].total_ns = val.total_ns;
264 		vals[j].key = keys[j];
265 		memcpy(vals[j].comm, val.comm, TASK_COMM_LEN);
266 	}
267 
268 	/* There is a race here: system calls which are represented by keys
269 	 * above and happened between lookup and delete will be ignored.  This
270 	 * will be fixed in future by using bpf_map_lookup_and_delete_batch,
271 	 * but this function is too fresh to use it in bcc. */
272 
273 	for (j = 0; j < i; j++) {
274 		err = bpf_map_delete_elem(fd, &keys[j]);
275 		if (err) {
276 			warn("failed to delete element: %s\n", strerror(errno));
277 			return false;
278 		}
279 	}
280 
281 	*count = i;
282 	return true;
283 }
284 
parse_arg(int key,char * arg,struct argp_state * state)285 static error_t parse_arg(int key, char *arg, struct argp_state *state)
286 {
287 	int number;
288 	int err;
289 
290 	switch (key) {
291 	case 'h':
292 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
293 		break;
294 	case 'v':
295 		env.verbose = true;
296 		break;
297 	case 'x':
298 		env.failures = true;
299 		break;
300 	case 'L':
301 		env.latency = true;
302 		break;
303 	case 'm':
304 		env.milliseconds = true;
305 		break;
306 	case 'P':
307 		env.process = true;
308 		break;
309 	case 'p':
310 		err = get_int(arg, &env.pid, 1, INT_MAX);
311 		if (err) {
312 			warn("invalid PID: %s\n", arg);
313 			argp_usage(state);
314 		}
315 		break;
316 	case 'i':
317 		err = get_int(arg, &env.interval, 0, INT_MAX);
318 		if (err) {
319 			warn("invalid INTERVAL: %s\n", arg);
320 			argp_usage(state);
321 		}
322 		break;
323 	case 'd':
324 		err = get_int(arg, &env.duration, 1, INT_MAX);
325 		if (err) {
326 			warn("invalid DURATION: %s\n", arg);
327 			argp_usage(state);
328 		}
329 		break;
330 	case 'T':
331 		err = get_int(arg, &env.top, 1, INT_MAX);
332 		if (err) {
333 			warn("invalid TOP: %s\n", arg);
334 			argp_usage(state);
335 		}
336 		break;
337 	case 'e':
338 		err = get_int(arg, &number, 1, INT_MAX);
339 		if (err) {
340 			number = errno_by_name(arg);
341 			if (number < 0) {
342 				warn("invalid errno: %s (bad, or can't "
343 				     "parse dynamically; consider using "
344 				     "numeric value and/or installing the "
345 				     "errno program from moreutils)\n", arg);
346 				argp_usage(state);
347 			}
348 		}
349 		env.filter_errno = number;
350 		break;
351 	case 'l':
352 		env.list_syscalls = true;
353 		break;
354 	default:
355 		return ARGP_ERR_UNKNOWN;
356 	}
357 	return 0;
358 }
359 
360 static volatile sig_atomic_t hang_on = 1;
361 
sig_int(int signo)362 void sig_int(int signo)
363 {
364 	hang_on = 0;
365 }
366 
main(int argc,char ** argv)367 int main(int argc, char **argv)
368 {
369 	void (*print)(struct data_ext_t *, size_t);
370 	int (*compar)(const void *, const void *);
371 	static const struct argp argp = {
372 		.options = opts,
373 		.parser = parse_arg,
374 		.doc = argp_program_doc,
375 	};
376 	struct data_ext_t vals[MAX_ENTRIES];
377 	struct syscount_bpf *obj;
378 	int seconds = 0;
379 	__u32 count;
380 	int err;
381 
382 	init_syscall_names();
383 
384 	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
385 	if (err)
386 		goto free_names;
387 
388 	if (env.list_syscalls) {
389 		list_syscalls();
390 		goto free_names;
391 	}
392 
393 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
394 	libbpf_set_print(libbpf_print_fn);
395 
396 	obj = syscount_bpf__open();
397 	if (!obj) {
398 		warn("failed to open BPF object\n");
399 		err = 1;
400 		goto free_names;
401 	}
402 
403 	if (env.pid)
404 		obj->rodata->filter_pid = env.pid;
405 	if (env.failures)
406 		obj->rodata->filter_failed = true;
407 	if (env.latency)
408 		obj->rodata->measure_latency = true;
409 	if (env.process)
410 		obj->rodata->count_by_process = true;
411 	if (env.filter_errno)
412 		obj->rodata->filter_errno = env.filter_errno;
413 
414 	err = syscount_bpf__load(obj);
415 	if (err) {
416 		warn("failed to load BPF object: %s\n", strerror(-err));
417 		goto cleanup_obj;
418 	}
419 
420 	obj->links.sys_exit = bpf_program__attach(obj->progs.sys_exit);
421 	if (!obj->links.sys_exit) {
422 		err = -errno;
423 		warn("failed to attach sys_exit program: %s\n", strerror(-err));
424 		goto cleanup_obj;
425 	}
426 	if (env.latency) {
427 		obj->links.sys_enter = bpf_program__attach(obj->progs.sys_enter);
428 		if (!obj->links.sys_enter) {
429 			err = -errno;
430 			warn("failed to attach sys_enter programs: %s\n",
431 			     strerror(-err));
432 			goto cleanup_obj;
433 		}
434 	}
435 
436 	if (signal(SIGINT, sig_int) == SIG_ERR) {
437 		warn("can't set signal handler: %s\n", strerror(errno));
438 		goto cleanup_obj;
439 	}
440 
441 	compar = env.latency ? compar_latency : compar_count;
442 	print = env.latency ? print_latency : print_count;
443 
444 	printf("Tracing syscalls, printing top %d... Ctrl+C to quit.\n", env.top);
445 	while (hang_on) {
446 		sleep(env.interval ?: 1);
447 		if (env.duration) {
448 			seconds += env.interval ?: 1;
449 			if (seconds >= env.duration)
450 				hang_on = 0;
451 		}
452 		if (hang_on && !env.interval)
453 			continue;
454 
455 		count = MAX_ENTRIES;
456 		if (!read_vals(bpf_map__fd(obj->maps.data), vals, &count))
457 			break;
458 		if (!count)
459 			continue;
460 
461 		qsort(vals, count, sizeof(vals[0]), compar);
462 		print_timestamp();
463 		print(vals, count);
464 	}
465 
466 cleanup_obj:
467 	syscount_bpf__destroy(obj);
468 free_names:
469 	free_syscall_names();
470 
471 	return err != 0;
472 }
473