1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Anton Protopopov
3 //
4 // Based on syscount(8) from BCC by Sasha Goldshtein
5 #include <unistd.h>
6 #include <signal.h>
7 #include <fcntl.h>
8 #include <time.h>
9 #include <argp.h>
10 #include <bpf/bpf.h>
11 #include "syscount.h"
12 #include "syscount.skel.h"
13 #include "errno_helpers.h"
14 #include "syscall_helpers.h"
15 #include "trace_helpers.h"
16
17 /* This structure extends data_t by adding a key item which should be sorted
18 * together with the count and total_ns fields */
19 struct data_ext_t {
20 __u64 count;
21 __u64 total_ns;
22 char comm[TASK_COMM_LEN];
23 __u32 key;
24 };
25
26
27 #define warn(...) fprintf(stderr, __VA_ARGS__)
28
29 const char *argp_program_version = "syscount 0.1";
30 const char *argp_program_bug_address =
31 "https://github.com/iovisor/bcc/tree/master/libbpf-tools";
32 static const char argp_program_doc[] =
33 "\nsyscount: summarize syscall counts and latencies\n"
34 "\n"
35 "EXAMPLES:\n"
36 " syscount # print top 10 syscalls by count every second\n"
37 " syscount -p $(pidof dd) # look only at a particular process\n"
38 " syscount -L # measure and sort output by latency\n"
39 " syscount -P # group statistics by pid, not by syscall\n"
40 " syscount -x -i 5 # count only failed syscalls\n"
41 " syscount -e ENOENT -i 5 # count only syscalls failed with a given errno"
42 ;
43
44 static const struct argp_option opts[] = {
45 { "verbose", 'v', NULL, 0, "Verbose debug output" },
46 { "pid", 'p', "PID", 0, "Process PID to trace" },
47 { "interval", 'i', "INTERVAL", 0, "Print summary at this interval"
48 " (seconds), 0 for infinite wait (default)" },
49 { "duration", 'd', "DURATION", 0, "Total tracing duration (seconds)" },
50 { "top", 'T', "TOP", 0, "Print only the top syscalls (default 10)" },
51 { "failures", 'x', NULL, 0, "Trace only failed syscalls" },
52 { "latency", 'L', NULL, 0, "Collect syscall latency" },
53 { "milliseconds", 'm', NULL, 0, "Display latency in milliseconds"
54 " (default: microseconds)" },
55 { "process", 'P', NULL, 0, "Count by process and not by syscall" },
56 { "errno", 'e', "ERRNO", 0, "Trace only syscalls that return this error"
57 "(numeric or EPERM, etc.)" },
58 { "list", 'l', NULL, 0, "Print list of recognized syscalls and exit" },
59 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
60 {},
61 };
62
63 static struct env {
64 bool list_syscalls;
65 bool milliseconds;
66 bool failures;
67 bool verbose;
68 bool latency;
69 bool process;
70 int filter_errno;
71 int interval;
72 int duration;
73 int top;
74 pid_t pid;
75 } env = {
76 .top = 10,
77 };
78
get_int(const char * arg,int * ret,int min,int max)79 static int get_int(const char *arg, int *ret, int min, int max)
80 {
81 char *end;
82 long val;
83
84 errno = 0;
85 val = strtol(arg, &end, 10);
86 if (errno) {
87 warn("strtol: %s: %s\n", arg, strerror(errno));
88 return -1;
89 } else if (end == arg || val < min || val > max) {
90 return -1;
91 }
92 if (ret)
93 *ret = val;
94 return 0;
95 }
96
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)97 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
98 {
99 if (level == LIBBPF_DEBUG && !env.verbose)
100 return 0;
101
102 return vfprintf(stderr, format, args);
103 }
104
compar_count(const void * dx,const void * dy)105 static int compar_count(const void *dx, const void *dy)
106 {
107 __u64 x = ((struct data_ext_t *) dx)->count;
108 __u64 y = ((struct data_ext_t *) dy)->count;
109 return x > y ? -1 : !(x == y);
110 }
111
compar_latency(const void * dx,const void * dy)112 static int compar_latency(const void *dx, const void *dy)
113 {
114 __u64 x = ((struct data_ext_t *) dx)->total_ns;
115 __u64 y = ((struct data_ext_t *) dy)->total_ns;
116 return x > y ? -1 : !(x == y);
117 }
118
agg_col(struct data_ext_t * val,char * buf,size_t size)119 static const char *agg_col(struct data_ext_t *val, char *buf, size_t size)
120 {
121 if (env.process) {
122 snprintf(buf, size, "%-6u %-15s", val->key, val->comm);
123 } else {
124 syscall_name(val->key, buf, size);
125 }
126 return buf;
127 }
128
agg_colname(void)129 static const char *agg_colname(void)
130 {
131 return (env.process) ? "PID COMM" : "SYSCALL";
132 }
133
time_colname(void)134 static const char *time_colname(void)
135 {
136 return (env.milliseconds) ? "TIME (ms)" : "TIME (us)";
137 }
138
print_latency_header(void)139 static void print_latency_header(void)
140 {
141 printf("%-22s %8s %16s\n", agg_colname(), "COUNT", time_colname());
142 }
143
print_count_header(void)144 static void print_count_header(void)
145 {
146 printf("%-22s %8s\n", agg_colname(), "COUNT");
147 }
148
print_latency(struct data_ext_t * vals,size_t count)149 static void print_latency(struct data_ext_t *vals, size_t count)
150 {
151 double div = env.milliseconds ? 1000000.0 : 1000.0;
152 char buf[2 * TASK_COMM_LEN];
153 int i;
154
155 print_latency_header();
156 for (i = 0; i < count && i < env.top; i++)
157 printf("%-22s %8llu %16.3lf\n",
158 agg_col(&vals[i], buf, sizeof(buf)),
159 vals[i].count, vals[i].total_ns / div);
160 printf("\n");
161 }
162
print_count(struct data_ext_t * vals,size_t count)163 static void print_count(struct data_ext_t *vals, size_t count)
164 {
165 char buf[2 * TASK_COMM_LEN];
166 int i;
167
168 print_count_header();
169 for (i = 0; i < count && i < env.top; i++)
170 printf("%-22s %8llu\n",
171 agg_col(&vals[i], buf, sizeof(buf)), vals[i].count);
172 printf("\n");
173 }
174
print_timestamp()175 static void print_timestamp()
176 {
177 time_t now = time(NULL);
178 struct tm tm;
179
180 if (localtime_r(&now, &tm))
181 printf("[%02d:%02d:%02d]\n", tm.tm_hour, tm.tm_min, tm.tm_sec);
182 else
183 warn("localtime_r: %s", strerror(errno));
184 }
185
186 static bool batch_map_ops = true; /* hope for the best */
187
read_vals_batch(int fd,struct data_ext_t * vals,__u32 * count)188 static bool read_vals_batch(int fd, struct data_ext_t *vals, __u32 *count)
189 {
190 struct data_t orig_vals[*count];
191 void *in = NULL, *out;
192 __u32 i, n, n_read = 0;
193 __u32 keys[*count];
194 int err = 0;
195
196 while (n_read < *count && !err) {
197 n = *count - n_read;
198 err = bpf_map_lookup_and_delete_batch(fd, &in, &out,
199 keys + n_read, orig_vals + n_read, &n, NULL);
200 if (err && errno != ENOENT) {
201 /* we want to propagate EINVAL upper, so that
202 * the batch_map_ops flag is set to false */
203 if (errno != EINVAL)
204 warn("bpf_map_lookup_and_delete_batch: %s\n",
205 strerror(-err));
206 return false;
207 }
208 n_read += n;
209 in = out;
210 }
211
212 for (i = 0; i < n_read; i++) {
213 vals[i].count = orig_vals[i].count;
214 vals[i].total_ns = orig_vals[i].total_ns;
215 vals[i].key = keys[i];
216 strncpy(vals[i].comm, orig_vals[i].comm, TASK_COMM_LEN);
217 }
218
219 *count = n_read;
220 return true;
221 }
222
read_vals(int fd,struct data_ext_t * vals,__u32 * count)223 static bool read_vals(int fd, struct data_ext_t *vals, __u32 *count)
224 {
225 __u32 keys[MAX_ENTRIES];
226 struct data_t val;
227 __u32 key = -1;
228 __u32 next_key;
229 int i = 0, j;
230 int err;
231
232 if (batch_map_ops) {
233 bool ok = read_vals_batch(fd, vals, count);
234 if (!ok && errno == EINVAL) {
235 /* fall back to a racy variant */
236 batch_map_ops = false;
237 } else {
238 return ok;
239 }
240 }
241
242 if (!vals || !count || !*count)
243 return true;
244
245 for (key = -1; i < *count; ) {
246 err = bpf_map_get_next_key(fd, &key, &next_key);
247 if (err && errno != ENOENT) {
248 warn("failed to get next key: %s\n", strerror(errno));
249 return false;
250 } else if (err) {
251 break;
252 }
253 key = keys[i++] = next_key;
254 }
255
256 for (j = 0; j < i; j++) {
257 err = bpf_map_lookup_elem(fd, &keys[j], &val);
258 if (err && errno != ENOENT) {
259 warn("failed to lookup element: %s\n", strerror(errno));
260 return false;
261 }
262 vals[j].count = val.count;
263 vals[j].total_ns = val.total_ns;
264 vals[j].key = keys[j];
265 memcpy(vals[j].comm, val.comm, TASK_COMM_LEN);
266 }
267
268 /* There is a race here: system calls which are represented by keys
269 * above and happened between lookup and delete will be ignored. This
270 * will be fixed in future by using bpf_map_lookup_and_delete_batch,
271 * but this function is too fresh to use it in bcc. */
272
273 for (j = 0; j < i; j++) {
274 err = bpf_map_delete_elem(fd, &keys[j]);
275 if (err) {
276 warn("failed to delete element: %s\n", strerror(errno));
277 return false;
278 }
279 }
280
281 *count = i;
282 return true;
283 }
284
parse_arg(int key,char * arg,struct argp_state * state)285 static error_t parse_arg(int key, char *arg, struct argp_state *state)
286 {
287 int number;
288 int err;
289
290 switch (key) {
291 case 'h':
292 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
293 break;
294 case 'v':
295 env.verbose = true;
296 break;
297 case 'x':
298 env.failures = true;
299 break;
300 case 'L':
301 env.latency = true;
302 break;
303 case 'm':
304 env.milliseconds = true;
305 break;
306 case 'P':
307 env.process = true;
308 break;
309 case 'p':
310 err = get_int(arg, &env.pid, 1, INT_MAX);
311 if (err) {
312 warn("invalid PID: %s\n", arg);
313 argp_usage(state);
314 }
315 break;
316 case 'i':
317 err = get_int(arg, &env.interval, 0, INT_MAX);
318 if (err) {
319 warn("invalid INTERVAL: %s\n", arg);
320 argp_usage(state);
321 }
322 break;
323 case 'd':
324 err = get_int(arg, &env.duration, 1, INT_MAX);
325 if (err) {
326 warn("invalid DURATION: %s\n", arg);
327 argp_usage(state);
328 }
329 break;
330 case 'T':
331 err = get_int(arg, &env.top, 1, INT_MAX);
332 if (err) {
333 warn("invalid TOP: %s\n", arg);
334 argp_usage(state);
335 }
336 break;
337 case 'e':
338 err = get_int(arg, &number, 1, INT_MAX);
339 if (err) {
340 number = errno_by_name(arg);
341 if (number < 0) {
342 warn("invalid errno: %s (bad, or can't "
343 "parse dynamically; consider using "
344 "numeric value and/or installing the "
345 "errno program from moreutils)\n", arg);
346 argp_usage(state);
347 }
348 }
349 env.filter_errno = number;
350 break;
351 case 'l':
352 env.list_syscalls = true;
353 break;
354 default:
355 return ARGP_ERR_UNKNOWN;
356 }
357 return 0;
358 }
359
360 static volatile sig_atomic_t hang_on = 1;
361
sig_int(int signo)362 void sig_int(int signo)
363 {
364 hang_on = 0;
365 }
366
main(int argc,char ** argv)367 int main(int argc, char **argv)
368 {
369 void (*print)(struct data_ext_t *, size_t);
370 int (*compar)(const void *, const void *);
371 static const struct argp argp = {
372 .options = opts,
373 .parser = parse_arg,
374 .doc = argp_program_doc,
375 };
376 struct data_ext_t vals[MAX_ENTRIES];
377 struct syscount_bpf *obj;
378 int seconds = 0;
379 __u32 count;
380 int err;
381
382 init_syscall_names();
383
384 err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
385 if (err)
386 goto free_names;
387
388 if (env.list_syscalls) {
389 list_syscalls();
390 goto free_names;
391 }
392
393 libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
394 libbpf_set_print(libbpf_print_fn);
395
396 obj = syscount_bpf__open();
397 if (!obj) {
398 warn("failed to open BPF object\n");
399 err = 1;
400 goto free_names;
401 }
402
403 if (env.pid)
404 obj->rodata->filter_pid = env.pid;
405 if (env.failures)
406 obj->rodata->filter_failed = true;
407 if (env.latency)
408 obj->rodata->measure_latency = true;
409 if (env.process)
410 obj->rodata->count_by_process = true;
411 if (env.filter_errno)
412 obj->rodata->filter_errno = env.filter_errno;
413
414 err = syscount_bpf__load(obj);
415 if (err) {
416 warn("failed to load BPF object: %s\n", strerror(-err));
417 goto cleanup_obj;
418 }
419
420 obj->links.sys_exit = bpf_program__attach(obj->progs.sys_exit);
421 if (!obj->links.sys_exit) {
422 err = -errno;
423 warn("failed to attach sys_exit program: %s\n", strerror(-err));
424 goto cleanup_obj;
425 }
426 if (env.latency) {
427 obj->links.sys_enter = bpf_program__attach(obj->progs.sys_enter);
428 if (!obj->links.sys_enter) {
429 err = -errno;
430 warn("failed to attach sys_enter programs: %s\n",
431 strerror(-err));
432 goto cleanup_obj;
433 }
434 }
435
436 if (signal(SIGINT, sig_int) == SIG_ERR) {
437 warn("can't set signal handler: %s\n", strerror(errno));
438 goto cleanup_obj;
439 }
440
441 compar = env.latency ? compar_latency : compar_count;
442 print = env.latency ? print_latency : print_count;
443
444 printf("Tracing syscalls, printing top %d... Ctrl+C to quit.\n", env.top);
445 while (hang_on) {
446 sleep(env.interval ?: 1);
447 if (env.duration) {
448 seconds += env.interval ?: 1;
449 if (seconds >= env.duration)
450 hang_on = 0;
451 }
452 if (hang_on && !env.interval)
453 continue;
454
455 count = MAX_ENTRIES;
456 if (!read_vals(bpf_map__fd(obj->maps.data), vals, &count))
457 break;
458 if (!count)
459 continue;
460
461 qsort(vals, count, sizeof(vals[0]), compar);
462 print_timestamp();
463 print(vals, count);
464 }
465
466 cleanup_obj:
467 syscount_bpf__destroy(obj);
468 free_names:
469 free_syscall_names();
470
471 return err != 0;
472 }
473