• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# syscount   Summarize syscall counts and latencies.
4#
5# USAGE: syscount [-p PID] [-i INTERVAL] [-T TOP] [-x] [-L] [-m] [-P] [-l]
6#
7# Copyright 2017, Sasha Goldshtein.
8# Licensed under the Apache License, Version 2.0 (the "License")
9#
10# 15-Feb-2017   Sasha Goldshtein    Created this.
11
12from time import sleep, strftime
13import argparse
14import errno
15import itertools
16import sys
17import signal
18from bcc import BPF
19from bcc.utils import printb
20from bcc.syscall import syscall_name, syscalls
21
22if sys.version_info.major < 3:
23    izip_longest = itertools.izip_longest
24else:
25    izip_longest = itertools.zip_longest
26
27# signal handler
28def signal_ignore(signal, frame):
29    print()
30
31def handle_errno(errstr):
32    try:
33        return abs(int(errstr))
34    except ValueError:
35        pass
36
37    try:
38        return getattr(errno, errstr)
39    except AttributeError:
40        raise argparse.ArgumentTypeError("couldn't map %s to an errno" % errstr)
41
42
43parser = argparse.ArgumentParser(
44    description="Summarize syscall counts and latencies.")
45parser.add_argument("-p", "--pid", type=int, help="trace only this pid")
46parser.add_argument("-i", "--interval", type=int,
47    help="print summary at this interval (seconds)")
48parser.add_argument("-d", "--duration", type=int,
49    help="total duration of trace, in seconds")
50parser.add_argument("-T", "--top", type=int, default=10,
51    help="print only the top syscalls by count or latency")
52parser.add_argument("-x", "--failures", action="store_true",
53    help="trace only failed syscalls (return < 0)")
54parser.add_argument("-e", "--errno", type=handle_errno,
55    help="trace only syscalls that return this error (numeric or EPERM, etc.)")
56parser.add_argument("-L", "--latency", action="store_true",
57    help="collect syscall latency")
58parser.add_argument("-m", "--milliseconds", action="store_true",
59    help="display latency in milliseconds (default: microseconds)")
60parser.add_argument("-P", "--process", action="store_true",
61    help="count by process and not by syscall")
62parser.add_argument("-l", "--list", action="store_true",
63    help="print list of recognized syscalls and exit")
64parser.add_argument("--ebpf", action="store_true",
65    help=argparse.SUPPRESS)
66args = parser.parse_args()
67if args.duration and not args.interval:
68    args.interval = args.duration
69if not args.interval:
70    args.interval = 99999999
71
72if args.list:
73    for grp in izip_longest(*(iter(sorted(syscalls.values())),) * 4):
74        print("   ".join(["%-20s" % s for s in grp if s is not None]))
75    sys.exit(0)
76
77text = """
78#ifdef LATENCY
79struct data_t {
80    u64 count;
81    u64 total_ns;
82};
83
84BPF_HASH(start, u64, u64);
85BPF_HASH(data, u32, struct data_t);
86#else
87BPF_HASH(data, u32, u64);
88#endif
89
90#ifdef LATENCY
91TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
92    u64 pid_tgid = bpf_get_current_pid_tgid();
93
94#ifdef FILTER_PID
95    if (pid_tgid >> 32 != FILTER_PID)
96        return 0;
97#endif
98
99    u64 t = bpf_ktime_get_ns();
100    start.update(&pid_tgid, &t);
101    return 0;
102}
103#endif
104
105TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
106    u64 pid_tgid = bpf_get_current_pid_tgid();
107
108#ifdef FILTER_PID
109    if (pid_tgid >> 32 != FILTER_PID)
110        return 0;
111#endif
112
113#ifdef FILTER_FAILED
114    if (args->ret >= 0)
115        return 0;
116#endif
117
118#ifdef FILTER_ERRNO
119    if (args->ret != -FILTER_ERRNO)
120        return 0;
121#endif
122
123#ifdef BY_PROCESS
124    u32 key = pid_tgid >> 32;
125#else
126    u32 key = args->id;
127#endif
128
129#ifdef LATENCY
130    struct data_t *val, zero = {};
131    u64 *start_ns = start.lookup(&pid_tgid);
132    if (!start_ns)
133        return 0;
134
135    val = data.lookup_or_try_init(&key, &zero);
136    if (val) {
137        val->count++;
138        val->total_ns += bpf_ktime_get_ns() - *start_ns;
139    }
140#else
141    u64 *val, zero = 0;
142    val = data.lookup_or_try_init(&key, &zero);
143    if (val) {
144        ++(*val);
145    }
146#endif
147    return 0;
148}
149"""
150
151if args.pid:
152    text = ("#define FILTER_PID %d\n" % args.pid) + text
153if args.failures:
154    text = "#define FILTER_FAILED\n" + text
155if args.errno:
156    text = "#define FILTER_ERRNO %d\n" % abs(args.errno) + text
157if args.latency:
158    text = "#define LATENCY\n" + text
159if args.process:
160    text = "#define BY_PROCESS\n" + text
161if args.ebpf:
162    print(text)
163    exit()
164
165bpf = BPF(text=text)
166
167def print_stats():
168    if args.latency:
169        print_latency_stats()
170    else:
171        print_count_stats()
172
173agg_colname = "PID    COMM" if args.process else "SYSCALL"
174time_colname = "TIME (ms)" if args.milliseconds else "TIME (us)"
175
176def comm_for_pid(pid):
177    try:
178        return open("/proc/%d/comm" % pid, "rb").read().strip()
179    except Exception:
180        return b"[unknown]"
181
182def agg_colval(key):
183    if args.process:
184        return b"%-6d %-15s" % (key.value, comm_for_pid(key.value))
185    else:
186        return syscall_name(key.value)
187
188def print_count_stats():
189    data = bpf["data"]
190    print("[%s]" % strftime("%H:%M:%S"))
191    print("%-22s %8s" % (agg_colname, "COUNT"))
192    for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]:
193        if k.value == 0xFFFFFFFF:
194            continue    # happens occasionally, we don't need it
195        printb(b"%-22s %8d" % (agg_colval(k), v.value))
196    print("")
197    data.clear()
198
199def print_latency_stats():
200    data = bpf["data"]
201    print("[%s]" % strftime("%H:%M:%S"))
202    print("%-22s %8s %16s" % (agg_colname, "COUNT", time_colname))
203    for k, v in sorted(data.items(),
204                       key=lambda kv: -kv[1].total_ns)[:args.top]:
205        if k.value == 0xFFFFFFFF:
206            continue    # happens occasionally, we don't need it
207        printb((b"%-22s %8d " + (b"%16.6f" if args.milliseconds else b"%16.3f")) %
208               (agg_colval(k), v.count,
209                v.total_ns / (1e6 if args.milliseconds else 1e3)))
210    print("")
211    data.clear()
212
213print("Tracing %ssyscalls, printing top %d... Ctrl+C to quit." %
214      ("failed " if args.failures else "", args.top))
215exiting = 0 if args.interval else 1
216seconds = 0
217while True:
218    try:
219        sleep(args.interval)
220        seconds += args.interval
221    except KeyboardInterrupt:
222        exiting = 1
223        signal.signal(signal.SIGINT, signal_ignore)
224    if args.duration and seconds >= args.duration:
225        exiting = 1
226
227    print_stats()
228
229    if exiting:
230        print("Detaching...")
231        exit()
232