• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3from __future__ import print_function
4
5import argparse
6import ctypes as ct
7import os
8import platform
9import re
10import signal
11import sys
12
13from bcc import BPF
14from datetime import datetime
15from time import strftime
16
17#
18# exitsnoop Trace all process termination (exit, fatal signal)
19#           For Linux, uses BCC, eBPF. Embedded C.
20#
21# USAGE: exitsnoop [-h] [-x] [-t] [--utc] [--label[=LABEL]] [-p PID]
22#
23_examples = """examples:
24    exitsnoop                # trace all process termination
25    exitsnoop -x             # trace only fails, exclude exit(0)
26    exitsnoop -t             # include timestamps (local time)
27    exitsnoop --utc          # include timestamps (UTC)
28    exitsnoop -p 181         # only trace PID 181
29    exitsnoop --label=exit   # label each output line with 'exit'
30    exitsnoop --per-thread   # trace per thread termination
31"""
32"""
33  Exit status (from <include/sysexits.h>):
34
35    0 EX_OK        Success
36    2              argparse error
37   70 EX_SOFTWARE  syntax error detected by compiler, or
38                   verifier error from kernel
39   77 EX_NOPERM    Need sudo (CAP_SYS_ADMIN) for BPF() system call
40
41  The template for this script was Brendan Gregg's execsnoop
42      https://github.com/iovisor/bcc/blob/master/tools/execsnoop.py
43
44  More information about this script is in bcc/tools/exitsnoop_example.txt
45
46  Copyright 2016 Netflix, Inc.
47  Copyright 2019 Instana, Inc.
48  Licensed under the Apache License, Version 2.0 (the "License")
49
50  07-Feb-2016   Brendan Gregg (Netflix)            Created execsnoop
51  04-May-2019   Arturo Martin-de-Nicolas (Instana) Created exitsnoop
52  13-May-2019   Jeroen Soeters (Instana) Refactor to import as module
53"""
54
55def _getParser():
56    parser = argparse.ArgumentParser(
57        description="Trace all process termination (exit, fatal signal)",
58        formatter_class=argparse.RawDescriptionHelpFormatter,
59        epilog=_examples)
60    a=parser.add_argument
61    a("-t", "--timestamp", action="store_true", help="include timestamp (local time default)")
62    a("--utc",             action="store_true", help="include timestamp in UTC (-t implied)")
63    a("-p", "--pid",                            help="trace this PID only")
64    a("--label",                                help="label each line")
65    a("-x", "--failed",    action="store_true", help="trace only fails, exclude exit(0)")
66    a("--per-thread",      action="store_true", help="trace per thread termination")
67    # print the embedded C program and exit, for debugging
68    a("--ebpf",            action="store_true", help=argparse.SUPPRESS)
69    # RHEL 7.6 keeps task->start_time as struct timespec, convert to u64 nanoseconds
70    a("--timespec",        action="store_true", help=argparse.SUPPRESS)
71    return parser.parse_args
72
73
74class Global():
75    parse_args = _getParser()
76    args = None
77    argv = None
78    SIGNUM_TO_SIGNAME = dict((v, re.sub("^SIG", "", k))
79        for k,v in signal.__dict__.items() if re.match("^SIG[A-Z]+$", k))
80
81
82class Data(ct.Structure):
83    """Event data matching struct data_t in _embedded_c()."""
84    _TASK_COMM_LEN = 16      # linux/sched.h
85    _pack_ = 1
86    _fields_ = [
87        ("start_time", ct.c_ulonglong), # task->start_time, see --timespec arg
88        ("exit_time", ct.c_ulonglong),  # bpf_ktime_get_ns()
89        ("pid", ct.c_uint), # task->tgid, thread group id == sys_getpid()
90        ("tid", ct.c_uint), # task->pid, thread id == sys_gettid()
91        ("ppid", ct.c_uint),# task->parent->tgid, notified of exit
92        ("exit_code", ct.c_int),
93        ("sig_info", ct.c_uint),
94        ("task", ct.c_char * _TASK_COMM_LEN)
95    ]
96
97def _embedded_c(args):
98    """Generate C program for sched_process_exit tracepoint in kernel/exit.c."""
99    c = """
100    EBPF_COMMENT
101    #include <linux/sched.h>
102    BPF_STATIC_ASSERT_DEF
103
104    struct data_t {
105        u64 start_time;
106        u64 exit_time;
107        u32 pid;
108        u32 tid;
109        u32 ppid;
110        int exit_code;
111        u32 sig_info;
112        char task[TASK_COMM_LEN];
113    } __attribute__((packed));
114
115    BPF_STATIC_ASSERT(sizeof(struct data_t) == CTYPES_SIZEOF_DATA);
116    BPF_PERF_OUTPUT(events);
117
118    TRACEPOINT_PROBE(sched, sched_process_exit)
119    {
120        struct task_struct *task = (typeof(task))bpf_get_current_task();
121        if (FILTER_PID || FILTER_EXIT_CODE) { return 0; }
122
123        struct data_t data = {
124            .start_time = PROCESS_START_TIME_NS,
125            .exit_time = bpf_ktime_get_ns(),
126            .pid = task->tgid,
127            .tid = task->pid,
128            .ppid = task->parent->tgid,
129            .exit_code = task->exit_code >> 8,
130            .sig_info = task->exit_code & 0xFF,
131        };
132        bpf_get_current_comm(&data.task, sizeof(data.task));
133
134        events.perf_submit(args, &data, sizeof(data));
135        return 0;
136    }
137    """
138    # TODO: this macro belongs in bcc/src/cc/export/helpers.h
139    bpf_static_assert_def = r"""
140    #ifndef BPF_STATIC_ASSERT
141    #define BPF_STATIC_ASSERT(condition) __attribute__((unused)) \
142    extern int bpf_static_assert[(condition) ? 1 : -1]
143    #endif
144    """
145
146    if Global.args.pid:
147        if Global.args.per_thread:
148            filter_pid = "task->tgid != %s" % Global.args.pid
149        else:
150            filter_pid = "!(task->tgid == %s && task->pid == task->tgid)" % Global.args.pid
151    else:
152        filter_pid = '0' if Global.args.per_thread else 'task->pid != task->tgid'
153
154    code_substitutions = [
155        ('EBPF_COMMENT', '' if not Global.args.ebpf else _ebpf_comment()),
156        ("BPF_STATIC_ASSERT_DEF", bpf_static_assert_def),
157        ("CTYPES_SIZEOF_DATA", str(ct.sizeof(Data))),
158        ('FILTER_PID', filter_pid),
159        ('FILTER_EXIT_CODE', '0' if not Global.args.failed else 'task->exit_code == 0'),
160        ('PROCESS_START_TIME_NS', 'task->start_time' if not Global.args.timespec else
161             '(task->start_time.tv_sec * 1000000000L) + task->start_time.tv_nsec'),
162    ]
163    for old,new in code_substitutions:
164        c = c.replace(old, new)
165    return c
166
167def _ebpf_comment():
168    """Return a C-style comment with information about the generated code."""
169    comment=('Created by %s at %s:\n\t%s' %
170                    (sys.argv[0], strftime("%Y-%m-%d %H:%M:%S %Z"), _embedded_c.__doc__))
171    args = str(vars(Global.args)).replace('{','{\n\t').replace(', ',',\n\t').replace('}',',\n }\n\n')
172    return ("\n   /*" + ("\n %s\n\n ARGV = %s\n\n ARGS = %s/" %
173                             (comment, ' '.join(Global.argv), args))
174                   .replace('\n','\n\t*').replace('\t','    '))
175
176def _print_header():
177    if Global.args.timestamp:
178        title = 'TIME-' + ('UTC' if Global.args.utc else strftime("%Z"))
179        print("%-13s" % title, end="")
180    if Global.args.label is not None:
181        print("%-6s" % "LABEL", end="")
182    print("%-16s %-6s %-6s %-6s %-7s %-10s" %
183              ("PCOMM", "PID", "PPID", "TID", "AGE(s)", "EXIT_CODE"))
184
185def _print_event(cpu, data, size): # callback
186    """Print the exit event."""
187    e = ct.cast(data, ct.POINTER(Data)).contents
188    if Global.args.timestamp:
189        now = datetime.utcnow() if Global.args.utc else datetime.now()
190        print("%-13s" % (now.strftime("%H:%M:%S.%f")[:-3]), end="")
191    if Global.args.label is not None:
192        label = Global.args.label if len(Global.args.label) else 'exit'
193        print("%-6s" % label, end="")
194    age = (e.exit_time - e.start_time) / 1e9
195    print("%-16s %-6d %-6d %-6d %-7.2f " %
196              (e.task.decode(), e.pid, e.ppid, e.tid, age), end="")
197    if e.sig_info == 0:
198        print("0" if e.exit_code == 0 else "code %d" % e.exit_code)
199    else:
200        sig = e.sig_info & 0x7F
201        if sig:
202            print("signal %d (%s)" % (sig, signum_to_signame(sig)), end="")
203        if e.sig_info & 0x80:
204            print(", core dumped ", end="")
205        print()
206
207# =============================
208# Module: These functions are available for import
209# =============================
210def initialize(arg_list = sys.argv[1:]):
211    """Trace all process termination.
212
213    arg_list - list of args, if omitted then uses command line args
214               arg_list is passed to argparse.ArgumentParser.parse_args()
215
216    For example, if arg_list = [ '-x', '-t' ]
217       args.failed == True
218       args.timestamp == True
219
220    Returns a tuple (return_code, result)
221       0 = Ok, result is the return value from BPF()
222       1 = args.ebpf is requested, result is the generated C code
223       os.EX_NOPERM: need CAP_SYS_ADMIN, result is error message
224       os.EX_SOFTWARE: internal software error, result is error message
225    """
226    Global.argv = arg_list
227    Global.args = Global.parse_args(arg_list)
228    if Global.args.utc and not Global.args.timestamp:
229        Global.args.timestamp = True
230    if not Global.args.ebpf and os.geteuid() != 0:
231        return (os.EX_NOPERM, "Need sudo (CAP_SYS_ADMIN) for BPF() system call")
232    if re.match('^3\.10\..*el7.*$', platform.release()): # Centos/Red Hat
233        Global.args.timespec = True
234    for _ in range(2):
235        c = _embedded_c(Global.args)
236        if Global.args.ebpf:
237            return (1, c)
238        try:
239            return (os.EX_OK, BPF(text=c))
240        except Exception as e:
241            error = format(e)
242            if (not Global.args.timespec
243                    and error.find('struct timespec')
244                    and error.find('start_time')):
245                print('This kernel keeps task->start_time in a struct timespec.\n' +
246                          'Retrying with --timespec')
247                Global.args.timespec = True
248                continue
249            return (os.EX_SOFTWARE, "BPF error: " + error)
250        except:
251            return (os.EX_SOFTWARE, "Unexpected error: {0}".format(sys.exc_info()[0]))
252
253def snoop(bpf, event_handler):
254    """Call event_handler for process termination events.
255
256    bpf - result returned by successful initialize()
257    event_handler - callback function to handle termination event
258    args.pid - Return after event_handler is called, only monitoring this pid
259    """
260    bpf["events"].open_perf_buffer(event_handler)
261    while True:
262        bpf.perf_buffer_poll()
263        if Global.args.pid:
264            return
265
266def signum_to_signame(signum):
267    """Return the name of the signal corresponding to signum."""
268    return Global.SIGNUM_TO_SIGNAME.get(signum, "unknown")
269
270# =============================
271# Script: invoked as a script
272# =============================
273def main():
274    try:
275        rc, buffer = initialize()
276        if rc:
277            print(buffer)
278            sys.exit(0 if Global.args.ebpf else rc)
279        _print_header()
280        snoop(buffer, _print_event)
281    except KeyboardInterrupt:
282        print()
283        sys.exit()
284
285    return 0
286
287if __name__ == '__main__':
288    main()
289