• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# opensnoop Trace open() syscalls.
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: opensnoop [-h] [-T] [-x] [-p PID] [-d DURATION] [-t TID] [-n NAME]
8#
9# Copyright (c) 2015 Brendan Gregg.
10# Licensed under the Apache License, Version 2.0 (the "License")
11#
12# 17-Sep-2015   Brendan Gregg   Created this.
13# 29-Apr-2016   Allan McAleavy  Updated for BPF_PERF_OUTPUT.
14# 08-Oct-2016   Dina Goldshtein Support filtering by PID and TID.
15# 28-Dec-2018   Tim Douglas     Print flags argument, enable filtering
16# 06-Jan-2019   Takuma Kume     Support filtering by UID
17
18from __future__ import print_function
19from bcc import ArgString, BPF
20from bcc.containers import filter_by_containers
21from bcc.utils import printb
22import argparse
23from datetime import datetime, timedelta
24import os
25
26# arguments
27examples = """examples:
28    ./opensnoop           # trace all open() syscalls
29    ./opensnoop -T        # include timestamps
30    ./opensnoop -U        # include UID
31    ./opensnoop -x        # only show failed opens
32    ./opensnoop -p 181    # only trace PID 181
33    ./opensnoop -t 123    # only trace TID 123
34    ./opensnoop -u 1000   # only trace UID 1000
35    ./opensnoop -d 10     # trace for 10 seconds only
36    ./opensnoop -n main   # only print process names containing "main"
37    ./opensnoop -e        # show extended fields
38    ./opensnoop -f O_WRONLY -f O_RDWR  # only print calls for writing
39    ./opensnoop --cgroupmap mappath  # only trace cgroups in this BPF map
40    ./opensnoop --mntnsmap mappath   # only trace mount namespaces in the map
41"""
42parser = argparse.ArgumentParser(
43    description="Trace open() syscalls",
44    formatter_class=argparse.RawDescriptionHelpFormatter,
45    epilog=examples)
46parser.add_argument("-T", "--timestamp", action="store_true",
47    help="include timestamp on output")
48parser.add_argument("-U", "--print-uid", action="store_true",
49    help="print UID column")
50parser.add_argument("-x", "--failed", action="store_true",
51    help="only show failed opens")
52parser.add_argument("-p", "--pid",
53    help="trace this PID only")
54parser.add_argument("-t", "--tid",
55    help="trace this TID only")
56parser.add_argument("--cgroupmap",
57    help="trace cgroups in this BPF map only")
58parser.add_argument("--mntnsmap",
59    help="trace mount namespaces in this BPF map only")
60parser.add_argument("-u", "--uid",
61    help="trace this UID only")
62parser.add_argument("-d", "--duration",
63    help="total duration of trace in seconds")
64parser.add_argument("-n", "--name",
65    type=ArgString,
66    help="only print process names containing this name")
67parser.add_argument("--ebpf", action="store_true",
68    help=argparse.SUPPRESS)
69parser.add_argument("-e", "--extended_fields", action="store_true",
70    help="show extended fields")
71parser.add_argument("-f", "--flag_filter", action="append",
72    help="filter on flags argument (e.g., O_WRONLY)")
73args = parser.parse_args()
74debug = 0
75if args.duration:
76    args.duration = timedelta(seconds=int(args.duration))
77flag_filter_mask = 0
78for flag in args.flag_filter or []:
79    if not flag.startswith('O_'):
80        exit("Bad flag: %s" % flag)
81    try:
82        flag_filter_mask |= getattr(os, flag)
83    except AttributeError:
84        exit("Bad flag: %s" % flag)
85
86# define BPF program
87bpf_text = """
88#include <uapi/linux/ptrace.h>
89#include <uapi/linux/limits.h>
90#include <linux/sched.h>
91
92struct val_t {
93    u64 id;
94    char comm[TASK_COMM_LEN];
95    const char *fname;
96    int flags; // EXTENDED_STRUCT_MEMBER
97};
98
99struct data_t {
100    u64 id;
101    u64 ts;
102    u32 uid;
103    int ret;
104    char comm[TASK_COMM_LEN];
105    char fname[NAME_MAX];
106    int flags; // EXTENDED_STRUCT_MEMBER
107};
108
109BPF_PERF_OUTPUT(events);
110"""
111
112bpf_text_kprobe = """
113BPF_HASH(infotmp, u64, struct val_t);
114
115int trace_return(struct pt_regs *ctx)
116{
117    u64 id = bpf_get_current_pid_tgid();
118    struct val_t *valp;
119    struct data_t data = {};
120
121    u64 tsp = bpf_ktime_get_ns();
122
123    valp = infotmp.lookup(&id);
124    if (valp == 0) {
125        // missed entry
126        return 0;
127    }
128    bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm);
129    bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)valp->fname);
130    data.id = valp->id;
131    data.ts = tsp / 1000;
132    data.uid = bpf_get_current_uid_gid();
133    data.flags = valp->flags; // EXTENDED_STRUCT_MEMBER
134    data.ret = PT_REGS_RC(ctx);
135
136    events.perf_submit(ctx, &data, sizeof(data));
137    infotmp.delete(&id);
138
139    return 0;
140}
141"""
142
143bpf_text_kprobe_header_open = """
144int syscall__trace_entry_open(struct pt_regs *ctx, const char __user *filename, int flags)
145{
146"""
147
148bpf_text_kprobe_header_openat = """
149int syscall__trace_entry_openat(struct pt_regs *ctx, int dfd, const char __user *filename, int flags)
150{
151"""
152
153bpf_text_kprobe_header_openat2 = """
154#include <uapi/linux/openat2.h>
155int syscall__trace_entry_openat2(struct pt_regs *ctx, int dfd, const char __user *filename, struct open_how *how)
156{
157    int flags = how->flags;
158"""
159
160bpf_text_kprobe_body = """
161    struct val_t val = {};
162    u64 id = bpf_get_current_pid_tgid();
163    u32 pid = id >> 32; // PID is higher part
164    u32 tid = id;       // Cast and get the lower part
165    u32 uid = bpf_get_current_uid_gid();
166
167    PID_TID_FILTER
168    UID_FILTER
169    FLAGS_FILTER
170
171    if (container_should_be_filtered()) {
172        return 0;
173    }
174
175    if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
176        val.id = id;
177        val.fname = filename;
178        val.flags = flags; // EXTENDED_STRUCT_MEMBER
179        infotmp.update(&id, &val);
180    }
181
182    return 0;
183};
184"""
185
186bpf_text_kfunc_header_open = """
187#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
188KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
189{
190    const char __user *filename = (char *)PT_REGS_PARM1(regs);
191    int flags = PT_REGS_PARM2(regs);
192#else
193KRETFUNC_PROBE(FNNAME, const char __user *filename, int flags, int ret)
194{
195#endif
196"""
197
198bpf_text_kfunc_header_openat = """
199#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
200KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
201{
202    int dfd = PT_REGS_PARM1(regs);
203    const char __user *filename = (char *)PT_REGS_PARM2(regs);
204    int flags = PT_REGS_PARM3(regs);
205#else
206KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, int flags, int ret)
207{
208#endif
209"""
210
211bpf_text_kfunc_header_openat2 = """
212#include <uapi/linux/openat2.h>
213#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)
214KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret)
215{
216    int dfd = PT_REGS_PARM1(regs);
217    const char __user *filename = (char *)PT_REGS_PARM2(regs);
218    struct open_how __user how;
219    int flags;
220
221    bpf_probe_read_user(&how, sizeof(struct open_how), (struct open_how*)PT_REGS_PARM3(regs));
222    flags = how.flags;
223#else
224KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, struct open_how __user *how, int ret)
225{
226    int flags = how->flags;
227#endif
228"""
229
230bpf_text_kfunc_body = """
231    u64 id = bpf_get_current_pid_tgid();
232    u32 pid = id >> 32; // PID is higher part
233    u32 tid = id;       // Cast and get the lower part
234    u32 uid = bpf_get_current_uid_gid();
235
236    PID_TID_FILTER
237    UID_FILTER
238    FLAGS_FILTER
239    if (container_should_be_filtered()) {
240        return 0;
241    }
242
243    struct data_t data = {};
244    bpf_get_current_comm(&data.comm, sizeof(data.comm));
245
246    u64 tsp = bpf_ktime_get_ns();
247
248    bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)filename);
249    data.id    = id;
250    data.ts    = tsp / 1000;
251    data.uid   = bpf_get_current_uid_gid();
252    data.flags = flags; // EXTENDED_STRUCT_MEMBER
253    data.ret   = ret;
254
255    events.perf_submit(ctx, &data, sizeof(data));
256
257    return 0;
258}
259"""
260
261b = BPF(text='')
262# open and openat are always in place since 2.6.16
263fnname_open = b.get_syscall_prefix().decode() + 'open'
264fnname_openat = b.get_syscall_prefix().decode() + 'openat'
265fnname_openat2 = b.get_syscall_prefix().decode() + 'openat2'
266if b.ksymname(fnname_openat2) == -1:
267    fnname_openat2 = None
268
269is_support_kfunc = BPF.support_kfunc()
270if is_support_kfunc:
271    bpf_text += bpf_text_kfunc_header_open.replace('FNNAME', fnname_open)
272    bpf_text += bpf_text_kfunc_body
273
274    bpf_text += bpf_text_kfunc_header_openat.replace('FNNAME', fnname_openat)
275    bpf_text += bpf_text_kfunc_body
276
277    if fnname_openat2:
278        bpf_text += bpf_text_kfunc_header_openat2.replace('FNNAME', fnname_openat2)
279        bpf_text += bpf_text_kfunc_body
280else:
281    bpf_text += bpf_text_kprobe
282
283    bpf_text += bpf_text_kprobe_header_open
284    bpf_text += bpf_text_kprobe_body
285
286    bpf_text += bpf_text_kprobe_header_openat
287    bpf_text += bpf_text_kprobe_body
288
289    if fnname_openat2:
290        bpf_text += bpf_text_kprobe_header_openat2
291        bpf_text += bpf_text_kprobe_body
292
293if args.tid:  # TID trumps PID
294    bpf_text = bpf_text.replace('PID_TID_FILTER',
295        'if (tid != %s) { return 0; }' % args.tid)
296elif args.pid:
297    bpf_text = bpf_text.replace('PID_TID_FILTER',
298        'if (pid != %s) { return 0; }' % args.pid)
299else:
300    bpf_text = bpf_text.replace('PID_TID_FILTER', '')
301if args.uid:
302    bpf_text = bpf_text.replace('UID_FILTER',
303        'if (uid != %s) { return 0; }' % args.uid)
304else:
305    bpf_text = bpf_text.replace('UID_FILTER', '')
306bpf_text = filter_by_containers(args) + bpf_text
307if args.flag_filter:
308    bpf_text = bpf_text.replace('FLAGS_FILTER',
309        'if (!(flags & %d)) { return 0; }' % flag_filter_mask)
310else:
311    bpf_text = bpf_text.replace('FLAGS_FILTER', '')
312if not (args.extended_fields or args.flag_filter):
313    bpf_text = '\n'.join(x for x in bpf_text.split('\n')
314        if 'EXTENDED_STRUCT_MEMBER' not in x)
315if debug or args.ebpf:
316    print(bpf_text)
317    if args.ebpf:
318        exit()
319
320# initialize BPF
321b = BPF(text=bpf_text)
322if not is_support_kfunc:
323    b.attach_kprobe(event=fnname_open, fn_name="syscall__trace_entry_open")
324    b.attach_kretprobe(event=fnname_open, fn_name="trace_return")
325
326    b.attach_kprobe(event=fnname_openat, fn_name="syscall__trace_entry_openat")
327    b.attach_kretprobe(event=fnname_openat, fn_name="trace_return")
328
329    if fnname_openat2:
330        b.attach_kprobe(event=fnname_openat2, fn_name="syscall__trace_entry_openat2")
331        b.attach_kretprobe(event=fnname_openat2, fn_name="trace_return")
332
333initial_ts = 0
334
335# header
336if args.timestamp:
337    print("%-14s" % ("TIME(s)"), end="")
338if args.print_uid:
339    print("%-6s" % ("UID"), end="")
340print("%-6s %-16s %4s %3s " %
341      ("TID" if args.tid else "PID", "COMM", "FD", "ERR"), end="")
342if args.extended_fields:
343    print("%-9s" % ("FLAGS"), end="")
344print("PATH")
345
346# process event
347def print_event(cpu, data, size):
348    event = b["events"].event(data)
349    global initial_ts
350
351    # split return value into FD and errno columns
352    if event.ret >= 0:
353        fd_s = event.ret
354        err = 0
355    else:
356        fd_s = -1
357        err = - event.ret
358
359    if not initial_ts:
360        initial_ts = event.ts
361
362    if args.failed and (event.ret >= 0):
363        return
364
365    if args.name and bytes(args.name) not in event.comm:
366        return
367
368    if args.timestamp:
369        delta = event.ts - initial_ts
370        printb(b"%-14.9f" % (float(delta) / 1000000), nl="")
371
372    if args.print_uid:
373        printb(b"%-6d" % event.uid, nl="")
374
375    printb(b"%-6d %-16s %4d %3d " %
376           (event.id & 0xffffffff if args.tid else event.id >> 32,
377            event.comm, fd_s, err), nl="")
378
379    if args.extended_fields:
380        printb(b"%08o " % event.flags, nl="")
381
382    printb(b'%s' % event.fname)
383
384# loop with callback to print_event
385b["events"].open_perf_buffer(print_event, page_cnt=64)
386start_time = datetime.now()
387while not args.duration or datetime.now() - start_time < args.duration:
388    try:
389        b.perf_buffer_poll()
390    except KeyboardInterrupt:
391        exit()
392