1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# opensnoop Trace open() syscalls. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# USAGE: opensnoop [-h] [-T] [-x] [-p PID] [-d DURATION] [-t TID] [-n NAME] 8# 9# Copyright (c) 2015 Brendan Gregg. 10# Licensed under the Apache License, Version 2.0 (the "License") 11# 12# 17-Sep-2015 Brendan Gregg Created this. 13# 29-Apr-2016 Allan McAleavy Updated for BPF_PERF_OUTPUT. 14# 08-Oct-2016 Dina Goldshtein Support filtering by PID and TID. 15# 28-Dec-2018 Tim Douglas Print flags argument, enable filtering 16# 06-Jan-2019 Takuma Kume Support filtering by UID 17 18from __future__ import print_function 19from bcc import ArgString, BPF 20from bcc.containers import filter_by_containers 21from bcc.utils import printb 22import argparse 23from datetime import datetime, timedelta 24import os 25 26# arguments 27examples = """examples: 28 ./opensnoop # trace all open() syscalls 29 ./opensnoop -T # include timestamps 30 ./opensnoop -U # include UID 31 ./opensnoop -x # only show failed opens 32 ./opensnoop -p 181 # only trace PID 181 33 ./opensnoop -t 123 # only trace TID 123 34 ./opensnoop -u 1000 # only trace UID 1000 35 ./opensnoop -d 10 # trace for 10 seconds only 36 ./opensnoop -n main # only print process names containing "main" 37 ./opensnoop -e # show extended fields 38 ./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing 39 ./opensnoop --cgroupmap mappath # only trace cgroups in this BPF map 40 ./opensnoop --mntnsmap mappath # only trace mount namespaces in the map 41""" 42parser = argparse.ArgumentParser( 43 description="Trace open() syscalls", 44 formatter_class=argparse.RawDescriptionHelpFormatter, 45 epilog=examples) 46parser.add_argument("-T", "--timestamp", action="store_true", 47 help="include timestamp on output") 48parser.add_argument("-U", "--print-uid", action="store_true", 49 help="print UID column") 50parser.add_argument("-x", "--failed", action="store_true", 51 help="only show failed opens") 52parser.add_argument("-p", "--pid", 53 help="trace this PID only") 54parser.add_argument("-t", "--tid", 55 help="trace this TID only") 56parser.add_argument("--cgroupmap", 57 help="trace cgroups in this BPF map only") 58parser.add_argument("--mntnsmap", 59 help="trace mount namespaces in this BPF map only") 60parser.add_argument("-u", "--uid", 61 help="trace this UID only") 62parser.add_argument("-d", "--duration", 63 help="total duration of trace in seconds") 64parser.add_argument("-n", "--name", 65 type=ArgString, 66 help="only print process names containing this name") 67parser.add_argument("--ebpf", action="store_true", 68 help=argparse.SUPPRESS) 69parser.add_argument("-e", "--extended_fields", action="store_true", 70 help="show extended fields") 71parser.add_argument("-f", "--flag_filter", action="append", 72 help="filter on flags argument (e.g., O_WRONLY)") 73args = parser.parse_args() 74debug = 0 75if args.duration: 76 args.duration = timedelta(seconds=int(args.duration)) 77flag_filter_mask = 0 78for flag in args.flag_filter or []: 79 if not flag.startswith('O_'): 80 exit("Bad flag: %s" % flag) 81 try: 82 flag_filter_mask |= getattr(os, flag) 83 except AttributeError: 84 exit("Bad flag: %s" % flag) 85 86# define BPF program 87bpf_text = """ 88#include <uapi/linux/ptrace.h> 89#include <uapi/linux/limits.h> 90#include <linux/sched.h> 91 92struct val_t { 93 u64 id; 94 char comm[TASK_COMM_LEN]; 95 const char *fname; 96 int flags; // EXTENDED_STRUCT_MEMBER 97}; 98 99struct data_t { 100 u64 id; 101 u64 ts; 102 u32 uid; 103 int ret; 104 char comm[TASK_COMM_LEN]; 105 char fname[NAME_MAX]; 106 int flags; // EXTENDED_STRUCT_MEMBER 107}; 108 109BPF_PERF_OUTPUT(events); 110""" 111 112bpf_text_kprobe = """ 113BPF_HASH(infotmp, u64, struct val_t); 114 115int trace_return(struct pt_regs *ctx) 116{ 117 u64 id = bpf_get_current_pid_tgid(); 118 struct val_t *valp; 119 struct data_t data = {}; 120 121 u64 tsp = bpf_ktime_get_ns(); 122 123 valp = infotmp.lookup(&id); 124 if (valp == 0) { 125 // missed entry 126 return 0; 127 } 128 bpf_probe_read_kernel(&data.comm, sizeof(data.comm), valp->comm); 129 bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)valp->fname); 130 data.id = valp->id; 131 data.ts = tsp / 1000; 132 data.uid = bpf_get_current_uid_gid(); 133 data.flags = valp->flags; // EXTENDED_STRUCT_MEMBER 134 data.ret = PT_REGS_RC(ctx); 135 136 events.perf_submit(ctx, &data, sizeof(data)); 137 infotmp.delete(&id); 138 139 return 0; 140} 141""" 142 143bpf_text_kprobe_header_open = """ 144int syscall__trace_entry_open(struct pt_regs *ctx, const char __user *filename, int flags) 145{ 146""" 147 148bpf_text_kprobe_header_openat = """ 149int syscall__trace_entry_openat(struct pt_regs *ctx, int dfd, const char __user *filename, int flags) 150{ 151""" 152 153bpf_text_kprobe_header_openat2 = """ 154#include <uapi/linux/openat2.h> 155int syscall__trace_entry_openat2(struct pt_regs *ctx, int dfd, const char __user *filename, struct open_how *how) 156{ 157 int flags = how->flags; 158""" 159 160bpf_text_kprobe_body = """ 161 struct val_t val = {}; 162 u64 id = bpf_get_current_pid_tgid(); 163 u32 pid = id >> 32; // PID is higher part 164 u32 tid = id; // Cast and get the lower part 165 u32 uid = bpf_get_current_uid_gid(); 166 167 PID_TID_FILTER 168 UID_FILTER 169 FLAGS_FILTER 170 171 if (container_should_be_filtered()) { 172 return 0; 173 } 174 175 if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { 176 val.id = id; 177 val.fname = filename; 178 val.flags = flags; // EXTENDED_STRUCT_MEMBER 179 infotmp.update(&id, &val); 180 } 181 182 return 0; 183}; 184""" 185 186bpf_text_kfunc_header_open = """ 187#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__) 188KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret) 189{ 190 const char __user *filename = (char *)PT_REGS_PARM1(regs); 191 int flags = PT_REGS_PARM2(regs); 192#else 193KRETFUNC_PROBE(FNNAME, const char __user *filename, int flags, int ret) 194{ 195#endif 196""" 197 198bpf_text_kfunc_header_openat = """ 199#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__) 200KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret) 201{ 202 int dfd = PT_REGS_PARM1(regs); 203 const char __user *filename = (char *)PT_REGS_PARM2(regs); 204 int flags = PT_REGS_PARM3(regs); 205#else 206KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, int flags, int ret) 207{ 208#endif 209""" 210 211bpf_text_kfunc_header_openat2 = """ 212#include <uapi/linux/openat2.h> 213#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__) 214KRETFUNC_PROBE(FNNAME, struct pt_regs *regs, int ret) 215{ 216 int dfd = PT_REGS_PARM1(regs); 217 const char __user *filename = (char *)PT_REGS_PARM2(regs); 218 struct open_how __user how; 219 int flags; 220 221 bpf_probe_read_user(&how, sizeof(struct open_how), (struct open_how*)PT_REGS_PARM3(regs)); 222 flags = how.flags; 223#else 224KRETFUNC_PROBE(FNNAME, int dfd, const char __user *filename, struct open_how __user *how, int ret) 225{ 226 int flags = how->flags; 227#endif 228""" 229 230bpf_text_kfunc_body = """ 231 u64 id = bpf_get_current_pid_tgid(); 232 u32 pid = id >> 32; // PID is higher part 233 u32 tid = id; // Cast and get the lower part 234 u32 uid = bpf_get_current_uid_gid(); 235 236 PID_TID_FILTER 237 UID_FILTER 238 FLAGS_FILTER 239 if (container_should_be_filtered()) { 240 return 0; 241 } 242 243 struct data_t data = {}; 244 bpf_get_current_comm(&data.comm, sizeof(data.comm)); 245 246 u64 tsp = bpf_ktime_get_ns(); 247 248 bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)filename); 249 data.id = id; 250 data.ts = tsp / 1000; 251 data.uid = bpf_get_current_uid_gid(); 252 data.flags = flags; // EXTENDED_STRUCT_MEMBER 253 data.ret = ret; 254 255 events.perf_submit(ctx, &data, sizeof(data)); 256 257 return 0; 258} 259""" 260 261b = BPF(text='') 262# open and openat are always in place since 2.6.16 263fnname_open = b.get_syscall_prefix().decode() + 'open' 264fnname_openat = b.get_syscall_prefix().decode() + 'openat' 265fnname_openat2 = b.get_syscall_prefix().decode() + 'openat2' 266if b.ksymname(fnname_openat2) == -1: 267 fnname_openat2 = None 268 269is_support_kfunc = BPF.support_kfunc() 270if is_support_kfunc: 271 bpf_text += bpf_text_kfunc_header_open.replace('FNNAME', fnname_open) 272 bpf_text += bpf_text_kfunc_body 273 274 bpf_text += bpf_text_kfunc_header_openat.replace('FNNAME', fnname_openat) 275 bpf_text += bpf_text_kfunc_body 276 277 if fnname_openat2: 278 bpf_text += bpf_text_kfunc_header_openat2.replace('FNNAME', fnname_openat2) 279 bpf_text += bpf_text_kfunc_body 280else: 281 bpf_text += bpf_text_kprobe 282 283 bpf_text += bpf_text_kprobe_header_open 284 bpf_text += bpf_text_kprobe_body 285 286 bpf_text += bpf_text_kprobe_header_openat 287 bpf_text += bpf_text_kprobe_body 288 289 if fnname_openat2: 290 bpf_text += bpf_text_kprobe_header_openat2 291 bpf_text += bpf_text_kprobe_body 292 293if args.tid: # TID trumps PID 294 bpf_text = bpf_text.replace('PID_TID_FILTER', 295 'if (tid != %s) { return 0; }' % args.tid) 296elif args.pid: 297 bpf_text = bpf_text.replace('PID_TID_FILTER', 298 'if (pid != %s) { return 0; }' % args.pid) 299else: 300 bpf_text = bpf_text.replace('PID_TID_FILTER', '') 301if args.uid: 302 bpf_text = bpf_text.replace('UID_FILTER', 303 'if (uid != %s) { return 0; }' % args.uid) 304else: 305 bpf_text = bpf_text.replace('UID_FILTER', '') 306bpf_text = filter_by_containers(args) + bpf_text 307if args.flag_filter: 308 bpf_text = bpf_text.replace('FLAGS_FILTER', 309 'if (!(flags & %d)) { return 0; }' % flag_filter_mask) 310else: 311 bpf_text = bpf_text.replace('FLAGS_FILTER', '') 312if not (args.extended_fields or args.flag_filter): 313 bpf_text = '\n'.join(x for x in bpf_text.split('\n') 314 if 'EXTENDED_STRUCT_MEMBER' not in x) 315if debug or args.ebpf: 316 print(bpf_text) 317 if args.ebpf: 318 exit() 319 320# initialize BPF 321b = BPF(text=bpf_text) 322if not is_support_kfunc: 323 b.attach_kprobe(event=fnname_open, fn_name="syscall__trace_entry_open") 324 b.attach_kretprobe(event=fnname_open, fn_name="trace_return") 325 326 b.attach_kprobe(event=fnname_openat, fn_name="syscall__trace_entry_openat") 327 b.attach_kretprobe(event=fnname_openat, fn_name="trace_return") 328 329 if fnname_openat2: 330 b.attach_kprobe(event=fnname_openat2, fn_name="syscall__trace_entry_openat2") 331 b.attach_kretprobe(event=fnname_openat2, fn_name="trace_return") 332 333initial_ts = 0 334 335# header 336if args.timestamp: 337 print("%-14s" % ("TIME(s)"), end="") 338if args.print_uid: 339 print("%-6s" % ("UID"), end="") 340print("%-6s %-16s %4s %3s " % 341 ("TID" if args.tid else "PID", "COMM", "FD", "ERR"), end="") 342if args.extended_fields: 343 print("%-9s" % ("FLAGS"), end="") 344print("PATH") 345 346# process event 347def print_event(cpu, data, size): 348 event = b["events"].event(data) 349 global initial_ts 350 351 # split return value into FD and errno columns 352 if event.ret >= 0: 353 fd_s = event.ret 354 err = 0 355 else: 356 fd_s = -1 357 err = - event.ret 358 359 if not initial_ts: 360 initial_ts = event.ts 361 362 if args.failed and (event.ret >= 0): 363 return 364 365 if args.name and bytes(args.name) not in event.comm: 366 return 367 368 if args.timestamp: 369 delta = event.ts - initial_ts 370 printb(b"%-14.9f" % (float(delta) / 1000000), nl="") 371 372 if args.print_uid: 373 printb(b"%-6d" % event.uid, nl="") 374 375 printb(b"%-6d %-16s %4d %3d " % 376 (event.id & 0xffffffff if args.tid else event.id >> 32, 377 event.comm, fd_s, err), nl="") 378 379 if args.extended_fields: 380 printb(b"%08o " % event.flags, nl="") 381 382 printb(b'%s' % event.fname) 383 384# loop with callback to print_event 385b["events"].open_perf_buffer(print_event, page_cnt=64) 386start_time = datetime.now() 387while not args.duration or datetime.now() - start_time < args.duration: 388 try: 389 b.perf_buffer_poll() 390 except KeyboardInterrupt: 391 exit() 392