1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# statsnoop Trace stat() syscalls. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# USAGE: statsnoop [-h] [-t] [-x] [-p PID] 8# 9# Copyright 2016 Netflix, Inc. 10# Licensed under the Apache License, Version 2.0 (the "License") 11# 12# 08-Feb-2016 Brendan Gregg Created this. 13# 17-Feb-2016 Allan McAleavy updated for BPF_PERF_OUTPUT 14 15from __future__ import print_function 16from bcc import BPF 17import argparse 18 19# arguments 20examples = """examples: 21 ./statsnoop # trace all stat() syscalls 22 ./statsnoop -t # include timestamps 23 ./statsnoop -x # only show failed stats 24 ./statsnoop -p 181 # only trace PID 181 25""" 26parser = argparse.ArgumentParser( 27 description="Trace stat() syscalls", 28 formatter_class=argparse.RawDescriptionHelpFormatter, 29 epilog=examples) 30parser.add_argument("-t", "--timestamp", action="store_true", 31 help="include timestamp on output") 32parser.add_argument("-x", "--failed", action="store_true", 33 help="only show failed stats") 34parser.add_argument("-p", "--pid", 35 help="trace this PID only") 36parser.add_argument("--ebpf", action="store_true", 37 help=argparse.SUPPRESS) 38args = parser.parse_args() 39debug = 0 40 41# define BPF program 42bpf_text = """ 43#include <uapi/linux/ptrace.h> 44#include <uapi/linux/limits.h> 45#include <linux/sched.h> 46 47struct val_t { 48 const char *fname; 49}; 50 51struct data_t { 52 u32 pid; 53 u64 ts_ns; 54 int ret; 55 char comm[TASK_COMM_LEN]; 56 char fname[NAME_MAX]; 57}; 58 59BPF_HASH(infotmp, u32, struct val_t); 60BPF_PERF_OUTPUT(events); 61 62int syscall__entry(struct pt_regs *ctx, const char __user *filename) 63{ 64 struct val_t val = {}; 65 u64 pid_tgid = bpf_get_current_pid_tgid(); 66 u32 pid = pid_tgid >> 32; 67 u32 tid = (u32)pid_tgid; 68 69 FILTER 70 val.fname = filename; 71 infotmp.update(&tid, &val); 72 73 return 0; 74}; 75 76int trace_return(struct pt_regs *ctx) 77{ 78 u64 pid_tgid = bpf_get_current_pid_tgid(); 79 u32 tid = (u32)pid_tgid; 80 struct val_t *valp; 81 82 valp = infotmp.lookup(&tid); 83 if (valp == 0) { 84 // missed entry 85 return 0; 86 } 87 88 struct data_t data = {.pid = pid_tgid >> 32}; 89 bpf_probe_read_user(&data.fname, sizeof(data.fname), (void *)valp->fname); 90 bpf_get_current_comm(&data.comm, sizeof(data.comm)); 91 data.ts_ns = bpf_ktime_get_ns(); 92 data.ret = PT_REGS_RC(ctx); 93 94 events.perf_submit(ctx, &data, sizeof(data)); 95 infotmp.delete(&tid); 96 97 return 0; 98} 99""" 100if args.pid: 101 bpf_text = bpf_text.replace('FILTER', 102 'if (pid != %s) { return 0; }' % args.pid) 103else: 104 bpf_text = bpf_text.replace('FILTER', '') 105if debug or args.ebpf: 106 print(bpf_text) 107 if args.ebpf: 108 exit() 109 110# initialize BPF 111b = BPF(text=bpf_text) 112 113# for POSIX compliance, all architectures implement these 114# system calls but the name of the actual entry point may 115# be different for which we must check if the entry points 116# actually exist before attaching the probes 117syscall_fnname = b.get_syscall_fnname("stat") 118if BPF.ksymname(syscall_fnname) != -1: 119 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry") 120 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 121 122syscall_fnname = b.get_syscall_fnname("statfs") 123if BPF.ksymname(syscall_fnname) != -1: 124 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry") 125 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 126 127syscall_fnname = b.get_syscall_fnname("newstat") 128if BPF.ksymname(syscall_fnname) != -1: 129 b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry") 130 b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return") 131 132start_ts = 0 133prev_ts = 0 134delta = 0 135 136# header 137if args.timestamp: 138 print("%-14s" % ("TIME(s)"), end="") 139print("%-7s %-16s %4s %3s %s" % ("PID", "COMM", "FD", "ERR", "PATH")) 140 141# process event 142def print_event(cpu, data, size): 143 event = b["events"].event(data) 144 global start_ts 145 global prev_ts 146 global delta 147 global cont 148 149 # split return value into FD and errno columns 150 if event.ret >= 0: 151 if args.failed: 152 return 153 fd_s = event.ret 154 err = 0 155 else: 156 fd_s = -1 157 err = - event.ret 158 159 if start_ts == 0: 160 start_ts = event.ts_ns 161 162 if args.timestamp: 163 print("%-14.9f" % (float(event.ts_ns - start_ts) / 1000000000), end="") 164 165 print("%-7d %-16s %4d %3d %s" % (event.pid, 166 event.comm.decode('utf-8', 'replace'), fd_s, err, 167 event.fname.decode('utf-8', 'replace'))) 168 169# loop with callback to print_event 170b["events"].open_perf_buffer(print_event, page_cnt=64) 171while 1: 172 try: 173 b.perf_buffer_poll() 174 except KeyboardInterrupt: 175 exit() 176