1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# biosnoop Trace block device I/O and print details including issuing PID. 5# For Linux, uses BCC, eBPF. 6# 7# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O 8# request, as well as a starting timestamp for calculating I/O latency. 9# 10# Copyright (c) 2015 Brendan Gregg. 11# Licensed under the Apache License, Version 2.0 (the "License") 12# 13# 16-Sep-2015 Brendan Gregg Created this. 14 15from __future__ import print_function 16from bcc import BPF 17 18# load BPF program 19b = BPF(text=""" 20#include <uapi/linux/ptrace.h> 21#include <linux/blkdev.h> 22 23struct val_t { 24 u32 pid; 25 char name[TASK_COMM_LEN]; 26}; 27 28BPF_HASH(start, struct request *); 29BPF_HASH(infobyreq, struct request *, struct val_t); 30 31// cache PID and comm by-req 32int trace_pid_start(struct pt_regs *ctx, struct request *req) 33{ 34 struct val_t val = {}; 35 36 if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) { 37 val.pid = bpf_get_current_pid_tgid(); 38 infobyreq.update(&req, &val); 39 } 40 41 return 0; 42} 43 44// time block I/O 45int trace_req_start(struct pt_regs *ctx, struct request *req) 46{ 47 u64 ts; 48 49 ts = bpf_ktime_get_ns(); 50 start.update(&req, &ts); 51 52 return 0; 53} 54 55// output 56int trace_req_completion(struct pt_regs *ctx, struct request *req) 57{ 58 u64 *tsp, delta; 59 u32 *pidp = 0; 60 struct val_t *valp; 61 62 // fetch timestamp and calculate delta 63 tsp = start.lookup(&req); 64 if (tsp == 0) { 65 // missed tracing issue 66 return 0; 67 } 68 delta = bpf_ktime_get_ns() - *tsp; 69 70 // 71 // Fetch and output issuing pid and comm. 72 // As bpf_trace_prink() is limited to a maximum of 1 string and 2 73 // integers, we'll use more than one to output the data. 74 // 75 valp = infobyreq.lookup(&req); 76 if (valp == 0) { 77 bpf_trace_printk("0 0 ? %d\\n", req->__data_len); 78 } else { 79 bpf_trace_printk("0 %d %s %d\\n", valp->pid, valp->name, 80 req->__data_len); 81 } 82 83 // output remaining details 84 if (req->cmd_flags & REQ_WRITE) { 85 bpf_trace_printk("1 W %s %d %d ?\\n", req->rq_disk->disk_name, 86 req->__sector, delta / 1000); 87 } else { 88 bpf_trace_printk("1 R %s %d %d ?\\n", req->rq_disk->disk_name, 89 req->__sector, delta / 1000); 90 } 91 92 start.delete(&req); 93 infobyreq.delete(&req); 94 95 return 0; 96} 97""", debug=0) 98b.attach_kprobe(event="blk_account_io_start", fn_name="trace_pid_start") 99b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") 100b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") 101b.attach_kprobe(event="blk_account_io_completion", 102 fn_name="trace_req_completion") 103 104# header 105print("%-14s %-14s %-6s %-7s %-2s %-9s %-7s %7s" % ("TIME(s)", "COMM", "PID", 106 "DISK", "T", "SECTOR", "BYTES", "LAT(ms)")) 107 108start_ts = 0 109 110# format output 111while 1: 112 (task, pid, cpu, flags, ts, msg) = b.trace_fields() 113 args = msg.split(" ") 114 115 if start_ts == 0: 116 start_ts = ts 117 118 if args[0] == "0": 119 (real_pid, real_comm, bytes_s) = (args[1], args[2], args[3]) 120 continue 121 else: 122 (type_s, disk_s, sector_s, us_s) = (args[1], args[2], args[3], 123 args[4]) 124 125 ms = float(int(us_s, 10)) / 1000 126 127 print("%-14.9f %-14.14s %-6s %-7s %-2s %-9s %-7s %7.2f" % ( 128 ts - start_ts, real_comm, real_pid, disk_s, type_s, sector_s, 129 bytes_s, ms)) 130