1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# biolatency Summarize block device I/O latency as a histogram. 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: biolatency [-h] [-T] [-Q] [-m] [-D] [-F] [-e] [-j] [interval] [count] 8# 9# Copyright (c) 2015 Brendan Gregg. 10# Licensed under the Apache License, Version 2.0 (the "License") 11# 12# 20-Sep-2015 Brendan Gregg Created this. 13 14from __future__ import print_function 15from bcc import BPF 16from time import sleep, strftime 17import argparse 18import ctypes as ct 19 20# arguments 21examples = """examples: 22 ./biolatency # summarize block I/O latency as a histogram 23 ./biolatency 1 10 # print 1 second summaries, 10 times 24 ./biolatency -mT 1 # 1s summaries, milliseconds, and timestamps 25 ./biolatency -Q # include OS queued time in I/O time 26 ./biolatency -D # show each disk device separately 27 ./biolatency -F # show I/O flags separately 28 ./biolatency -j # print a dictionary 29 ./biolatency -e # show extension summary(total, average) 30""" 31parser = argparse.ArgumentParser( 32 description="Summarize block device I/O latency as a histogram", 33 formatter_class=argparse.RawDescriptionHelpFormatter, 34 epilog=examples) 35parser.add_argument("-T", "--timestamp", action="store_true", 36 help="include timestamp on output") 37parser.add_argument("-Q", "--queued", action="store_true", 38 help="include OS queued time in I/O time") 39parser.add_argument("-m", "--milliseconds", action="store_true", 40 help="millisecond histogram") 41parser.add_argument("-D", "--disks", action="store_true", 42 help="print a histogram per disk device") 43parser.add_argument("-F", "--flags", action="store_true", 44 help="print a histogram per set of I/O flags") 45parser.add_argument("-e", "--extension", action="store_true", 46 help="summarize average/total value") 47parser.add_argument("interval", nargs="?", default=99999999, 48 help="output interval, in seconds") 49parser.add_argument("count", nargs="?", default=99999999, 50 help="number of outputs") 51parser.add_argument("--ebpf", action="store_true", 52 help=argparse.SUPPRESS) 53parser.add_argument("-j", "--json", action="store_true", 54 help="json output") 55 56args = parser.parse_args() 57countdown = int(args.count) 58debug = 0 59 60if args.flags and args.disks: 61 print("ERROR: can only use -D or -F. Exiting.") 62 exit() 63 64# define BPF program 65bpf_text = """ 66#include <uapi/linux/ptrace.h> 67#include <linux/blk-mq.h> 68 69typedef struct disk_key { 70 char disk[DISK_NAME_LEN]; 71 u64 slot; 72} disk_key_t; 73 74typedef struct flag_key { 75 u64 flags; 76 u64 slot; 77} flag_key_t; 78 79typedef struct ext_val { 80 u64 total; 81 u64 count; 82} ext_val_t; 83 84BPF_HASH(start, struct request *); 85STORAGE 86 87// time block I/O 88int trace_req_start(struct pt_regs *ctx, struct request *req) 89{ 90 u64 ts = bpf_ktime_get_ns(); 91 start.update(&req, &ts); 92 return 0; 93} 94 95// output 96int trace_req_done(struct pt_regs *ctx, struct request *req) 97{ 98 u64 *tsp, delta; 99 100 // fetch timestamp and calculate delta 101 tsp = start.lookup(&req); 102 if (tsp == 0) { 103 return 0; // missed issue 104 } 105 delta = bpf_ktime_get_ns() - *tsp; 106 107 EXTENSION 108 109 FACTOR 110 111 // store as histogram 112 STORE 113 114 start.delete(&req); 115 return 0; 116} 117""" 118 119# code substitutions 120if args.milliseconds: 121 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;') 122 label = "msecs" 123else: 124 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;') 125 label = "usecs" 126 127storage_str = "" 128store_str = "" 129if args.disks: 130 storage_str += "BPF_HISTOGRAM(dist, disk_key_t);" 131 disks_str = """ 132 disk_key_t key = {.slot = bpf_log2l(delta)}; 133 void *__tmp = (void *)req->__RQ_DISK__->disk_name; 134 bpf_probe_read(&key.disk, sizeof(key.disk), __tmp); 135 dist.atomic_increment(key); 136 """ 137 if BPF.kernel_struct_has_field(b'request', b'rq_disk'): 138 store_str += disks_str.replace('__RQ_DISK__', 'rq_disk') 139 else: 140 store_str += disks_str.replace('__RQ_DISK__', 'q->disk') 141elif args.flags: 142 storage_str += "BPF_HISTOGRAM(dist, flag_key_t);" 143 store_str += """ 144 flag_key_t key = {.slot = bpf_log2l(delta)}; 145 key.flags = req->cmd_flags; 146 dist.atomic_increment(key); 147 """ 148else: 149 storage_str += "BPF_HISTOGRAM(dist);" 150 store_str += "dist.atomic_increment(bpf_log2l(delta));" 151 152if args.extension: 153 storage_str += "BPF_ARRAY(extension, ext_val_t, 1);" 154 bpf_text = bpf_text.replace('EXTENSION', """ 155 u32 index = 0; 156 ext_val_t *ext_val = extension.lookup(&index); 157 if (ext_val) { 158 lock_xadd(&ext_val->total, delta); 159 lock_xadd(&ext_val->count, 1); 160 } 161 """) 162else: 163 bpf_text = bpf_text.replace('EXTENSION', '') 164bpf_text = bpf_text.replace("STORAGE", storage_str) 165bpf_text = bpf_text.replace("STORE", store_str) 166 167if debug or args.ebpf: 168 print(bpf_text) 169 if args.ebpf: 170 exit() 171 172# load BPF program 173b = BPF(text=bpf_text) 174if args.queued: 175 if BPF.get_kprobe_functions(b'__blk_account_io_start'): 176 b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start") 177 else: 178 b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start") 179else: 180 if BPF.get_kprobe_functions(b'blk_start_request'): 181 b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start") 182 b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start") 183if BPF.get_kprobe_functions(b'__blk_account_io_done'): 184 b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done") 185else: 186 b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done") 187 188if not args.json: 189 print("Tracing block device I/O... Hit Ctrl-C to end.") 190 191def disk_print(s): 192 disk = s.decode('utf-8', 'replace') 193 if not disk: 194 disk = "<unknown>" 195 return disk 196 197# see blk_fill_rwbs(): 198req_opf = { 199 0: "Read", 200 1: "Write", 201 2: "Flush", 202 3: "Discard", 203 5: "SecureErase", 204 6: "ZoneReset", 205 7: "WriteSame", 206 9: "WriteZeros" 207} 208REQ_OP_BITS = 8 209REQ_OP_MASK = ((1 << REQ_OP_BITS) - 1) 210REQ_SYNC = 1 << (REQ_OP_BITS + 3) 211REQ_META = 1 << (REQ_OP_BITS + 4) 212REQ_PRIO = 1 << (REQ_OP_BITS + 5) 213REQ_NOMERGE = 1 << (REQ_OP_BITS + 6) 214REQ_IDLE = 1 << (REQ_OP_BITS + 7) 215REQ_FUA = 1 << (REQ_OP_BITS + 9) 216REQ_RAHEAD = 1 << (REQ_OP_BITS + 11) 217REQ_BACKGROUND = 1 << (REQ_OP_BITS + 12) 218REQ_NOWAIT = 1 << (REQ_OP_BITS + 13) 219def flags_print(flags): 220 desc = "" 221 # operation 222 if flags & REQ_OP_MASK in req_opf: 223 desc = req_opf[flags & REQ_OP_MASK] 224 else: 225 desc = "Unknown" 226 # flags 227 if flags & REQ_SYNC: 228 desc = "Sync-" + desc 229 if flags & REQ_META: 230 desc = "Metadata-" + desc 231 if flags & REQ_FUA: 232 desc = "ForcedUnitAccess-" + desc 233 if flags & REQ_PRIO: 234 desc = "Priority-" + desc 235 if flags & REQ_NOMERGE: 236 desc = "NoMerge-" + desc 237 if flags & REQ_IDLE: 238 desc = "Idle-" + desc 239 if flags & REQ_RAHEAD: 240 desc = "ReadAhead-" + desc 241 if flags & REQ_BACKGROUND: 242 desc = "Background-" + desc 243 if flags & REQ_NOWAIT: 244 desc = "NoWait-" + desc 245 return desc 246 247# output 248exiting = 0 if args.interval else 1 249dist = b.get_table("dist") 250if args.extension: 251 extension = b.get_table("extension") 252while (1): 253 try: 254 sleep(int(args.interval)) 255 except KeyboardInterrupt: 256 exiting = 1 257 258 print() 259 if args.json: 260 if args.timestamp: 261 print("%-8s\n" % strftime("%H:%M:%S"), end="") 262 263 if args.flags: 264 dist.print_json_hist(label, "flags", flags_print) 265 else: 266 dist.print_json_hist(label, "disk", disk_print) 267 268 else: 269 if args.timestamp: 270 print("%-8s\n" % strftime("%H:%M:%S"), end="") 271 272 if args.flags: 273 dist.print_log2_hist(label, "flags", flags_print) 274 else: 275 dist.print_log2_hist(label, "disk", disk_print) 276 if args.extension: 277 total = extension[0].total 278 counts = extension[0].count 279 if counts > 0: 280 if label == 'msecs': 281 total /= 1000000 282 elif label == 'usecs': 283 total /= 1000 284 avg = total / counts 285 print("\navg = %ld %s, total: %ld %s, count: %ld\n" % 286 (total / counts, label, total, label, counts)) 287 extension.clear() 288 289 dist.clear() 290 291 countdown -= 1 292 if exiting or countdown == 0: 293 exit() 294