• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# biolatency    Summarize block device I/O latency as a histogram.
5#       For Linux, uses BCC, eBPF.
6#
7# USAGE: biolatency [-h] [-T] [-Q] [-m] [-D] [-F] [-e] [-j] [interval] [count]
8#
9# Copyright (c) 2015 Brendan Gregg.
10# Licensed under the Apache License, Version 2.0 (the "License")
11#
12# 20-Sep-2015   Brendan Gregg   Created this.
13
14from __future__ import print_function
15from bcc import BPF
16from time import sleep, strftime
17import argparse
18import ctypes as ct
19
20# arguments
21examples = """examples:
22    ./biolatency                    # summarize block I/O latency as a histogram
23    ./biolatency 1 10               # print 1 second summaries, 10 times
24    ./biolatency -mT 1              # 1s summaries, milliseconds, and timestamps
25    ./biolatency -Q                 # include OS queued time in I/O time
26    ./biolatency -D                 # show each disk device separately
27    ./biolatency -F                 # show I/O flags separately
28    ./biolatency -j                 # print a dictionary
29    ./biolatency -e                 # show extension summary(total, average)
30"""
31parser = argparse.ArgumentParser(
32    description="Summarize block device I/O latency as a histogram",
33    formatter_class=argparse.RawDescriptionHelpFormatter,
34    epilog=examples)
35parser.add_argument("-T", "--timestamp", action="store_true",
36    help="include timestamp on output")
37parser.add_argument("-Q", "--queued", action="store_true",
38    help="include OS queued time in I/O time")
39parser.add_argument("-m", "--milliseconds", action="store_true",
40    help="millisecond histogram")
41parser.add_argument("-D", "--disks", action="store_true",
42    help="print a histogram per disk device")
43parser.add_argument("-F", "--flags", action="store_true",
44    help="print a histogram per set of I/O flags")
45parser.add_argument("-e", "--extension", action="store_true",
46    help="summarize average/total value")
47parser.add_argument("interval", nargs="?", default=99999999,
48    help="output interval, in seconds")
49parser.add_argument("count", nargs="?", default=99999999,
50    help="number of outputs")
51parser.add_argument("--ebpf", action="store_true",
52    help=argparse.SUPPRESS)
53parser.add_argument("-j", "--json", action="store_true",
54    help="json output")
55
56args = parser.parse_args()
57countdown = int(args.count)
58debug = 0
59
60if args.flags and args.disks:
61    print("ERROR: can only use -D or -F. Exiting.")
62    exit()
63
64# define BPF program
65bpf_text = """
66#include <uapi/linux/ptrace.h>
67#include <linux/blk-mq.h>
68
69typedef struct disk_key {
70    char disk[DISK_NAME_LEN];
71    u64 slot;
72} disk_key_t;
73
74typedef struct flag_key {
75    u64 flags;
76    u64 slot;
77} flag_key_t;
78
79typedef struct ext_val {
80    u64 total;
81    u64 count;
82} ext_val_t;
83
84BPF_HASH(start, struct request *);
85STORAGE
86
87// time block I/O
88int trace_req_start(struct pt_regs *ctx, struct request *req)
89{
90    u64 ts = bpf_ktime_get_ns();
91    start.update(&req, &ts);
92    return 0;
93}
94
95// output
96int trace_req_done(struct pt_regs *ctx, struct request *req)
97{
98    u64 *tsp, delta;
99
100    // fetch timestamp and calculate delta
101    tsp = start.lookup(&req);
102    if (tsp == 0) {
103        return 0;   // missed issue
104    }
105    delta = bpf_ktime_get_ns() - *tsp;
106
107    EXTENSION
108
109    FACTOR
110
111    // store as histogram
112    STORE
113
114    start.delete(&req);
115    return 0;
116}
117"""
118
119# code substitutions
120if args.milliseconds:
121    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
122    label = "msecs"
123else:
124    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
125    label = "usecs"
126
127storage_str = ""
128store_str = ""
129if args.disks:
130    storage_str += "BPF_HISTOGRAM(dist, disk_key_t);"
131    disks_str = """
132    disk_key_t key = {.slot = bpf_log2l(delta)};
133    void *__tmp = (void *)req->__RQ_DISK__->disk_name;
134    bpf_probe_read(&key.disk, sizeof(key.disk), __tmp);
135    dist.atomic_increment(key);
136    """
137    if BPF.kernel_struct_has_field(b'request', b'rq_disk'):
138        store_str += disks_str.replace('__RQ_DISK__', 'rq_disk')
139    else:
140        store_str += disks_str.replace('__RQ_DISK__', 'q->disk')
141elif args.flags:
142    storage_str += "BPF_HISTOGRAM(dist, flag_key_t);"
143    store_str += """
144    flag_key_t key = {.slot = bpf_log2l(delta)};
145    key.flags = req->cmd_flags;
146    dist.atomic_increment(key);
147    """
148else:
149    storage_str += "BPF_HISTOGRAM(dist);"
150    store_str += "dist.atomic_increment(bpf_log2l(delta));"
151
152if args.extension:
153    storage_str += "BPF_ARRAY(extension, ext_val_t, 1);"
154    bpf_text = bpf_text.replace('EXTENSION', """
155    u32 index = 0;
156    ext_val_t *ext_val = extension.lookup(&index);
157    if (ext_val) {
158        lock_xadd(&ext_val->total, delta);
159        lock_xadd(&ext_val->count, 1);
160    }
161    """)
162else:
163    bpf_text = bpf_text.replace('EXTENSION', '')
164bpf_text = bpf_text.replace("STORAGE", storage_str)
165bpf_text = bpf_text.replace("STORE", store_str)
166
167if debug or args.ebpf:
168    print(bpf_text)
169    if args.ebpf:
170        exit()
171
172# load BPF program
173b = BPF(text=bpf_text)
174if args.queued:
175    if BPF.get_kprobe_functions(b'__blk_account_io_start'):
176        b.attach_kprobe(event="__blk_account_io_start", fn_name="trace_req_start")
177    else:
178        b.attach_kprobe(event="blk_account_io_start", fn_name="trace_req_start")
179else:
180    if BPF.get_kprobe_functions(b'blk_start_request'):
181        b.attach_kprobe(event="blk_start_request", fn_name="trace_req_start")
182    b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_req_start")
183if BPF.get_kprobe_functions(b'__blk_account_io_done'):
184    b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_req_done")
185else:
186    b.attach_kprobe(event="blk_account_io_done", fn_name="trace_req_done")
187
188if not args.json:
189    print("Tracing block device I/O... Hit Ctrl-C to end.")
190
191def disk_print(s):
192    disk = s.decode('utf-8', 'replace')
193    if not disk:
194        disk = "<unknown>"
195    return disk
196
197# see blk_fill_rwbs():
198req_opf = {
199    0: "Read",
200    1: "Write",
201    2: "Flush",
202    3: "Discard",
203    5: "SecureErase",
204    6: "ZoneReset",
205    7: "WriteSame",
206    9: "WriteZeros"
207}
208REQ_OP_BITS = 8
209REQ_OP_MASK = ((1 << REQ_OP_BITS) - 1)
210REQ_SYNC = 1 << (REQ_OP_BITS + 3)
211REQ_META = 1 << (REQ_OP_BITS + 4)
212REQ_PRIO = 1 << (REQ_OP_BITS + 5)
213REQ_NOMERGE = 1 << (REQ_OP_BITS + 6)
214REQ_IDLE = 1 << (REQ_OP_BITS + 7)
215REQ_FUA = 1 << (REQ_OP_BITS + 9)
216REQ_RAHEAD = 1 << (REQ_OP_BITS + 11)
217REQ_BACKGROUND = 1 << (REQ_OP_BITS + 12)
218REQ_NOWAIT = 1 << (REQ_OP_BITS + 13)
219def flags_print(flags):
220    desc = ""
221    # operation
222    if flags & REQ_OP_MASK in req_opf:
223        desc = req_opf[flags & REQ_OP_MASK]
224    else:
225        desc = "Unknown"
226    # flags
227    if flags & REQ_SYNC:
228        desc = "Sync-" + desc
229    if flags & REQ_META:
230        desc = "Metadata-" + desc
231    if flags & REQ_FUA:
232        desc = "ForcedUnitAccess-" + desc
233    if flags & REQ_PRIO:
234        desc = "Priority-" + desc
235    if flags & REQ_NOMERGE:
236        desc = "NoMerge-" + desc
237    if flags & REQ_IDLE:
238        desc = "Idle-" + desc
239    if flags & REQ_RAHEAD:
240        desc = "ReadAhead-" + desc
241    if flags & REQ_BACKGROUND:
242        desc = "Background-" + desc
243    if flags & REQ_NOWAIT:
244        desc = "NoWait-" + desc
245    return desc
246
247# output
248exiting = 0 if args.interval else 1
249dist = b.get_table("dist")
250if args.extension:
251    extension = b.get_table("extension")
252while (1):
253    try:
254        sleep(int(args.interval))
255    except KeyboardInterrupt:
256        exiting = 1
257
258    print()
259    if args.json:
260        if args.timestamp:
261            print("%-8s\n" % strftime("%H:%M:%S"), end="")
262
263        if args.flags:
264            dist.print_json_hist(label, "flags", flags_print)
265        else:
266            dist.print_json_hist(label, "disk", disk_print)
267
268    else:
269        if args.timestamp:
270            print("%-8s\n" % strftime("%H:%M:%S"), end="")
271
272        if args.flags:
273            dist.print_log2_hist(label, "flags", flags_print)
274        else:
275            dist.print_log2_hist(label, "disk", disk_print)
276        if args.extension:
277            total = extension[0].total
278            counts = extension[0].count
279            if counts > 0:
280                if label == 'msecs':
281                    total /= 1000000
282                elif label == 'usecs':
283                    total /= 1000
284                avg = total / counts
285                print("\navg = %ld %s, total: %ld %s, count: %ld\n" %
286                      (total / counts, label, total, label, counts))
287            extension.clear()
288
289    dist.clear()
290
291    countdown -= 1
292    if exiting or countdown == 0:
293        exit()
294