1#!/usr/bin/python 2# 3# wakeuptime Summarize sleep to wakeup time by waker kernel stack 4# For Linux, uses BCC, eBPF. 5# 6# USAGE: wakeuptime [-h] [-u] [-p PID] [-v] [-f] [duration] 7# 8# Copyright 2016 Netflix, Inc. 9# Licensed under the Apache License, Version 2.0 (the "License") 10# 11# 14-Jan-2016 Brendan Gregg Created this. 12 13from __future__ import print_function 14from bcc import BPF 15from bcc.utils import printb 16from time import sleep, strftime 17import argparse 18import signal 19import errno 20from sys import stderr 21 22# arg validation 23def positive_int(val): 24 try: 25 ival = int(val) 26 except ValueError: 27 raise argparse.ArgumentTypeError("must be an integer") 28 29 if ival < 0: 30 raise argparse.ArgumentTypeError("must be positive") 31 return ival 32 33def positive_nonzero_int(val): 34 ival = positive_int(val) 35 if ival == 0: 36 raise argparse.ArgumentTypeError("must be nonzero") 37 return ival 38 39# arguments 40examples = """examples: 41 ./wakeuptime # trace blocked time with waker stacks 42 ./wakeuptime 5 # trace for 5 seconds only 43 ./wakeuptime -f 5 # 5 seconds, and output in folded format 44 ./wakeuptime -u # don't include kernel threads (user only) 45 ./wakeuptime -p 185 # trace for PID 185 only 46""" 47parser = argparse.ArgumentParser( 48 description="Summarize sleep to wakeup time by waker kernel stack", 49 formatter_class=argparse.RawDescriptionHelpFormatter, 50 epilog=examples) 51parser.add_argument("-u", "--useronly", action="store_true", 52 help="user threads only (no kernel threads)") 53parser.add_argument("-p", "--pid", 54 type=positive_int, 55 help="trace this PID only") 56parser.add_argument("-v", "--verbose", action="store_true", 57 help="show raw addresses") 58parser.add_argument("-f", "--folded", action="store_true", 59 help="output folded format") 60parser.add_argument("--stack-storage-size", default=1024, 61 type=positive_nonzero_int, 62 help="the number of unique stack traces that can be stored and " 63 "displayed (default 1024)") 64parser.add_argument("duration", nargs="?", default=99999999, 65 type=positive_nonzero_int, 66 help="duration of trace, in seconds") 67parser.add_argument("-m", "--min-block-time", default=1, 68 type=positive_nonzero_int, 69 help="the amount of time in microseconds over which we " + 70 "store traces (default 1)") 71parser.add_argument("-M", "--max-block-time", default=(1 << 64) - 1, 72 type=positive_nonzero_int, 73 help="the amount of time in microseconds under which we " + 74 "store traces (default U64_MAX)") 75parser.add_argument("--ebpf", action="store_true", 76 help=argparse.SUPPRESS) 77args = parser.parse_args() 78folded = args.folded 79duration = int(args.duration) 80debug = 0 81if args.pid and args.useronly: 82 parser.error("use either -p or -u.") 83 84# signal handler 85def signal_ignore(signal, frame): 86 print() 87 88# define BPF program 89bpf_text = """ 90#include <uapi/linux/ptrace.h> 91#include <linux/sched.h> 92 93#define MINBLOCK_US MINBLOCK_US_VALUEULL 94#define MAXBLOCK_US MAXBLOCK_US_VALUEULL 95 96struct key_t { 97 int w_k_stack_id; 98 char waker[TASK_COMM_LEN]; 99 char target[TASK_COMM_LEN]; 100}; 101BPF_HASH(counts, struct key_t); 102BPF_HASH(start, u32); 103BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); 104 105static int offcpu_sched_switch() { 106 u64 pid_tgid = bpf_get_current_pid_tgid(); 107 u32 pid = pid_tgid >> 32; 108 u32 tid = (u32)pid_tgid; 109 struct task_struct *p = (struct task_struct *) bpf_get_current_task(); 110 u64 ts; 111 112 if (FILTER) 113 return 0; 114 115 ts = bpf_ktime_get_ns(); 116 start.update(&tid, &ts); 117 return 0; 118} 119 120static int wakeup(ARG0, struct task_struct *p) { 121 u32 pid = p->tgid; 122 u32 tid = p->pid; 123 u64 delta, *tsp, ts; 124 125 tsp = start.lookup(&tid); 126 if (tsp == 0) 127 return 0; // missed start 128 start.delete(&tid); 129 130 if (FILTER) 131 return 0; 132 133 // calculate delta time 134 delta = bpf_ktime_get_ns() - *tsp; 135 delta = delta / 1000; 136 if ((delta < MINBLOCK_US) || (delta > MAXBLOCK_US)) 137 return 0; 138 139 struct key_t key = {}; 140 141 key.w_k_stack_id = stack_traces.get_stackid(ctx, 0); 142 bpf_probe_read_kernel(&key.target, sizeof(key.target), p->comm); 143 bpf_get_current_comm(&key.waker, sizeof(key.waker)); 144 145 counts.atomic_increment(key, delta); 146 return 0; 147} 148""" 149 150bpf_text_kprobe = """ 151int offcpu(struct pt_regs *ctx) { 152 return offcpu_sched_switch(); 153} 154 155int waker(struct pt_regs *ctx, struct task_struct *p) { 156 return wakeup(ctx, p); 157} 158""" 159 160bpf_text_raw_tp = """ 161RAW_TRACEPOINT_PROBE(sched_switch) 162{ 163 // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next) 164 return offcpu_sched_switch(); 165} 166 167RAW_TRACEPOINT_PROBE(sched_wakeup) 168{ 169 // TP_PROTO(struct task_struct *p) 170 struct task_struct *p = (struct task_struct *)ctx->args[0]; 171 return wakeup(ctx, p); 172} 173""" 174 175is_supported_raw_tp = BPF.support_raw_tracepoint() 176if is_supported_raw_tp: 177 bpf_text += bpf_text_raw_tp 178else: 179 bpf_text += bpf_text_kprobe 180 181if args.pid: 182 filter = 'pid != %s' % args.pid 183elif args.useronly: 184 filter = 'p->flags & PF_KTHREAD' 185else: 186 filter = '0' 187bpf_text = bpf_text.replace('FILTER', filter) 188 189if is_supported_raw_tp: 190 arg0 = 'struct bpf_raw_tracepoint_args *ctx' 191else: 192 arg0 = 'struct pt_regs *ctx' 193bpf_text = bpf_text.replace('ARG0', arg0) 194 195# set stack storage size 196bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) 197bpf_text = bpf_text.replace('MINBLOCK_US_VALUE', str(args.min_block_time)) 198bpf_text = bpf_text.replace('MAXBLOCK_US_VALUE', str(args.max_block_time)) 199 200if debug or args.ebpf: 201 print(bpf_text) 202 if args.ebpf: 203 exit() 204 205# initialize BPF 206b = BPF(text=bpf_text) 207if not is_supported_raw_tp: 208 b.attach_kprobe(event="schedule", fn_name="offcpu") 209 b.attach_kprobe(event="try_to_wake_up", fn_name="waker") 210 matched = b.num_open_kprobes() 211 if matched == 0: 212 print("0 functions traced. Exiting.") 213 exit() 214 215# header 216if not folded: 217 print("Tracing blocked time (us) by kernel stack", end="") 218 if duration < 99999999: 219 print(" for %d secs." % duration) 220 else: 221 print("... Hit Ctrl-C to end.") 222 223# output 224while (1): 225 try: 226 sleep(duration) 227 except KeyboardInterrupt: 228 # as cleanup can take many seconds, trap Ctrl-C: 229 signal.signal(signal.SIGINT, signal_ignore) 230 231 if not folded: 232 print() 233 missing_stacks = 0 234 has_enomem = False 235 counts = b.get_table("counts") 236 stack_traces = b.get_table("stack_traces") 237 for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): 238 # handle get_stackid errors 239 # check for an ENOMEM error 240 if k.w_k_stack_id == -errno.ENOMEM: 241 missing_stacks += 1 242 continue 243 244 waker_kernel_stack = [] if k.w_k_stack_id < 1 else \ 245 reversed(list(stack_traces.walk(k.w_k_stack_id))[1:]) 246 247 if folded: 248 # print folded stack output 249 line = \ 250 [k.waker] + \ 251 [b.ksym(addr) 252 for addr in reversed(list(waker_kernel_stack))] + \ 253 [k.target] 254 printb(b"%s %d" % (b";".join(line), v.value)) 255 else: 256 # print default multi-line stack output 257 printb(b" %-16s %s" % (b"target:", k.target)) 258 for addr in waker_kernel_stack: 259 printb(b" %-16x %s" % (addr, b.ksym(addr))) 260 printb(b" %-16s %s" % (b"waker:", k.waker)) 261 print(" %d\n" % v.value) 262 counts.clear() 263 264 if missing_stacks > 0: 265 enomem_str = " Consider increasing --stack-storage-size." 266 print("WARNING: %d stack traces could not be displayed.%s" % 267 (missing_stacks, enomem_str), 268 file=stderr) 269 270 if not folded: 271 print("Detaching...") 272 exit() 273