1#!/usr/bin/env python 2# 3# wakeuptime Summarize sleep to wakeup time by waker kernel stack 4# For Linux, uses BCC, eBPF. 5# 6# USAGE: wakeuptime [-h] [-u] [-p PID] [-v] [-f] [duration] 7# 8# The current implementation uses an unrolled loop for x86_64, and was written 9# as a proof of concept. This implementation should be replaced in the future 10# with an appropriate bpf_ call, when available. 11# 12# Currently limited to a stack trace depth of 21 (maxdepth + 1). 13# 14# Copyright 2016 Netflix, Inc. 15# Licensed under the Apache License, Version 2.0 (the "License") 16# 17# 14-Jan-2016 Brendan Gregg Created this. 18 19from __future__ import print_function 20from bcc import BPF 21from time import sleep, strftime 22import argparse 23import signal 24 25# arguments 26examples = """examples: 27 ./wakeuptime # trace blocked time with waker stacks 28 ./wakeuptime 5 # trace for 5 seconds only 29 ./wakeuptime -f 5 # 5 seconds, and output in folded format 30 ./wakeuptime -u # don't include kernel threads (user only) 31 ./wakeuptime -p 185 # trace fo PID 185 only 32""" 33parser = argparse.ArgumentParser( 34 description="Summarize sleep to wakeup time by waker kernel stack", 35 formatter_class=argparse.RawDescriptionHelpFormatter, 36 epilog=examples) 37parser.add_argument("-u", "--useronly", action="store_true", 38 help="user threads only (no kernel threads)") 39parser.add_argument("-p", "--pid", 40 help="trace this PID only") 41parser.add_argument("-v", "--verbose", action="store_true", 42 help="show raw addresses") 43parser.add_argument("-f", "--folded", action="store_true", 44 help="output folded format") 45parser.add_argument("duration", nargs="?", default=99999999, 46 help="duration of trace, in seconds") 47args = parser.parse_args() 48folded = args.folded 49duration = int(args.duration) 50debug = 0 51maxdepth = 20 # and MAXDEPTH 52if args.pid and args.useronly: 53 print("ERROR: use either -p or -u.") 54 exit() 55 56# signal handler 57def signal_ignore(signal, frame): 58 print() 59 60# define BPF program 61bpf_text = """ 62#include <uapi/linux/ptrace.h> 63#include <linux/sched.h> 64 65#define MAXDEPTH 20 66#define MINBLOCK_US 1 67 68struct key_t { 69 char waker[TASK_COMM_LEN]; 70 char target[TASK_COMM_LEN]; 71 // Skip saving the ip 72 u64 ret[MAXDEPTH]; 73}; 74BPF_HASH(counts, struct key_t); 75BPF_HASH(start, u32); 76 77static u64 get_frame(u64 *bp) { 78 if (*bp) { 79 // The following stack walker is x86_64/arm64 specific 80 u64 ret = 0; 81 if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8))) 82 return 0; 83 if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp)) 84 return 0; 85#ifdef __x86_64__ 86 if (ret < __START_KERNEL_map) 87#elif __aarch64__ 88 if (ret < VA_START) 89#else 90#error "Unsupported architecture for stack walker" 91#endif 92 return 0; 93 return ret; 94 } 95 return 0; 96} 97 98int offcpu(struct pt_regs *ctx) { 99 u32 pid = bpf_get_current_pid_tgid(); 100 u64 ts = bpf_ktime_get_ns(); 101 // XXX: should filter here too, but need task_struct 102 start.update(&pid, &ts); 103 return 0; 104} 105 106int waker(struct pt_regs *ctx, struct task_struct *p) { 107 u32 pid = p->pid; 108 u64 delta, *tsp, ts; 109 110 tsp = start.lookup(&pid); 111 if (tsp == 0) 112 return 0; // missed start 113 start.delete(&pid); 114 115 if (FILTER) 116 return 0; 117 118 // calculate delta time 119 delta = bpf_ktime_get_ns() - *tsp; 120 delta = delta / 1000; 121 if (delta < MINBLOCK_US) 122 return 0; 123 124 struct key_t key = {}; 125 u64 zero = 0, *val, bp = 0; 126 int depth = 0; 127 128 bpf_probe_read(&key.target, sizeof(key.target), p->comm); 129 bpf_get_current_comm(&key.waker, sizeof(key.waker)); 130 bp = PT_REGS_FP(ctx); 131 132 // unrolled loop (MAXDEPTH): 133 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 134 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 135 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 136 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 137 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 138 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 139 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 140 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 141 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 142 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 143 144 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 145 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 146 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 147 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 148 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 149 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 150 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 151 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 152 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 153 if (!(key.ret[depth++] = get_frame(&bp))) goto out; 154 155out: 156 val = counts.lookup_or_init(&key, &zero); 157 (*val) += delta; 158 return 0; 159} 160""" 161if args.pid: 162 filter = 'pid != %s' % args.pid 163elif args.useronly: 164 filter = 'p->flags & PF_KTHREAD' 165else: 166 filter = '0' 167bpf_text = bpf_text.replace('FILTER', filter) 168if debug: 169 print(bpf_text) 170 171# initialize BPF 172b = BPF(text=bpf_text) 173b.attach_kprobe(event="schedule", fn_name="offcpu") 174b.attach_kprobe(event="try_to_wake_up", fn_name="waker") 175matched = b.num_open_kprobes() 176if matched == 0: 177 print("0 functions traced. Exiting.") 178 exit() 179 180# header 181if not folded: 182 print("Tracing blocked time (us) by kernel stack", end="") 183 if duration < 99999999: 184 print(" for %d secs." % duration) 185 else: 186 print("... Hit Ctrl-C to end.") 187 188# output 189while (1): 190 try: 191 sleep(duration) 192 except KeyboardInterrupt: 193 # as cleanup can take many seconds, trap Ctrl-C: 194 signal.signal(signal.SIGINT, signal_ignore) 195 196 if not folded: 197 print() 198 counts = b.get_table("counts") 199 for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): 200 if folded: 201 # print folded stack output 202 line = k.waker.decode('utf-8', 'replace') + ";" 203 for i in reversed(range(0, maxdepth)): 204 if k.ret[i] == 0: 205 continue 206 line = line + b.ksym(k.ret[i]) 207 if i != 0: 208 line = line + ";" 209 print("%s;%s %d" % (line, k.target.decode('utf-8', 'replace'), v.value)) 210 else: 211 # print default multi-line stack output 212 print(" %-16s %s" % ("target:", k.target.decode('utf-8', 'replace'))) 213 for i in range(0, maxdepth): 214 if k.ret[i] == 0: 215 break 216 print(" %-16x %s" % (k.ret[i], 217 b.ksym(k.ret[i]))) 218 print(" %-16s %s" % ("waker:", k.waker.decode('utf-8', 'replace'))) 219 print(" %d\n" % v.value) 220 counts.clear() 221 222 if not folded: 223 print("Detaching...") 224 exit() 225