1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# cpudist Summarize on- and off-CPU time per task as a histogram. 5# 6# USAGE: cpudist [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [-I] [interval] [count] 7# 8# This measures the time a task spends on or off the CPU, and shows this time 9# as a histogram, optionally per-process. 10# 11# By default CPU idle time are excluded by simply excluding PID 0. 12# 13# Copyright 2016 Sasha Goldshtein 14# Licensed under the Apache License, Version 2.0 (the "License") 15# 16# 27-Mar-2022 Rocky Xing Changed to exclude CPU idle time by default. 17 18from __future__ import print_function 19from bcc import BPF 20from time import sleep, strftime 21import argparse 22 23examples = """examples: 24 cpudist # summarize on-CPU time as a histogram 25 cpudist -O # summarize off-CPU time as a histogram 26 cpudist 1 10 # print 1 second summaries, 10 times 27 cpudist -mT 1 # 1s summaries, milliseconds, and timestamps 28 cpudist -P # show each PID separately 29 cpudist -p 185 # trace PID 185 only 30 cpudist -I # include CPU idle time 31""" 32parser = argparse.ArgumentParser( 33 description="Summarize on-CPU time per task as a histogram.", 34 formatter_class=argparse.RawDescriptionHelpFormatter, 35 epilog=examples) 36parser.add_argument("-O", "--offcpu", action="store_true", 37 help="measure off-CPU time") 38parser.add_argument("-T", "--timestamp", action="store_true", 39 help="include timestamp on output") 40parser.add_argument("-m", "--milliseconds", action="store_true", 41 help="millisecond histogram") 42parser.add_argument("-P", "--pids", action="store_true", 43 help="print a histogram per process ID") 44parser.add_argument("-L", "--tids", action="store_true", 45 help="print a histogram per thread ID") 46parser.add_argument("-p", "--pid", 47 help="trace this PID only") 48parser.add_argument("-I", "--include-idle", action="store_true", 49 help="include CPU idle time") 50parser.add_argument("interval", nargs="?", default=99999999, 51 help="output interval, in seconds") 52parser.add_argument("count", nargs="?", default=99999999, 53 help="number of outputs") 54parser.add_argument("--ebpf", action="store_true", 55 help=argparse.SUPPRESS) 56args = parser.parse_args() 57countdown = int(args.count) 58debug = 0 59 60bpf_text = """#include <uapi/linux/ptrace.h> 61#include <linux/sched.h> 62""" 63 64if not args.offcpu: 65 bpf_text += "#define ONCPU\n" 66 67bpf_text += """ 68typedef struct entry_key { 69 u32 pid; 70 u32 cpu; 71} entry_key_t; 72 73typedef struct pid_key { 74 u64 id; 75 u64 slot; 76} pid_key_t; 77 78 79BPF_HASH(start, entry_key_t, u64, MAX_PID); 80STORAGE 81 82static inline void store_start(u32 tgid, u32 pid, u32 cpu, u64 ts) 83{ 84 if (PID_FILTER) 85 return; 86 87 if (IDLE_FILTER) 88 return; 89 90 entry_key_t entry_key = { .pid = pid, .cpu = cpu }; 91 start.update(&entry_key, &ts); 92} 93 94static inline void update_hist(u32 tgid, u32 pid, u32 cpu, u64 ts) 95{ 96 if (PID_FILTER) 97 return; 98 99 if (IDLE_FILTER) 100 return; 101 102 entry_key_t entry_key = { .pid = pid, .cpu = cpu }; 103 u64 *tsp = start.lookup(&entry_key); 104 if (tsp == 0) 105 return; 106 107 if (ts < *tsp) { 108 // Probably a clock issue where the recorded on-CPU event had a 109 // timestamp later than the recorded off-CPU event, or vice versa. 110 return; 111 } 112 u64 delta = ts - *tsp; 113 FACTOR 114 STORE 115} 116 117int sched_switch(struct pt_regs *ctx, struct task_struct *prev) 118{ 119 u64 ts = bpf_ktime_get_ns(); 120 u64 pid_tgid = bpf_get_current_pid_tgid(); 121 u32 tgid = pid_tgid >> 32, pid = pid_tgid; 122 u32 cpu = bpf_get_smp_processor_id(); 123 124 u32 prev_pid = prev->pid; 125 u32 prev_tgid = prev->tgid; 126#ifdef ONCPU 127 update_hist(prev_tgid, prev_pid, cpu, ts); 128#else 129 store_start(prev_tgid, prev_pid, cpu, ts); 130#endif 131 132BAIL: 133#ifdef ONCPU 134 store_start(tgid, pid, cpu, ts); 135#else 136 update_hist(tgid, pid, cpu, ts); 137#endif 138 139 return 0; 140} 141""" 142 143if args.pid: 144 bpf_text = bpf_text.replace('PID_FILTER', 'tgid != %s' % args.pid) 145else: 146 bpf_text = bpf_text.replace('PID_FILTER', '0') 147 148# set idle filter 149idle_filter = 'pid == 0' 150if args.include_idle: 151 idle_filter = '0' 152bpf_text = bpf_text.replace('IDLE_FILTER', idle_filter) 153 154if args.milliseconds: 155 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;') 156 label = "msecs" 157else: 158 bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;') 159 label = "usecs" 160if args.pids or args.tids: 161 section = "pid" 162 pid = "tgid" 163 if args.tids: 164 pid = "pid" 165 section = "tid" 166 bpf_text = bpf_text.replace('STORAGE', 167 'BPF_HISTOGRAM(dist, pid_key_t, MAX_PID);') 168 bpf_text = bpf_text.replace('STORE', 169 'pid_key_t key = {.id = ' + pid + ', .slot = bpf_log2l(delta)}; ' + 170 'dist.increment(key);') 171else: 172 section = "" 173 bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);') 174 bpf_text = bpf_text.replace('STORE', 175 'dist.atomic_increment(bpf_log2l(delta));') 176if debug or args.ebpf: 177 print(bpf_text) 178 if args.ebpf: 179 exit() 180 181max_pid = int(open("/proc/sys/kernel/pid_max").read()) 182 183b = BPF(text=bpf_text, cflags=["-DMAX_PID=%d" % max_pid]) 184b.attach_kprobe(event_re="^finish_task_switch$|^finish_task_switch\.isra\.\d$", 185 fn_name="sched_switch") 186 187print("Tracing %s-CPU time... Hit Ctrl-C to end." % 188 ("off" if args.offcpu else "on")) 189 190exiting = 0 if args.interval else 1 191dist = b.get_table("dist") 192while (1): 193 try: 194 sleep(int(args.interval)) 195 except KeyboardInterrupt: 196 exiting = 1 197 198 print() 199 if args.timestamp: 200 print("%-8s\n" % strftime("%H:%M:%S"), end="") 201 202 def pid_to_comm(pid): 203 try: 204 comm = open("/proc/%d/comm" % pid, "r").read() 205 return "%d %s" % (pid, comm) 206 except IOError: 207 return str(pid) 208 209 dist.print_log2_hist(label, section, section_print_fn=pid_to_comm) 210 dist.clear() 211 212 countdown -= 1 213 if exiting or countdown == 0: 214 exit() 215