• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# cpudist   Summarize on- and off-CPU time per task as a histogram.
5#
6# USAGE: cpudist [-h] [-O] [-T] [-m] [-P] [-L] [-p PID] [-I] [interval] [count]
7#
8# This measures the time a task spends on or off the CPU, and shows this time
9# as a histogram, optionally per-process.
10#
11# By default CPU idle time are excluded by simply excluding PID 0.
12#
13# Copyright 2016 Sasha Goldshtein
14# Licensed under the Apache License, Version 2.0 (the "License")
15#
16# 27-Mar-2022   Rocky Xing      Changed to exclude CPU idle time by default.
17
18from __future__ import print_function
19from bcc import BPF
20from time import sleep, strftime
21import argparse
22
23examples = """examples:
24    cpudist              # summarize on-CPU time as a histogram
25    cpudist -O           # summarize off-CPU time as a histogram
26    cpudist 1 10         # print 1 second summaries, 10 times
27    cpudist -mT 1        # 1s summaries, milliseconds, and timestamps
28    cpudist -P           # show each PID separately
29    cpudist -p 185       # trace PID 185 only
30    cpudist -I           # include CPU idle time
31"""
32parser = argparse.ArgumentParser(
33    description="Summarize on-CPU time per task as a histogram.",
34    formatter_class=argparse.RawDescriptionHelpFormatter,
35    epilog=examples)
36parser.add_argument("-O", "--offcpu", action="store_true",
37    help="measure off-CPU time")
38parser.add_argument("-T", "--timestamp", action="store_true",
39    help="include timestamp on output")
40parser.add_argument("-m", "--milliseconds", action="store_true",
41    help="millisecond histogram")
42parser.add_argument("-P", "--pids", action="store_true",
43    help="print a histogram per process ID")
44parser.add_argument("-L", "--tids", action="store_true",
45    help="print a histogram per thread ID")
46parser.add_argument("-p", "--pid",
47    help="trace this PID only")
48parser.add_argument("-I", "--include-idle", action="store_true",
49    help="include CPU idle time")
50parser.add_argument("interval", nargs="?", default=99999999,
51    help="output interval, in seconds")
52parser.add_argument("count", nargs="?", default=99999999,
53    help="number of outputs")
54parser.add_argument("--ebpf", action="store_true",
55    help=argparse.SUPPRESS)
56args = parser.parse_args()
57countdown = int(args.count)
58debug = 0
59
60bpf_text = """#include <uapi/linux/ptrace.h>
61#include <linux/sched.h>
62"""
63
64if not args.offcpu:
65    bpf_text += "#define ONCPU\n"
66
67bpf_text += """
68typedef struct entry_key {
69    u32 pid;
70    u32 cpu;
71} entry_key_t;
72
73typedef struct pid_key {
74    u64 id;
75    u64 slot;
76} pid_key_t;
77
78
79BPF_HASH(start, entry_key_t, u64, MAX_PID);
80STORAGE
81
82static inline void store_start(u32 tgid, u32 pid, u32 cpu, u64 ts)
83{
84    if (PID_FILTER)
85        return;
86
87    if (IDLE_FILTER)
88        return;
89
90    entry_key_t entry_key = { .pid = pid, .cpu = cpu };
91    start.update(&entry_key, &ts);
92}
93
94static inline void update_hist(u32 tgid, u32 pid, u32 cpu, u64 ts)
95{
96    if (PID_FILTER)
97        return;
98
99    if (IDLE_FILTER)
100        return;
101
102    entry_key_t entry_key = { .pid = pid, .cpu = cpu };
103    u64 *tsp = start.lookup(&entry_key);
104    if (tsp == 0)
105        return;
106
107    if (ts < *tsp) {
108        // Probably a clock issue where the recorded on-CPU event had a
109        // timestamp later than the recorded off-CPU event, or vice versa.
110        return;
111    }
112    u64 delta = ts - *tsp;
113    FACTOR
114    STORE
115}
116
117int sched_switch(struct pt_regs *ctx, struct task_struct *prev)
118{
119    u64 ts = bpf_ktime_get_ns();
120    u64 pid_tgid = bpf_get_current_pid_tgid();
121    u32 tgid = pid_tgid >> 32, pid = pid_tgid;
122    u32 cpu = bpf_get_smp_processor_id();
123
124    u32 prev_pid = prev->pid;
125    u32 prev_tgid = prev->tgid;
126#ifdef ONCPU
127    update_hist(prev_tgid, prev_pid, cpu, ts);
128#else
129    store_start(prev_tgid, prev_pid, cpu, ts);
130#endif
131
132BAIL:
133#ifdef ONCPU
134    store_start(tgid, pid, cpu, ts);
135#else
136    update_hist(tgid, pid, cpu, ts);
137#endif
138
139    return 0;
140}
141"""
142
143if args.pid:
144    bpf_text = bpf_text.replace('PID_FILTER', 'tgid != %s' % args.pid)
145else:
146    bpf_text = bpf_text.replace('PID_FILTER', '0')
147
148# set idle filter
149idle_filter = 'pid == 0'
150if args.include_idle:
151    idle_filter = '0'
152bpf_text = bpf_text.replace('IDLE_FILTER', idle_filter)
153
154if args.milliseconds:
155    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
156    label = "msecs"
157else:
158    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
159    label = "usecs"
160if args.pids or args.tids:
161    section = "pid"
162    pid = "tgid"
163    if args.tids:
164        pid = "pid"
165        section = "tid"
166    bpf_text = bpf_text.replace('STORAGE',
167        'BPF_HISTOGRAM(dist, pid_key_t, MAX_PID);')
168    bpf_text = bpf_text.replace('STORE',
169        'pid_key_t key = {.id = ' + pid + ', .slot = bpf_log2l(delta)}; ' +
170        'dist.increment(key);')
171else:
172    section = ""
173    bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);')
174    bpf_text = bpf_text.replace('STORE',
175        'dist.atomic_increment(bpf_log2l(delta));')
176if debug or args.ebpf:
177    print(bpf_text)
178    if args.ebpf:
179        exit()
180
181max_pid = int(open("/proc/sys/kernel/pid_max").read())
182
183b = BPF(text=bpf_text, cflags=["-DMAX_PID=%d" % max_pid])
184b.attach_kprobe(event_re="^finish_task_switch$|^finish_task_switch\.isra\.\d$",
185                fn_name="sched_switch")
186
187print("Tracing %s-CPU time... Hit Ctrl-C to end." %
188      ("off" if args.offcpu else "on"))
189
190exiting = 0 if args.interval else 1
191dist = b.get_table("dist")
192while (1):
193    try:
194        sleep(int(args.interval))
195    except KeyboardInterrupt:
196        exiting = 1
197
198    print()
199    if args.timestamp:
200        print("%-8s\n" % strftime("%H:%M:%S"), end="")
201
202    def pid_to_comm(pid):
203        try:
204            comm = open("/proc/%d/comm" % pid, "r").read()
205            return "%d %s" % (pid, comm)
206        except IOError:
207            return str(pid)
208
209    dist.print_log2_hist(label, section, section_print_fn=pid_to_comm)
210    dist.clear()
211
212    countdown -= 1
213    if exiting or countdown == 0:
214        exit()
215