• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# IPC - Instructions Per Cycles using Perf Events and
3# uprobes
4# 24-Apr-2020	Saleem Ahmad	Created this.
5
6from bcc import BPF, utils
7from optparse import OptionParser
8
9# load BPF program
10code="""
11#include <uapi/linux/ptrace.h>
12
13struct perf_delta {
14    u64 clk_delta;
15    u64 inst_delta;
16    u64 time_delta;
17};
18
19/*
20Perf Arrays to read counter values for open
21perf events.
22*/
23BPF_PERF_ARRAY(clk, MAX_CPUS);
24BPF_PERF_ARRAY(inst, MAX_CPUS);
25
26// Perf Output
27BPF_PERF_OUTPUT(output);
28
29// Per Cpu Data to store start values
30BPF_PERCPU_ARRAY(data, u64);
31
32#define CLOCK_ID 0
33#define INSTRUCTION_ID 1
34#define TIME_ID 2
35
36void trace_start(struct pt_regs *ctx) {
37    u32 clk_k = CLOCK_ID;
38    u32 inst_k = INSTRUCTION_ID;
39    u32 time = TIME_ID;
40
41    int cpu = bpf_get_smp_processor_id();
42    /*
43    perf_read may return negative values for errors.
44    If cpu id is greater than BPF_PERF_ARRAY size,
45    counters values will be very large negative number.
46    NOTE: Use bpf_perf_event_value is recommended over
47    bpf_perf_event_read or map.perf_read() due to
48    issues in ABI. map.perf_read_value() need to be
49    implemented in future.
50    */
51    u64 clk_start = clk.perf_read(cpu);
52    u64 inst_start = inst.perf_read(cpu);
53    u64 time_start = bpf_ktime_get_ns();
54
55    u64* kptr = NULL;
56    kptr = data.lookup(&clk_k);
57    if (kptr) {
58        data.update(&clk_k, &clk_start);
59    } else {
60        data.insert(&clk_k, &clk_start);
61    }
62
63    kptr = data.lookup(&inst_k);
64    if (kptr) {
65        data.update(&inst_k, &inst_start);
66    } else {
67        data.insert(&inst_k, &inst_start);
68    }
69
70    kptr = data.lookup(&time);
71    if (kptr) {
72        data.update(&time, &time_start);
73    } else {
74        data.insert(&time, &time_start);
75    }
76}
77
78void trace_end(struct pt_regs* ctx) {
79    u32 clk_k = CLOCK_ID;
80    u32 inst_k = INSTRUCTION_ID;
81    u32 time = TIME_ID;
82
83    int cpu = bpf_get_smp_processor_id();
84    /*
85    perf_read may return negative values for errors.
86    If cpu id is greater than BPF_PERF_ARRAY size,
87    counters values will be very large negative number.
88    NOTE: Use bpf_perf_event_value is recommended over
89    bpf_perf_event_read or map.perf_read() due to
90    issues in ABI. map.perf_read_value() need to be
91    implemented in future.
92    */
93    u64 clk_end = clk.perf_read(cpu);
94    u64 inst_end = inst.perf_read(cpu);
95    u64 time_end = bpf_ktime_get_ns();
96
97    struct perf_delta perf_data = {} ;
98    u64* kptr = NULL;
99    kptr = data.lookup(&clk_k);
100
101    // Find elements in map, if not found return
102    if (kptr) {
103        perf_data.clk_delta = clk_end - *kptr;
104    } else {
105        return;
106    }
107
108    kptr = data.lookup(&inst_k);
109    if (kptr) {
110        perf_data.inst_delta = inst_end - *kptr;
111    } else {
112        return;
113    }
114
115    kptr = data.lookup(&time);
116    if (kptr) {
117        perf_data.time_delta = time_end - *kptr;
118    } else {
119        return;
120    }
121
122    output.perf_submit(ctx, &perf_data, sizeof(struct perf_delta));
123}
124"""
125
126usage='Usage: ipc.py [options]\nexample ./ipc.py -l c -s strlen'
127parser = OptionParser(usage)
128parser.add_option('-l', '--lib', dest='lib_name', help='lib name containing symbol to trace, e.g. c for libc', type=str)
129parser.add_option('-s', '--sym', dest='sym', help='symbol to trace', type=str)
130
131(options, args) = parser.parse_args()
132if (not options.lib_name or not options.sym):
133    parser.print_help()
134    exit()
135
136num_cpus = len(utils.get_online_cpus())
137
138b = BPF(text=code, cflags=['-DMAX_CPUS=%s' % str(num_cpus)])
139
140# Attach Probes at start and end of the trace function
141# NOTE: When attaching to a function for tracing, during runtime relocation
142# stage by linker, function will be called once to return a different function
143# address, which will be called by the process. e.g. in case of strlen
144# after relocation stage, __strlen_sse2 is called instread of strlen.
145# NOTE: There will be a context switch from userspace to kernel space,
146# on caputring counters on USDT probes, so actual IPC might be slightly different.
147# This example is to give a reference on how to use perf events with tracing.
148b.attach_uprobe(name=options.lib_name, sym=options.sym, fn_name="trace_start")
149b.attach_uretprobe(name=options.lib_name, sym=options.sym, fn_name="trace_end")
150
151def print_data(cpu, data, size):
152    e = b["output"].event(data)
153    print("%-8d %-12d %-8.2f %-8s %d" % (e.clk_delta, e.inst_delta,
154        1.0* e.inst_delta/e.clk_delta, str(round(e.time_delta * 1e-3, 2)) + ' us', cpu))
155
156print("Counters Data")
157print("%-8s %-12s %-8s %-8s %s" % ('CLOCK', 'INSTRUCTION', 'IPC', 'TIME', 'CPU'))
158
159b["output"].open_perf_buffer(print_data)
160
161# Perf Event for Unhalted Cycles, The hex value is
162# combination of event, umask and cmask. Read Intel
163# Doc to find the event and cmask. Or use
164# perf list --details to get event, umask and cmask
165# NOTE: Events can be multiplexed by kernel in case the
166# number of counters is greater than supported by CPU
167# performance monitoring unit, which can result in inaccurate
168# results. Counter values need to be normalized for a more
169# accurate value.
170PERF_TYPE_RAW = 4
171# Unhalted Clock Cycles
172b["clk"].open_perf_event(PERF_TYPE_RAW, 0x0000003C)
173# Instruction Retired
174b["inst"].open_perf_event(PERF_TYPE_RAW, 0x000000C0)
175
176while True:
177	try:
178	    b.perf_buffer_poll()
179	except KeyboardInterrupt:
180            exit()
181