1#!/usr/bin/python 2# IPC - Instructions Per Cycles using Perf Events and 3# uprobes 4# 24-Apr-2020 Saleem Ahmad Created this. 5 6from bcc import BPF, utils 7from optparse import OptionParser 8 9# load BPF program 10code=""" 11#include <uapi/linux/ptrace.h> 12 13struct perf_delta { 14 u64 clk_delta; 15 u64 inst_delta; 16 u64 time_delta; 17}; 18 19/* 20Perf Arrays to read counter values for open 21perf events. 22*/ 23BPF_PERF_ARRAY(clk, MAX_CPUS); 24BPF_PERF_ARRAY(inst, MAX_CPUS); 25 26// Perf Output 27BPF_PERF_OUTPUT(output); 28 29// Per Cpu Data to store start values 30BPF_PERCPU_ARRAY(data, u64); 31 32#define CLOCK_ID 0 33#define INSTRUCTION_ID 1 34#define TIME_ID 2 35 36void trace_start(struct pt_regs *ctx) { 37 u32 clk_k = CLOCK_ID; 38 u32 inst_k = INSTRUCTION_ID; 39 u32 time = TIME_ID; 40 41 int cpu = bpf_get_smp_processor_id(); 42 /* 43 perf_read may return negative values for errors. 44 If cpu id is greater than BPF_PERF_ARRAY size, 45 counters values will be very large negative number. 46 NOTE: Use bpf_perf_event_value is recommended over 47 bpf_perf_event_read or map.perf_read() due to 48 issues in ABI. map.perf_read_value() need to be 49 implemented in future. 50 */ 51 u64 clk_start = clk.perf_read(cpu); 52 u64 inst_start = inst.perf_read(cpu); 53 u64 time_start = bpf_ktime_get_ns(); 54 55 u64* kptr = NULL; 56 kptr = data.lookup(&clk_k); 57 if (kptr) { 58 data.update(&clk_k, &clk_start); 59 } else { 60 data.insert(&clk_k, &clk_start); 61 } 62 63 kptr = data.lookup(&inst_k); 64 if (kptr) { 65 data.update(&inst_k, &inst_start); 66 } else { 67 data.insert(&inst_k, &inst_start); 68 } 69 70 kptr = data.lookup(&time); 71 if (kptr) { 72 data.update(&time, &time_start); 73 } else { 74 data.insert(&time, &time_start); 75 } 76} 77 78void trace_end(struct pt_regs* ctx) { 79 u32 clk_k = CLOCK_ID; 80 u32 inst_k = INSTRUCTION_ID; 81 u32 time = TIME_ID; 82 83 int cpu = bpf_get_smp_processor_id(); 84 /* 85 perf_read may return negative values for errors. 86 If cpu id is greater than BPF_PERF_ARRAY size, 87 counters values will be very large negative number. 88 NOTE: Use bpf_perf_event_value is recommended over 89 bpf_perf_event_read or map.perf_read() due to 90 issues in ABI. map.perf_read_value() need to be 91 implemented in future. 92 */ 93 u64 clk_end = clk.perf_read(cpu); 94 u64 inst_end = inst.perf_read(cpu); 95 u64 time_end = bpf_ktime_get_ns(); 96 97 struct perf_delta perf_data = {} ; 98 u64* kptr = NULL; 99 kptr = data.lookup(&clk_k); 100 101 // Find elements in map, if not found return 102 if (kptr) { 103 perf_data.clk_delta = clk_end - *kptr; 104 } else { 105 return; 106 } 107 108 kptr = data.lookup(&inst_k); 109 if (kptr) { 110 perf_data.inst_delta = inst_end - *kptr; 111 } else { 112 return; 113 } 114 115 kptr = data.lookup(&time); 116 if (kptr) { 117 perf_data.time_delta = time_end - *kptr; 118 } else { 119 return; 120 } 121 122 output.perf_submit(ctx, &perf_data, sizeof(struct perf_delta)); 123} 124""" 125 126usage='Usage: ipc.py [options]\nexample ./ipc.py -l c -s strlen' 127parser = OptionParser(usage) 128parser.add_option('-l', '--lib', dest='lib_name', help='lib name containing symbol to trace, e.g. c for libc', type=str) 129parser.add_option('-s', '--sym', dest='sym', help='symbol to trace', type=str) 130 131(options, args) = parser.parse_args() 132if (not options.lib_name or not options.sym): 133 parser.print_help() 134 exit() 135 136num_cpus = len(utils.get_online_cpus()) 137 138b = BPF(text=code, cflags=['-DMAX_CPUS=%s' % str(num_cpus)]) 139 140# Attach Probes at start and end of the trace function 141# NOTE: When attaching to a function for tracing, during runtime relocation 142# stage by linker, function will be called once to return a different function 143# address, which will be called by the process. e.g. in case of strlen 144# after relocation stage, __strlen_sse2 is called instread of strlen. 145# NOTE: There will be a context switch from userspace to kernel space, 146# on caputring counters on USDT probes, so actual IPC might be slightly different. 147# This example is to give a reference on how to use perf events with tracing. 148b.attach_uprobe(name=options.lib_name, sym=options.sym, fn_name="trace_start") 149b.attach_uretprobe(name=options.lib_name, sym=options.sym, fn_name="trace_end") 150 151def print_data(cpu, data, size): 152 e = b["output"].event(data) 153 print("%-8d %-12d %-8.2f %-8s %d" % (e.clk_delta, e.inst_delta, 154 1.0* e.inst_delta/e.clk_delta, str(round(e.time_delta * 1e-3, 2)) + ' us', cpu)) 155 156print("Counters Data") 157print("%-8s %-12s %-8s %-8s %s" % ('CLOCK', 'INSTRUCTION', 'IPC', 'TIME', 'CPU')) 158 159b["output"].open_perf_buffer(print_data) 160 161# Perf Event for Unhalted Cycles, The hex value is 162# combination of event, umask and cmask. Read Intel 163# Doc to find the event and cmask. Or use 164# perf list --details to get event, umask and cmask 165# NOTE: Events can be multiplexed by kernel in case the 166# number of counters is greater than supported by CPU 167# performance monitoring unit, which can result in inaccurate 168# results. Counter values need to be normalized for a more 169# accurate value. 170PERF_TYPE_RAW = 4 171# Unhalted Clock Cycles 172b["clk"].open_perf_event(PERF_TYPE_RAW, 0x0000003C) 173# Instruction Retired 174b["inst"].open_perf_event(PERF_TYPE_RAW, 0x000000C0) 175 176while True: 177 try: 178 b.perf_buffer_poll() 179 except KeyboardInterrupt: 180 exit() 181