1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# cachetop Count cache kernel function calls per processes 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: cachetop 8# Taken from cachestat by Brendan Gregg 9# 10# Copyright (c) 2016-present, Facebook, Inc. 11# Licensed under the Apache License, Version 2.0 (the "License") 12# 13# 13-Jul-2016 Emmanuel Bretelle first version 14# 17-Mar-2022 Rocky Xing Added PID filter support. 15 16from __future__ import absolute_import 17from __future__ import division 18# Do not import unicode_literals until #623 is fixed 19# from __future__ import unicode_literals 20from __future__ import print_function 21 22from bcc import BPF 23from collections import defaultdict 24from time import strftime 25 26import argparse 27import curses 28import pwd 29import re 30import signal 31from time import sleep 32 33FIELDS = ( 34 "PID", 35 "UID", 36 "CMD", 37 "HITS", 38 "MISSES", 39 "DIRTIES", 40 "READ_HIT%", 41 "WRITE_HIT%" 42) 43DEFAULT_FIELD = "HITS" 44DEFAULT_SORT_FIELD = FIELDS.index(DEFAULT_FIELD) 45 46# signal handler 47def signal_ignore(signal, frame): 48 print() 49 50 51# Function to gather data from /proc/meminfo 52# return dictionary for quicker lookup of both values 53def get_meminfo(): 54 result = {} 55 56 for line in open('/proc/meminfo'): 57 k = line.split(':', 3) 58 v = k[1].split() 59 result[k[0]] = int(v[0]) 60 return result 61 62 63def get_processes_stats( 64 bpf, 65 sort_field=DEFAULT_SORT_FIELD, 66 sort_reverse=False): 67 ''' 68 Return a tuple containing: 69 buffer 70 cached 71 list of tuple with per process cache stats 72 ''' 73 counts = bpf.get_table("counts") 74 stats = defaultdict(lambda: defaultdict(int)) 75 for k, v in counts.items(): 76 stats["%d-%d-%s" % (k.pid, k.uid, k.comm.decode('utf-8', 'replace'))][k.ip] = v.value 77 stats_list = [] 78 79 for pid, count in sorted(stats.items(), key=lambda stat: stat[0]): 80 rtaccess = 0 81 wtaccess = 0 82 mpa = 0 83 mbd = 0 84 apcl = 0 85 apd = 0 86 access = 0 87 misses = 0 88 rhits = 0 89 whits = 0 90 91 for k, v in count.items(): 92 if re.match(b'mark_page_accessed', bpf.ksym(k)) is not None: 93 mpa = max(0, v) 94 95 if re.match(b'mark_buffer_dirty', bpf.ksym(k)) is not None: 96 mbd = max(0, v) 97 98 if re.match(b'add_to_page_cache_lru', bpf.ksym(k)) is not None: 99 apcl = max(0, v) 100 101 if re.match(b'account_page_dirtied', bpf.ksym(k)) is not None: 102 apd = max(0, v) 103 104 # access = total cache access incl. reads(mpa) and writes(mbd) 105 # misses = total of add to lru which we do when we write(mbd) 106 # and also the mark the page dirty(same as mbd) 107 access = (mpa + mbd) 108 misses = (apcl + apd) 109 110 # rtaccess is the read hit % during the sample period. 111 # wtaccess is the write hit % during the sample period. 112 if mpa > 0: 113 rtaccess = float(mpa) / (access + misses) 114 if apcl > 0: 115 wtaccess = float(apcl) / (access + misses) 116 117 if wtaccess != 0: 118 whits = 100 * wtaccess 119 if rtaccess != 0: 120 rhits = 100 * rtaccess 121 122 _pid, uid, comm = pid.split('-', 2) 123 stats_list.append( 124 (int(_pid), uid, comm, 125 access, misses, mbd, 126 rhits, whits)) 127 128 stats_list = sorted( 129 stats_list, key=lambda stat: stat[sort_field], reverse=sort_reverse 130 ) 131 counts.clear() 132 return stats_list 133 134 135def handle_loop(stdscr, args): 136 # don't wait on key press 137 stdscr.nodelay(1) 138 # set default sorting field 139 sort_field = FIELDS.index(DEFAULT_FIELD) 140 sort_reverse = True 141 142 # load BPF program 143 bpf_text = """ 144 145 #include <uapi/linux/ptrace.h> 146 struct key_t { 147 u64 ip; 148 u32 pid; 149 u32 uid; 150 char comm[16]; 151 }; 152 153 BPF_HASH(counts, struct key_t); 154 155 int do_count(struct pt_regs *ctx) { 156 u32 pid = bpf_get_current_pid_tgid() >> 32; 157 if (FILTER_PID) 158 return 0; 159 160 struct key_t key = {}; 161 u32 uid = bpf_get_current_uid_gid(); 162 163 key.ip = PT_REGS_IP(ctx); 164 key.pid = pid; 165 key.uid = uid; 166 bpf_get_current_comm(&(key.comm), 16); 167 168 counts.increment(key); 169 return 0; 170 } 171 172 """ 173 174 if args.pid: 175 bpf_text = bpf_text.replace('FILTER_PID', 'pid != %d' % args.pid) 176 else: 177 bpf_text = bpf_text.replace('FILTER_PID', '0') 178 179 b = BPF(text=bpf_text) 180 b.attach_kprobe(event="add_to_page_cache_lru", fn_name="do_count") 181 b.attach_kprobe(event="mark_page_accessed", fn_name="do_count") 182 b.attach_kprobe(event="mark_buffer_dirty", fn_name="do_count") 183 184 # Function account_page_dirtied() is changed to folio_account_dirtied() in 5.15. 185 if BPF.get_kprobe_functions(b'folio_account_dirtied'): 186 b.attach_kprobe(event="folio_account_dirtied", fn_name="do_count") 187 elif BPF.get_kprobe_functions(b'account_page_dirtied'): 188 b.attach_kprobe(event="account_page_dirtied", fn_name="do_count") 189 190 exiting = 0 191 192 while 1: 193 s = stdscr.getch() 194 if s == ord('q'): 195 exiting = 1 196 elif s == ord('r'): 197 sort_reverse = not sort_reverse 198 elif s == ord('<'): 199 sort_field = max(0, sort_field - 1) 200 elif s == ord('>'): 201 sort_field = min(len(FIELDS) - 1, sort_field + 1) 202 try: 203 sleep(args.interval) 204 except KeyboardInterrupt: 205 exiting = 1 206 # as cleanup can take many seconds, trap Ctrl-C: 207 signal.signal(signal.SIGINT, signal_ignore) 208 209 # Get memory info 210 mem = get_meminfo() 211 cached = int(mem["Cached"]) / 1024 212 buff = int(mem["Buffers"]) / 1024 213 214 process_stats = get_processes_stats( 215 b, 216 sort_field=sort_field, 217 sort_reverse=sort_reverse) 218 stdscr.clear() 219 stdscr.addstr( 220 0, 0, 221 "%-8s Buffers MB: %.0f / Cached MB: %.0f " 222 "/ Sort: %s / Order: %s" % ( 223 strftime("%H:%M:%S"), buff, cached, FIELDS[sort_field], 224 sort_reverse and "descending" or "ascending" 225 ) 226 ) 227 228 # header 229 stdscr.addstr( 230 1, 0, 231 "{0:8} {1:8} {2:16} {3:8} {4:8} {5:8} {6:10} {7:10}".format( 232 *FIELDS 233 ), 234 curses.A_REVERSE 235 ) 236 (height, width) = stdscr.getmaxyx() 237 for i, stat in enumerate(process_stats): 238 uid = int(stat[1]) 239 try: 240 username = pwd.getpwuid(uid)[0] 241 except KeyError: 242 # `pwd` throws a KeyError if the user cannot be found. This can 243 # happen e.g. when the process is running in a cgroup that has 244 # different users from the host. 245 username = 'UNKNOWN({})'.format(uid) 246 247 stdscr.addstr( 248 i + 2, 0, 249 "{0:8} {username:8.8} {2:16} {3:8} {4:8} " 250 "{5:8} {6:9.1f}% {7:9.1f}%".format( 251 *stat, username=username 252 ) 253 ) 254 if i > height - 4: 255 break 256 stdscr.refresh() 257 if exiting: 258 print("Detaching...") 259 return 260 261 262def parse_arguments(): 263 parser = argparse.ArgumentParser( 264 description='Show Linux page cache hit/miss statistics including read ' 265 'and write hit % per processes in a UI like top.' 266 ) 267 parser.add_argument("-p", "--pid", type=int, metavar="PID", 268 help="trace this PID only") 269 parser.add_argument( 270 'interval', type=int, default=5, nargs='?', 271 help='Interval between probes.' 272 ) 273 274 args = parser.parse_args() 275 return args 276 277args = parse_arguments() 278curses.wrapper(handle_loop, args) 279