1#!/usr/bin/python 2# @lint-avoid-python-3-compatibility-imports 3# 4# dirtop file reads and writes by directory. 5# For Linux, uses BCC, eBPF. 6# 7# USAGE: dirtop.py -d 'directory1,directory2' [-h] [-C] [-r MAXROWS] [interval] [count] 8# 9# This uses in-kernel eBPF maps to store per process summaries for efficiency. 10# 11# Copyright 2016 Netflix, Inc. 12# Licensed under the Apache License, Version 2.0 (the "License") 13# 14# 13-Mar-2020 Erwan Velu Created dirtop from filetop 15# 06-Feb-2016 Brendan Gregg Created filetop. 16 17from __future__ import print_function 18from bcc import BPF 19from time import sleep, strftime 20import argparse 21import os 22import stat 23from subprocess import call 24 25# arguments 26examples = """examples: 27 ./dirtop -d '/hdfs/uuid/*/yarn' # directory I/O top, 1 second refresh 28 ./dirtop -d '/hdfs/uuid/*/yarn' -C # don't clear the screen 29 ./dirtop -d '/hdfs/uuid/*/yarn' 5 # 5 second summaries 30 ./dirtop -d '/hdfs/uuid/*/yarn' 5 10 # 5 second summaries, 10 times only 31 ./dirtop -d '/hdfs/uuid/*/yarn,/hdfs/uuid/*/data' # Running dirtop on two set of directories 32""" 33parser = argparse.ArgumentParser( 34 description="File reads and writes by process", 35 formatter_class=argparse.RawDescriptionHelpFormatter, 36 epilog=examples) 37parser.add_argument("-C", "--noclear", action="store_true", 38 help="don't clear the screen") 39parser.add_argument("-r", "--maxrows", default=20, 40 help="maximum rows to print, default 20") 41parser.add_argument("-s", "--sort", default="all", 42 choices=["all", "reads", "writes", "rbytes", "wbytes"], 43 help="sort column, default all") 44parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid", 45 help="trace this PID only") 46parser.add_argument("interval", nargs="?", default=1, 47 help="output interval, in seconds") 48parser.add_argument("count", nargs="?", default=99999999, 49 help="number of outputs") 50parser.add_argument("--ebpf", action="store_true", 51 help=argparse.SUPPRESS) 52parser.add_argument("-d", "--root-directories", type=str, required=True, dest="rootdirs", 53 help="select the directories to observe, separated by commas") 54args = parser.parse_args() 55interval = int(args.interval) 56countdown = int(args.count) 57maxrows = int(args.maxrows) 58clear = not int(args.noclear) 59debug = 0 60 61# linux stats 62loadavg = "/proc/loadavg" 63 64# define BPF program 65bpf_text = """ 66# include <uapi/linux/ptrace.h> 67# include <linux/blkdev.h> 68 69// the key for the output summary 70struct info_t { 71 unsigned long inode_id; 72}; 73 74// the value of the output summary 75struct val_t { 76 u64 reads; 77 u64 writes; 78 u64 rbytes; 79 u64 wbytes; 80}; 81 82BPF_HASH(counts, struct info_t, struct val_t); 83 84static int do_entry(struct pt_regs *ctx, struct file *file, 85 char __user *buf, size_t count, int is_read) 86{ 87 u32 tgid = bpf_get_current_pid_tgid() >> 32; 88 if (TGID_FILTER) 89 return 0; 90 91 // The directory inodes we look at 92 u32 dir_ids[INODES_NUMBER] = DIRECTORY_INODES; 93 struct info_t info = {.inode_id = 0}; 94 struct dentry *pde = file->f_path.dentry; 95 for (int i=0; i<50; i++) { 96 // If we don't have any parent, we reached the root 97 if (!pde->d_parent) { 98 break; 99 } 100 pde = pde->d_parent; 101 // Does the files is part of the directory we look for 102 for(int dir_id=0; dir_id<INODES_NUMBER; dir_id++) { 103 if (pde->d_inode->i_ino == dir_ids[dir_id]) { 104 // Yes, let's export the top directory inode 105 info.inode_id = pde->d_inode->i_ino; 106 break; 107 } 108 } 109 } 110 // If we didn't found any, let's abort 111 if (info.inode_id == 0) { 112 return 0; 113 } 114 115 struct val_t *valp, zero = {}; 116 valp = counts.lookup_or_try_init(&info, &zero); 117 if (valp) { 118 if (is_read) { 119 valp->reads++; 120 valp->rbytes += count; 121 } else { 122 valp->writes++; 123 valp->wbytes += count; 124 } 125 } 126 return 0; 127} 128 129int trace_read_entry(struct pt_regs *ctx, struct file *file, 130 char __user *buf, size_t count) 131{ 132 return do_entry(ctx, file, buf, count, 1); 133} 134 135int trace_write_entry(struct pt_regs *ctx, struct file *file, 136 char __user *buf, size_t count) 137{ 138 return do_entry(ctx, file, buf, count, 0); 139} 140 141""" 142 143 144def get_searched_ids(root_directories): 145 """Export the inode numbers of the selected directories.""" 146 from glob import glob 147 inode_to_path = {} 148 inodes = "{" 149 total_dirs = 0 150 for root_directory in root_directories.split(','): 151 try: 152 searched_dirs = glob(root_directory, recursive=True) 153 except TypeError: 154 searched_dirs = glob(root_directory) 155 if not searched_dirs: 156 continue 157 158 for mydir in searched_dirs: 159 total_dirs = total_dirs + 1 160 # If we pass more than 15 dirs, ebpf program fails 161 if total_dirs > 15: 162 print('15 directories limit reached') 163 break 164 inode_id = os.lstat(mydir)[stat.ST_INO] 165 if inode_id in inode_to_path: 166 if inode_to_path[inode_id] == mydir: 167 print('Skipping {} as already considered'.format(mydir)) 168 else: 169 inodes = "{},{}".format(inodes, inode_id) 170 inode_to_path[inode_id] = mydir 171 print('Considering {} with inode_id {}'.format(mydir, inode_id)) 172 173 inodes = inodes + '}' 174 if len(inode_to_path) == 0: 175 print('Cannot find any valid directory') 176 exit() 177 return inodes.replace('{,', '{'), inode_to_path 178 179 180if args.tgid: 181 bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid) 182else: 183 bpf_text = bpf_text.replace('TGID_FILTER', '0') 184 185inodes, inodes_to_path = get_searched_ids(args.rootdirs) 186bpf_text = bpf_text.replace("DIRECTORY_INODES", inodes) 187bpf_text = bpf_text.replace( 188 "INODES_NUMBER", '{}'.format(len(inodes.split(',')))) 189 190if debug or args.ebpf: 191 print(bpf_text) 192 if args.ebpf: 193 exit() 194 195# initialize BPF 196b = BPF(text=bpf_text) 197b.attach_kprobe(event="vfs_read", fn_name="trace_read_entry") 198b.attach_kprobe(event="vfs_write", fn_name="trace_write_entry") 199 200DNAME_INLINE_LEN = 32 # linux/dcache.h 201 202print('Tracing... Output every %d secs. Hit Ctrl-C to end' % interval) 203 204 205def sort_fn(counts): 206 """Define how to sort the columns""" 207 if args.sort == "all": 208 return (counts[1].rbytes + counts[1].wbytes + counts[1].reads + counts[1].writes) 209 else: 210 return getattr(counts[1], args.sort) 211 212 213# output 214exiting = 0 215while 1: 216 try: 217 sleep(interval) 218 except KeyboardInterrupt: 219 exiting = 1 220 221 # header 222 if clear: 223 call("clear") 224 else: 225 print() 226 with open(loadavg) as stats: 227 print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read())) 228 229 print("%-6s %-6s %-8s %-8s %s" % 230 ("READS", "WRITES", "R_Kb", "W_Kb", "PATH")) 231 # by-TID output 232 counts = b.get_table("counts") 233 line = 0 234 reads = {} 235 writes = {} 236 reads_Kb = {} 237 writes_Kb = {} 238 for k, v in reversed(sorted(counts.items(), 239 key=sort_fn)): 240 # If it's the first time we see this inode 241 if k.inode_id not in reads: 242 # let's create a new entry 243 reads[k.inode_id] = v.reads 244 writes[k.inode_id] = v.writes 245 reads_Kb[k.inode_id] = v.rbytes / 1024 246 writes_Kb[k.inode_id] = v.wbytes / 1024 247 else: 248 # unless add the current performance metrics 249 # to the previous ones 250 reads[k.inode_id] += v.reads 251 writes[k.inode_id] += v.writes 252 reads_Kb[k.inode_id] += v.rbytes / 1024 253 writes_Kb[k.inode_id] += v.wbytes / 1024 254 255 for node_id in reads: 256 print("%-6d %-6d %-8d %-8d %s" % 257 (reads[node_id], writes[node_id], reads_Kb[node_id], writes_Kb[node_id], inodes_to_path[node_id])) 258 line += 1 259 if line >= maxrows: 260 break 261 262 counts.clear() 263 264 countdown -= 1 265 if exiting or countdown == 0: 266 print("Detaching...") 267 exit() 268