1#!/usr/bin/python 2# 3# memleak Trace and display outstanding allocations to detect 4# memory leaks in user-mode processes and the kernel. 5# 6# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] 7# [--combined-only] [--wa-missing-free] [-s SAMPLE_RATE] 8# [-T TOP] [-z MIN_SIZE] [-Z MAX_SIZE] [-O OBJ] 9# [interval] [count] 10# 11# Licensed under the Apache License, Version 2.0 (the "License") 12# Copyright (C) 2016 Sasha Goldshtein. 13 14from bcc import BPF 15from time import sleep 16from datetime import datetime 17import resource 18import argparse 19import subprocess 20import os 21import sys 22 23class Allocation(object): 24 def __init__(self, stack, size): 25 self.stack = stack 26 self.count = 1 27 self.size = size 28 29 def update(self, size): 30 self.count += 1 31 self.size += size 32 33def run_command_get_output(command): 34 p = subprocess.Popen(command.split(), 35 stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 36 return iter(p.stdout.readline, b'') 37 38def run_command_get_pid(command): 39 p = subprocess.Popen(command.split()) 40 return p.pid 41 42examples = """ 43EXAMPLES: 44 45./memleak -p $(pidof allocs) 46 Trace allocations and display a summary of "leaked" (outstanding) 47 allocations every 5 seconds 48./memleak -p $(pidof allocs) -t 49 Trace allocations and display each individual allocator function call 50./memleak -ap $(pidof allocs) 10 51 Trace allocations and display allocated addresses, sizes, and stacks 52 every 10 seconds for outstanding allocations 53./memleak -c "./allocs" 54 Run the specified command and trace its allocations 55./memleak 56 Trace allocations in kernel mode and display a summary of outstanding 57 allocations every 5 seconds 58./memleak -o 60000 59 Trace allocations in kernel mode and display a summary of outstanding 60 allocations that are at least one minute (60 seconds) old 61./memleak -s 5 62 Trace roughly every 5th allocation, to reduce overhead 63""" 64 65description = """ 66Trace outstanding memory allocations that weren't freed. 67Supports both user-mode allocations made with libc functions and kernel-mode 68allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding 69memory release functions. 70""" 71 72parser = argparse.ArgumentParser(description=description, 73 formatter_class=argparse.RawDescriptionHelpFormatter, 74 epilog=examples) 75parser.add_argument("-p", "--pid", type=int, default=-1, 76 help="the PID to trace; if not specified, trace kernel allocs") 77parser.add_argument("-t", "--trace", action="store_true", 78 help="print trace messages for each alloc/free call") 79parser.add_argument("interval", nargs="?", default=5, type=int, 80 help="interval in seconds to print outstanding allocations") 81parser.add_argument("count", nargs="?", type=int, 82 help="number of times to print the report before exiting") 83parser.add_argument("-a", "--show-allocs", default=False, action="store_true", 84 help="show allocation addresses and sizes as well as call stacks") 85parser.add_argument("-o", "--older", default=500, type=int, 86 help="prune allocations younger than this age in milliseconds") 87parser.add_argument("-c", "--command", 88 help="execute and trace the specified command") 89parser.add_argument("--combined-only", default=False, action="store_true", 90 help="show combined allocation statistics only") 91parser.add_argument("--wa-missing-free", default=False, action="store_true", 92 help="Workaround to alleviate misjudgments when free is missing") 93parser.add_argument("-s", "--sample-rate", default=1, type=int, 94 help="sample every N-th allocation to decrease the overhead") 95parser.add_argument("-T", "--top", type=int, default=10, 96 help="display only this many top allocating stacks (by size)") 97parser.add_argument("-z", "--min-size", type=int, 98 help="capture only allocations larger than this size") 99parser.add_argument("-Z", "--max-size", type=int, 100 help="capture only allocations smaller than this size") 101parser.add_argument("-O", "--obj", type=str, default="c", 102 help="attach to allocator functions in the specified object") 103parser.add_argument("--ebpf", action="store_true", 104 help=argparse.SUPPRESS) 105parser.add_argument("--percpu", default=False, action="store_true", 106 help="trace percpu allocations") 107 108args = parser.parse_args() 109 110pid = args.pid 111command = args.command 112kernel_trace = (pid == -1 and command is None) 113trace_all = args.trace 114interval = args.interval 115min_age_ns = 1e6 * args.older 116sample_every_n = args.sample_rate 117num_prints = args.count 118top_stacks = args.top 119min_size = args.min_size 120max_size = args.max_size 121obj = args.obj 122 123if min_size is not None and max_size is not None and min_size > max_size: 124 print("min_size (-z) can't be greater than max_size (-Z)") 125 exit(1) 126 127if command is not None: 128 print("Executing '%s' and tracing the resulting process." % command) 129 pid = run_command_get_pid(command) 130 131bpf_source = """ 132#include <uapi/linux/ptrace.h> 133 134struct alloc_info_t { 135 u64 size; 136 u64 timestamp_ns; 137 int stack_id; 138}; 139 140struct combined_alloc_info_t { 141 u64 total_size; 142 u64 number_of_allocs; 143}; 144 145BPF_HASH(sizes, u64); 146BPF_HASH(allocs, u64, struct alloc_info_t, 1000000); 147BPF_HASH(memptrs, u64, u64); 148BPF_STACK_TRACE(stack_traces, 10240); 149BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10240); 150 151static inline void update_statistics_add(u64 stack_id, u64 sz) { 152 struct combined_alloc_info_t *existing_cinfo; 153 struct combined_alloc_info_t cinfo = {0}; 154 155 existing_cinfo = combined_allocs.lookup(&stack_id); 156 if (existing_cinfo != 0) 157 cinfo = *existing_cinfo; 158 159 cinfo.total_size += sz; 160 cinfo.number_of_allocs += 1; 161 162 combined_allocs.update(&stack_id, &cinfo); 163} 164 165static inline void update_statistics_del(u64 stack_id, u64 sz) { 166 struct combined_alloc_info_t *existing_cinfo; 167 struct combined_alloc_info_t cinfo = {0}; 168 169 existing_cinfo = combined_allocs.lookup(&stack_id); 170 if (existing_cinfo != 0) 171 cinfo = *existing_cinfo; 172 173 if (sz >= cinfo.total_size) 174 cinfo.total_size = 0; 175 else 176 cinfo.total_size -= sz; 177 178 if (cinfo.number_of_allocs > 0) 179 cinfo.number_of_allocs -= 1; 180 181 combined_allocs.update(&stack_id, &cinfo); 182} 183 184static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) { 185 SIZE_FILTER 186 if (SAMPLE_EVERY_N > 1) { 187 u64 ts = bpf_ktime_get_ns(); 188 if (ts % SAMPLE_EVERY_N != 0) 189 return 0; 190 } 191 192 u64 pid = bpf_get_current_pid_tgid(); 193 u64 size64 = size; 194 sizes.update(&pid, &size64); 195 196 if (SHOULD_PRINT) 197 bpf_trace_printk("alloc entered, size = %u\\n", size); 198 return 0; 199} 200 201static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) { 202 u64 pid = bpf_get_current_pid_tgid(); 203 u64* size64 = sizes.lookup(&pid); 204 struct alloc_info_t info = {0}; 205 206 if (size64 == 0) 207 return 0; // missed alloc entry 208 209 info.size = *size64; 210 sizes.delete(&pid); 211 212 if (address != 0) { 213 info.timestamp_ns = bpf_ktime_get_ns(); 214 info.stack_id = stack_traces.get_stackid(ctx, STACK_FLAGS); 215 allocs.update(&address, &info); 216 update_statistics_add(info.stack_id, info.size); 217 } 218 219 if (SHOULD_PRINT) { 220 bpf_trace_printk("alloc exited, size = %lu, result = %lx\\n", 221 info.size, address); 222 } 223 return 0; 224} 225 226static inline int gen_alloc_exit(struct pt_regs *ctx) { 227 return gen_alloc_exit2(ctx, PT_REGS_RC(ctx)); 228} 229 230static inline int gen_free_enter(struct pt_regs *ctx, void *address) { 231 u64 addr = (u64)address; 232 struct alloc_info_t *info = allocs.lookup(&addr); 233 if (info == 0) 234 return 0; 235 236 allocs.delete(&addr); 237 update_statistics_del(info->stack_id, info->size); 238 239 if (SHOULD_PRINT) { 240 bpf_trace_printk("free entered, address = %lx, size = %lu\\n", 241 address, info->size); 242 } 243 return 0; 244} 245 246int malloc_enter(struct pt_regs *ctx, size_t size) { 247 return gen_alloc_enter(ctx, size); 248} 249 250int malloc_exit(struct pt_regs *ctx) { 251 return gen_alloc_exit(ctx); 252} 253 254int free_enter(struct pt_regs *ctx, void *address) { 255 return gen_free_enter(ctx, address); 256} 257 258int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) { 259 return gen_alloc_enter(ctx, nmemb * size); 260} 261 262int calloc_exit(struct pt_regs *ctx) { 263 return gen_alloc_exit(ctx); 264} 265 266int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) { 267 gen_free_enter(ctx, ptr); 268 return gen_alloc_enter(ctx, size); 269} 270 271int realloc_exit(struct pt_regs *ctx) { 272 return gen_alloc_exit(ctx); 273} 274 275int mmap_enter(struct pt_regs *ctx) { 276 size_t size = (size_t)PT_REGS_PARM2(ctx); 277 return gen_alloc_enter(ctx, size); 278} 279 280int mmap_exit(struct pt_regs *ctx) { 281 return gen_alloc_exit(ctx); 282} 283 284int munmap_enter(struct pt_regs *ctx, void *address) { 285 return gen_free_enter(ctx, address); 286} 287 288int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment, 289 size_t size) { 290 u64 memptr64 = (u64)(size_t)memptr; 291 u64 pid = bpf_get_current_pid_tgid(); 292 293 memptrs.update(&pid, &memptr64); 294 return gen_alloc_enter(ctx, size); 295} 296 297int posix_memalign_exit(struct pt_regs *ctx) { 298 u64 pid = bpf_get_current_pid_tgid(); 299 u64 *memptr64 = memptrs.lookup(&pid); 300 void *addr; 301 302 if (memptr64 == 0) 303 return 0; 304 305 memptrs.delete(&pid); 306 307 if (bpf_probe_read_user(&addr, sizeof(void*), (void*)(size_t)*memptr64)) 308 return 0; 309 310 u64 addr64 = (u64)(size_t)addr; 311 return gen_alloc_exit2(ctx, addr64); 312} 313 314int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) { 315 return gen_alloc_enter(ctx, size); 316} 317 318int aligned_alloc_exit(struct pt_regs *ctx) { 319 return gen_alloc_exit(ctx); 320} 321 322int valloc_enter(struct pt_regs *ctx, size_t size) { 323 return gen_alloc_enter(ctx, size); 324} 325 326int valloc_exit(struct pt_regs *ctx) { 327 return gen_alloc_exit(ctx); 328} 329 330int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) { 331 return gen_alloc_enter(ctx, size); 332} 333 334int memalign_exit(struct pt_regs *ctx) { 335 return gen_alloc_exit(ctx); 336} 337 338int pvalloc_enter(struct pt_regs *ctx, size_t size) { 339 return gen_alloc_enter(ctx, size); 340} 341 342int pvalloc_exit(struct pt_regs *ctx) { 343 return gen_alloc_exit(ctx); 344} 345""" 346 347bpf_source_kernel = """ 348 349TRACEPOINT_PROBE(kmem, kmalloc) { 350 if (WORKAROUND_MISSING_FREE) 351 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 352 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 353 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 354} 355 356TRACEPOINT_PROBE(kmem, kmalloc_node) { 357 if (WORKAROUND_MISSING_FREE) 358 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 359 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 360 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 361} 362 363TRACEPOINT_PROBE(kmem, kfree) { 364 return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 365} 366 367TRACEPOINT_PROBE(kmem, kmem_cache_alloc) { 368 if (WORKAROUND_MISSING_FREE) 369 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 370 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 371 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 372} 373 374TRACEPOINT_PROBE(kmem, kmem_cache_alloc_node) { 375 if (WORKAROUND_MISSING_FREE) 376 gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 377 gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc); 378 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 379} 380 381TRACEPOINT_PROBE(kmem, kmem_cache_free) { 382 return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 383} 384 385TRACEPOINT_PROBE(kmem, mm_page_alloc) { 386 gen_alloc_enter((struct pt_regs *)args, PAGE_SIZE << args->order); 387 return gen_alloc_exit2((struct pt_regs *)args, args->pfn); 388} 389 390TRACEPOINT_PROBE(kmem, mm_page_free) { 391 return gen_free_enter((struct pt_regs *)args, (void *)args->pfn); 392} 393""" 394 395bpf_source_percpu = """ 396 397TRACEPOINT_PROBE(percpu, percpu_alloc_percpu) { 398 gen_alloc_enter((struct pt_regs *)args, args->size); 399 return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr); 400} 401 402TRACEPOINT_PROBE(percpu, percpu_free_percpu) { 403 return gen_free_enter((struct pt_regs *)args, (void *)args->ptr); 404} 405""" 406 407if kernel_trace: 408 if args.percpu: 409 bpf_source += bpf_source_percpu 410 else: 411 bpf_source += bpf_source_kernel 412 413if kernel_trace: 414 bpf_source = bpf_source.replace("WORKAROUND_MISSING_FREE", "1" 415 if args.wa_missing_free else "0") 416 417bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") 418bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n)) 419bpf_source = bpf_source.replace("PAGE_SIZE", str(resource.getpagesize())) 420 421size_filter = "" 422if min_size is not None and max_size is not None: 423 size_filter = "if (size < %d || size > %d) return 0;" % \ 424 (min_size, max_size) 425elif min_size is not None: 426 size_filter = "if (size < %d) return 0;" % min_size 427elif max_size is not None: 428 size_filter = "if (size > %d) return 0;" % max_size 429bpf_source = bpf_source.replace("SIZE_FILTER", size_filter) 430 431stack_flags = "0" 432if not kernel_trace: 433 stack_flags += "|BPF_F_USER_STACK" 434bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags) 435 436if args.ebpf: 437 print(bpf_source) 438 exit() 439 440bpf = BPF(text=bpf_source) 441 442if not kernel_trace: 443 print("Attaching to pid %d, Ctrl+C to quit." % pid) 444 445 def attach_probes(sym, fn_prefix=None, can_fail=False): 446 if fn_prefix is None: 447 fn_prefix = sym 448 449 try: 450 bpf.attach_uprobe(name=obj, sym=sym, 451 fn_name=fn_prefix + "_enter", 452 pid=pid) 453 bpf.attach_uretprobe(name=obj, sym=sym, 454 fn_name=fn_prefix + "_exit", 455 pid=pid) 456 except Exception: 457 if can_fail: 458 return 459 else: 460 raise 461 462 attach_probes("malloc") 463 attach_probes("calloc") 464 attach_probes("realloc") 465 attach_probes("mmap") 466 attach_probes("posix_memalign") 467 attach_probes("valloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory 468 attach_probes("memalign") 469 attach_probes("pvalloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory 470 attach_probes("aligned_alloc", can_fail=True) # added in C11 471 bpf.attach_uprobe(name=obj, sym="free", fn_name="free_enter", 472 pid=pid) 473 bpf.attach_uprobe(name=obj, sym="munmap", fn_name="munmap_enter", 474 pid=pid) 475 476else: 477 print("Attaching to kernel allocators, Ctrl+C to quit.") 478 479 # No probe attaching here. Allocations are counted by attaching to 480 # tracepoints. 481 # 482 # Memory allocations in Linux kernel are not limited to malloc/free 483 # equivalents. It's also common to allocate a memory page or multiple 484 # pages. Page allocator have two interfaces, one working with page 485 # frame numbers (PFN), while other working with page addresses. It's 486 # possible to allocate pages with one kind of functions, and free them 487 # with another. Code in kernel can easy convert PFNs to addresses and 488 # back, but it's hard to do the same in eBPF kprobe without fragile 489 # hacks. 490 # 491 # Fortunately, Linux exposes tracepoints for memory allocations, which 492 # can be instrumented by eBPF programs. Tracepoint for page allocations 493 # gives access to PFNs for both allocator interfaces. So there is no 494 # need to guess which allocation corresponds to which free. 495 496def print_outstanding(): 497 print("[%s] Top %d stacks with outstanding allocations:" % 498 (datetime.now().strftime("%H:%M:%S"), top_stacks)) 499 alloc_info = {} 500 allocs = bpf["allocs"] 501 stack_traces = bpf["stack_traces"] 502 for address, info in sorted(allocs.items(), key=lambda a: a[1].size): 503 if BPF.monotonic_time() - min_age_ns < info.timestamp_ns: 504 continue 505 if info.stack_id < 0: 506 continue 507 if info.stack_id in alloc_info: 508 alloc_info[info.stack_id].update(info.size) 509 else: 510 stack = list(stack_traces.walk(info.stack_id)) 511 combined = [] 512 for addr in stack: 513 combined.append(('0x'+format(addr, '016x')+'\t').encode('utf-8') + bpf.sym(addr, pid, 514 show_module=True, show_offset=True)) 515 alloc_info[info.stack_id] = Allocation(combined, 516 info.size) 517 if args.show_allocs: 518 print("\taddr = %x size = %s" % 519 (address.value, info.size)) 520 to_show = sorted(alloc_info.values(), 521 key=lambda a: a.size)[-top_stacks:] 522 for alloc in to_show: 523 print("\t%d bytes in %d allocations from stack\n\t\t%s" % 524 (alloc.size, alloc.count, 525 b"\n\t\t".join(alloc.stack).decode("ascii"))) 526 527def print_outstanding_combined(): 528 stack_traces = bpf["stack_traces"] 529 stacks = sorted(bpf["combined_allocs"].items(), 530 key=lambda a: -a[1].total_size) 531 cnt = 1 532 entries = [] 533 for stack_id, info in stacks: 534 try: 535 trace = [] 536 for addr in stack_traces.walk(stack_id.value): 537 sym = bpf.sym(addr, pid, 538 show_module=True, 539 show_offset=True) 540 trace.append(sym) 541 trace = "\n\t\t".join(trace) 542 except KeyError: 543 trace = "stack information lost" 544 545 entry = ("\t%d bytes in %d allocations from stack\n\t\t%s" % 546 (info.total_size, info.number_of_allocs, trace)) 547 entries.append(entry) 548 549 cnt += 1 550 if cnt > top_stacks: 551 break 552 553 print("[%s] Top %d stacks with outstanding allocations:" % 554 (datetime.now().strftime("%H:%M:%S"), top_stacks)) 555 556 print('\n'.join(reversed(entries))) 557 558count_so_far = 0 559while True: 560 if trace_all: 561 print(bpf.trace_fields()) 562 else: 563 try: 564 sleep(interval) 565 except KeyboardInterrupt: 566 exit() 567 if args.combined_only: 568 print_outstanding_combined() 569 else: 570 print_outstanding() 571 sys.stdout.flush() 572 count_so_far += 1 573 if num_prints is not None and count_so_far >= num_prints: 574 exit() 575