• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# memleak   Trace and display outstanding allocations to detect
4#           memory leaks in user-mode processes and the kernel.
5#
6# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
7#                [--combined-only] [--wa-missing-free] [-s SAMPLE_RATE]
8#                [-T TOP] [-z MIN_SIZE] [-Z MAX_SIZE] [-O OBJ]
9#                [interval] [count]
10#
11# Licensed under the Apache License, Version 2.0 (the "License")
12# Copyright (C) 2016 Sasha Goldshtein.
13
14from bcc import BPF
15from time import sleep
16from datetime import datetime
17import resource
18import argparse
19import subprocess
20import os
21import sys
22
23class Allocation(object):
24    def __init__(self, stack, size):
25        self.stack = stack
26        self.count = 1
27        self.size = size
28
29    def update(self, size):
30        self.count += 1
31        self.size += size
32
33def run_command_get_output(command):
34        p = subprocess.Popen(command.split(),
35                stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
36        return iter(p.stdout.readline, b'')
37
38def run_command_get_pid(command):
39        p = subprocess.Popen(command.split())
40        return p.pid
41
42examples = """
43EXAMPLES:
44
45./memleak -p $(pidof allocs)
46        Trace allocations and display a summary of "leaked" (outstanding)
47        allocations every 5 seconds
48./memleak -p $(pidof allocs) -t
49        Trace allocations and display each individual allocator function call
50./memleak -ap $(pidof allocs) 10
51        Trace allocations and display allocated addresses, sizes, and stacks
52        every 10 seconds for outstanding allocations
53./memleak -c "./allocs"
54        Run the specified command and trace its allocations
55./memleak
56        Trace allocations in kernel mode and display a summary of outstanding
57        allocations every 5 seconds
58./memleak -o 60000
59        Trace allocations in kernel mode and display a summary of outstanding
60        allocations that are at least one minute (60 seconds) old
61./memleak -s 5
62        Trace roughly every 5th allocation, to reduce overhead
63"""
64
65description = """
66Trace outstanding memory allocations that weren't freed.
67Supports both user-mode allocations made with libc functions and kernel-mode
68allocations made with kmalloc/kmem_cache_alloc/get_free_pages and corresponding
69memory release functions.
70"""
71
72parser = argparse.ArgumentParser(description=description,
73        formatter_class=argparse.RawDescriptionHelpFormatter,
74        epilog=examples)
75parser.add_argument("-p", "--pid", type=int, default=-1,
76        help="the PID to trace; if not specified, trace kernel allocs")
77parser.add_argument("-t", "--trace", action="store_true",
78        help="print trace messages for each alloc/free call")
79parser.add_argument("interval", nargs="?", default=5, type=int,
80        help="interval in seconds to print outstanding allocations")
81parser.add_argument("count", nargs="?", type=int,
82        help="number of times to print the report before exiting")
83parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
84        help="show allocation addresses and sizes as well as call stacks")
85parser.add_argument("-o", "--older", default=500, type=int,
86        help="prune allocations younger than this age in milliseconds")
87parser.add_argument("-c", "--command",
88        help="execute and trace the specified command")
89parser.add_argument("--combined-only", default=False, action="store_true",
90        help="show combined allocation statistics only")
91parser.add_argument("--wa-missing-free", default=False, action="store_true",
92        help="Workaround to alleviate misjudgments when free is missing")
93parser.add_argument("-s", "--sample-rate", default=1, type=int,
94        help="sample every N-th allocation to decrease the overhead")
95parser.add_argument("-T", "--top", type=int, default=10,
96        help="display only this many top allocating stacks (by size)")
97parser.add_argument("-z", "--min-size", type=int,
98        help="capture only allocations larger than this size")
99parser.add_argument("-Z", "--max-size", type=int,
100        help="capture only allocations smaller than this size")
101parser.add_argument("-O", "--obj", type=str, default="c",
102        help="attach to allocator functions in the specified object")
103parser.add_argument("--ebpf", action="store_true",
104        help=argparse.SUPPRESS)
105parser.add_argument("--percpu", default=False, action="store_true",
106        help="trace percpu allocations")
107
108args = parser.parse_args()
109
110pid = args.pid
111command = args.command
112kernel_trace = (pid == -1 and command is None)
113trace_all = args.trace
114interval = args.interval
115min_age_ns = 1e6 * args.older
116sample_every_n = args.sample_rate
117num_prints = args.count
118top_stacks = args.top
119min_size = args.min_size
120max_size = args.max_size
121obj = args.obj
122
123if min_size is not None and max_size is not None and min_size > max_size:
124        print("min_size (-z) can't be greater than max_size (-Z)")
125        exit(1)
126
127if command is not None:
128        print("Executing '%s' and tracing the resulting process." % command)
129        pid = run_command_get_pid(command)
130
131bpf_source = """
132#include <uapi/linux/ptrace.h>
133
134struct alloc_info_t {
135        u64 size;
136        u64 timestamp_ns;
137        int stack_id;
138};
139
140struct combined_alloc_info_t {
141        u64 total_size;
142        u64 number_of_allocs;
143};
144
145BPF_HASH(sizes, u64);
146BPF_HASH(allocs, u64, struct alloc_info_t, 1000000);
147BPF_HASH(memptrs, u64, u64);
148BPF_STACK_TRACE(stack_traces, 10240);
149BPF_HASH(combined_allocs, u64, struct combined_alloc_info_t, 10240);
150
151static inline void update_statistics_add(u64 stack_id, u64 sz) {
152        struct combined_alloc_info_t *existing_cinfo;
153        struct combined_alloc_info_t cinfo = {0};
154
155        existing_cinfo = combined_allocs.lookup(&stack_id);
156        if (existing_cinfo != 0)
157                cinfo = *existing_cinfo;
158
159        cinfo.total_size += sz;
160        cinfo.number_of_allocs += 1;
161
162        combined_allocs.update(&stack_id, &cinfo);
163}
164
165static inline void update_statistics_del(u64 stack_id, u64 sz) {
166        struct combined_alloc_info_t *existing_cinfo;
167        struct combined_alloc_info_t cinfo = {0};
168
169        existing_cinfo = combined_allocs.lookup(&stack_id);
170        if (existing_cinfo != 0)
171                cinfo = *existing_cinfo;
172
173        if (sz >= cinfo.total_size)
174                cinfo.total_size = 0;
175        else
176                cinfo.total_size -= sz;
177
178        if (cinfo.number_of_allocs > 0)
179                cinfo.number_of_allocs -= 1;
180
181        combined_allocs.update(&stack_id, &cinfo);
182}
183
184static inline int gen_alloc_enter(struct pt_regs *ctx, size_t size) {
185        SIZE_FILTER
186        if (SAMPLE_EVERY_N > 1) {
187                u64 ts = bpf_ktime_get_ns();
188                if (ts % SAMPLE_EVERY_N != 0)
189                        return 0;
190        }
191
192        u64 pid = bpf_get_current_pid_tgid();
193        u64 size64 = size;
194        sizes.update(&pid, &size64);
195
196        if (SHOULD_PRINT)
197                bpf_trace_printk("alloc entered, size = %u\\n", size);
198        return 0;
199}
200
201static inline int gen_alloc_exit2(struct pt_regs *ctx, u64 address) {
202        u64 pid = bpf_get_current_pid_tgid();
203        u64* size64 = sizes.lookup(&pid);
204        struct alloc_info_t info = {0};
205
206        if (size64 == 0)
207                return 0; // missed alloc entry
208
209        info.size = *size64;
210        sizes.delete(&pid);
211
212        if (address != 0) {
213                info.timestamp_ns = bpf_ktime_get_ns();
214                info.stack_id = stack_traces.get_stackid(ctx, STACK_FLAGS);
215                allocs.update(&address, &info);
216                update_statistics_add(info.stack_id, info.size);
217        }
218
219        if (SHOULD_PRINT) {
220                bpf_trace_printk("alloc exited, size = %lu, result = %lx\\n",
221                                 info.size, address);
222        }
223        return 0;
224}
225
226static inline int gen_alloc_exit(struct pt_regs *ctx) {
227        return gen_alloc_exit2(ctx, PT_REGS_RC(ctx));
228}
229
230static inline int gen_free_enter(struct pt_regs *ctx, void *address) {
231        u64 addr = (u64)address;
232        struct alloc_info_t *info = allocs.lookup(&addr);
233        if (info == 0)
234                return 0;
235
236        allocs.delete(&addr);
237        update_statistics_del(info->stack_id, info->size);
238
239        if (SHOULD_PRINT) {
240                bpf_trace_printk("free entered, address = %lx, size = %lu\\n",
241                                 address, info->size);
242        }
243        return 0;
244}
245
246int malloc_enter(struct pt_regs *ctx, size_t size) {
247        return gen_alloc_enter(ctx, size);
248}
249
250int malloc_exit(struct pt_regs *ctx) {
251        return gen_alloc_exit(ctx);
252}
253
254int free_enter(struct pt_regs *ctx, void *address) {
255        return gen_free_enter(ctx, address);
256}
257
258int calloc_enter(struct pt_regs *ctx, size_t nmemb, size_t size) {
259        return gen_alloc_enter(ctx, nmemb * size);
260}
261
262int calloc_exit(struct pt_regs *ctx) {
263        return gen_alloc_exit(ctx);
264}
265
266int realloc_enter(struct pt_regs *ctx, void *ptr, size_t size) {
267        gen_free_enter(ctx, ptr);
268        return gen_alloc_enter(ctx, size);
269}
270
271int realloc_exit(struct pt_regs *ctx) {
272        return gen_alloc_exit(ctx);
273}
274
275int mmap_enter(struct pt_regs *ctx) {
276        size_t size = (size_t)PT_REGS_PARM2(ctx);
277        return gen_alloc_enter(ctx, size);
278}
279
280int mmap_exit(struct pt_regs *ctx) {
281        return gen_alloc_exit(ctx);
282}
283
284int munmap_enter(struct pt_regs *ctx, void *address) {
285        return gen_free_enter(ctx, address);
286}
287
288int posix_memalign_enter(struct pt_regs *ctx, void **memptr, size_t alignment,
289                         size_t size) {
290        u64 memptr64 = (u64)(size_t)memptr;
291        u64 pid = bpf_get_current_pid_tgid();
292
293        memptrs.update(&pid, &memptr64);
294        return gen_alloc_enter(ctx, size);
295}
296
297int posix_memalign_exit(struct pt_regs *ctx) {
298        u64 pid = bpf_get_current_pid_tgid();
299        u64 *memptr64 = memptrs.lookup(&pid);
300        void *addr;
301
302        if (memptr64 == 0)
303                return 0;
304
305        memptrs.delete(&pid);
306
307        if (bpf_probe_read_user(&addr, sizeof(void*), (void*)(size_t)*memptr64))
308                return 0;
309
310        u64 addr64 = (u64)(size_t)addr;
311        return gen_alloc_exit2(ctx, addr64);
312}
313
314int aligned_alloc_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
315        return gen_alloc_enter(ctx, size);
316}
317
318int aligned_alloc_exit(struct pt_regs *ctx) {
319        return gen_alloc_exit(ctx);
320}
321
322int valloc_enter(struct pt_regs *ctx, size_t size) {
323        return gen_alloc_enter(ctx, size);
324}
325
326int valloc_exit(struct pt_regs *ctx) {
327        return gen_alloc_exit(ctx);
328}
329
330int memalign_enter(struct pt_regs *ctx, size_t alignment, size_t size) {
331        return gen_alloc_enter(ctx, size);
332}
333
334int memalign_exit(struct pt_regs *ctx) {
335        return gen_alloc_exit(ctx);
336}
337
338int pvalloc_enter(struct pt_regs *ctx, size_t size) {
339        return gen_alloc_enter(ctx, size);
340}
341
342int pvalloc_exit(struct pt_regs *ctx) {
343        return gen_alloc_exit(ctx);
344}
345"""
346
347bpf_source_kernel = """
348
349TRACEPOINT_PROBE(kmem, kmalloc) {
350        if (WORKAROUND_MISSING_FREE)
351            gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
352        gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
353        return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
354}
355
356TRACEPOINT_PROBE(kmem, kmalloc_node) {
357        if (WORKAROUND_MISSING_FREE)
358            gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
359        gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
360        return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
361}
362
363TRACEPOINT_PROBE(kmem, kfree) {
364        return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
365}
366
367TRACEPOINT_PROBE(kmem, kmem_cache_alloc) {
368        if (WORKAROUND_MISSING_FREE)
369            gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
370        gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
371        return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
372}
373
374TRACEPOINT_PROBE(kmem, kmem_cache_alloc_node) {
375        if (WORKAROUND_MISSING_FREE)
376            gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
377        gen_alloc_enter((struct pt_regs *)args, args->bytes_alloc);
378        return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
379}
380
381TRACEPOINT_PROBE(kmem, kmem_cache_free) {
382        return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
383}
384
385TRACEPOINT_PROBE(kmem, mm_page_alloc) {
386        gen_alloc_enter((struct pt_regs *)args, PAGE_SIZE << args->order);
387        return gen_alloc_exit2((struct pt_regs *)args, args->pfn);
388}
389
390TRACEPOINT_PROBE(kmem, mm_page_free) {
391        return gen_free_enter((struct pt_regs *)args, (void *)args->pfn);
392}
393"""
394
395bpf_source_percpu = """
396
397TRACEPOINT_PROBE(percpu, percpu_alloc_percpu) {
398        gen_alloc_enter((struct pt_regs *)args, args->size);
399        return gen_alloc_exit2((struct pt_regs *)args, (size_t)args->ptr);
400}
401
402TRACEPOINT_PROBE(percpu, percpu_free_percpu) {
403        return gen_free_enter((struct pt_regs *)args, (void *)args->ptr);
404}
405"""
406
407if kernel_trace:
408        if args.percpu:
409                bpf_source += bpf_source_percpu
410        else:
411                bpf_source += bpf_source_kernel
412
413if kernel_trace:
414    bpf_source = bpf_source.replace("WORKAROUND_MISSING_FREE", "1"
415                                    if args.wa_missing_free else "0")
416
417bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
418bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
419bpf_source = bpf_source.replace("PAGE_SIZE", str(resource.getpagesize()))
420
421size_filter = ""
422if min_size is not None and max_size is not None:
423        size_filter = "if (size < %d || size > %d) return 0;" % \
424                      (min_size, max_size)
425elif min_size is not None:
426        size_filter = "if (size < %d) return 0;" % min_size
427elif max_size is not None:
428        size_filter = "if (size > %d) return 0;" % max_size
429bpf_source = bpf_source.replace("SIZE_FILTER", size_filter)
430
431stack_flags = "0"
432if not kernel_trace:
433        stack_flags += "|BPF_F_USER_STACK"
434bpf_source = bpf_source.replace("STACK_FLAGS", stack_flags)
435
436if args.ebpf:
437    print(bpf_source)
438    exit()
439
440bpf = BPF(text=bpf_source)
441
442if not kernel_trace:
443        print("Attaching to pid %d, Ctrl+C to quit." % pid)
444
445        def attach_probes(sym, fn_prefix=None, can_fail=False):
446                if fn_prefix is None:
447                        fn_prefix = sym
448
449                try:
450                        bpf.attach_uprobe(name=obj, sym=sym,
451                                          fn_name=fn_prefix + "_enter",
452                                          pid=pid)
453                        bpf.attach_uretprobe(name=obj, sym=sym,
454                                             fn_name=fn_prefix + "_exit",
455                                             pid=pid)
456                except Exception:
457                        if can_fail:
458                                return
459                        else:
460                                raise
461
462        attach_probes("malloc")
463        attach_probes("calloc")
464        attach_probes("realloc")
465        attach_probes("mmap")
466        attach_probes("posix_memalign")
467        attach_probes("valloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory
468        attach_probes("memalign")
469        attach_probes("pvalloc", can_fail=True) # failed on Android, is deprecated in libc.so from bionic directory
470        attach_probes("aligned_alloc", can_fail=True)  # added in C11
471        bpf.attach_uprobe(name=obj, sym="free", fn_name="free_enter",
472                                  pid=pid)
473        bpf.attach_uprobe(name=obj, sym="munmap", fn_name="munmap_enter",
474                                  pid=pid)
475
476else:
477        print("Attaching to kernel allocators, Ctrl+C to quit.")
478
479        # No probe attaching here. Allocations are counted by attaching to
480        # tracepoints.
481        #
482        # Memory allocations in Linux kernel are not limited to malloc/free
483        # equivalents. It's also common to allocate a memory page or multiple
484        # pages. Page allocator have two interfaces, one working with page
485        # frame numbers (PFN), while other working with page addresses. It's
486        # possible to allocate pages with one kind of functions, and free them
487        # with another. Code in kernel can easy convert PFNs to addresses and
488        # back, but it's hard to do the same in eBPF kprobe without fragile
489        # hacks.
490        #
491        # Fortunately, Linux exposes tracepoints for memory allocations, which
492        # can be instrumented by eBPF programs. Tracepoint for page allocations
493        # gives access to PFNs for both allocator interfaces. So there is no
494        # need to guess which allocation corresponds to which free.
495
496def print_outstanding():
497        print("[%s] Top %d stacks with outstanding allocations:" %
498              (datetime.now().strftime("%H:%M:%S"), top_stacks))
499        alloc_info = {}
500        allocs = bpf["allocs"]
501        stack_traces = bpf["stack_traces"]
502        for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
503                if BPF.monotonic_time() - min_age_ns < info.timestamp_ns:
504                        continue
505                if info.stack_id < 0:
506                        continue
507                if info.stack_id in alloc_info:
508                        alloc_info[info.stack_id].update(info.size)
509                else:
510                        stack = list(stack_traces.walk(info.stack_id))
511                        combined = []
512                        for addr in stack:
513                                combined.append(('0x'+format(addr, '016x')+'\t').encode('utf-8') + bpf.sym(addr, pid,
514                                        show_module=True, show_offset=True))
515                        alloc_info[info.stack_id] = Allocation(combined,
516                                                               info.size)
517                if args.show_allocs:
518                        print("\taddr = %x size = %s" %
519                              (address.value, info.size))
520        to_show = sorted(alloc_info.values(),
521                         key=lambda a: a.size)[-top_stacks:]
522        for alloc in to_show:
523                print("\t%d bytes in %d allocations from stack\n\t\t%s" %
524                      (alloc.size, alloc.count,
525                       b"\n\t\t".join(alloc.stack).decode("ascii")))
526
527def print_outstanding_combined():
528        stack_traces = bpf["stack_traces"]
529        stacks = sorted(bpf["combined_allocs"].items(),
530                        key=lambda a: -a[1].total_size)
531        cnt = 1
532        entries = []
533        for stack_id, info in stacks:
534                try:
535                        trace = []
536                        for addr in stack_traces.walk(stack_id.value):
537                                sym = bpf.sym(addr, pid,
538                                                      show_module=True,
539                                                      show_offset=True)
540                                trace.append(sym)
541                        trace = "\n\t\t".join(trace)
542                except KeyError:
543                        trace = "stack information lost"
544
545                entry = ("\t%d bytes in %d allocations from stack\n\t\t%s" %
546                         (info.total_size, info.number_of_allocs, trace))
547                entries.append(entry)
548
549                cnt += 1
550                if cnt > top_stacks:
551                        break
552
553        print("[%s] Top %d stacks with outstanding allocations:" %
554              (datetime.now().strftime("%H:%M:%S"), top_stacks))
555
556        print('\n'.join(reversed(entries)))
557
558count_so_far = 0
559while True:
560        if trace_all:
561                print(bpf.trace_fields())
562        else:
563                try:
564                        sleep(interval)
565                except KeyboardInterrupt:
566                        exit()
567                if args.combined_only:
568                        print_outstanding_combined()
569                else:
570                        print_outstanding()
571                sys.stdout.flush()
572                count_so_far += 1
573                if num_prints is not None and count_so_far >= num_prints:
574                        exit()
575