• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# oomkill   Trace oom_kill_process(). For Linux, uses BCC, eBPF.
4#
5# This traces the kernel out-of-memory killer, and prints basic details,
6# including the system load averages. This can provide more context on the
7# system state at the time of OOM: was it getting busier or steady, based
8# on the load averages? This tool may also be useful to customize for
9# investigations; for example, by adding other task_struct details at the time
10# of OOM.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 09-Feb-2016   Brendan Gregg   Created this.
16
17from bcc import BPF
18from time import strftime
19import ctypes as ct
20
21# linux stats
22loadavg = "/proc/loadavg"
23
24# define BPF program
25bpf_text = """
26#include <uapi/linux/ptrace.h>
27#include <linux/oom.h>
28
29struct data_t {
30    u64 fpid;
31    u64 tpid;
32    u64 pages;
33    char fcomm[TASK_COMM_LEN];
34    char tcomm[TASK_COMM_LEN];
35};
36
37BPF_PERF_OUTPUT(events);
38
39void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc,
40    struct task_struct *p, unsigned int points, unsigned long totalpages)
41{
42    struct data_t data = {};
43    u32 pid = bpf_get_current_pid_tgid();
44    data.fpid = pid;
45    data.tpid = p->pid;
46    data.pages = totalpages;
47    bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm));
48    bpf_probe_read(&data.tcomm, sizeof(data.tcomm), p->comm);
49    events.perf_submit(ctx, &data, sizeof(data));
50}
51"""
52
53# kernel->user event data: struct data_t
54TASK_COMM_LEN = 16  # linux/sched.h
55class Data(ct.Structure):
56    _fields_ = [
57        ("fpid", ct.c_ulonglong),
58        ("tpid", ct.c_ulonglong),
59        ("pages", ct.c_ulonglong),
60        ("fcomm", ct.c_char * TASK_COMM_LEN),
61        ("tcomm", ct.c_char * TASK_COMM_LEN)
62    ]
63
64# process event
65def print_event(cpu, data, size):
66    event = ct.cast(data, ct.POINTER(Data)).contents
67    with open(loadavg) as stats:
68        avgline = stats.read().rstrip()
69    print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")"
70        ", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid,
71        event.fcomm.decode('utf-8', 'replace'), event.tpid,
72        event.tcomm.decode('utf-8', 'replace'), event.pages, avgline))
73
74# initialize BPF
75b = BPF(text=bpf_text)
76print("Tracing OOM kills... Ctrl-C to stop.")
77b["events"].open_perf_buffer(print_event)
78while 1:
79    b.perf_buffer_poll()
80