• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2#
3# mountsnoop Trace mount() and umount syscalls.
4#            For Linux, uses BCC, eBPF. Embedded C.
5#
6# USAGE: mountsnoop [-h]
7#
8# Copyright (c) 2016 Facebook, Inc.
9# Licensed under the Apache License, Version 2.0 (the "License")
10#
11# 14-Oct-2016   Omar Sandoval   Created this.
12
13from __future__ import print_function
14import argparse
15import bcc
16import ctypes
17import errno
18import functools
19import sys
20
21
22bpf_text = r"""
23#include <uapi/linux/ptrace.h>
24#include <linux/sched.h>
25
26#include <linux/nsproxy.h>
27#include <linux/ns_common.h>
28
29/*
30 * XXX: struct mnt_namespace is defined in fs/mount.h, which is private to the
31 * VFS and not installed in any kernel-devel packages. So, let's duplicate the
32 * important part of the definition. There are actually more members in the
33 * real struct, but we don't need them, and they're more likely to change.
34 */
35struct mnt_namespace {
36    atomic_t count;
37    struct ns_common ns;
38};
39
40/*
41 * XXX: this could really use first-class string support in BPF. target is a
42 * NUL-terminated path up to PATH_MAX in length. source and type are
43 * NUL-terminated strings up to PAGE_SIZE in length. data is a weird case: it's
44 * almost always a NUL-terminated string, but for some filesystems (e.g., older
45 * NFS variants), it's a binary structure with plenty of NUL bytes, so the
46 * kernel always copies up to PAGE_SIZE bytes, stopping when it hits a fault.
47 *
48 * The best we can do with the existing BPF helpers is to copy as much of each
49 * argument as we can. Our stack space is limited, and we need to leave some
50 * headroom for the rest of the function, so this should be a decent value.
51 */
52#define MAX_STR_LEN 412
53
54enum event_type {
55    EVENT_MOUNT,
56    EVENT_MOUNT_SOURCE,
57    EVENT_MOUNT_TARGET,
58    EVENT_MOUNT_TYPE,
59    EVENT_MOUNT_DATA,
60    EVENT_MOUNT_RET,
61    EVENT_UMOUNT,
62    EVENT_UMOUNT_TARGET,
63    EVENT_UMOUNT_RET,
64};
65
66struct data_t {
67    enum event_type type;
68    pid_t pid, tgid;
69    union {
70        /* EVENT_MOUNT, EVENT_UMOUNT */
71        struct {
72            /* current->nsproxy->mnt_ns->ns.inum */
73            unsigned int mnt_ns;
74            char comm[TASK_COMM_LEN];
75            unsigned long flags;
76        } enter;
77        /*
78         * EVENT_MOUNT_SOURCE, EVENT_MOUNT_TARGET, EVENT_MOUNT_TYPE,
79         * EVENT_MOUNT_DATA, EVENT_UMOUNT_TARGET
80         */
81        char str[MAX_STR_LEN];
82        /* EVENT_MOUNT_RET, EVENT_UMOUNT_RET */
83        int retval;
84    };
85};
86
87BPF_PERF_OUTPUT(events);
88
89int syscall__mount(struct pt_regs *ctx, char __user *source,
90                      char __user *target, char __user *type,
91                      unsigned long flags)
92{
93    /* sys_mount takes too many arguments */
94    char __user *data = (char __user *)PT_REGS_PARM5(ctx);
95    struct data_t event = {};
96    struct task_struct *task;
97    struct nsproxy *nsproxy;
98    struct mnt_namespace *mnt_ns;
99
100    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
101    event.tgid = bpf_get_current_pid_tgid() >> 32;
102
103    event.type = EVENT_MOUNT;
104    bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm));
105    event.enter.flags = flags;
106    task = (struct task_struct *)bpf_get_current_task();
107    nsproxy = task->nsproxy;
108    mnt_ns = nsproxy->mnt_ns;
109    event.enter.mnt_ns = mnt_ns->ns.inum;
110    events.perf_submit(ctx, &event, sizeof(event));
111
112    event.type = EVENT_MOUNT_SOURCE;
113    memset(event.str, 0, sizeof(event.str));
114    bpf_probe_read(event.str, sizeof(event.str), source);
115    events.perf_submit(ctx, &event, sizeof(event));
116
117    event.type = EVENT_MOUNT_TARGET;
118    memset(event.str, 0, sizeof(event.str));
119    bpf_probe_read(event.str, sizeof(event.str), target);
120    events.perf_submit(ctx, &event, sizeof(event));
121
122    event.type = EVENT_MOUNT_TYPE;
123    memset(event.str, 0, sizeof(event.str));
124    bpf_probe_read(event.str, sizeof(event.str), type);
125    events.perf_submit(ctx, &event, sizeof(event));
126
127    event.type = EVENT_MOUNT_DATA;
128    memset(event.str, 0, sizeof(event.str));
129    bpf_probe_read(event.str, sizeof(event.str), data);
130    events.perf_submit(ctx, &event, sizeof(event));
131
132    return 0;
133}
134
135int do_ret_sys_mount(struct pt_regs *ctx)
136{
137    struct data_t event = {};
138
139    event.type = EVENT_MOUNT_RET;
140    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
141    event.tgid = bpf_get_current_pid_tgid() >> 32;
142    event.retval = PT_REGS_RC(ctx);
143    events.perf_submit(ctx, &event, sizeof(event));
144
145    return 0;
146}
147
148int syscall__umount(struct pt_regs *ctx, char __user *target, int flags)
149{
150    struct data_t event = {};
151    struct task_struct *task;
152    struct nsproxy *nsproxy;
153    struct mnt_namespace *mnt_ns;
154
155    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
156    event.tgid = bpf_get_current_pid_tgid() >> 32;
157
158    event.type = EVENT_UMOUNT;
159    bpf_get_current_comm(event.enter.comm, sizeof(event.enter.comm));
160    event.enter.flags = flags;
161    task = (struct task_struct *)bpf_get_current_task();
162    nsproxy = task->nsproxy;
163    mnt_ns = nsproxy->mnt_ns;
164    event.enter.mnt_ns = mnt_ns->ns.inum;
165    events.perf_submit(ctx, &event, sizeof(event));
166
167    event.type = EVENT_UMOUNT_TARGET;
168    memset(event.str, 0, sizeof(event.str));
169    bpf_probe_read(event.str, sizeof(event.str), target);
170    events.perf_submit(ctx, &event, sizeof(event));
171
172    return 0;
173}
174
175int do_ret_sys_umount(struct pt_regs *ctx)
176{
177    struct data_t event = {};
178
179    event.type = EVENT_UMOUNT_RET;
180    event.pid = bpf_get_current_pid_tgid() & 0xffffffff;
181    event.tgid = bpf_get_current_pid_tgid() >> 32;
182    event.retval = PT_REGS_RC(ctx);
183    events.perf_submit(ctx, &event, sizeof(event));
184
185    return 0;
186}
187"""
188
189# sys/mount.h
190MS_MGC_VAL = 0xc0ed0000
191MS_MGC_MSK = 0xffff0000
192MOUNT_FLAGS = [
193    ('MS_RDONLY', 1),
194    ('MS_NOSUID', 2),
195    ('MS_NODEV', 4),
196    ('MS_NOEXEC', 8),
197    ('MS_SYNCHRONOUS', 16),
198    ('MS_REMOUNT', 32),
199    ('MS_MANDLOCK', 64),
200    ('MS_DIRSYNC', 128),
201    ('MS_NOATIME', 1024),
202    ('MS_NODIRATIME', 2048),
203    ('MS_BIND', 4096),
204    ('MS_MOVE', 8192),
205    ('MS_REC', 16384),
206    ('MS_SILENT', 32768),
207    ('MS_POSIXACL', 1 << 16),
208    ('MS_UNBINDABLE', 1 << 17),
209    ('MS_PRIVATE', 1 << 18),
210    ('MS_SLAVE', 1 << 19),
211    ('MS_SHARED', 1 << 20),
212    ('MS_RELATIME', 1 << 21),
213    ('MS_KERNMOUNT', 1 << 22),
214    ('MS_I_VERSION', 1 << 23),
215    ('MS_STRICTATIME', 1 << 24),
216    ('MS_LAZYTIME', 1 << 25),
217    ('MS_ACTIVE', 1 << 30),
218    ('MS_NOUSER', 1 << 31),
219]
220UMOUNT_FLAGS = [
221    ('MNT_FORCE', 1),
222    ('MNT_DETACH', 2),
223    ('MNT_EXPIRE', 4),
224    ('UMOUNT_NOFOLLOW', 8),
225]
226
227
228TASK_COMM_LEN = 16  # linux/sched.h
229MAX_STR_LEN = 412
230
231
232class EventType(object):
233    EVENT_MOUNT = 0
234    EVENT_MOUNT_SOURCE = 1
235    EVENT_MOUNT_TARGET = 2
236    EVENT_MOUNT_TYPE = 3
237    EVENT_MOUNT_DATA = 4
238    EVENT_MOUNT_RET = 5
239    EVENT_UMOUNT = 6
240    EVENT_UMOUNT_TARGET = 7
241    EVENT_UMOUNT_RET = 8
242
243
244class EnterData(ctypes.Structure):
245    _fields_ = [
246        ('mnt_ns', ctypes.c_uint),
247        ('comm', ctypes.c_char * TASK_COMM_LEN),
248        ('flags', ctypes.c_ulong),
249    ]
250
251
252class DataUnion(ctypes.Union):
253    _fields_ = [
254        ('enter', EnterData),
255        ('str', ctypes.c_char * MAX_STR_LEN),
256        ('retval', ctypes.c_int),
257    ]
258
259
260class Event(ctypes.Structure):
261    _fields_ = [
262        ('type', ctypes.c_uint),
263        ('pid', ctypes.c_uint),
264        ('tgid', ctypes.c_uint),
265        ('union', DataUnion),
266    ]
267
268
269def _decode_flags(flags, flag_list):
270    str_flags = []
271    for flag, bit in flag_list:
272        if flags & bit:
273            str_flags.append(flag)
274        flags &= ~bit
275    if flags or not str_flags:
276        str_flags.append('0x{:x}'.format(flags))
277    return str_flags
278
279
280def decode_flags(flags, flag_list):
281    return '|'.join(_decode_flags(flags, flag_list))
282
283
284def decode_mount_flags(flags):
285    str_flags = []
286    if flags & MS_MGC_MSK == MS_MGC_VAL:
287        flags &= ~MS_MGC_MSK
288        str_flags.append('MS_MGC_VAL')
289    str_flags.extend(_decode_flags(flags, MOUNT_FLAGS))
290    return '|'.join(str_flags)
291
292
293def decode_umount_flags(flags):
294    return decode_flags(flags, UMOUNT_FLAGS)
295
296
297def decode_errno(retval):
298    try:
299        return '-' + errno.errorcode[-retval]
300    except KeyError:
301        return str(retval)
302
303
304_escape_chars = {
305    ord('\a'): '\\a',
306    ord('\b'): '\\b',
307    ord('\t'): '\\t',
308    ord('\n'): '\\n',
309    ord('\v'): '\\v',
310    ord('\f'): '\\f',
311    ord('\r'): '\\r',
312    ord('"'): '\\"',
313    ord('\\'): '\\\\',
314}
315
316
317def escape_character(c):
318    try:
319        return _escape_chars[c]
320    except KeyError:
321        if 0x20 <= c <= 0x7e:
322            return chr(c)
323        else:
324            return '\\x{:02x}'.format(c)
325
326
327if sys.version_info.major < 3:
328    def decode_mount_string(s):
329        return '"{}"'.format(''.join(escape_character(ord(c)) for c in s))
330else:
331    def decode_mount_string(s):
332        return '"{}"'.format(''.join(escape_character(c) for c in s))
333
334
335def print_event(mounts, umounts, cpu, data, size):
336    event = ctypes.cast(data, ctypes.POINTER(Event)).contents
337
338    try:
339        if event.type == EventType.EVENT_MOUNT:
340            mounts[event.pid] = {
341                'pid': event.pid,
342                'tgid': event.tgid,
343                'mnt_ns': event.union.enter.mnt_ns,
344                'comm': event.union.enter.comm,
345                'flags': event.union.enter.flags,
346            }
347        elif event.type == EventType.EVENT_MOUNT_SOURCE:
348            mounts[event.pid]['source'] = event.union.str
349        elif event.type == EventType.EVENT_MOUNT_TARGET:
350            mounts[event.pid]['target'] = event.union.str
351        elif event.type == EventType.EVENT_MOUNT_TYPE:
352            mounts[event.pid]['type'] = event.union.str
353        elif event.type == EventType.EVENT_MOUNT_DATA:
354            # XXX: data is not always a NUL-terminated string
355            mounts[event.pid]['data'] = event.union.str
356        elif event.type == EventType.EVENT_UMOUNT:
357            umounts[event.pid] = {
358                'pid': event.pid,
359                'tgid': event.tgid,
360                'mnt_ns': event.union.enter.mnt_ns,
361                'comm': event.union.enter.comm,
362                'flags': event.union.enter.flags,
363            }
364        elif event.type == EventType.EVENT_UMOUNT_TARGET:
365            umounts[event.pid]['target'] = event.union.str
366        elif (event.type == EventType.EVENT_MOUNT_RET or
367              event.type == EventType.EVENT_UMOUNT_RET):
368            if event.type == EventType.EVENT_MOUNT_RET:
369                syscall = mounts.pop(event.pid)
370                call = ('mount({source}, {target}, {type}, {flags}, {data}) ' +
371                        '= {retval}').format(
372                    source=decode_mount_string(syscall['source']),
373                    target=decode_mount_string(syscall['target']),
374                    type=decode_mount_string(syscall['type']),
375                    flags=decode_mount_flags(syscall['flags']),
376                    data=decode_mount_string(syscall['data']),
377                    retval=decode_errno(event.union.retval))
378            else:
379                syscall = umounts.pop(event.pid)
380                call = 'umount({target}, {flags}) = {retval}'.format(
381                    target=decode_mount_string(syscall['target']),
382                    flags=decode_umount_flags(syscall['flags']),
383                    retval=decode_errno(event.union.retval))
384            print('{:16} {:<7} {:<7} {:<11} {}'.format(
385                syscall['comm'].decode('utf-8', 'replace'), syscall['tgid'],
386                syscall['pid'], syscall['mnt_ns'], call))
387    except KeyError:
388        # This might happen if we lost an event.
389        pass
390
391
392def main():
393    parser = argparse.ArgumentParser(
394        description='trace mount() and umount() syscalls'
395    )
396    parser.add_argument("--ebpf", action="store_true",
397        help=argparse.SUPPRESS)
398    args = parser.parse_args()
399
400    mounts = {}
401    umounts = {}
402    if args.ebpf:
403        print(bpf_text)
404        exit()
405    b = bcc.BPF(text=bpf_text)
406    mount_fnname = b.get_syscall_fnname("mount")
407    b.attach_kprobe(event=mount_fnname, fn_name="syscall__mount")
408    b.attach_kretprobe(event=mount_fnname, fn_name="do_ret_sys_mount")
409    umount_fnname = b.get_syscall_fnname("umount")
410    b.attach_kprobe(event=umount_fnname, fn_name="syscall__umount")
411    b.attach_kretprobe(event=umount_fnname, fn_name="do_ret_sys_umount")
412    b['events'].open_perf_buffer(
413        functools.partial(print_event, mounts, umounts))
414    print('{:16} {:<7} {:<7} {:<11} {}'.format(
415        'COMM', 'PID', 'TID', 'MNT_NS', 'CALL'))
416    while True:
417        b.perf_buffer_poll()
418
419
420if __name__ == '__main__':
421    main()
422