• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpaccept Trace TCP accept()s.
5#           For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcpaccept [-h] [-t] [-p PID]
8#
9# This uses dynamic tracing of the kernel inet_csk_accept() socket function
10# (from tcp_prot.accept), and will need to be modified to match kernel changes.
11#
12# Copyright (c) 2015 Brendan Gregg.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 13-Oct-2015   Brendan Gregg   Created this.
16# 14-Feb-2016      "      "     Switch to bpf_perf_output.
17
18from __future__ import print_function
19from bcc import BPF
20from socket import inet_ntop, AF_INET, AF_INET6
21from struct import pack
22import argparse
23import ctypes as ct
24
25# arguments
26examples = """examples:
27    ./tcpaccept           # trace all TCP accept()s
28    ./tcpaccept -t        # include timestamps
29    ./tcpaccept -p 181    # only trace PID 181
30"""
31parser = argparse.ArgumentParser(
32    description="Trace TCP accepts",
33    formatter_class=argparse.RawDescriptionHelpFormatter,
34    epilog=examples)
35parser.add_argument("-t", "--timestamp", action="store_true",
36    help="include timestamp on output")
37parser.add_argument("-p", "--pid",
38    help="trace this PID only")
39parser.add_argument("--ebpf", action="store_true",
40    help=argparse.SUPPRESS)
41args = parser.parse_args()
42debug = 0
43
44# define BPF program
45bpf_text = """
46#include <uapi/linux/ptrace.h>
47#include <net/sock.h>
48#include <bcc/proto.h>
49
50// separate data structs for ipv4 and ipv6
51struct ipv4_data_t {
52    u64 ts_us;
53    u32 pid;
54    u32 saddr;
55    u32 daddr;
56    u64 ip;
57    u16 lport;
58    char task[TASK_COMM_LEN];
59};
60BPF_PERF_OUTPUT(ipv4_events);
61
62struct ipv6_data_t {
63    u64 ts_us;
64    u32 pid;
65    unsigned __int128 saddr;
66    unsigned __int128 daddr;
67    u64 ip;
68    u16 lport;
69    char task[TASK_COMM_LEN];
70};
71BPF_PERF_OUTPUT(ipv6_events);
72"""
73
74#
75# The following is the code for older kernels(Linux pre-4.16).
76# It uses kprobes to instrument inet_csk_accept(). On Linux 4.16 and
77# later, the sock:inet_sock_set_state tracepoint should be used instead, as
78# is done by the code that follows this.
79#
80bpf_text_kprobe = """
81int kretprobe__inet_csk_accept(struct pt_regs *ctx)
82{
83    struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
84    u32 pid = bpf_get_current_pid_tgid();
85
86    if (newsk == NULL)
87        return 0;
88
89    // check this is TCP
90    u8 protocol = 0;
91    // workaround for reading the sk_protocol bitfield:
92
93    // Following comments add by Joe Yin:
94    // Unfortunately,it can not work since Linux 4.10,
95    // because the sk_wmem_queued is not following the bitfield of sk_protocol.
96    // And the following member is sk_gso_max_segs.
97    // So, we can use this:
98    // bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3);
99    // In order to  diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime,
100    // sk_lingertime is closed to the gso_max_segs_offset,and
101    // the offset between the two members is 4
102
103    int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs);
104    int sk_lingertime_offset = offsetof(struct sock, sk_lingertime);
105
106    if (sk_lingertime_offset - gso_max_segs_offset == 4)
107        // 4.10+ with little endian
108#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
109        protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3);
110    else
111        // pre-4.10 with little endian
112        protocol = *(u8 *)((u64)&newsk->sk_wmem_queued - 3);
113#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
114        // 4.10+ with big endian
115        protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 1);
116    else
117        // pre-4.10 with big endian
118        protocol = *(u8 *)((u64)&newsk->sk_wmem_queued - 1);
119#else
120# error "Fix your compiler's __BYTE_ORDER__?!"
121#endif
122
123    if (protocol != IPPROTO_TCP)
124        return 0;
125
126    // pull in details
127    u16 family = 0, lport = 0;
128    family = newsk->__sk_common.skc_family;
129    lport = newsk->__sk_common.skc_num;
130
131    if (family == AF_INET) {
132        struct ipv4_data_t data4 = {.pid = pid, .ip = 4};
133        data4.ts_us = bpf_ktime_get_ns() / 1000;
134        data4.saddr = newsk->__sk_common.skc_rcv_saddr;
135        data4.daddr = newsk->__sk_common.skc_daddr;
136        data4.lport = lport;
137        bpf_get_current_comm(&data4.task, sizeof(data4.task));
138        ipv4_events.perf_submit(ctx, &data4, sizeof(data4));
139
140    } else if (family == AF_INET6) {
141        struct ipv6_data_t data6 = {.pid = pid, .ip = 6};
142        data6.ts_us = bpf_ktime_get_ns() / 1000;
143        bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
144            &newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
145        bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
146            &newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
147        data6.lport = lport;
148        bpf_get_current_comm(&data6.task, sizeof(data6.task));
149        ipv6_events.perf_submit(ctx, &data6, sizeof(data6));
150    }
151    // else drop
152
153    return 0;
154}
155"""
156
157bpf_text_tracepoint = """
158TRACEPOINT_PROBE(sock, inet_sock_set_state)
159{
160    if (args->protocol != IPPROTO_TCP)
161        return 0;
162    u32 pid = bpf_get_current_pid_tgid();
163    // pull in details
164    u16 family = 0, lport = 0;
165    family = args->family;
166    lport = args->sport;
167
168    if (family == AF_INET) {
169        struct ipv4_data_t data4 = {.pid = pid, .ip = 4};
170        data4.ts_us = bpf_ktime_get_ns() / 1000;
171        __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr));
172        __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr));
173        data4.lport = lport;
174        bpf_get_current_comm(&data4.task, sizeof(data4.task));
175        ipv4_events.perf_submit(args, &data4, sizeof(data4));
176    } else if (family == AF_INET6) {
177        struct ipv6_data_t data6 = {.pid = pid, .ip = 6};
178        data6.ts_us = bpf_ktime_get_ns() / 1000;
179        __builtin_memcpy(&data6.saddr, args->saddr, sizeof(data6.saddr));
180        __builtin_memcpy(&data6.daddr, args->daddr, sizeof(data6.daddr));
181        data6.lport = lport;
182        bpf_get_current_comm(&data6.task, sizeof(data6.task));
183        ipv6_events.perf_submit(args, &data6, sizeof(data6));
184    }
185    // else drop
186
187    return 0;
188}
189"""
190
191if (BPF.tracepoint_exists("sock", "inet_sock_set_state")):
192    bpf_text += bpf_text_tracepoint
193else:
194    bpf_text += bpf_text_kprobe
195
196
197# code substitutions
198if args.pid:
199    bpf_text = bpf_text.replace('FILTER',
200        'if (pid != %s) { return 0; }' % args.pid)
201else:
202    bpf_text = bpf_text.replace('FILTER', '')
203if debug or args.ebpf:
204    print(bpf_text)
205    if args.ebpf:
206        exit()
207
208# event data
209TASK_COMM_LEN = 16      # linux/sched.h
210
211class Data_ipv4(ct.Structure):
212    _fields_ = [
213        ("ts_us", ct.c_ulonglong),
214        ("pid", ct.c_uint),
215        ("saddr", ct.c_uint),
216        ("daddr", ct.c_uint),
217        ("ip", ct.c_ulonglong),
218        ("lport", ct.c_ushort),
219        ("task", ct.c_char * TASK_COMM_LEN)
220    ]
221
222class Data_ipv6(ct.Structure):
223    _fields_ = [
224        ("ts_us", ct.c_ulonglong),
225        ("pid", ct.c_uint),
226        ("saddr", (ct.c_ulonglong * 2)),
227        ("daddr", (ct.c_ulonglong * 2)),
228        ("ip", ct.c_ulonglong),
229        ("lport", ct.c_ushort),
230        ("task", ct.c_char * TASK_COMM_LEN)
231    ]
232
233# process event
234def print_ipv4_event(cpu, data, size):
235    event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
236    global start_ts
237    if args.timestamp:
238        if start_ts == 0:
239            start_ts = event.ts_us
240        print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
241    print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid,
242        event.task.decode('utf-8', 'replace'), event.ip,
243        inet_ntop(AF_INET, pack("I", event.daddr)),
244        inet_ntop(AF_INET, pack("I", event.saddr)), event.lport))
245
246def print_ipv6_event(cpu, data, size):
247    event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
248    global start_ts
249    if args.timestamp:
250        if start_ts == 0:
251            start_ts = event.ts_us
252        print("%-9.3f" % ((float(event.ts_us) - start_ts) / 1000000), end="")
253    print("%-6d %-12.12s %-2d %-16s %-16s %-4d" % (event.pid,
254        event.task.decode('utf-8', 'replace'), event.ip,
255        inet_ntop(AF_INET6, event.daddr),inet_ntop(AF_INET6, event.saddr),
256        event.lport))
257
258# initialize BPF
259b = BPF(text=bpf_text)
260
261# header
262if args.timestamp:
263    print("%-9s" % ("TIME(s)"), end="")
264print("%-6s %-12s %-2s %-16s %-16s %-4s" % ("PID", "COMM", "IP", "RADDR",
265    "LADDR", "LPORT"))
266
267start_ts = 0
268
269# read events
270b["ipv4_events"].open_perf_buffer(print_ipv4_event)
271b["ipv6_events"].open_perf_buffer(print_ipv6_event)
272while 1:
273    b.perf_buffer_poll()
274