• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# @lint-avoid-python-3-compatibility-imports
3#
4# tcpretrans    Trace or count TCP retransmits and TLPs.
5#               For Linux, uses BCC, eBPF. Embedded C.
6#
7# USAGE: tcpretrans [-c] [-h] [-l]
8#
9# This uses dynamic tracing of kernel functions, and will need to be updated
10# to match kernel changes.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 14-Feb-2016   Brendan Gregg   Created this.
16# 03-Nov-2017   Matthias Tafelmeier Extended this.
17
18from __future__ import print_function
19from bcc import BPF
20import argparse
21from time import strftime
22from socket import inet_ntop, AF_INET, AF_INET6
23from struct import pack
24import ctypes as ct
25from time import sleep
26
27# arguments
28examples = """examples:
29    ./tcpretrans           # trace TCP retransmits
30    ./tcpretrans -l        # include TLP attempts
31"""
32parser = argparse.ArgumentParser(
33    description="Trace TCP retransmits",
34    formatter_class=argparse.RawDescriptionHelpFormatter,
35    epilog=examples)
36parser.add_argument("-l", "--lossprobe", action="store_true",
37    help="include tail loss probe attempts")
38parser.add_argument("-c", "--count", action="store_true",
39    help="count occurred retransmits per flow")
40parser.add_argument("--ebpf", action="store_true",
41    help=argparse.SUPPRESS)
42args = parser.parse_args()
43debug = 0
44
45# define BPF program
46bpf_text = """
47#include <uapi/linux/ptrace.h>
48#include <net/sock.h>
49#include <bcc/proto.h>
50
51#define RETRANSMIT  1
52#define TLP         2
53
54// separate data structs for ipv4 and ipv6
55struct ipv4_data_t {
56    u32 pid;
57    u64 ip;
58    u32 saddr;
59    u32 daddr;
60    u16 lport;
61    u16 dport;
62    u64 state;
63    u64 type;
64};
65BPF_PERF_OUTPUT(ipv4_events);
66
67struct ipv6_data_t {
68    u32 pid;
69    u64 ip;
70    unsigned __int128 saddr;
71    unsigned __int128 daddr;
72    u16 lport;
73    u16 dport;
74    u64 state;
75    u64 type;
76};
77BPF_PERF_OUTPUT(ipv6_events);
78
79// separate flow keys per address family
80struct ipv4_flow_key_t {
81    u32 saddr;
82    u32 daddr;
83    u16 lport;
84    u16 dport;
85};
86BPF_HASH(ipv4_count, struct ipv4_flow_key_t);
87
88struct ipv6_flow_key_t {
89    unsigned __int128 saddr;
90    unsigned __int128 daddr;
91    u16 lport;
92    u16 dport;
93};
94BPF_HASH(ipv6_count, struct ipv6_flow_key_t);
95
96static int trace_event(struct pt_regs *ctx, struct sock *skp, int type)
97{
98    if (skp == NULL)
99        return 0;
100    u32 pid = bpf_get_current_pid_tgid() >> 32;
101
102    // pull in details
103    u16 family = skp->__sk_common.skc_family;
104    u16 lport = skp->__sk_common.skc_num;
105    u16 dport = skp->__sk_common.skc_dport;
106    char state = skp->__sk_common.skc_state;
107
108    if (family == AF_INET) {
109        IPV4_INIT
110        IPV4_CORE
111    } else if (family == AF_INET6) {
112        IPV6_INIT
113        IPV6_CORE
114    }
115    // else drop
116
117    return 0;
118}
119
120int trace_retransmit(struct pt_regs *ctx, struct sock *sk)
121{
122    trace_event(ctx, sk, RETRANSMIT);
123    return 0;
124}
125
126int trace_tlp(struct pt_regs *ctx, struct sock *sk)
127{
128    trace_event(ctx, sk, TLP);
129    return 0;
130}
131"""
132
133struct_init = { 'ipv4':
134        { 'count' :
135            """
136               struct ipv4_flow_key_t flow_key = {};
137               flow_key.saddr = skp->__sk_common.skc_rcv_saddr;
138               flow_key.daddr = skp->__sk_common.skc_daddr;
139               // lport is host order
140               flow_key.lport = lport;
141               flow_key.dport = ntohs(dport);""",
142               'trace' :
143               """
144               struct ipv4_data_t data4 = {};
145               data4.pid = pid;
146               data4.ip = 4;
147               data4.type = type;
148               data4.saddr = skp->__sk_common.skc_rcv_saddr;
149               data4.daddr = skp->__sk_common.skc_daddr;
150               // lport is host order
151               data4.lport = lport;
152               data4.dport = ntohs(dport);
153               data4.state = state; """
154               },
155        'ipv6':
156        { 'count' :
157            """
158                    struct ipv6_flow_key_t flow_key = {};
159                    bpf_probe_read(&flow_key.saddr, sizeof(flow_key.saddr),
160                        skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
161                    bpf_probe_read(&flow_key.daddr, sizeof(flow_key.daddr),
162                        skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
163                    // lport is host order
164                    flow_key.lport = lport;
165                    flow_key.dport = ntohs(dport);""",
166          'trace' : """
167                    struct ipv6_data_t data6 = {};
168                    data6.pid = pid;
169                    data6.ip = 6;
170                    data6.type = type;
171                    bpf_probe_read(&data6.saddr, sizeof(data6.saddr),
172                        skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);
173                    bpf_probe_read(&data6.daddr, sizeof(data6.daddr),
174                        skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32);
175                    // lport is host order
176                    data6.lport = lport;
177                    data6.dport = ntohs(dport);
178                    data6.state = state;"""
179                }
180        }
181
182count_core_base = """
183        COUNT_STRUCT.increment(flow_key);
184"""
185
186if args.count:
187    bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['count'])
188    bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['count'])
189    bpf_text = bpf_text.replace("IPV4_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv4_count'))
190    bpf_text = bpf_text.replace("IPV6_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv6_count'))
191else:
192    bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['trace'])
193    bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['trace'])
194    bpf_text = bpf_text.replace("IPV4_CORE", "ipv4_events.perf_submit(ctx, &data4, sizeof(data4));")
195    bpf_text = bpf_text.replace("IPV6_CORE", "ipv6_events.perf_submit(ctx, &data6, sizeof(data6));")
196
197if debug or args.ebpf:
198    print(bpf_text)
199    if args.ebpf:
200        exit()
201
202# event data
203class Data_ipv4(ct.Structure):
204    _fields_ = [
205        ("pid", ct.c_uint),
206        ("ip", ct.c_ulonglong),
207        ("saddr", ct.c_uint),
208        ("daddr", ct.c_uint),
209        ("lport", ct.c_ushort),
210        ("dport", ct.c_ushort),
211        ("state", ct.c_ulonglong),
212        ("type", ct.c_ulonglong)
213    ]
214
215class Data_ipv6(ct.Structure):
216    _fields_ = [
217        ("pid", ct.c_uint),
218        ("ip", ct.c_ulonglong),
219        ("saddr", (ct.c_ulonglong * 2)),
220        ("daddr", (ct.c_ulonglong * 2)),
221        ("lport", ct.c_ushort),
222        ("dport", ct.c_ushort),
223        ("state", ct.c_ulonglong),
224        ("type", ct.c_ulonglong)
225    ]
226
227# from bpf_text:
228type = {}
229type[1] = 'R'
230type[2] = 'L'
231
232# from include/net/tcp_states.h:
233tcpstate = {}
234tcpstate[1] = 'ESTABLISHED'
235tcpstate[2] = 'SYN_SENT'
236tcpstate[3] = 'SYN_RECV'
237tcpstate[4] = 'FIN_WAIT1'
238tcpstate[5] = 'FIN_WAIT2'
239tcpstate[6] = 'TIME_WAIT'
240tcpstate[7] = 'CLOSE'
241tcpstate[8] = 'CLOSE_WAIT'
242tcpstate[9] = 'LAST_ACK'
243tcpstate[10] = 'LISTEN'
244tcpstate[11] = 'CLOSING'
245tcpstate[12] = 'NEW_SYN_RECV'
246
247# process event
248def print_ipv4_event(cpu, data, size):
249    event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
250    print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
251        strftime("%H:%M:%S"), event.pid, event.ip,
252        "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport),
253        type[event.type],
254        "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport),
255        tcpstate[event.state]))
256
257def print_ipv6_event(cpu, data, size):
258    event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
259    print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % (
260        strftime("%H:%M:%S"), event.pid, event.ip,
261        "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport),
262        type[event.type],
263        "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport),
264        tcpstate[event.state]))
265
266def depict_cnt(counts_tab, l3prot='ipv4'):
267    for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value):
268        depict_key = ""
269        ep_fmt = "[%s]#%d"
270        if l3prot == 'ipv4':
271            depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET, pack('I', k.saddr)), k.lport),
272                                              ep_fmt % (inet_ntop(AF_INET, pack('I', k.daddr)), k.dport))
273        else:
274            depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET6, k.saddr), k.lport),
275                                              ep_fmt % (inet_ntop(AF_INET6, k.daddr), k.dport))
276
277        print ("%s %10d" % (depict_key, v.value))
278
279# initialize BPF
280b = BPF(text=bpf_text)
281b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit")
282if args.lossprobe:
283    b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp")
284
285print("Tracing retransmits ... Hit Ctrl-C to end")
286if args.count:
287    try:
288        while 1:
289            sleep(99999999)
290    except BaseException:
291        pass
292
293    # header
294    print("\n%-25s %-25s %-10s" % (
295        "LADDR:LPORT", "RADDR:RPORT", "RETRANSMITS"))
296    depict_cnt(b.get_table("ipv4_count"))
297    depict_cnt(b.get_table("ipv6_count"), l3prot='ipv6')
298# read events
299else:
300    # header
301    print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP",
302        "LADDR:LPORT", "T", "RADDR:RPORT", "STATE"))
303    b["ipv4_events"].open_perf_buffer(print_ipv4_event)
304    b["ipv6_events"].open_perf_buffer(print_ipv6_event)
305    while 1:
306        try:
307            b.perf_buffer_poll()
308        except KeyboardInterrupt:
309            exit()
310