1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# tcpretrans Trace or count TCP retransmits and TLPs. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# USAGE: tcpretrans [-c] [-h] [-l] 8# 9# This uses dynamic tracing of kernel functions, and will need to be updated 10# to match kernel changes. 11# 12# Copyright 2016 Netflix, Inc. 13# Licensed under the Apache License, Version 2.0 (the "License") 14# 15# 14-Feb-2016 Brendan Gregg Created this. 16# 03-Nov-2017 Matthias Tafelmeier Extended this. 17 18from __future__ import print_function 19from bcc import BPF 20import argparse 21from time import strftime 22from socket import inet_ntop, AF_INET, AF_INET6 23from struct import pack 24import ctypes as ct 25from time import sleep 26 27# arguments 28examples = """examples: 29 ./tcpretrans # trace TCP retransmits 30 ./tcpretrans -l # include TLP attempts 31""" 32parser = argparse.ArgumentParser( 33 description="Trace TCP retransmits", 34 formatter_class=argparse.RawDescriptionHelpFormatter, 35 epilog=examples) 36parser.add_argument("-l", "--lossprobe", action="store_true", 37 help="include tail loss probe attempts") 38parser.add_argument("-c", "--count", action="store_true", 39 help="count occurred retransmits per flow") 40parser.add_argument("--ebpf", action="store_true", 41 help=argparse.SUPPRESS) 42args = parser.parse_args() 43debug = 0 44 45# define BPF program 46bpf_text = """ 47#include <uapi/linux/ptrace.h> 48#include <net/sock.h> 49#include <bcc/proto.h> 50 51#define RETRANSMIT 1 52#define TLP 2 53 54// separate data structs for ipv4 and ipv6 55struct ipv4_data_t { 56 u32 pid; 57 u64 ip; 58 u32 saddr; 59 u32 daddr; 60 u16 lport; 61 u16 dport; 62 u64 state; 63 u64 type; 64}; 65BPF_PERF_OUTPUT(ipv4_events); 66 67struct ipv6_data_t { 68 u32 pid; 69 u64 ip; 70 unsigned __int128 saddr; 71 unsigned __int128 daddr; 72 u16 lport; 73 u16 dport; 74 u64 state; 75 u64 type; 76}; 77BPF_PERF_OUTPUT(ipv6_events); 78 79// separate flow keys per address family 80struct ipv4_flow_key_t { 81 u32 saddr; 82 u32 daddr; 83 u16 lport; 84 u16 dport; 85}; 86BPF_HASH(ipv4_count, struct ipv4_flow_key_t); 87 88struct ipv6_flow_key_t { 89 unsigned __int128 saddr; 90 unsigned __int128 daddr; 91 u16 lport; 92 u16 dport; 93}; 94BPF_HASH(ipv6_count, struct ipv6_flow_key_t); 95 96static int trace_event(struct pt_regs *ctx, struct sock *skp, int type) 97{ 98 if (skp == NULL) 99 return 0; 100 u32 pid = bpf_get_current_pid_tgid() >> 32; 101 102 // pull in details 103 u16 family = skp->__sk_common.skc_family; 104 u16 lport = skp->__sk_common.skc_num; 105 u16 dport = skp->__sk_common.skc_dport; 106 char state = skp->__sk_common.skc_state; 107 108 if (family == AF_INET) { 109 IPV4_INIT 110 IPV4_CORE 111 } else if (family == AF_INET6) { 112 IPV6_INIT 113 IPV6_CORE 114 } 115 // else drop 116 117 return 0; 118} 119 120int trace_retransmit(struct pt_regs *ctx, struct sock *sk) 121{ 122 trace_event(ctx, sk, RETRANSMIT); 123 return 0; 124} 125 126int trace_tlp(struct pt_regs *ctx, struct sock *sk) 127{ 128 trace_event(ctx, sk, TLP); 129 return 0; 130} 131""" 132 133struct_init = { 'ipv4': 134 { 'count' : 135 """ 136 struct ipv4_flow_key_t flow_key = {}; 137 flow_key.saddr = skp->__sk_common.skc_rcv_saddr; 138 flow_key.daddr = skp->__sk_common.skc_daddr; 139 // lport is host order 140 flow_key.lport = lport; 141 flow_key.dport = ntohs(dport);""", 142 'trace' : 143 """ 144 struct ipv4_data_t data4 = {}; 145 data4.pid = pid; 146 data4.ip = 4; 147 data4.type = type; 148 data4.saddr = skp->__sk_common.skc_rcv_saddr; 149 data4.daddr = skp->__sk_common.skc_daddr; 150 // lport is host order 151 data4.lport = lport; 152 data4.dport = ntohs(dport); 153 data4.state = state; """ 154 }, 155 'ipv6': 156 { 'count' : 157 """ 158 struct ipv6_flow_key_t flow_key = {}; 159 bpf_probe_read(&flow_key.saddr, sizeof(flow_key.saddr), 160 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 161 bpf_probe_read(&flow_key.daddr, sizeof(flow_key.daddr), 162 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 163 // lport is host order 164 flow_key.lport = lport; 165 flow_key.dport = ntohs(dport);""", 166 'trace' : """ 167 struct ipv6_data_t data6 = {}; 168 data6.pid = pid; 169 data6.ip = 6; 170 data6.type = type; 171 bpf_probe_read(&data6.saddr, sizeof(data6.saddr), 172 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 173 bpf_probe_read(&data6.daddr, sizeof(data6.daddr), 174 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 175 // lport is host order 176 data6.lport = lport; 177 data6.dport = ntohs(dport); 178 data6.state = state;""" 179 } 180 } 181 182count_core_base = """ 183 COUNT_STRUCT.increment(flow_key); 184""" 185 186if args.count: 187 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['count']) 188 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['count']) 189 bpf_text = bpf_text.replace("IPV4_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv4_count')) 190 bpf_text = bpf_text.replace("IPV6_CORE", count_core_base.replace("COUNT_STRUCT", 'ipv6_count')) 191else: 192 bpf_text = bpf_text.replace("IPV4_INIT", struct_init['ipv4']['trace']) 193 bpf_text = bpf_text.replace("IPV6_INIT", struct_init['ipv6']['trace']) 194 bpf_text = bpf_text.replace("IPV4_CORE", "ipv4_events.perf_submit(ctx, &data4, sizeof(data4));") 195 bpf_text = bpf_text.replace("IPV6_CORE", "ipv6_events.perf_submit(ctx, &data6, sizeof(data6));") 196 197if debug or args.ebpf: 198 print(bpf_text) 199 if args.ebpf: 200 exit() 201 202# event data 203class Data_ipv4(ct.Structure): 204 _fields_ = [ 205 ("pid", ct.c_uint), 206 ("ip", ct.c_ulonglong), 207 ("saddr", ct.c_uint), 208 ("daddr", ct.c_uint), 209 ("lport", ct.c_ushort), 210 ("dport", ct.c_ushort), 211 ("state", ct.c_ulonglong), 212 ("type", ct.c_ulonglong) 213 ] 214 215class Data_ipv6(ct.Structure): 216 _fields_ = [ 217 ("pid", ct.c_uint), 218 ("ip", ct.c_ulonglong), 219 ("saddr", (ct.c_ulonglong * 2)), 220 ("daddr", (ct.c_ulonglong * 2)), 221 ("lport", ct.c_ushort), 222 ("dport", ct.c_ushort), 223 ("state", ct.c_ulonglong), 224 ("type", ct.c_ulonglong) 225 ] 226 227# from bpf_text: 228type = {} 229type[1] = 'R' 230type[2] = 'L' 231 232# from include/net/tcp_states.h: 233tcpstate = {} 234tcpstate[1] = 'ESTABLISHED' 235tcpstate[2] = 'SYN_SENT' 236tcpstate[3] = 'SYN_RECV' 237tcpstate[4] = 'FIN_WAIT1' 238tcpstate[5] = 'FIN_WAIT2' 239tcpstate[6] = 'TIME_WAIT' 240tcpstate[7] = 'CLOSE' 241tcpstate[8] = 'CLOSE_WAIT' 242tcpstate[9] = 'LAST_ACK' 243tcpstate[10] = 'LISTEN' 244tcpstate[11] = 'CLOSING' 245tcpstate[12] = 'NEW_SYN_RECV' 246 247# process event 248def print_ipv4_event(cpu, data, size): 249 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents 250 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % ( 251 strftime("%H:%M:%S"), event.pid, event.ip, 252 "%s:%d" % (inet_ntop(AF_INET, pack('I', event.saddr)), event.lport), 253 type[event.type], 254 "%s:%s" % (inet_ntop(AF_INET, pack('I', event.daddr)), event.dport), 255 tcpstate[event.state])) 256 257def print_ipv6_event(cpu, data, size): 258 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents 259 print("%-8s %-6d %-2d %-20s %1s> %-20s %s" % ( 260 strftime("%H:%M:%S"), event.pid, event.ip, 261 "%s:%d" % (inet_ntop(AF_INET6, event.saddr), event.lport), 262 type[event.type], 263 "%s:%d" % (inet_ntop(AF_INET6, event.daddr), event.dport), 264 tcpstate[event.state])) 265 266def depict_cnt(counts_tab, l3prot='ipv4'): 267 for k, v in sorted(counts_tab.items(), key=lambda counts: counts[1].value): 268 depict_key = "" 269 ep_fmt = "[%s]#%d" 270 if l3prot == 'ipv4': 271 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET, pack('I', k.saddr)), k.lport), 272 ep_fmt % (inet_ntop(AF_INET, pack('I', k.daddr)), k.dport)) 273 else: 274 depict_key = "%-20s <-> %-20s" % (ep_fmt % (inet_ntop(AF_INET6, k.saddr), k.lport), 275 ep_fmt % (inet_ntop(AF_INET6, k.daddr), k.dport)) 276 277 print ("%s %10d" % (depict_key, v.value)) 278 279# initialize BPF 280b = BPF(text=bpf_text) 281b.attach_kprobe(event="tcp_retransmit_skb", fn_name="trace_retransmit") 282if args.lossprobe: 283 b.attach_kprobe(event="tcp_send_loss_probe", fn_name="trace_tlp") 284 285print("Tracing retransmits ... Hit Ctrl-C to end") 286if args.count: 287 try: 288 while 1: 289 sleep(99999999) 290 except BaseException: 291 pass 292 293 # header 294 print("\n%-25s %-25s %-10s" % ( 295 "LADDR:LPORT", "RADDR:RPORT", "RETRANSMITS")) 296 depict_cnt(b.get_table("ipv4_count")) 297 depict_cnt(b.get_table("ipv6_count"), l3prot='ipv6') 298# read events 299else: 300 # header 301 print("%-8s %-6s %-2s %-20s %1s> %-20s %-4s" % ("TIME", "PID", "IP", 302 "LADDR:LPORT", "T", "RADDR:RPORT", "STATE")) 303 b["ipv4_events"].open_perf_buffer(print_ipv4_event) 304 b["ipv6_events"].open_perf_buffer(print_ipv6_event) 305 while 1: 306 try: 307 b.perf_buffer_poll() 308 except KeyboardInterrupt: 309 exit() 310