1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# @lint-avoid-python-3-compatibility-imports 4# 5# tcpstates Trace the TCP session state changes with durations. 6# For Linux, uses BCC, BPF. Embedded C. 7# 8# USAGE: tcpstates [-h] [-C] [-S] [interval [count]] 9# 10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16. 11# Linux 4.16 also adds more state transitions so that they can be traced. 12# 13# Copyright 2018 Netflix, Inc. 14# Licensed under the Apache License, Version 2.0 (the "License") 15# 16# 20-Mar-2018 Brendan Gregg Created this. 17 18from __future__ import print_function 19from bcc import BPF 20import argparse 21from socket import inet_ntop, AF_INET, AF_INET6 22from struct import pack 23import ctypes as ct 24from time import strftime, time 25from os import getuid 26 27# arguments 28examples = """examples: 29 ./tcpstates # trace all TCP state changes 30 ./tcpstates -t # include timestamp column 31 ./tcpstates -T # include time column (HH:MM:SS) 32 ./tcpstates -w # wider colums (fit IPv6) 33 ./tcpstates -stT # csv output, with times & timestamps 34 ./tcpstates -Y # log events to the systemd journal 35 ./tcpstates -L 80 # only trace local port 80 36 ./tcpstates -L 80,81 # only trace local ports 80 and 81 37 ./tcpstates -D 80 # only trace remote port 80 38""" 39parser = argparse.ArgumentParser( 40 description="Trace TCP session state changes and durations", 41 formatter_class=argparse.RawDescriptionHelpFormatter, 42 epilog=examples) 43parser.add_argument("-T", "--time", action="store_true", 44 help="include time column on output (HH:MM:SS)") 45parser.add_argument("-t", "--timestamp", action="store_true", 46 help="include timestamp on output (seconds)") 47parser.add_argument("-w", "--wide", action="store_true", 48 help="wide column output (fits IPv6 addresses)") 49parser.add_argument("-s", "--csv", action="store_true", 50 help="comma separated values output") 51parser.add_argument("-L", "--localport", 52 help="comma-separated list of local ports to trace.") 53parser.add_argument("-D", "--remoteport", 54 help="comma-separated list of remote ports to trace.") 55parser.add_argument("--ebpf", action="store_true", 56 help=argparse.SUPPRESS) 57parser.add_argument("-Y", "--journal", action="store_true", 58 help="log session state changes to the systemd journal") 59args = parser.parse_args() 60debug = 0 61 62# define BPF program 63bpf_text = """ 64#include <uapi/linux/ptrace.h> 65#define KBUILD_MODNAME "foo" 66#include <linux/tcp.h> 67#include <net/sock.h> 68#include <bcc/proto.h> 69 70BPF_HASH(last, struct sock *, u64); 71 72// separate data structs for ipv4 and ipv6 73struct ipv4_data_t { 74 u64 ts_us; 75 u64 skaddr; 76 u32 saddr; 77 u32 daddr; 78 u64 span_us; 79 u32 pid; 80 u32 ports; 81 u32 oldstate; 82 u32 newstate; 83 char task[TASK_COMM_LEN]; 84}; 85BPF_PERF_OUTPUT(ipv4_events); 86 87struct ipv6_data_t { 88 u64 ts_us; 89 u64 skaddr; 90 unsigned __int128 saddr; 91 unsigned __int128 daddr; 92 u64 span_us; 93 u32 pid; 94 u32 ports; 95 u32 oldstate; 96 u32 newstate; 97 char task[TASK_COMM_LEN]; 98}; 99BPF_PERF_OUTPUT(ipv6_events); 100 101struct id_t { 102 u32 pid; 103 char task[TASK_COMM_LEN]; 104}; 105 106TRACEPOINT_PROBE(sock, inet_sock_set_state) 107{ 108 if (args->protocol != IPPROTO_TCP) 109 return 0; 110 111 u32 pid = bpf_get_current_pid_tgid() >> 32; 112 // sk is used as a UUID 113 struct sock *sk = (struct sock *)args->skaddr; 114 115 // lport is either used in a filter here, or later 116 u16 lport = args->sport; 117 FILTER_LPORT 118 119 // dport is either used in a filter here, or later 120 u16 dport = args->dport; 121 FILTER_DPORT 122 123 // calculate delta 124 u64 *tsp, delta_us; 125 tsp = last.lookup(&sk); 126 if (tsp == 0) 127 delta_us = 0; 128 else 129 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; 130 131 if (args->family == AF_INET) { 132 struct ipv4_data_t data4 = { 133 .span_us = delta_us, 134 .oldstate = args->oldstate, 135 .newstate = args->newstate }; 136 data4.skaddr = (u64)args->skaddr; 137 data4.ts_us = bpf_ktime_get_ns() / 1000; 138 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); 139 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); 140 // a workaround until data4 compiles with separate lport/dport 141 data4.ports = dport + ((0ULL + lport) << 32); 142 data4.pid = pid; 143 144 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 145 ipv4_events.perf_submit(args, &data4, sizeof(data4)); 146 147 } else /* 6 */ { 148 struct ipv6_data_t data6 = { 149 .span_us = delta_us, 150 .oldstate = args->oldstate, 151 .newstate = args->newstate }; 152 data6.skaddr = (u64)args->skaddr; 153 data6.ts_us = bpf_ktime_get_ns() / 1000; 154 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr)); 155 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr)); 156 // a workaround until data6 compiles with separate lport/dport 157 data6.ports = dport + ((0ULL + lport) << 32); 158 data6.pid = pid; 159 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 160 ipv6_events.perf_submit(args, &data6, sizeof(data6)); 161 } 162 163 u64 ts = bpf_ktime_get_ns(); 164 last.update(&sk, &ts); 165 166 return 0; 167} 168""" 169 170if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")): 171 print("ERROR: tracepoint sock:inet_sock_set_state missing " 172 "(added in Linux 4.16). Exiting") 173 exit() 174 175# code substitutions 176if args.remoteport: 177 dports = [int(dport) for dport in args.remoteport.split(',')] 178 dports_if = ' && '.join(['dport != %d' % dport for dport in dports]) 179 bpf_text = bpf_text.replace('FILTER_DPORT', 180 'if (%s) { last.delete(&sk); return 0; }' % dports_if) 181if args.localport: 182 lports = [int(lport) for lport in args.localport.split(',')] 183 lports_if = ' && '.join(['lport != %d' % lport for lport in lports]) 184 bpf_text = bpf_text.replace('FILTER_LPORT', 185 'if (%s) { last.delete(&sk); return 0; }' % lports_if) 186bpf_text = bpf_text.replace('FILTER_DPORT', '') 187bpf_text = bpf_text.replace('FILTER_LPORT', '') 188 189if debug or args.ebpf: 190 print(bpf_text) 191 if args.ebpf: 192 exit() 193 194# event data 195TASK_COMM_LEN = 16 # linux/sched.h 196 197class Data_ipv4(ct.Structure): 198 _fields_ = [ 199 ("ts_us", ct.c_ulonglong), 200 ("skaddr", ct.c_ulonglong), 201 ("saddr", ct.c_uint), 202 ("daddr", ct.c_uint), 203 ("span_us", ct.c_ulonglong), 204 ("pid", ct.c_uint), 205 ("ports", ct.c_uint), 206 ("oldstate", ct.c_uint), 207 ("newstate", ct.c_uint), 208 ("task", ct.c_char * TASK_COMM_LEN) 209 ] 210 211class Data_ipv6(ct.Structure): 212 _fields_ = [ 213 ("ts_us", ct.c_ulonglong), 214 ("skaddr", ct.c_ulonglong), 215 ("saddr", (ct.c_ulonglong * 2)), 216 ("daddr", (ct.c_ulonglong * 2)), 217 ("span_us", ct.c_ulonglong), 218 ("pid", ct.c_uint), 219 ("ports", ct.c_uint), 220 ("oldstate", ct.c_uint), 221 ("newstate", ct.c_uint), 222 ("task", ct.c_char * TASK_COMM_LEN) 223 ] 224 225# 226# Setup output formats 227# 228# Don't change the default output (next 2 lines): this fits in 80 chars. I 229# know it doesn't have NS or UIDs etc. I know. If you really, really, really 230# need to add columns, columns that solve real actual problems, I'd start by 231# adding an extended mode (-x) to included those columns. 232# 233header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s" 234format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " + 235 "-> %-11s %.3f") 236if args.wide: 237 header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " + 238 "-> %-11s %s") 239 format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " + 240 "-> %-11s %.3f") 241if args.csv: 242 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" 243 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f" 244 245if args.journal: 246 try: 247 from systemd import journal 248 except ImportError: 249 print("ERROR: Journal logging requires the systemd.journal module") 250 exit(1) 251 252 253def tcpstate2str(state): 254 # from include/net/tcp_states.h: 255 tcpstate = { 256 1: "ESTABLISHED", 257 2: "SYN_SENT", 258 3: "SYN_RECV", 259 4: "FIN_WAIT1", 260 5: "FIN_WAIT2", 261 6: "TIME_WAIT", 262 7: "CLOSE", 263 8: "CLOSE_WAIT", 264 9: "LAST_ACK", 265 10: "LISTEN", 266 11: "CLOSING", 267 12: "NEW_SYN_RECV", 268 } 269 270 if state in tcpstate: 271 return tcpstate[state] 272 else: 273 return str(state) 274 275def journal_fields(event, addr_family): 276 addr_pfx = 'IPV4' 277 if addr_family == AF_INET6: 278 addr_pfx = 'IPV6' 279 280 fields = { 281 # Standard fields described in systemd.journal-fields(7). journal.send 282 # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're 283 # root and specify OBJECT_PID, systemd-journald will add other OBJECT_* 284 # fields for us. 285 'SYSLOG_IDENTIFIER': 'tcpstates', 286 'PRIORITY': 5, 287 '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000, 288 'OBJECT_PID': str(event.pid), 289 'OBJECT_COMM': event.task.decode('utf-8', 'replace'), 290 # Custom fields, aka "stuff we sort of made up". 291 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)), 292 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 32), 293 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)), 294 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffffffff), 295 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate), 296 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate), 297 'OBJECT_TCP_SPAN_TIME': str(event.span_us) 298 } 299 300 msg_format_string = (u"%(OBJECT_COMM)s " + 301 u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " + 302 u"%(OBJECT_TCP_SOURCE_PORT)s → " + 303 u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " + 304 u"%(OBJECT_TCP_DESTINATION_PORT)s " + 305 u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s") 306 fields['MESSAGE'] = msg_format_string % (fields) 307 308 if getuid() == 0: 309 del fields['OBJECT_COMM'] # Handled by systemd-journald 310 311 return fields 312 313# process event 314def print_ipv4_event(cpu, data, size): 315 event = ct.cast(data, ct.POINTER(Data_ipv4)).contents 316 global start_ts 317 if args.time: 318 if args.csv: 319 print("%s," % strftime("%H:%M:%S"), end="") 320 else: 321 print("%-8s " % strftime("%H:%M:%S"), end="") 322 if args.timestamp: 323 if start_ts == 0: 324 start_ts = event.ts_us 325 delta_s = (float(event.ts_us) - start_ts) / 1000000 326 if args.csv: 327 print("%.6f," % delta_s, end="") 328 else: 329 print("%-9.6f " % delta_s, end="") 330 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), 331 "4" if args.wide or args.csv else "", 332 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32, 333 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff, 334 tcpstate2str(event.oldstate), tcpstate2str(event.newstate), 335 float(event.span_us) / 1000)) 336 if args.journal: 337 journal.send(**journal_fields(event, AF_INET)) 338 339def print_ipv6_event(cpu, data, size): 340 event = ct.cast(data, ct.POINTER(Data_ipv6)).contents 341 global start_ts 342 if args.time: 343 if args.csv: 344 print("%s," % strftime("%H:%M:%S"), end="") 345 else: 346 print("%-8s " % strftime("%H:%M:%S"), end="") 347 if args.timestamp: 348 if start_ts == 0: 349 start_ts = event.ts_us 350 delta_s = (float(event.ts_us) - start_ts) / 1000000 351 if args.csv: 352 print("%.6f," % delta_s, end="") 353 else: 354 print("%-9.6f " % delta_s, end="") 355 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), 356 "6" if args.wide or args.csv else "", 357 inet_ntop(AF_INET6, event.saddr), event.ports >> 32, 358 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff, 359 tcpstate2str(event.oldstate), tcpstate2str(event.newstate), 360 float(event.span_us) / 1000)) 361 if args.journal: 362 journal.send(**journal_fields(event, AF_INET6)) 363 364# initialize BPF 365b = BPF(text=bpf_text) 366 367# header 368if args.time: 369 if args.csv: 370 print("%s," % ("TIME"), end="") 371 else: 372 print("%-8s " % ("TIME"), end="") 373if args.timestamp: 374 if args.csv: 375 print("%s," % ("TIME(s)"), end="") 376 else: 377 print("%-9s " % ("TIME(s)"), end="") 378print(header_string % ("SKADDR", "C-PID", "C-COMM", 379 "IP" if args.wide or args.csv else "", 380 "LADDR", "LPORT", "RADDR", "RPORT", 381 "OLDSTATE", "NEWSTATE", "MS")) 382 383start_ts = 0 384 385# read events 386b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64) 387b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64) 388while 1: 389 try: 390 b.perf_buffer_poll() 391 except KeyboardInterrupt: 392 exit() 393