1#!/usr/bin/python 2# -*- coding: utf-8 -*- 3# @lint-avoid-python-3-compatibility-imports 4# 5# tcpstates Trace the TCP session state changes with durations. 6# For Linux, uses BCC, BPF. Embedded C. 7# 8# USAGE: tcpstates [-h] [-C] [-S] [interval [count]] [-4 | -6] 9# 10# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16. 11# Linux 4.16 also adds more state transitions so that they can be traced. 12# 13# Copyright 2018 Netflix, Inc. 14# Licensed under the Apache License, Version 2.0 (the "License") 15# 16# 20-Mar-2018 Brendan Gregg Created this. 17 18from __future__ import print_function 19from bcc import BPF 20import argparse 21from socket import inet_ntop, AF_INET, AF_INET6 22from struct import pack 23from time import strftime, time 24from os import getuid 25 26# arguments 27examples = """examples: 28 ./tcpstates # trace all TCP state changes 29 ./tcpstates -t # include timestamp column 30 ./tcpstates -T # include time column (HH:MM:SS) 31 ./tcpstates -w # wider columns (fit IPv6) 32 ./tcpstates -stT # csv output, with times & timestamps 33 ./tcpstates -Y # log events to the systemd journal 34 ./tcpstates -L 80 # only trace local port 80 35 ./tcpstates -L 80,81 # only trace local ports 80 and 81 36 ./tcpstates -D 80 # only trace remote port 80 37 ./tcpstates -4 # trace IPv4 family only 38 ./tcpstates -6 # trace IPv6 family only 39""" 40parser = argparse.ArgumentParser( 41 description="Trace TCP session state changes and durations", 42 formatter_class=argparse.RawDescriptionHelpFormatter, 43 epilog=examples) 44parser.add_argument("-T", "--time", action="store_true", 45 help="include time column on output (HH:MM:SS)") 46parser.add_argument("-t", "--timestamp", action="store_true", 47 help="include timestamp on output (seconds)") 48parser.add_argument("-w", "--wide", action="store_true", 49 help="wide column output (fits IPv6 addresses)") 50parser.add_argument("-s", "--csv", action="store_true", 51 help="comma separated values output") 52parser.add_argument("-L", "--localport", 53 help="comma-separated list of local ports to trace.") 54parser.add_argument("-D", "--remoteport", 55 help="comma-separated list of remote ports to trace.") 56parser.add_argument("--ebpf", action="store_true", 57 help=argparse.SUPPRESS) 58parser.add_argument("-Y", "--journal", action="store_true", 59 help="log session state changes to the systemd journal") 60group = parser.add_mutually_exclusive_group() 61group.add_argument("-4", "--ipv4", action="store_true", 62 help="trace IPv4 family only") 63group.add_argument("-6", "--ipv6", action="store_true", 64 help="trace IPv6 family only") 65args = parser.parse_args() 66debug = 0 67 68# define BPF program 69bpf_header = """ 70#include <uapi/linux/ptrace.h> 71#include <linux/tcp.h> 72#include <net/sock.h> 73#include <bcc/proto.h> 74 75BPF_HASH(last, struct sock *, u64); 76 77// separate data structs for ipv4 and ipv6 78struct ipv4_data_t { 79 u64 ts_us; 80 u64 skaddr; 81 u32 saddr; 82 u32 daddr; 83 u64 span_us; 84 u32 pid; 85 u32 ports; 86 u32 oldstate; 87 u32 newstate; 88 char task[TASK_COMM_LEN]; 89}; 90BPF_PERF_OUTPUT(ipv4_events); 91 92struct ipv6_data_t { 93 u64 ts_us; 94 u64 skaddr; 95 unsigned __int128 saddr; 96 unsigned __int128 daddr; 97 u64 span_us; 98 u32 pid; 99 u32 ports; 100 u32 oldstate; 101 u32 newstate; 102 char task[TASK_COMM_LEN]; 103}; 104BPF_PERF_OUTPUT(ipv6_events); 105""" 106 107bpf_text_tracepoint = """ 108TRACEPOINT_PROBE(sock, inet_sock_set_state) 109{ 110 if (args->protocol != IPPROTO_TCP) 111 return 0; 112 113 u32 pid = bpf_get_current_pid_tgid() >> 32; 114 // sk is used as a UUID 115 struct sock *sk = (struct sock *)args->skaddr; 116 117 // lport is either used in a filter here, or later 118 u16 lport = args->sport; 119 FILTER_LPORT 120 121 // dport is either used in a filter here, or later 122 u16 dport = args->dport; 123 FILTER_DPORT 124 125 // calculate delta 126 u64 *tsp, delta_us; 127 tsp = last.lookup(&sk); 128 if (tsp == 0) 129 delta_us = 0; 130 else 131 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; 132 u16 family = args->family; 133 FILTER_FAMILY 134 135 if (args->family == AF_INET) { 136 struct ipv4_data_t data4 = { 137 .span_us = delta_us, 138 .oldstate = args->oldstate, 139 .newstate = args->newstate }; 140 data4.skaddr = (u64)args->skaddr; 141 data4.ts_us = bpf_ktime_get_ns() / 1000; 142 __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); 143 __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); 144 // a workaround until data4 compiles with separate lport/dport 145 data4.ports = dport + ((0ULL + lport) << 16); 146 data4.pid = pid; 147 148 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 149 ipv4_events.perf_submit(args, &data4, sizeof(data4)); 150 151 } else /* 6 */ { 152 struct ipv6_data_t data6 = { 153 .span_us = delta_us, 154 .oldstate = args->oldstate, 155 .newstate = args->newstate }; 156 data6.skaddr = (u64)args->skaddr; 157 data6.ts_us = bpf_ktime_get_ns() / 1000; 158 __builtin_memcpy(&data6.saddr, args->saddr_v6, sizeof(data6.saddr)); 159 __builtin_memcpy(&data6.daddr, args->daddr_v6, sizeof(data6.daddr)); 160 // a workaround until data6 compiles with separate lport/dport 161 data6.ports = dport + ((0ULL + lport) << 16); 162 data6.pid = pid; 163 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 164 ipv6_events.perf_submit(args, &data6, sizeof(data6)); 165 } 166 167 if (args->newstate == TCP_CLOSE) { 168 last.delete(&sk); 169 } else { 170 u64 ts = bpf_ktime_get_ns(); 171 last.update(&sk, &ts); 172 } 173 174 return 0; 175} 176""" 177 178bpf_text_kprobe = """ 179int kprobe__tcp_set_state(struct pt_regs *ctx, struct sock *sk, int state) 180{ 181 u32 pid = bpf_get_current_pid_tgid() >> 32; 182 // sk is used as a UUID 183 184 // lport is either used in a filter here, or later 185 u16 lport = sk->__sk_common.skc_num; 186 FILTER_LPORT 187 188 // dport is either used in a filter here, or later 189 u16 dport = sk->__sk_common.skc_dport; 190 dport = ntohs(dport); 191 FILTER_DPORT 192 193 // calculate delta 194 u64 *tsp, delta_us; 195 tsp = last.lookup(&sk); 196 if (tsp == 0) 197 delta_us = 0; 198 else 199 delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; 200 201 u16 family = sk->__sk_common.skc_family; 202 FILTER_FAMILY 203 204 if (family == AF_INET) { 205 struct ipv4_data_t data4 = { 206 .span_us = delta_us, 207 .oldstate = sk->__sk_common.skc_state, 208 .newstate = state }; 209 data4.skaddr = (u64)sk; 210 data4.ts_us = bpf_ktime_get_ns() / 1000; 211 data4.saddr = sk->__sk_common.skc_rcv_saddr; 212 data4.daddr = sk->__sk_common.skc_daddr; 213 // a workaround until data4 compiles with separate lport/dport 214 data4.ports = dport + ((0ULL + lport) << 16); 215 data4.pid = pid; 216 217 bpf_get_current_comm(&data4.task, sizeof(data4.task)); 218 ipv4_events.perf_submit(ctx, &data4, sizeof(data4)); 219 220 } else /* 6 */ { 221 struct ipv6_data_t data6 = { 222 .span_us = delta_us, 223 .oldstate = sk->__sk_common.skc_state, 224 .newstate = state }; 225 data6.skaddr = (u64)sk; 226 data6.ts_us = bpf_ktime_get_ns() / 1000; 227 bpf_probe_read_kernel(&data6.saddr, sizeof(data6.saddr), 228 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 229 bpf_probe_read_kernel(&data6.daddr, sizeof(data6.daddr), 230 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 231 // a workaround until data6 compiles with separate lport/dport 232 data6.ports = dport + ((0ULL + lport) << 16); 233 data6.pid = pid; 234 bpf_get_current_comm(&data6.task, sizeof(data6.task)); 235 ipv6_events.perf_submit(ctx, &data6, sizeof(data6)); 236 } 237 238 if (state == TCP_CLOSE) { 239 last.delete(&sk); 240 } else { 241 u64 ts = bpf_ktime_get_ns(); 242 last.update(&sk, &ts); 243 } 244 245 return 0; 246 247}; 248""" 249 250bpf_text = bpf_header 251if BPF.tracepoint_exists("sock", "inet_sock_set_state"): 252 bpf_text += bpf_text_tracepoint 253else: 254 bpf_text += bpf_text_kprobe 255 256# code substitutions 257if args.remoteport: 258 dports = [int(dport) for dport in args.remoteport.split(',')] 259 dports_if = ' && '.join(['dport != %d' % dport for dport in dports]) 260 bpf_text = bpf_text.replace('FILTER_DPORT', 261 'if (%s) { last.delete(&sk); return 0; }' % dports_if) 262if args.localport: 263 lports = [int(lport) for lport in args.localport.split(',')] 264 lports_if = ' && '.join(['lport != %d' % lport for lport in lports]) 265 bpf_text = bpf_text.replace('FILTER_LPORT', 266 'if (%s) { last.delete(&sk); return 0; }' % lports_if) 267if args.ipv4: 268 bpf_text = bpf_text.replace('FILTER_FAMILY', 269 'if (family != AF_INET) { return 0; }') 270elif args.ipv6: 271 bpf_text = bpf_text.replace('FILTER_FAMILY', 272 'if (family != AF_INET6) { return 0; }') 273bpf_text = bpf_text.replace('FILTER_FAMILY', '') 274bpf_text = bpf_text.replace('FILTER_DPORT', '') 275bpf_text = bpf_text.replace('FILTER_LPORT', '') 276 277if debug or args.ebpf: 278 print(bpf_text) 279 if args.ebpf: 280 exit() 281 282# 283# Setup output formats 284# 285# Don't change the default output (next 2 lines): this fits in 80 chars. I 286# know it doesn't have NS or UIDs etc. I know. If you really, really, really 287# need to add columns, columns that solve real actual problems, I'd start by 288# adding an extended mode (-x) to included those columns. 289# 290header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s" 291format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " + 292 "-> %-11s %.3f") 293if args.wide: 294 header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " + 295 "-> %-11s %s") 296 format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " + 297 "-> %-11s %.3f") 298if args.csv: 299 header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s" 300 format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f" 301 302if args.journal: 303 try: 304 from systemd import journal 305 except ImportError: 306 print("ERROR: Journal logging requires the systemd.journal module") 307 exit(1) 308 309 310def tcpstate2str(state): 311 # from include/net/tcp_states.h: 312 tcpstate = { 313 1: "ESTABLISHED", 314 2: "SYN_SENT", 315 3: "SYN_RECV", 316 4: "FIN_WAIT1", 317 5: "FIN_WAIT2", 318 6: "TIME_WAIT", 319 7: "CLOSE", 320 8: "CLOSE_WAIT", 321 9: "LAST_ACK", 322 10: "LISTEN", 323 11: "CLOSING", 324 12: "NEW_SYN_RECV", 325 } 326 327 if state in tcpstate: 328 return tcpstate[state] 329 else: 330 return str(state) 331 332def journal_fields(event, addr_family): 333 addr_pfx = 'IPV4' 334 if addr_family == AF_INET6: 335 addr_pfx = 'IPV6' 336 337 fields = { 338 # Standard fields described in systemd.journal-fields(7). journal.send 339 # will fill in CODE_LINE, CODE_FILE, and CODE_FUNC for us. If we're 340 # root and specify OBJECT_PID, systemd-journald will add other OBJECT_* 341 # fields for us. 342 'SYSLOG_IDENTIFIER': 'tcpstates', 343 'PRIORITY': 5, 344 '_SOURCE_REALTIME_TIMESTAMP': time() * 1000000, 345 'OBJECT_PID': str(event.pid), 346 'OBJECT_COMM': event.task.decode('utf-8', 'replace'), 347 # Custom fields, aka "stuff we sort of made up". 348 'OBJECT_' + addr_pfx + '_SOURCE_ADDRESS': inet_ntop(addr_family, pack("I", event.saddr)), 349 'OBJECT_TCP_SOURCE_PORT': str(event.ports >> 16), 350 'OBJECT_' + addr_pfx + '_DESTINATION_ADDRESS': inet_ntop(addr_family, pack("I", event.daddr)), 351 'OBJECT_TCP_DESTINATION_PORT': str(event.ports & 0xffff), 352 'OBJECT_TCP_OLD_STATE': tcpstate2str(event.oldstate), 353 'OBJECT_TCP_NEW_STATE': tcpstate2str(event.newstate), 354 'OBJECT_TCP_SPAN_TIME': str(event.span_us) 355 } 356 357 msg_format_string = (u"%(OBJECT_COMM)s " + 358 u"%(OBJECT_" + addr_pfx + "_SOURCE_ADDRESS)s " + 359 u"%(OBJECT_TCP_SOURCE_PORT)s → " + 360 u"%(OBJECT_" + addr_pfx + "_DESTINATION_ADDRESS)s " + 361 u"%(OBJECT_TCP_DESTINATION_PORT)s " + 362 u"%(OBJECT_TCP_OLD_STATE)s → %(OBJECT_TCP_NEW_STATE)s") 363 fields['MESSAGE'] = msg_format_string % (fields) 364 365 if getuid() == 0: 366 del fields['OBJECT_COMM'] # Handled by systemd-journald 367 368 return fields 369 370# process event 371def print_ipv4_event(cpu, data, size): 372 event = b["ipv4_events"].event(data) 373 global start_ts 374 if args.time: 375 if args.csv: 376 print("%s," % strftime("%H:%M:%S"), end="") 377 else: 378 print("%-8s " % strftime("%H:%M:%S"), end="") 379 if args.timestamp: 380 if start_ts == 0: 381 start_ts = event.ts_us 382 delta_s = (float(event.ts_us) - start_ts) / 1000000 383 if args.csv: 384 print("%.6f," % delta_s, end="") 385 else: 386 print("%-9.6f " % delta_s, end="") 387 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), 388 "4" if args.wide or args.csv else "", 389 inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 16, 390 inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffff, 391 tcpstate2str(event.oldstate), tcpstate2str(event.newstate), 392 float(event.span_us) / 1000)) 393 if args.journal: 394 journal.send(**journal_fields(event, AF_INET)) 395 396def print_ipv6_event(cpu, data, size): 397 event = b["ipv6_events"].event(data) 398 global start_ts 399 if args.time: 400 if args.csv: 401 print("%s," % strftime("%H:%M:%S"), end="") 402 else: 403 print("%-8s " % strftime("%H:%M:%S"), end="") 404 if args.timestamp: 405 if start_ts == 0: 406 start_ts = event.ts_us 407 delta_s = (float(event.ts_us) - start_ts) / 1000000 408 if args.csv: 409 print("%.6f," % delta_s, end="") 410 else: 411 print("%-9.6f " % delta_s, end="") 412 print(format_string % (event.skaddr, event.pid, event.task.decode('utf-8', 'replace'), 413 "6" if args.wide or args.csv else "", 414 inet_ntop(AF_INET6, event.saddr), event.ports >> 16, 415 inet_ntop(AF_INET6, event.daddr), event.ports & 0xffff, 416 tcpstate2str(event.oldstate), tcpstate2str(event.newstate), 417 float(event.span_us) / 1000)) 418 if args.journal: 419 journal.send(**journal_fields(event, AF_INET6)) 420 421# initialize BPF 422b = BPF(text=bpf_text) 423 424# header 425if args.time: 426 if args.csv: 427 print("%s," % ("TIME"), end="") 428 else: 429 print("%-8s " % ("TIME"), end="") 430if args.timestamp: 431 if args.csv: 432 print("%s," % ("TIME(s)"), end="") 433 else: 434 print("%-9s " % ("TIME(s)"), end="") 435print(header_string % ("SKADDR", "C-PID", "C-COMM", 436 "IP" if args.wide or args.csv else "", 437 "LADDR", "LPORT", "RADDR", "RPORT", 438 "OLDSTATE", "NEWSTATE", "MS")) 439 440start_ts = 0 441 442# read events 443b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64) 444b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64) 445while 1: 446 try: 447 b.perf_buffer_poll() 448 except KeyboardInterrupt: 449 exit() 450