1#!/usr/bin/env python 2# 3# tcpv4tracer Trace TCP connections. 4# For Linux, uses BCC, eBPF. Embedded C. 5# 6# USAGE: tcpv4tracer [-h] [-v] [-p PID] [-N NETNS] 7# 8# You should generally try to avoid writing long scripts that measure multiple 9# functions and walk multiple kernel structures, as they will be a burden to 10# maintain as the kernel changes. 11# The following code should be replaced, and simplified, when static TCP probes 12# exist. 13# 14# Copyright 2017 Kinvolk GmbH 15# 16# Licensed under the Apache License, Version 2.0 (the "License") 17from __future__ import print_function 18from bcc import BPF 19 20import argparse as ap 21import ctypes 22from socket import inet_ntop, AF_INET, AF_INET6 23from struct import pack 24 25parser = ap.ArgumentParser(description="Trace TCP connections", 26 formatter_class=ap.RawDescriptionHelpFormatter) 27parser.add_argument("-t", "--timestamp", action="store_true", 28 help="include timestamp on output") 29parser.add_argument("-p", "--pid", default=0, type=int, 30 help="trace this PID only") 31parser.add_argument("-N", "--netns", default=0, type=int, 32 help="trace this Network Namespace only") 33parser.add_argument("-v", "--verbose", action="store_true", 34 help="include Network Namespace in the output") 35parser.add_argument("--ebpf", action="store_true", 36 help=ap.SUPPRESS) 37args = parser.parse_args() 38 39bpf_text = """ 40#include <uapi/linux/ptrace.h> 41#pragma clang diagnostic push 42#pragma clang diagnostic ignored "-Wtautological-compare" 43#include <net/sock.h> 44#pragma clang diagnostic pop 45#include <net/inet_sock.h> 46#include <net/net_namespace.h> 47#include <bcc/proto.h> 48 49#define TCP_EVENT_TYPE_CONNECT 1 50#define TCP_EVENT_TYPE_ACCEPT 2 51#define TCP_EVENT_TYPE_CLOSE 3 52 53struct tcp_ipv4_event_t { 54 u64 ts_ns; 55 u32 type; 56 u32 pid; 57 char comm[TASK_COMM_LEN]; 58 u8 ip; 59 u32 saddr; 60 u32 daddr; 61 u16 sport; 62 u16 dport; 63 u32 netns; 64}; 65BPF_PERF_OUTPUT(tcp_ipv4_event); 66 67struct tcp_ipv6_event_t { 68 u64 ts_ns; 69 u32 type; 70 u32 pid; 71 char comm[TASK_COMM_LEN]; 72 u8 ip; 73 unsigned __int128 saddr; 74 unsigned __int128 daddr; 75 u16 sport; 76 u16 dport; 77 u32 netns; 78}; 79BPF_PERF_OUTPUT(tcp_ipv6_event); 80 81// tcp_set_state doesn't run in the context of the process that initiated the 82// connection so we need to store a map TUPLE -> PID to send the right PID on 83// the event 84struct ipv4_tuple_t { 85 u32 saddr; 86 u32 daddr; 87 u16 sport; 88 u16 dport; 89 u32 netns; 90}; 91 92struct ipv6_tuple_t { 93 unsigned __int128 saddr; 94 unsigned __int128 daddr; 95 u16 sport; 96 u16 dport; 97 u32 netns; 98}; 99 100struct pid_comm_t { 101 u64 pid; 102 char comm[TASK_COMM_LEN]; 103}; 104 105BPF_HASH(tuplepid_ipv4, struct ipv4_tuple_t, struct pid_comm_t); 106BPF_HASH(tuplepid_ipv6, struct ipv6_tuple_t, struct pid_comm_t); 107 108BPF_HASH(connectsock, u64, struct sock *); 109 110static int read_ipv4_tuple(struct ipv4_tuple_t *tuple, struct sock *skp) 111{ 112 u32 net_ns_inum = 0; 113 u32 saddr = skp->__sk_common.skc_rcv_saddr; 114 u32 daddr = skp->__sk_common.skc_daddr; 115 struct inet_sock *sockp = (struct inet_sock *)skp; 116 u16 sport = sockp->inet_sport; 117 u16 dport = skp->__sk_common.skc_dport; 118#ifdef CONFIG_NET_NS 119 net_ns_inum = skp->__sk_common.skc_net.net->ns.inum; 120#endif 121 122 ##FILTER_NETNS## 123 124 tuple->saddr = saddr; 125 tuple->daddr = daddr; 126 tuple->sport = sport; 127 tuple->dport = dport; 128 tuple->netns = net_ns_inum; 129 130 // if addresses or ports are 0, ignore 131 if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) { 132 return 0; 133 } 134 135 return 1; 136} 137 138static int read_ipv6_tuple(struct ipv6_tuple_t *tuple, struct sock *skp) 139{ 140 u32 net_ns_inum = 0; 141 unsigned __int128 saddr = 0, daddr = 0; 142 struct inet_sock *sockp = (struct inet_sock *)skp; 143 u16 sport = sockp->inet_sport; 144 u16 dport = skp->__sk_common.skc_dport; 145#ifdef CONFIG_NET_NS 146 net_ns_inum = skp->__sk_common.skc_net.net->ns.inum; 147#endif 148 bpf_probe_read(&saddr, sizeof(saddr), 149 skp->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 150 bpf_probe_read(&daddr, sizeof(daddr), 151 skp->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 152 153 ##FILTER_NETNS## 154 155 tuple->saddr = saddr; 156 tuple->daddr = daddr; 157 tuple->sport = sport; 158 tuple->dport = dport; 159 tuple->netns = net_ns_inum; 160 161 // if addresses or ports are 0, ignore 162 if (saddr == 0 || daddr == 0 || sport == 0 || dport == 0) { 163 return 0; 164 } 165 166 return 1; 167} 168 169static bool check_family(struct sock *sk, u16 expected_family) { 170 u64 zero = 0; 171 u16 family = sk->__sk_common.skc_family; 172 return family == expected_family; 173} 174 175int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk) 176{ 177 u64 pid = bpf_get_current_pid_tgid(); 178 179 ##FILTER_PID## 180 181 // stash the sock ptr for lookup on return 182 connectsock.update(&pid, &sk); 183 184 return 0; 185} 186 187int trace_connect_v4_return(struct pt_regs *ctx) 188{ 189 int ret = PT_REGS_RC(ctx); 190 u64 pid = bpf_get_current_pid_tgid(); 191 192 struct sock **skpp; 193 skpp = connectsock.lookup(&pid); 194 if (skpp == 0) { 195 return 0; // missed entry 196 } 197 198 connectsock.delete(&pid); 199 200 if (ret != 0) { 201 // failed to send SYNC packet, may not have populated 202 // socket __sk_common.{skc_rcv_saddr, ...} 203 return 0; 204 } 205 206 // pull in details 207 struct sock *skp = *skpp; 208 struct ipv4_tuple_t t = { }; 209 if (!read_ipv4_tuple(&t, skp)) { 210 return 0; 211 } 212 213 struct pid_comm_t p = { }; 214 p.pid = pid; 215 bpf_get_current_comm(&p.comm, sizeof(p.comm)); 216 217 tuplepid_ipv4.update(&t, &p); 218 219 return 0; 220} 221 222int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk) 223{ 224 u64 pid = bpf_get_current_pid_tgid(); 225 226 ##FILTER_PID## 227 228 // stash the sock ptr for lookup on return 229 connectsock.update(&pid, &sk); 230 231 return 0; 232} 233 234int trace_connect_v6_return(struct pt_regs *ctx) 235{ 236 int ret = PT_REGS_RC(ctx); 237 u64 pid = bpf_get_current_pid_tgid(); 238 239 struct sock **skpp; 240 skpp = connectsock.lookup(&pid); 241 if (skpp == 0) { 242 return 0; // missed entry 243 } 244 245 connectsock.delete(&pid); 246 247 if (ret != 0) { 248 // failed to send SYNC packet, may not have populated 249 // socket __sk_common.{skc_rcv_saddr, ...} 250 return 0; 251 } 252 253 // pull in details 254 struct sock *skp = *skpp; 255 struct ipv6_tuple_t t = { }; 256 if (!read_ipv6_tuple(&t, skp)) { 257 return 0; 258 } 259 260 struct pid_comm_t p = { }; 261 p.pid = pid; 262 bpf_get_current_comm(&p.comm, sizeof(p.comm)); 263 264 tuplepid_ipv6.update(&t, &p); 265 266 return 0; 267} 268 269int trace_tcp_set_state_entry(struct pt_regs *ctx, struct sock *skp, int state) 270{ 271 if (state != TCP_ESTABLISHED && state != TCP_CLOSE) { 272 return 0; 273 } 274 275 u8 ipver = 0; 276 if (check_family(skp, AF_INET)) { 277 ipver = 4; 278 struct ipv4_tuple_t t = { }; 279 if (!read_ipv4_tuple(&t, skp)) { 280 return 0; 281 } 282 283 if (state == TCP_CLOSE) { 284 tuplepid_ipv4.delete(&t); 285 return 0; 286 } 287 288 struct pid_comm_t *p; 289 p = tuplepid_ipv4.lookup(&t); 290 if (p == 0) { 291 return 0; // missed entry 292 } 293 294 struct tcp_ipv4_event_t evt4 = { }; 295 evt4.ts_ns = bpf_ktime_get_ns(); 296 evt4.type = TCP_EVENT_TYPE_CONNECT; 297 evt4.pid = p->pid >> 32; 298 evt4.ip = ipver; 299 evt4.saddr = t.saddr; 300 evt4.daddr = t.daddr; 301 evt4.sport = ntohs(t.sport); 302 evt4.dport = ntohs(t.dport); 303 evt4.netns = t.netns; 304 305 int i; 306 for (i = 0; i < TASK_COMM_LEN; i++) { 307 evt4.comm[i] = p->comm[i]; 308 } 309 310 tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); 311 tuplepid_ipv4.delete(&t); 312 } else if (check_family(skp, AF_INET6)) { 313 ipver = 6; 314 struct ipv6_tuple_t t = { }; 315 if (!read_ipv6_tuple(&t, skp)) { 316 return 0; 317 } 318 319 if (state == TCP_CLOSE) { 320 tuplepid_ipv6.delete(&t); 321 return 0; 322 } 323 324 struct pid_comm_t *p; 325 p = tuplepid_ipv6.lookup(&t); 326 if (p == 0) { 327 return 0; // missed entry 328 } 329 330 struct tcp_ipv6_event_t evt6 = { }; 331 evt6.ts_ns = bpf_ktime_get_ns(); 332 evt6.type = TCP_EVENT_TYPE_CONNECT; 333 evt6.pid = p->pid >> 32; 334 evt6.ip = ipver; 335 evt6.saddr = t.saddr; 336 evt6.daddr = t.daddr; 337 evt6.sport = ntohs(t.sport); 338 evt6.dport = ntohs(t.dport); 339 evt6.netns = t.netns; 340 341 int i; 342 for (i = 0; i < TASK_COMM_LEN; i++) { 343 evt6.comm[i] = p->comm[i]; 344 } 345 346 tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); 347 tuplepid_ipv6.delete(&t); 348 } 349 // else drop 350 351 return 0; 352} 353 354int trace_close_entry(struct pt_regs *ctx, struct sock *skp) 355{ 356 u64 pid = bpf_get_current_pid_tgid(); 357 358 ##FILTER_PID## 359 360 u8 oldstate = skp->sk_state; 361 // Don't generate close events for connections that were never 362 // established in the first place. 363 if (oldstate == TCP_SYN_SENT || 364 oldstate == TCP_SYN_RECV || 365 oldstate == TCP_NEW_SYN_RECV) 366 return 0; 367 368 u8 ipver = 0; 369 if (check_family(skp, AF_INET)) { 370 ipver = 4; 371 struct ipv4_tuple_t t = { }; 372 if (!read_ipv4_tuple(&t, skp)) { 373 return 0; 374 } 375 376 struct tcp_ipv4_event_t evt4 = { }; 377 evt4.ts_ns = bpf_ktime_get_ns(); 378 evt4.type = TCP_EVENT_TYPE_CLOSE; 379 evt4.pid = pid >> 32; 380 evt4.ip = ipver; 381 evt4.saddr = t.saddr; 382 evt4.daddr = t.daddr; 383 evt4.sport = ntohs(t.sport); 384 evt4.dport = ntohs(t.dport); 385 evt4.netns = t.netns; 386 bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm)); 387 388 tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); 389 } else if (check_family(skp, AF_INET6)) { 390 ipver = 6; 391 struct ipv6_tuple_t t = { }; 392 if (!read_ipv6_tuple(&t, skp)) { 393 return 0; 394 } 395 396 struct tcp_ipv6_event_t evt6 = { }; 397 evt6.ts_ns = bpf_ktime_get_ns(); 398 evt6.type = TCP_EVENT_TYPE_CLOSE; 399 evt6.pid = pid >> 32; 400 evt6.ip = ipver; 401 evt6.saddr = t.saddr; 402 evt6.daddr = t.daddr; 403 evt6.sport = ntohs(t.sport); 404 evt6.dport = ntohs(t.dport); 405 evt6.netns = t.netns; 406 bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm)); 407 408 tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); 409 } 410 // else drop 411 412 return 0; 413}; 414 415int trace_accept_return(struct pt_regs *ctx) 416{ 417 struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); 418 u64 pid = bpf_get_current_pid_tgid(); 419 420 ##FILTER_PID## 421 422 if (newsk == NULL) { 423 return 0; 424 } 425 426 // pull in details 427 u16 lport = 0, dport = 0; 428 u32 net_ns_inum = 0; 429 u8 ipver = 0; 430 431 dport = newsk->__sk_common.skc_dport; 432 lport = newsk->__sk_common.skc_num; 433 434 // Get network namespace id, if kernel supports it 435#ifdef CONFIG_NET_NS 436 net_ns_inum = newsk->__sk_common.skc_net.net->ns.inum; 437#endif 438 439 ##FILTER_NETNS## 440 441 if (check_family(newsk, AF_INET)) { 442 ipver = 4; 443 444 struct tcp_ipv4_event_t evt4 = { 0 }; 445 446 evt4.ts_ns = bpf_ktime_get_ns(); 447 evt4.type = TCP_EVENT_TYPE_ACCEPT; 448 evt4.netns = net_ns_inum; 449 evt4.pid = pid >> 32; 450 evt4.ip = ipver; 451 452 evt4.saddr = newsk->__sk_common.skc_rcv_saddr; 453 evt4.daddr = newsk->__sk_common.skc_daddr; 454 455 evt4.sport = lport; 456 evt4.dport = ntohs(dport); 457 bpf_get_current_comm(&evt4.comm, sizeof(evt4.comm)); 458 459 // do not send event if IP address is 0.0.0.0 or port is 0 460 if (evt4.saddr != 0 && evt4.daddr != 0 && 461 evt4.sport != 0 && evt4.dport != 0) { 462 tcp_ipv4_event.perf_submit(ctx, &evt4, sizeof(evt4)); 463 } 464 } else if (check_family(newsk, AF_INET6)) { 465 ipver = 6; 466 467 struct tcp_ipv6_event_t evt6 = { 0 }; 468 469 evt6.ts_ns = bpf_ktime_get_ns(); 470 evt6.type = TCP_EVENT_TYPE_ACCEPT; 471 evt6.netns = net_ns_inum; 472 evt6.pid = pid >> 32; 473 evt6.ip = ipver; 474 475 bpf_probe_read(&evt6.saddr, sizeof(evt6.saddr), 476 newsk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32); 477 bpf_probe_read(&evt6.daddr, sizeof(evt6.daddr), 478 newsk->__sk_common.skc_v6_daddr.in6_u.u6_addr32); 479 480 evt6.sport = lport; 481 evt6.dport = ntohs(dport); 482 bpf_get_current_comm(&evt6.comm, sizeof(evt6.comm)); 483 484 // do not send event if IP address is 0.0.0.0 or port is 0 485 if (evt6.saddr != 0 && evt6.daddr != 0 && 486 evt6.sport != 0 && evt6.dport != 0) { 487 tcp_ipv6_event.perf_submit(ctx, &evt6, sizeof(evt6)); 488 } 489 } 490 // else drop 491 492 return 0; 493} 494""" 495 496TASK_COMM_LEN = 16 # linux/sched.h 497 498 499class TCPIPV4Evt(ctypes.Structure): 500 _fields_ = [ 501 ("ts_ns", ctypes.c_ulonglong), 502 ("type", ctypes.c_uint), 503 ("pid", ctypes.c_uint), 504 ("comm", ctypes.c_char * TASK_COMM_LEN), 505 ("ip", ctypes.c_ubyte), 506 ("saddr", ctypes.c_uint), 507 ("daddr", ctypes.c_uint), 508 ("sport", ctypes.c_ushort), 509 ("dport", ctypes.c_ushort), 510 ("netns", ctypes.c_uint) 511 ] 512 513 514class TCPIPV6Evt(ctypes.Structure): 515 _fields_ = [ 516 ("ts_ns", ctypes.c_ulonglong), 517 ("type", ctypes.c_uint), 518 ("pid", ctypes.c_uint), 519 ("comm", ctypes.c_char * TASK_COMM_LEN), 520 ("ip", ctypes.c_ubyte), 521 ("saddr", (ctypes.c_ulong * 2)), 522 ("daddr", (ctypes.c_ulong * 2)), 523 ("sport", ctypes.c_ushort), 524 ("dport", ctypes.c_ushort), 525 ("netns", ctypes.c_uint) 526 ] 527 528 529verbose_types = {"C": "connect", "A": "accept", 530 "X": "close", "U": "unknown"} 531 532 533def print_ipv4_event(cpu, data, size): 534 event = ctypes.cast(data, ctypes.POINTER(TCPIPV4Evt)).contents 535 global start_ts 536 537 if args.timestamp: 538 if start_ts == 0: 539 start_ts = event.ts_ns 540 if args.verbose: 541 print("%-14d" % (event.ts_ns - start_ts), end="") 542 else: 543 print("%-9.3f" % ((event.ts_ns - start_ts) / 1000000000.0), end="") 544 if event.type == 1: 545 type_str = "C" 546 elif event.type == 2: 547 type_str = "A" 548 elif event.type == 3: 549 type_str = "X" 550 else: 551 type_str = "U" 552 553 if args.verbose: 554 print("%-12s " % (verbose_types[type_str]), end="") 555 else: 556 print("%-2s " % (type_str), end="") 557 558 print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" % 559 (event.pid, event.comm.decode('utf-8', 'replace'), 560 event.ip, 561 inet_ntop(AF_INET, pack("I", event.saddr)), 562 inet_ntop(AF_INET, pack("I", event.daddr)), 563 event.sport, 564 event.dport), end="") 565 if args.verbose and not args.netns: 566 print(" %-8d" % event.netns) 567 else: 568 print() 569 570 571def print_ipv6_event(cpu, data, size): 572 event = ctypes.cast(data, ctypes.POINTER(TCPIPV6Evt)).contents 573 global start_ts 574 if args.timestamp: 575 if start_ts == 0: 576 start_ts = event.ts_ns 577 if args.verbose: 578 print("%-14d" % (event.ts_ns - start_ts), end="") 579 else: 580 print("%-9.3f" % ((event.ts_ns - start_ts) / 1000000000.0), end="") 581 if event.type == 1: 582 type_str = "C" 583 elif event.type == 2: 584 type_str = "A" 585 elif event.type == 3: 586 type_str = "X" 587 else: 588 type_str = "U" 589 590 if args.verbose: 591 print("%-12s " % (verbose_types[type_str]), end="") 592 else: 593 print("%-2s " % (type_str), end="") 594 595 print("%-6d %-16s %-2d %-16s %-16s %-6d %-6d" % 596 (event.pid, event.comm.decode('utf-8', 'replace'), 597 event.ip, 598 "[" + inet_ntop(AF_INET6, event.saddr) + "]", 599 "[" + inet_ntop(AF_INET6, event.daddr) + "]", 600 event.sport, 601 event.dport), end="") 602 if args.verbose and not args.netns: 603 print(" %-8d" % event.netns) 604 else: 605 print() 606 607 608pid_filter = "" 609netns_filter = "" 610 611if args.pid: 612 pid_filter = 'if (pid >> 32 != %d) { return 0; }' % args.pid 613if args.netns: 614 netns_filter = 'if (net_ns_inum != %d) { return 0; }' % args.netns 615 616bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter) 617bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter) 618 619if args.ebpf: 620 print(bpf_text) 621 exit() 622 623# initialize BPF 624b = BPF(text=bpf_text) 625b.attach_kprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_entry") 626b.attach_kretprobe(event="tcp_v4_connect", fn_name="trace_connect_v4_return") 627b.attach_kprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_entry") 628b.attach_kretprobe(event="tcp_v6_connect", fn_name="trace_connect_v6_return") 629b.attach_kprobe(event="tcp_set_state", fn_name="trace_tcp_set_state_entry") 630b.attach_kprobe(event="tcp_close", fn_name="trace_close_entry") 631b.attach_kretprobe(event="inet_csk_accept", fn_name="trace_accept_return") 632 633print("Tracing TCP established connections. Ctrl-C to end.") 634 635# header 636if args.verbose: 637 if args.timestamp: 638 print("%-14s" % ("TIME(ns)"), end="") 639 print("%-12s %-6s %-16s %-2s %-16s %-16s %-6s %-7s" % ("TYPE", 640 "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT"), end="") 641 if not args.netns: 642 print("%-8s" % "NETNS", end="") 643 print() 644else: 645 if args.timestamp: 646 print("%-9s" % ("TIME(s)"), end="") 647 print("%-2s %-6s %-16s %-2s %-16s %-16s %-6s %-6s" % 648 ("T", "PID", "COMM", "IP", "SADDR", "DADDR", "SPORT", "DPORT")) 649 650start_ts = 0 651 652def inet_ntoa(addr): 653 dq = '' 654 for i in range(0, 4): 655 dq = dq + str(addr & 0xff) 656 if (i != 3): 657 dq = dq + '.' 658 addr = addr >> 8 659 return dq 660 661 662b["tcp_ipv4_event"].open_perf_buffer(print_ipv4_event) 663b["tcp_ipv6_event"].open_perf_buffer(print_ipv6_event) 664while True: 665 try: 666 b.perf_buffer_poll() 667 except KeyboardInterrupt: 668 exit() 669