1#!/usr/bin/env python 2# @lint-avoid-python-3-compatibility-imports 3# 4# tcptop Summarize TCP send/recv throughput by host. 5# For Linux, uses BCC, eBPF. Embedded C. 6# 7# USAGE: tcptop [-h] [-C] [-S] [-p PID] [interval [count]] 8# 9# This uses dynamic tracing of kernel functions, and will need to be updated 10# to match kernel changes. 11# 12# WARNING: This traces all send/receives at the TCP level, and while it 13# summarizes data in-kernel to reduce overhead, there may still be some 14# overhead at high TCP send/receive rates (eg, ~13% of one CPU at 100k TCP 15# events/sec. This is not the same as packet rate: funccount can be used to 16# count the kprobes below to find out the TCP rate). Test in a lab environment 17# first. If your send/receive rate is low (eg, <1k/sec) then the overhead is 18# expected to be negligible. 19# 20# ToDo: Fit output to screen size (top X only) in default (not -C) mode. 21# 22# Copyright 2016 Netflix, Inc. 23# Licensed under the Apache License, Version 2.0 (the "License") 24# 25# 02-Sep-2016 Brendan Gregg Created this. 26 27from __future__ import print_function 28from bcc import BPF 29import argparse 30from socket import inet_ntop, AF_INET, AF_INET6 31from struct import pack 32from time import sleep, strftime 33from subprocess import call 34import ctypes as ct 35from collections import namedtuple, defaultdict 36 37# arguments 38def range_check(string): 39 value = int(string) 40 if value < 1: 41 msg = "value must be stricly positive, got %d" % (value,) 42 raise argparse.ArgumentTypeError(msg) 43 return value 44 45examples = """examples: 46 ./tcptop # trace TCP send/recv by host 47 ./tcptop -C # don't clear the screen 48 ./tcptop -p 181 # only trace PID 181 49""" 50parser = argparse.ArgumentParser( 51 description="Summarize TCP send/recv throughput by host", 52 formatter_class=argparse.RawDescriptionHelpFormatter, 53 epilog=examples) 54parser.add_argument("-C", "--noclear", action="store_true", 55 help="don't clear the screen") 56parser.add_argument("-S", "--nosummary", action="store_true", 57 help="skip system summary line") 58parser.add_argument("-p", "--pid", 59 help="trace this PID only") 60parser.add_argument("interval", nargs="?", default=1, type=range_check, 61 help="output interval, in seconds (default 1)") 62parser.add_argument("count", nargs="?", default=-1, type=range_check, 63 help="number of outputs") 64parser.add_argument("--ebpf", action="store_true", 65 help=argparse.SUPPRESS) 66args = parser.parse_args() 67debug = 0 68 69# linux stats 70loadavg = "/proc/loadavg" 71 72# define BPF program 73bpf_text = """ 74#include <uapi/linux/ptrace.h> 75#include <net/sock.h> 76#include <bcc/proto.h> 77 78struct ipv4_key_t { 79 u32 pid; 80 u32 saddr; 81 u32 daddr; 82 u16 lport; 83 u16 dport; 84}; 85BPF_HASH(ipv4_send_bytes, struct ipv4_key_t); 86BPF_HASH(ipv4_recv_bytes, struct ipv4_key_t); 87 88struct ipv6_key_t { 89 u32 pid; 90 unsigned __int128 saddr; 91 unsigned __int128 daddr; 92 u16 lport; 93 u16 dport; 94}; 95BPF_HASH(ipv6_send_bytes, struct ipv6_key_t); 96BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t); 97 98int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, 99 struct msghdr *msg, size_t size) 100{ 101 u32 pid = bpf_get_current_pid_tgid(); 102 FILTER 103 u16 dport = 0, family = sk->__sk_common.skc_family; 104 105 if (family == AF_INET) { 106 struct ipv4_key_t ipv4_key = {.pid = pid}; 107 ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr; 108 ipv4_key.daddr = sk->__sk_common.skc_daddr; 109 ipv4_key.lport = sk->__sk_common.skc_num; 110 dport = sk->__sk_common.skc_dport; 111 ipv4_key.dport = ntohs(dport); 112 ipv4_send_bytes.increment(ipv4_key, size); 113 114 } else if (family == AF_INET6) { 115 struct ipv6_key_t ipv6_key = {.pid = pid}; 116 __builtin_memcpy(&ipv6_key.saddr, 117 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32, sizeof(ipv6_key.saddr)); 118 __builtin_memcpy(&ipv6_key.daddr, 119 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32, sizeof(ipv6_key.daddr)); 120 ipv6_key.lport = sk->__sk_common.skc_num; 121 dport = sk->__sk_common.skc_dport; 122 ipv6_key.dport = ntohs(dport); 123 ipv6_send_bytes.increment(ipv6_key, size); 124 } 125 // else drop 126 127 return 0; 128} 129 130/* 131 * tcp_recvmsg() would be obvious to trace, but is less suitable because: 132 * - we'd need to trace both entry and return, to have both sock and size 133 * - misses tcp_read_sock() traffic 134 * we'd much prefer tracepoints once they are available. 135 */ 136int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied) 137{ 138 u32 pid = bpf_get_current_pid_tgid(); 139 FILTER 140 u16 dport = 0, family = sk->__sk_common.skc_family; 141 u64 *val, zero = 0; 142 143 if (copied <= 0) 144 return 0; 145 146 if (family == AF_INET) { 147 struct ipv4_key_t ipv4_key = {.pid = pid}; 148 ipv4_key.saddr = sk->__sk_common.skc_rcv_saddr; 149 ipv4_key.daddr = sk->__sk_common.skc_daddr; 150 ipv4_key.lport = sk->__sk_common.skc_num; 151 dport = sk->__sk_common.skc_dport; 152 ipv4_key.dport = ntohs(dport); 153 ipv4_recv_bytes.increment(ipv4_key, copied); 154 155 } else if (family == AF_INET6) { 156 struct ipv6_key_t ipv6_key = {.pid = pid}; 157 __builtin_memcpy(&ipv6_key.saddr, 158 sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32, sizeof(ipv6_key.saddr)); 159 __builtin_memcpy(&ipv6_key.daddr, 160 sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32, sizeof(ipv6_key.daddr)); 161 ipv6_key.lport = sk->__sk_common.skc_num; 162 dport = sk->__sk_common.skc_dport; 163 ipv6_key.dport = ntohs(dport); 164 ipv6_recv_bytes.increment(ipv6_key, copied); 165 } 166 // else drop 167 168 return 0; 169} 170""" 171 172# code substitutions 173if args.pid: 174 bpf_text = bpf_text.replace('FILTER', 175 'if (pid != %s) { return 0; }' % args.pid) 176else: 177 bpf_text = bpf_text.replace('FILTER', '') 178if debug or args.ebpf: 179 print(bpf_text) 180 if args.ebpf: 181 exit() 182 183TCPSessionKey = namedtuple('TCPSession', ['pid', 'laddr', 'lport', 'daddr', 'dport']) 184 185def pid_to_comm(pid): 186 try: 187 comm = open("/proc/%d/comm" % pid, "r").read().rstrip() 188 return comm 189 except IOError: 190 return str(pid) 191 192def get_ipv4_session_key(k): 193 return TCPSessionKey(pid=k.pid, 194 laddr=inet_ntop(AF_INET, pack("I", k.saddr)), 195 lport=k.lport, 196 daddr=inet_ntop(AF_INET, pack("I", k.daddr)), 197 dport=k.dport) 198 199def get_ipv6_session_key(k): 200 return TCPSessionKey(pid=k.pid, 201 laddr=inet_ntop(AF_INET6, k.saddr), 202 lport=k.lport, 203 daddr=inet_ntop(AF_INET6, k.daddr), 204 dport=k.dport) 205 206# initialize BPF 207b = BPF(text=bpf_text) 208 209ipv4_send_bytes = b["ipv4_send_bytes"] 210ipv4_recv_bytes = b["ipv4_recv_bytes"] 211ipv6_send_bytes = b["ipv6_send_bytes"] 212ipv6_recv_bytes = b["ipv6_recv_bytes"] 213 214print('Tracing... Output every %s secs. Hit Ctrl-C to end' % args.interval) 215 216# output 217i = 0 218exiting = False 219while i != args.count and not exiting: 220 try: 221 sleep(args.interval) 222 except KeyboardInterrupt: 223 exiting = True 224 225 # header 226 if args.noclear: 227 print() 228 else: 229 call("clear") 230 if not args.nosummary: 231 with open(loadavg) as stats: 232 print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read())) 233 234 # IPv4: build dict of all seen keys 235 ipv4_throughput = defaultdict(lambda: [0, 0]) 236 for k, v in ipv4_send_bytes.items(): 237 key = get_ipv4_session_key(k) 238 ipv4_throughput[key][0] = v.value 239 ipv4_send_bytes.clear() 240 241 for k, v in ipv4_recv_bytes.items(): 242 key = get_ipv4_session_key(k) 243 ipv4_throughput[key][1] = v.value 244 ipv4_recv_bytes.clear() 245 246 if ipv4_throughput: 247 print("%-6s %-12s %-21s %-21s %6s %6s" % ("PID", "COMM", 248 "LADDR", "RADDR", "RX_KB", "TX_KB")) 249 250 # output 251 for k, (send_bytes, recv_bytes) in sorted(ipv4_throughput.items(), 252 key=lambda kv: sum(kv[1]), 253 reverse=True): 254 print("%-6d %-12.12s %-21s %-21s %6d %6d" % (k.pid, 255 pid_to_comm(k.pid), 256 k.laddr + ":" + str(k.lport), 257 k.daddr + ":" + str(k.dport), 258 int(recv_bytes / 1024), int(send_bytes / 1024))) 259 260 # IPv6: build dict of all seen keys 261 ipv6_throughput = defaultdict(lambda: [0, 0]) 262 for k, v in ipv6_send_bytes.items(): 263 key = get_ipv6_session_key(k) 264 ipv6_throughput[key][0] = v.value 265 ipv6_send_bytes.clear() 266 267 for k, v in ipv6_recv_bytes.items(): 268 key = get_ipv6_session_key(k) 269 ipv6_throughput[key][1] = v.value 270 ipv6_recv_bytes.clear() 271 272 if ipv6_throughput: 273 # more than 80 chars, sadly. 274 print("\n%-6s %-12s %-32s %-32s %6s %6s" % ("PID", "COMM", 275 "LADDR6", "RADDR6", "RX_KB", "TX_KB")) 276 277 # output 278 for k, (send_bytes, recv_bytes) in sorted(ipv6_throughput.items(), 279 key=lambda kv: sum(kv[1]), 280 reverse=True): 281 print("%-6d %-12.12s %-32s %-32s %6d %6d" % (k.pid, 282 pid_to_comm(k.pid), 283 k.laddr + ":" + str(k.lport), 284 k.daddr + ":" + str(k.dport), 285 int(recv_bytes / 1024), int(send_bytes / 1024))) 286 287 i += 1 288