1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __BPF_TCP_HELPERS_H
3 #define __BPF_TCP_HELPERS_H
4
5 #include <stdbool.h>
6 #include <linux/types.h>
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_core_read.h>
9 #include <bpf/bpf_tracing.h>
10
11 #define BPF_STRUCT_OPS(name, args...) \
12 SEC("struct_ops/"#name) \
13 BPF_PROG(name, args)
14
15 #ifndef SOL_TCP
16 #define SOL_TCP 6
17 #endif
18
19 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
20
21 struct sock_common {
22 unsigned char skc_state;
23 __u16 skc_num;
24 } __attribute__((preserve_access_index));
25
26 enum sk_pacing {
27 SK_PACING_NONE = 0,
28 SK_PACING_NEEDED = 1,
29 SK_PACING_FQ = 2,
30 };
31
32 struct sock {
33 struct sock_common __sk_common;
34 #define sk_state __sk_common.skc_state
35 unsigned long sk_pacing_rate;
36 __u32 sk_pacing_status; /* see enum sk_pacing */
37 } __attribute__((preserve_access_index));
38
39 struct inet_sock {
40 struct sock sk;
41 } __attribute__((preserve_access_index));
42
43 struct inet_connection_sock {
44 struct inet_sock icsk_inet;
45 __u8 icsk_ca_state:6,
46 icsk_ca_setsockopt:1,
47 icsk_ca_dst_locked:1;
48 struct {
49 __u8 pending;
50 } icsk_ack;
51 __u64 icsk_ca_priv[104 / sizeof(__u64)];
52 } __attribute__((preserve_access_index));
53
54 struct request_sock {
55 struct sock_common __req_common;
56 } __attribute__((preserve_access_index));
57
58 struct tcp_sock {
59 struct inet_connection_sock inet_conn;
60
61 __u32 rcv_nxt;
62 __u32 snd_nxt;
63 __u32 snd_una;
64 __u32 window_clamp;
65 __u8 ecn_flags;
66 __u32 delivered;
67 __u32 delivered_ce;
68 __u32 snd_cwnd;
69 __u32 snd_cwnd_cnt;
70 __u32 snd_cwnd_clamp;
71 __u32 snd_ssthresh;
72 __u8 syn_data:1, /* SYN includes data */
73 syn_fastopen:1, /* SYN includes Fast Open option */
74 syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
75 syn_fastopen_ch:1, /* Active TFO re-enabling probe */
76 syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
77 save_syn:1, /* Save headers of SYN packet */
78 is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
79 syn_smc:1; /* SYN includes SMC */
80 __u32 max_packets_out;
81 __u32 lsndtime;
82 __u32 prior_cwnd;
83 __u64 tcp_mstamp; /* most recent packet received/sent */
84 } __attribute__((preserve_access_index));
85
inet_csk(const struct sock * sk)86 static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
87 {
88 return (struct inet_connection_sock *)sk;
89 }
90
inet_csk_ca(const struct sock * sk)91 static __always_inline void *inet_csk_ca(const struct sock *sk)
92 {
93 return (void *)inet_csk(sk)->icsk_ca_priv;
94 }
95
tcp_sk(const struct sock * sk)96 static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
97 {
98 return (struct tcp_sock *)sk;
99 }
100
before(__u32 seq1,__u32 seq2)101 static __always_inline bool before(__u32 seq1, __u32 seq2)
102 {
103 return (__s32)(seq1-seq2) < 0;
104 }
105 #define after(seq2, seq1) before(seq1, seq2)
106
107 #define TCP_ECN_OK 1
108 #define TCP_ECN_QUEUE_CWR 2
109 #define TCP_ECN_DEMAND_CWR 4
110 #define TCP_ECN_SEEN 8
111
112 enum inet_csk_ack_state_t {
113 ICSK_ACK_SCHED = 1,
114 ICSK_ACK_TIMER = 2,
115 ICSK_ACK_PUSHED = 4,
116 ICSK_ACK_PUSHED2 = 8,
117 ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
118 };
119
120 enum tcp_ca_event {
121 CA_EVENT_TX_START = 0,
122 CA_EVENT_CWND_RESTART = 1,
123 CA_EVENT_COMPLETE_CWR = 2,
124 CA_EVENT_LOSS = 3,
125 CA_EVENT_ECN_NO_CE = 4,
126 CA_EVENT_ECN_IS_CE = 5,
127 };
128
129 struct ack_sample {
130 __u32 pkts_acked;
131 __s32 rtt_us;
132 __u32 in_flight;
133 } __attribute__((preserve_access_index));
134
135 struct rate_sample {
136 __u64 prior_mstamp; /* starting timestamp for interval */
137 __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
138 __s32 delivered; /* number of packets delivered over interval */
139 long interval_us; /* time for tp->delivered to incr "delivered" */
140 __u32 snd_interval_us; /* snd interval for delivered packets */
141 __u32 rcv_interval_us; /* rcv interval for delivered packets */
142 long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
143 int losses; /* number of packets marked lost upon ACK */
144 __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
145 __u32 prior_in_flight; /* in flight before this ACK */
146 bool is_app_limited; /* is sample from packet with bubble in pipe? */
147 bool is_retrans; /* is sample from retransmission? */
148 bool is_ack_delayed; /* is this (likely) a delayed ACK? */
149 } __attribute__((preserve_access_index));
150
151 #define TCP_CA_NAME_MAX 16
152 #define TCP_CONG_NEEDS_ECN 0x2
153
154 struct tcp_congestion_ops {
155 char name[TCP_CA_NAME_MAX];
156 __u32 flags;
157
158 /* initialize private data (optional) */
159 void (*init)(struct sock *sk);
160 /* cleanup private data (optional) */
161 void (*release)(struct sock *sk);
162
163 /* return slow start threshold (required) */
164 __u32 (*ssthresh)(struct sock *sk);
165 /* do new cwnd calculation (required) */
166 void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
167 /* call before changing ca_state (optional) */
168 void (*set_state)(struct sock *sk, __u8 new_state);
169 /* call when cwnd event occurs (optional) */
170 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
171 /* call when ack arrives (optional) */
172 void (*in_ack_event)(struct sock *sk, __u32 flags);
173 /* new value of cwnd after loss (required) */
174 __u32 (*undo_cwnd)(struct sock *sk);
175 /* hook for packet ack accounting (optional) */
176 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
177 /* override sysctl_tcp_min_tso_segs */
178 __u32 (*min_tso_segs)(struct sock *sk);
179 /* returns the multiplier used in tcp_sndbuf_expand (optional) */
180 __u32 (*sndbuf_expand)(struct sock *sk);
181 /* call when packets are delivered to update cwnd and pacing rate,
182 * after all the ca_state processing. (optional)
183 */
184 void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
185 void *owner;
186 };
187
188 #define min(a, b) ((a) < (b) ? (a) : (b))
189 #define max(a, b) ((a) > (b) ? (a) : (b))
190 #define min_not_zero(x, y) ({ \
191 typeof(x) __x = (x); \
192 typeof(y) __y = (y); \
193 __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
194
tcp_in_slow_start(const struct tcp_sock * tp)195 static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
196 {
197 return tp->snd_cwnd < tp->snd_ssthresh;
198 }
199
tcp_is_cwnd_limited(const struct sock * sk)200 static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
201 {
202 const struct tcp_sock *tp = tcp_sk(sk);
203
204 /* If in slow start, ensure cwnd grows to twice what was ACKed. */
205 if (tcp_in_slow_start(tp))
206 return tp->snd_cwnd < 2 * tp->max_packets_out;
207
208 return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
209 }
210
tcp_cc_eq(const char * a,const char * b)211 static __always_inline bool tcp_cc_eq(const char *a, const char *b)
212 {
213 int i;
214
215 for (i = 0; i < TCP_CA_NAME_MAX; i++) {
216 if (a[i] != b[i])
217 return false;
218 if (!a[i])
219 break;
220 }
221
222 return true;
223 }
224
225 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
226 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
227
228 #endif
229