• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook */
3 
4 #include <linux/bpf.h>
5 #include <netinet/in.h>
6 #include <stdbool.h>
7 
8 #include "bpf_helpers.h"
9 #include "bpf_endian.h"
10 
11 enum bpf_addr_array_idx {
12 	ADDR_SRV_IDX,
13 	ADDR_CLI_IDX,
14 	__NR_BPF_ADDR_ARRAY_IDX,
15 };
16 
17 enum bpf_result_array_idx {
18 	EGRESS_SRV_IDX,
19 	EGRESS_CLI_IDX,
20 	INGRESS_LISTEN_IDX,
21 	__NR_BPF_RESULT_ARRAY_IDX,
22 };
23 
24 enum bpf_linum_array_idx {
25 	EGRESS_LINUM_IDX,
26 	INGRESS_LINUM_IDX,
27 	__NR_BPF_LINUM_ARRAY_IDX,
28 };
29 
30 struct {
31 	__uint(type, BPF_MAP_TYPE_ARRAY);
32 	__uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
33 	__type(key, __u32);
34 	__type(value, struct sockaddr_in6);
35 } addr_map SEC(".maps");
36 
37 struct {
38 	__uint(type, BPF_MAP_TYPE_ARRAY);
39 	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
40 	__type(key, __u32);
41 	__type(value, struct bpf_sock);
42 } sock_result_map SEC(".maps");
43 
44 struct {
45 	__uint(type, BPF_MAP_TYPE_ARRAY);
46 	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
47 	__type(key, __u32);
48 	__type(value, struct bpf_tcp_sock);
49 } tcp_sock_result_map SEC(".maps");
50 
51 struct {
52 	__uint(type, BPF_MAP_TYPE_ARRAY);
53 	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
54 	__type(key, __u32);
55 	__type(value, __u32);
56 } linum_map SEC(".maps");
57 
58 struct bpf_spinlock_cnt {
59 	struct bpf_spin_lock lock;
60 	__u32 cnt;
61 };
62 
63 struct {
64 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
65 	__uint(map_flags, BPF_F_NO_PREALLOC);
66 	__type(key, int);
67 	__type(value, struct bpf_spinlock_cnt);
68 } sk_pkt_out_cnt SEC(".maps");
69 
70 struct {
71 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
72 	__uint(map_flags, BPF_F_NO_PREALLOC);
73 	__type(key, int);
74 	__type(value, struct bpf_spinlock_cnt);
75 } sk_pkt_out_cnt10 SEC(".maps");
76 
is_loopback6(__u32 * a6)77 static bool is_loopback6(__u32 *a6)
78 {
79 	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
80 }
81 
skcpy(struct bpf_sock * dst,const struct bpf_sock * src)82 static void skcpy(struct bpf_sock *dst,
83 		  const struct bpf_sock *src)
84 {
85 	dst->bound_dev_if = src->bound_dev_if;
86 	dst->family = src->family;
87 	dst->type = src->type;
88 	dst->protocol = src->protocol;
89 	dst->mark = src->mark;
90 	dst->priority = src->priority;
91 	dst->src_ip4 = src->src_ip4;
92 	dst->src_ip6[0] = src->src_ip6[0];
93 	dst->src_ip6[1] = src->src_ip6[1];
94 	dst->src_ip6[2] = src->src_ip6[2];
95 	dst->src_ip6[3] = src->src_ip6[3];
96 	dst->src_port = src->src_port;
97 	dst->dst_ip4 = src->dst_ip4;
98 	dst->dst_ip6[0] = src->dst_ip6[0];
99 	dst->dst_ip6[1] = src->dst_ip6[1];
100 	dst->dst_ip6[2] = src->dst_ip6[2];
101 	dst->dst_ip6[3] = src->dst_ip6[3];
102 	dst->dst_port = src->dst_port;
103 	dst->state = src->state;
104 }
105 
tpcpy(struct bpf_tcp_sock * dst,const struct bpf_tcp_sock * src)106 static void tpcpy(struct bpf_tcp_sock *dst,
107 		  const struct bpf_tcp_sock *src)
108 {
109 	dst->snd_cwnd = src->snd_cwnd;
110 	dst->srtt_us = src->srtt_us;
111 	dst->rtt_min = src->rtt_min;
112 	dst->snd_ssthresh = src->snd_ssthresh;
113 	dst->rcv_nxt = src->rcv_nxt;
114 	dst->snd_nxt = src->snd_nxt;
115 	dst->snd_una = src->snd_una;
116 	dst->mss_cache = src->mss_cache;
117 	dst->ecn_flags = src->ecn_flags;
118 	dst->rate_delivered = src->rate_delivered;
119 	dst->rate_interval_us = src->rate_interval_us;
120 	dst->packets_out = src->packets_out;
121 	dst->retrans_out = src->retrans_out;
122 	dst->total_retrans = src->total_retrans;
123 	dst->segs_in = src->segs_in;
124 	dst->data_segs_in = src->data_segs_in;
125 	dst->segs_out = src->segs_out;
126 	dst->data_segs_out = src->data_segs_out;
127 	dst->lost_out = src->lost_out;
128 	dst->sacked_out = src->sacked_out;
129 	dst->bytes_received = src->bytes_received;
130 	dst->bytes_acked = src->bytes_acked;
131 }
132 
133 #define RETURN {						\
134 	linum = __LINE__;					\
135 	bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0);	\
136 	return 1;						\
137 }
138 
139 SEC("cgroup_skb/egress")
egress_read_sock_fields(struct __sk_buff * skb)140 int egress_read_sock_fields(struct __sk_buff *skb)
141 {
142 	struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
143 	__u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
144 	struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
145 	struct sockaddr_in6 *srv_sa6, *cli_sa6;
146 	struct bpf_tcp_sock *tp, *tp_ret;
147 	struct bpf_sock *sk, *sk_ret;
148 	__u32 linum, linum_idx;
149 
150 	linum_idx = EGRESS_LINUM_IDX;
151 
152 	sk = skb->sk;
153 	if (!sk || sk->state == 10)
154 		RETURN;
155 
156 	sk = bpf_sk_fullsock(sk);
157 	if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
158 	    !is_loopback6(sk->src_ip6))
159 		RETURN;
160 
161 	tp = bpf_tcp_sock(sk);
162 	if (!tp)
163 		RETURN;
164 
165 	srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
166 	cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
167 	if (!srv_sa6 || !cli_sa6)
168 		RETURN;
169 
170 	if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
171 		result_idx = EGRESS_SRV_IDX;
172 	else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
173 		result_idx = EGRESS_CLI_IDX;
174 	else
175 		RETURN;
176 
177 	sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
178 	tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
179 	if (!sk_ret || !tp_ret)
180 		RETURN;
181 
182 	skcpy(sk_ret, sk);
183 	tpcpy(tp_ret, tp);
184 
185 	if (result_idx == EGRESS_SRV_IDX) {
186 		/* The userspace has created it for srv sk */
187 		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
188 		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
189 						   0, 0);
190 	} else {
191 		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
192 						 &cli_cnt_init,
193 						 BPF_SK_STORAGE_GET_F_CREATE);
194 		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10,
195 						   sk, &cli_cnt_init,
196 						   BPF_SK_STORAGE_GET_F_CREATE);
197 	}
198 
199 	if (!pkt_out_cnt || !pkt_out_cnt10)
200 		RETURN;
201 
202 	/* Even both cnt and cnt10 have lock defined in their BTF,
203 	 * intentionally one cnt takes lock while one does not
204 	 * as a test for the spinlock support in BPF_MAP_TYPE_SK_STORAGE.
205 	 */
206 	pkt_out_cnt->cnt += 1;
207 	bpf_spin_lock(&pkt_out_cnt10->lock);
208 	pkt_out_cnt10->cnt += 10;
209 	bpf_spin_unlock(&pkt_out_cnt10->lock);
210 
211 	RETURN;
212 }
213 
214 SEC("cgroup_skb/ingress")
ingress_read_sock_fields(struct __sk_buff * skb)215 int ingress_read_sock_fields(struct __sk_buff *skb)
216 {
217 	__u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
218 	struct bpf_tcp_sock *tp, *tp_ret;
219 	struct bpf_sock *sk, *sk_ret;
220 	struct sockaddr_in6 *srv_sa6;
221 	__u32 linum, linum_idx;
222 
223 	linum_idx = INGRESS_LINUM_IDX;
224 
225 	sk = skb->sk;
226 	if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
227 		RETURN;
228 
229 	srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
230 	if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
231 		RETURN;
232 
233 	if (sk->state != 10 && sk->state != 12)
234 		RETURN;
235 
236 	sk = bpf_get_listener_sock(sk);
237 	if (!sk)
238 		RETURN;
239 
240 	tp = bpf_tcp_sock(sk);
241 	if (!tp)
242 		RETURN;
243 
244 	sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
245 	tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
246 	if (!sk_ret || !tp_ret)
247 		RETURN;
248 
249 	skcpy(sk_ret, sk);
250 	tpcpy(tp_ret, tp);
251 
252 	RETURN;
253 }
254 
255 char _license[] SEC("license") = "GPL";
256