• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook */
3 
4 #define _GNU_SOURCE
5 #include <netinet/in.h>
6 #include <arpa/inet.h>
7 #include <unistd.h>
8 #include <sched.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 
13 #include <bpf/bpf.h>
14 #include <bpf/libbpf.h>
15 #include <linux/compiler.h>
16 
17 #include "network_helpers.h"
18 #include "cgroup_helpers.h"
19 #include "test_progs.h"
20 #include "bpf_rlimit.h"
21 #include "test_sock_fields.skel.h"
22 
23 enum bpf_linum_array_idx {
24 	EGRESS_LINUM_IDX,
25 	INGRESS_LINUM_IDX,
26 	READ_SK_DST_PORT_LINUM_IDX,
27 	__NR_BPF_LINUM_ARRAY_IDX,
28 };
29 
30 struct bpf_spinlock_cnt {
31 	struct bpf_spin_lock lock;
32 	__u32 cnt;
33 };
34 
35 #define PARENT_CGROUP	"/test-bpf-sock-fields"
36 #define CHILD_CGROUP	"/test-bpf-sock-fields/child"
37 #define DATA "Hello BPF!"
38 #define DATA_LEN sizeof(DATA)
39 
40 static struct sockaddr_in6 srv_sa6, cli_sa6;
41 static int sk_pkt_out_cnt10_fd;
42 static struct test_sock_fields *skel;
43 static int sk_pkt_out_cnt_fd;
44 static __u64 parent_cg_id;
45 static __u64 child_cg_id;
46 static int linum_map_fd;
47 static __u32 duration;
48 
create_netns(void)49 static bool create_netns(void)
50 {
51 	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
52 		return false;
53 
54 	if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
55 		return false;
56 
57 	return true;
58 }
59 
print_sk(const struct bpf_sock * sk,const char * prefix)60 static void print_sk(const struct bpf_sock *sk, const char *prefix)
61 {
62 	char src_ip4[24], dst_ip4[24];
63 	char src_ip6[64], dst_ip6[64];
64 
65 	inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
66 	inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
67 	inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
68 	inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
69 
70 	printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
71 	       "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
72 	       "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
73 	       prefix,
74 	       sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
75 	       sk->mark, sk->priority,
76 	       sk->src_ip4, src_ip4,
77 	       sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
78 	       src_ip6, sk->src_port,
79 	       sk->dst_ip4, dst_ip4,
80 	       sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
81 	       dst_ip6, ntohs(sk->dst_port));
82 }
83 
print_tp(const struct bpf_tcp_sock * tp,const char * prefix)84 static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
85 {
86 	printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
87 	       "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
88 	       "rate_delivered:%u rate_interval_us:%u packets_out:%u "
89 	       "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
90 	       "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
91 	       "bytes_received:%llu bytes_acked:%llu\n",
92 	       prefix,
93 	       tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
94 	       tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
95 	       tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
96 	       tp->packets_out, tp->retrans_out, tp->total_retrans,
97 	       tp->segs_in, tp->data_segs_in, tp->segs_out,
98 	       tp->data_segs_out, tp->lost_out, tp->sacked_out,
99 	       tp->bytes_received, tp->bytes_acked);
100 }
101 
check_result(void)102 static void check_result(void)
103 {
104 	struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
105 	struct bpf_sock srv_sk, cli_sk, listen_sk;
106 	__u32 idx, ingress_linum, egress_linum, linum;
107 	int err;
108 
109 	idx = EGRESS_LINUM_IDX;
110 	err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
111 	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
112 	      "err:%d errno:%d\n", err, errno);
113 
114 	idx = INGRESS_LINUM_IDX;
115 	err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
116 	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
117 	      "err:%d errno:%d\n", err, errno);
118 
119 	idx = READ_SK_DST_PORT_LINUM_IDX;
120 	err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
121 	ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
122 	ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
123 
124 	memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
125 	memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
126 	memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
127 	memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
128 	memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
129 	memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
130 
131 	print_sk(&listen_sk, "listen_sk");
132 	print_sk(&srv_sk, "srv_sk");
133 	print_sk(&cli_sk, "cli_sk");
134 	print_tp(&listen_tp, "listen_tp");
135 	print_tp(&srv_tp, "srv_tp");
136 	print_tp(&cli_tp, "cli_tp");
137 
138 	CHECK(listen_sk.state != 10 ||
139 	      listen_sk.family != AF_INET6 ||
140 	      listen_sk.protocol != IPPROTO_TCP ||
141 	      memcmp(listen_sk.src_ip6, &in6addr_loopback,
142 		     sizeof(listen_sk.src_ip6)) ||
143 	      listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
144 	      listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
145 	      listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
146 	      listen_sk.dst_port,
147 	      "listen_sk",
148 	      "Unexpected. Check listen_sk output. ingress_linum:%u\n",
149 	      ingress_linum);
150 
151 	CHECK(srv_sk.state == 10 ||
152 	      !srv_sk.state ||
153 	      srv_sk.family != AF_INET6 ||
154 	      srv_sk.protocol != IPPROTO_TCP ||
155 	      memcmp(srv_sk.src_ip6, &in6addr_loopback,
156 		     sizeof(srv_sk.src_ip6)) ||
157 	      memcmp(srv_sk.dst_ip6, &in6addr_loopback,
158 		     sizeof(srv_sk.dst_ip6)) ||
159 	      srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
160 	      srv_sk.dst_port != cli_sa6.sin6_port,
161 	      "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
162 	      egress_linum);
163 
164 	CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
165 
166 	CHECK(cli_sk.state == 10 ||
167 	      !cli_sk.state ||
168 	      cli_sk.family != AF_INET6 ||
169 	      cli_sk.protocol != IPPROTO_TCP ||
170 	      memcmp(cli_sk.src_ip6, &in6addr_loopback,
171 		     sizeof(cli_sk.src_ip6)) ||
172 	      memcmp(cli_sk.dst_ip6, &in6addr_loopback,
173 		     sizeof(cli_sk.dst_ip6)) ||
174 	      cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
175 	      cli_sk.dst_port != srv_sa6.sin6_port,
176 	      "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
177 	      egress_linum);
178 
179 	CHECK(listen_tp.data_segs_out ||
180 	      listen_tp.data_segs_in ||
181 	      listen_tp.total_retrans ||
182 	      listen_tp.bytes_acked,
183 	      "listen_tp",
184 	      "Unexpected. Check listen_tp output. ingress_linum:%u\n",
185 	      ingress_linum);
186 
187 	CHECK(srv_tp.data_segs_out != 2 ||
188 	      srv_tp.data_segs_in ||
189 	      srv_tp.snd_cwnd != 10 ||
190 	      srv_tp.total_retrans ||
191 	      srv_tp.bytes_acked < 2 * DATA_LEN,
192 	      "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
193 	      egress_linum);
194 
195 	CHECK(cli_tp.data_segs_out ||
196 	      cli_tp.data_segs_in != 2 ||
197 	      cli_tp.snd_cwnd != 10 ||
198 	      cli_tp.total_retrans ||
199 	      cli_tp.bytes_received < 2 * DATA_LEN,
200 	      "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
201 	      egress_linum);
202 
203 	CHECK(skel->bss->parent_cg_id != parent_cg_id,
204 	      "parent_cg_id", "%zu != %zu\n",
205 	      (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
206 
207 	CHECK(skel->bss->child_cg_id != child_cg_id,
208 	      "child_cg_id", "%zu != %zu\n",
209 	       (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
210 }
211 
check_sk_pkt_out_cnt(int accept_fd,int cli_fd)212 static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
213 {
214 	struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
215 	int err;
216 
217 	pkt_out_cnt.cnt = ~0;
218 	pkt_out_cnt10.cnt = ~0;
219 	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
220 	if (!err)
221 		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
222 					  &pkt_out_cnt10);
223 
224 	/* The bpf prog only counts for fullsock and
225 	 * passive connection did not become fullsock until 3WHS
226 	 * had been finished, so the bpf prog only counted two data
227 	 * packet out.
228 	 */
229 	CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
230 	      pkt_out_cnt10.cnt < 0xeB9F + 20,
231 	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
232 	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
233 	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
234 
235 	pkt_out_cnt.cnt = ~0;
236 	pkt_out_cnt10.cnt = ~0;
237 	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
238 	if (!err)
239 		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
240 					  &pkt_out_cnt10);
241 	/* Active connection is fullsock from the beginning.
242 	 * 1 SYN and 1 ACK during 3WHS
243 	 * 2 Acks on data packet.
244 	 *
245 	 * The bpf_prog initialized it to 0xeB9F.
246 	 */
247 	CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
248 	      pkt_out_cnt10.cnt < 0xeB9F + 40,
249 	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
250 	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
251 	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
252 }
253 
init_sk_storage(int sk_fd,__u32 pkt_out_cnt)254 static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
255 {
256 	struct bpf_spinlock_cnt scnt = {};
257 	int err;
258 
259 	scnt.cnt = pkt_out_cnt;
260 	err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
261 				  BPF_NOEXIST);
262 	if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
263 		  "err:%d errno:%d\n", err, errno))
264 		return err;
265 
266 	err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
267 				  BPF_NOEXIST);
268 	if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
269 		  "err:%d errno:%d\n", err, errno))
270 		return err;
271 
272 	return 0;
273 }
274 
test(void)275 static void test(void)
276 {
277 	int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
278 	socklen_t addrlen = sizeof(struct sockaddr_in6);
279 	char buf[DATA_LEN];
280 
281 	/* Prepare listen_fd */
282 	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
283 	/* start_server() has logged the error details */
284 	if (CHECK_FAIL(listen_fd == -1))
285 		goto done;
286 
287 	err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
288 	if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
289 		  errno))
290 		goto done;
291 	memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
292 
293 	cli_fd = connect_to_fd(listen_fd, 0);
294 	if (CHECK_FAIL(cli_fd == -1))
295 		goto done;
296 
297 	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
298 	if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
299 		  err, errno))
300 		goto done;
301 
302 	accept_fd = accept(listen_fd, NULL, NULL);
303 	if (CHECK(accept_fd == -1, "accept(listen_fd)",
304 		  "accept_fd:%d errno:%d\n",
305 		  accept_fd, errno))
306 		goto done;
307 
308 	if (init_sk_storage(accept_fd, 0xeB9F))
309 		goto done;
310 
311 	for (i = 0; i < 2; i++) {
312 		/* Send some data from accept_fd to cli_fd.
313 		 * MSG_EOR to stop kernel from coalescing two pkts.
314 		 */
315 		err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
316 		if (CHECK(err != DATA_LEN, "send(accept_fd)",
317 			  "err:%d errno:%d\n", err, errno))
318 			goto done;
319 
320 		err = recv(cli_fd, buf, DATA_LEN, 0);
321 		if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
322 			  err, errno))
323 			goto done;
324 	}
325 
326 	shutdown(cli_fd, SHUT_WR);
327 	err = recv(accept_fd, buf, 1, 0);
328 	if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
329 		  err, errno))
330 		goto done;
331 	shutdown(accept_fd, SHUT_WR);
332 	err = recv(cli_fd, buf, 1, 0);
333 	if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
334 		  err, errno))
335 		goto done;
336 	check_sk_pkt_out_cnt(accept_fd, cli_fd);
337 	check_result();
338 
339 done:
340 	if (accept_fd != -1)
341 		close(accept_fd);
342 	if (cli_fd != -1)
343 		close(cli_fd);
344 	if (listen_fd != -1)
345 		close(listen_fd);
346 }
347 
test_sock_fields(void)348 void test_sock_fields(void)
349 {
350 	int parent_cg_fd = -1, child_cg_fd = -1;
351 	struct bpf_link *link;
352 
353 	/* Use a dedicated netns to have a fixed listen port */
354 	if (!create_netns())
355 		return;
356 
357 	/* Create a cgroup, get fd, and join it */
358 	parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
359 	if (CHECK_FAIL(parent_cg_fd < 0))
360 		return;
361 	parent_cg_id = get_cgroup_id(PARENT_CGROUP);
362 	if (CHECK_FAIL(!parent_cg_id))
363 		goto done;
364 
365 	child_cg_fd = test__join_cgroup(CHILD_CGROUP);
366 	if (CHECK_FAIL(child_cg_fd < 0))
367 		goto done;
368 	child_cg_id = get_cgroup_id(CHILD_CGROUP);
369 	if (CHECK_FAIL(!child_cg_id))
370 		goto done;
371 
372 	skel = test_sock_fields__open_and_load();
373 	if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
374 		goto done;
375 
376 	link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
377 	if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
378 		goto done;
379 	skel->links.egress_read_sock_fields = link;
380 
381 	link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
382 	if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
383 		goto done;
384 	skel->links.ingress_read_sock_fields = link;
385 
386 	link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
387 	if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
388 		goto done;
389 	skel->links.read_sk_dst_port = link;
390 
391 	linum_map_fd = bpf_map__fd(skel->maps.linum_map);
392 	sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
393 	sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
394 
395 	test();
396 
397 done:
398 	test_sock_fields__detach(skel);
399 	test_sock_fields__destroy(skel);
400 	if (child_cg_fd != -1)
401 		close(child_cg_fd);
402 	if (parent_cg_fd != -1)
403 		close(parent_cg_fd);
404 }
405