• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018 Facebook */
3 
4 #include <stdlib.h>
5 #include <unistd.h>
6 #include <stdbool.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <assert.h>
10 #include <fcntl.h>
11 #include <linux/bpf.h>
12 #include <linux/err.h>
13 #include <linux/types.h>
14 #include <linux/if_ether.h>
15 #include <sys/types.h>
16 #include <sys/epoll.h>
17 #include <sys/socket.h>
18 #include <netinet/in.h>
19 #include <bpf/bpf.h>
20 #include <bpf/libbpf.h>
21 #include "bpf_rlimit.h"
22 #include "bpf_util.h"
23 
24 #include "test_progs.h"
25 #include "test_select_reuseport_common.h"
26 
27 #define MAX_TEST_NAME 80
28 #define MIN_TCPHDR_LEN 20
29 #define UDPHDR_LEN 8
30 
31 #define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
32 #define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
33 #define REUSEPORT_ARRAY_SIZE 32
34 
35 static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
36 static __u32 expected_results[NR_RESULTS];
37 static int sk_fds[REUSEPORT_ARRAY_SIZE];
38 static int reuseport_array = -1, outer_map = -1;
39 static enum bpf_map_type inner_map_type;
40 static int select_by_skb_data_prog;
41 static int saved_tcp_syncookie = -1;
42 static struct bpf_object *obj;
43 static int saved_tcp_fo = -1;
44 static __u32 index_zero;
45 static int epfd;
46 
47 static union sa46 {
48 	struct sockaddr_in6 v6;
49 	struct sockaddr_in v4;
50 	sa_family_t family;
51 } srv_sa;
52 
53 #define RET_IF(condition, tag, format...) ({				\
54 	if (CHECK_FAIL(condition)) {					\
55 		printf(tag " " format);					\
56 		return;							\
57 	}								\
58 })
59 
60 #define RET_ERR(condition, tag, format...) ({				\
61 	if (CHECK_FAIL(condition)) {					\
62 		printf(tag " " format);					\
63 		return -1;						\
64 	}								\
65 })
66 
create_maps(enum bpf_map_type inner_type)67 static int create_maps(enum bpf_map_type inner_type)
68 {
69 	struct bpf_create_map_attr attr = {};
70 
71 	inner_map_type = inner_type;
72 
73 	/* Creating reuseport_array */
74 	attr.name = "reuseport_array";
75 	attr.map_type = inner_type;
76 	attr.key_size = sizeof(__u32);
77 	attr.value_size = sizeof(__u32);
78 	attr.max_entries = REUSEPORT_ARRAY_SIZE;
79 
80 	reuseport_array = bpf_create_map_xattr(&attr);
81 	RET_ERR(reuseport_array < 0, "creating reuseport_array",
82 		"reuseport_array:%d errno:%d\n", reuseport_array, errno);
83 
84 	/* Creating outer_map */
85 	attr.name = "outer_map";
86 	attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
87 	attr.key_size = sizeof(__u32);
88 	attr.value_size = sizeof(__u32);
89 	attr.max_entries = 1;
90 	attr.inner_map_fd = reuseport_array;
91 	outer_map = bpf_create_map_xattr(&attr);
92 	RET_ERR(outer_map < 0, "creating outer_map",
93 		"outer_map:%d errno:%d\n", outer_map, errno);
94 
95 	return 0;
96 }
97 
prepare_bpf_obj(void)98 static int prepare_bpf_obj(void)
99 {
100 	struct bpf_program *prog;
101 	struct bpf_map *map;
102 	int err;
103 
104 	obj = bpf_object__open("test_select_reuseport_kern.o");
105 	err = libbpf_get_error(obj);
106 	RET_ERR(err, "open test_select_reuseport_kern.o",
107 		"obj:%p PTR_ERR(obj):%d\n", obj, err);
108 
109 	map = bpf_object__find_map_by_name(obj, "outer_map");
110 	RET_ERR(!map, "find outer_map", "!map\n");
111 	err = bpf_map__reuse_fd(map, outer_map);
112 	RET_ERR(err, "reuse outer_map", "err:%d\n", err);
113 
114 	err = bpf_object__load(obj);
115 	RET_ERR(err, "load bpf_object", "err:%d\n", err);
116 
117 	prog = bpf_program__next(NULL, obj);
118 	RET_ERR(!prog, "get first bpf_program", "!prog\n");
119 	select_by_skb_data_prog = bpf_program__fd(prog);
120 	RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
121 		"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
122 
123 	map = bpf_object__find_map_by_name(obj, "result_map");
124 	RET_ERR(!map, "find result_map", "!map\n");
125 	result_map = bpf_map__fd(map);
126 	RET_ERR(result_map < 0, "get result_map fd",
127 		"result_map:%d\n", result_map);
128 
129 	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
130 	RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
131 	tmp_index_ovr_map = bpf_map__fd(map);
132 	RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
133 		"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
134 
135 	map = bpf_object__find_map_by_name(obj, "linum_map");
136 	RET_ERR(!map, "find linum_map", "!map\n");
137 	linum_map = bpf_map__fd(map);
138 	RET_ERR(linum_map < 0, "get linum_map fd",
139 		"linum_map:%d\n", linum_map);
140 
141 	map = bpf_object__find_map_by_name(obj, "data_check_map");
142 	RET_ERR(!map, "find data_check_map", "!map\n");
143 	data_check_map = bpf_map__fd(map);
144 	RET_ERR(data_check_map < 0, "get data_check_map fd",
145 		"data_check_map:%d\n", data_check_map);
146 
147 	return 0;
148 }
149 
sa46_init_loopback(union sa46 * sa,sa_family_t family)150 static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
151 {
152 	memset(sa, 0, sizeof(*sa));
153 	sa->family = family;
154 	if (sa->family == AF_INET6)
155 		sa->v6.sin6_addr = in6addr_loopback;
156 	else
157 		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
158 }
159 
sa46_init_inany(union sa46 * sa,sa_family_t family)160 static void sa46_init_inany(union sa46 *sa, sa_family_t family)
161 {
162 	memset(sa, 0, sizeof(*sa));
163 	sa->family = family;
164 	if (sa->family == AF_INET6)
165 		sa->v6.sin6_addr = in6addr_any;
166 	else
167 		sa->v4.sin_addr.s_addr = INADDR_ANY;
168 }
169 
read_int_sysctl(const char * sysctl)170 static int read_int_sysctl(const char *sysctl)
171 {
172 	char buf[16];
173 	int fd, ret;
174 
175 	fd = open(sysctl, 0);
176 	RET_ERR(fd == -1, "open(sysctl)",
177 		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
178 
179 	ret = read(fd, buf, sizeof(buf));
180 	RET_ERR(ret <= 0, "read(sysctl)",
181 		"sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
182 
183 	close(fd);
184 	return atoi(buf);
185 }
186 
write_int_sysctl(const char * sysctl,int v)187 static int write_int_sysctl(const char *sysctl, int v)
188 {
189 	int fd, ret, size;
190 	char buf[16];
191 
192 	fd = open(sysctl, O_RDWR);
193 	RET_ERR(fd == -1, "open(sysctl)",
194 		"sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
195 
196 	size = snprintf(buf, sizeof(buf), "%d", v);
197 	ret = write(fd, buf, size);
198 	RET_ERR(ret != size, "write(sysctl)",
199 		"sysctl:%s ret:%d size:%d errno:%d\n",
200 		sysctl, ret, size, errno);
201 
202 	close(fd);
203 	return 0;
204 }
205 
restore_sysctls(void)206 static void restore_sysctls(void)
207 {
208 	if (saved_tcp_fo != -1)
209 		write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
210 	if (saved_tcp_syncookie != -1)
211 		write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
212 }
213 
enable_fastopen(void)214 static int enable_fastopen(void)
215 {
216 	int fo;
217 
218 	fo = read_int_sysctl(TCP_FO_SYSCTL);
219 	if (fo < 0)
220 		return -1;
221 
222 	return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
223 }
224 
enable_syncookie(void)225 static int enable_syncookie(void)
226 {
227 	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
228 }
229 
disable_syncookie(void)230 static int disable_syncookie(void)
231 {
232 	return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
233 }
234 
get_linum(void)235 static long get_linum(void)
236 {
237 	__u32 linum;
238 	int err;
239 
240 	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
241 	RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
242 		err, errno);
243 
244 	return linum;
245 }
246 
check_data(int type,sa_family_t family,const struct cmd * cmd,int cli_fd)247 static void check_data(int type, sa_family_t family, const struct cmd *cmd,
248 		       int cli_fd)
249 {
250 	struct data_check expected = {}, result;
251 	union sa46 cli_sa;
252 	socklen_t addrlen;
253 	int err;
254 
255 	addrlen = sizeof(cli_sa);
256 	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
257 			  &addrlen);
258 	RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
259 	       err, errno);
260 
261 	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
262 	RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
263 	       err, errno);
264 
265 	if (type == SOCK_STREAM) {
266 		expected.len = MIN_TCPHDR_LEN;
267 		expected.ip_protocol = IPPROTO_TCP;
268 	} else {
269 		expected.len = UDPHDR_LEN;
270 		expected.ip_protocol = IPPROTO_UDP;
271 	}
272 
273 	if (family == AF_INET6) {
274 		expected.eth_protocol = htons(ETH_P_IPV6);
275 		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
276 			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
277 			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
278 			!srv_sa.v6.sin6_addr.s6_addr32[0];
279 
280 		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
281 		       sizeof(cli_sa.v6.sin6_addr));
282 		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
283 		       sizeof(in6addr_loopback));
284 		expected.skb_ports[0] = cli_sa.v6.sin6_port;
285 		expected.skb_ports[1] = srv_sa.v6.sin6_port;
286 	} else {
287 		expected.eth_protocol = htons(ETH_P_IP);
288 		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
289 
290 		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
291 		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
292 		expected.skb_ports[0] = cli_sa.v4.sin_port;
293 		expected.skb_ports[1] = srv_sa.v4.sin_port;
294 	}
295 
296 	if (memcmp(&result, &expected, offsetof(struct data_check,
297 						equal_check_end))) {
298 		printf("unexpected data_check\n");
299 		printf("  result: (0x%x, %u, %u)\n",
300 		       result.eth_protocol, result.ip_protocol,
301 		       result.bind_inany);
302 		printf("expected: (0x%x, %u, %u)\n",
303 		       expected.eth_protocol, expected.ip_protocol,
304 		       expected.bind_inany);
305 		RET_IF(1, "data_check result != expected",
306 		       "bpf_prog_linum:%ld\n", get_linum());
307 	}
308 
309 	RET_IF(!result.hash, "data_check result.hash empty",
310 	       "result.hash:%u", result.hash);
311 
312 	expected.len += cmd ? sizeof(*cmd) : 0;
313 	if (type == SOCK_STREAM)
314 		RET_IF(expected.len > result.len, "expected.len > result.len",
315 		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
316 		       expected.len, result.len, get_linum());
317 	else
318 		RET_IF(expected.len != result.len, "expected.len != result.len",
319 		       "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
320 		       expected.len, result.len, get_linum());
321 }
322 
result_to_str(enum result res)323 static const char *result_to_str(enum result res)
324 {
325 	switch (res) {
326 	case DROP_ERR_INNER_MAP:
327 		return "DROP_ERR_INNER_MAP";
328 	case DROP_ERR_SKB_DATA:
329 		return "DROP_ERR_SKB_DATA";
330 	case DROP_ERR_SK_SELECT_REUSEPORT:
331 		return "DROP_ERR_SK_SELECT_REUSEPORT";
332 	case DROP_MISC:
333 		return "DROP_MISC";
334 	case PASS:
335 		return "PASS";
336 	case PASS_ERR_SK_SELECT_REUSEPORT:
337 		return "PASS_ERR_SK_SELECT_REUSEPORT";
338 	default:
339 		return "UNKNOWN";
340 	}
341 }
342 
check_results(void)343 static void check_results(void)
344 {
345 	__u32 results[NR_RESULTS];
346 	__u32 i, broken = 0;
347 	int err;
348 
349 	for (i = 0; i < NR_RESULTS; i++) {
350 		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
351 		RET_IF(err < 0, "lookup_elem(result_map)",
352 		       "i:%u err:%d errno:%d\n", i, err, errno);
353 	}
354 
355 	for (i = 0; i < NR_RESULTS; i++) {
356 		if (results[i] != expected_results[i]) {
357 			broken = i;
358 			break;
359 		}
360 	}
361 
362 	if (i == NR_RESULTS)
363 		return;
364 
365 	printf("unexpected result\n");
366 	printf(" result: [");
367 	printf("%u", results[0]);
368 	for (i = 1; i < NR_RESULTS; i++)
369 		printf(", %u", results[i]);
370 	printf("]\n");
371 
372 	printf("expected: [");
373 	printf("%u", expected_results[0]);
374 	for (i = 1; i < NR_RESULTS; i++)
375 		printf(", %u", expected_results[i]);
376 	printf("]\n");
377 
378 	printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken),
379 	       get_linum());
380 
381 	CHECK_FAIL(true);
382 }
383 
send_data(int type,sa_family_t family,void * data,size_t len,enum result expected)384 static int send_data(int type, sa_family_t family, void *data, size_t len,
385 		     enum result expected)
386 {
387 	union sa46 cli_sa;
388 	int fd, err;
389 
390 	fd = socket(family, type, 0);
391 	RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
392 
393 	sa46_init_loopback(&cli_sa, family);
394 	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
395 	RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
396 
397 	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
398 		     sizeof(srv_sa));
399 	RET_ERR(err != len && expected >= PASS,
400 		"sendto()", "family:%u err:%d errno:%d expected:%d\n",
401 		family, err, errno, expected);
402 
403 	return fd;
404 }
405 
do_test(int type,sa_family_t family,struct cmd * cmd,enum result expected)406 static void do_test(int type, sa_family_t family, struct cmd *cmd,
407 		    enum result expected)
408 {
409 	int nev, srv_fd, cli_fd;
410 	struct epoll_event ev;
411 	struct cmd rcv_cmd;
412 	ssize_t nread;
413 
414 	cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
415 			   expected);
416 	if (cli_fd < 0)
417 		return;
418 	nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
419 	RET_IF((nev <= 0 && expected >= PASS) ||
420 	       (nev > 0 && expected < PASS),
421 	       "nev <> expected",
422 	       "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
423 	       nev, expected, type, family,
424 	       cmd ? cmd->reuseport_index : -1,
425 	       cmd ? cmd->pass_on_failure : -1);
426 	check_results();
427 	check_data(type, family, cmd, cli_fd);
428 
429 	if (expected < PASS)
430 		return;
431 
432 	RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
433 	       cmd->reuseport_index != ev.data.u32,
434 	       "check cmd->reuseport_index",
435 	       "cmd:(%u, %u) ev.data.u32:%u\n",
436 	       cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
437 
438 	srv_fd = sk_fds[ev.data.u32];
439 	if (type == SOCK_STREAM) {
440 		int new_fd = accept(srv_fd, NULL, 0);
441 
442 		RET_IF(new_fd == -1, "accept(srv_fd)",
443 		       "ev.data.u32:%u new_fd:%d errno:%d\n",
444 		       ev.data.u32, new_fd, errno);
445 
446 		nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
447 		RET_IF(nread != sizeof(rcv_cmd),
448 		       "recv(new_fd)",
449 		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
450 		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
451 
452 		close(new_fd);
453 	} else {
454 		nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
455 		RET_IF(nread != sizeof(rcv_cmd),
456 		       "recv(sk_fds)",
457 		       "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
458 		       ev.data.u32, nread, sizeof(rcv_cmd), errno);
459 	}
460 
461 	close(cli_fd);
462 }
463 
test_err_inner_map(int type,sa_family_t family)464 static void test_err_inner_map(int type, sa_family_t family)
465 {
466 	struct cmd cmd = {
467 		.reuseport_index = 0,
468 		.pass_on_failure = 0,
469 	};
470 
471 	expected_results[DROP_ERR_INNER_MAP]++;
472 	do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
473 }
474 
test_err_skb_data(int type,sa_family_t family)475 static void test_err_skb_data(int type, sa_family_t family)
476 {
477 	expected_results[DROP_ERR_SKB_DATA]++;
478 	do_test(type, family, NULL, DROP_ERR_SKB_DATA);
479 }
480 
test_err_sk_select_port(int type,sa_family_t family)481 static void test_err_sk_select_port(int type, sa_family_t family)
482 {
483 	struct cmd cmd = {
484 		.reuseport_index = REUSEPORT_ARRAY_SIZE,
485 		.pass_on_failure = 0,
486 	};
487 
488 	expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
489 	do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
490 }
491 
test_pass(int type,sa_family_t family)492 static void test_pass(int type, sa_family_t family)
493 {
494 	struct cmd cmd;
495 	int i;
496 
497 	cmd.pass_on_failure = 0;
498 	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
499 		expected_results[PASS]++;
500 		cmd.reuseport_index = i;
501 		do_test(type, family, &cmd, PASS);
502 	}
503 }
504 
test_syncookie(int type,sa_family_t family)505 static void test_syncookie(int type, sa_family_t family)
506 {
507 	int err, tmp_index = 1;
508 	struct cmd cmd = {
509 		.reuseport_index = 0,
510 		.pass_on_failure = 0,
511 	};
512 
513 	/*
514 	 * +1 for TCP-SYN and
515 	 * +1 for the TCP-ACK (ack the syncookie)
516 	 */
517 	expected_results[PASS] += 2;
518 	enable_syncookie();
519 	/*
520 	 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
521 	 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
522 	 *          tmp_index_ovr_map
523 	 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
524 	 *          is from the cmd.reuseport_index
525 	 */
526 	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
527 				  &tmp_index, BPF_ANY);
528 	RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
529 	       "err:%d errno:%d\n", err, errno);
530 	do_test(type, family, &cmd, PASS);
531 	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
532 				  &tmp_index);
533 	RET_IF(err < 0 || tmp_index >= 0,
534 	       "lookup_elem(tmp_index_ovr_map)",
535 	       "err:%d errno:%d tmp_index:%d\n",
536 	       err, errno, tmp_index);
537 	disable_syncookie();
538 }
539 
test_pass_on_err(int type,sa_family_t family)540 static void test_pass_on_err(int type, sa_family_t family)
541 {
542 	struct cmd cmd = {
543 		.reuseport_index = REUSEPORT_ARRAY_SIZE,
544 		.pass_on_failure = 1,
545 	};
546 
547 	expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
548 	do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
549 }
550 
test_detach_bpf(int type,sa_family_t family)551 static void test_detach_bpf(int type, sa_family_t family)
552 {
553 #ifdef SO_DETACH_REUSEPORT_BPF
554 	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
555 	struct epoll_event ev;
556 	int cli_fd, err, nev;
557 	struct cmd cmd = {};
558 	int optvalue = 0;
559 
560 	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
561 			 &optvalue, sizeof(optvalue));
562 	RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
563 	       "err:%d errno:%d\n", err, errno);
564 
565 	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
566 			 &optvalue, sizeof(optvalue));
567 	RET_IF(err == 0 || errno != ENOENT,
568 	       "setsockopt(SO_DETACH_REUSEPORT_BPF)",
569 	       "err:%d errno:%d\n", err, errno);
570 
571 	for (i = 0; i < NR_RESULTS; i++) {
572 		err = bpf_map_lookup_elem(result_map, &i, &tmp);
573 		RET_IF(err < 0, "lookup_elem(result_map)",
574 		       "i:%u err:%d errno:%d\n", i, err, errno);
575 		nr_run_before += tmp;
576 	}
577 
578 	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
579 	if (cli_fd < 0)
580 		return;
581 	nev = epoll_wait(epfd, &ev, 1, 5);
582 	RET_IF(nev <= 0, "nev <= 0",
583 	       "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
584 	       nev,  type, family);
585 
586 	for (i = 0; i < NR_RESULTS; i++) {
587 		err = bpf_map_lookup_elem(result_map, &i, &tmp);
588 		RET_IF(err < 0, "lookup_elem(result_map)",
589 		       "i:%u err:%d errno:%d\n", i, err, errno);
590 		nr_run_after += tmp;
591 	}
592 
593 	RET_IF(nr_run_before != nr_run_after,
594 	       "nr_run_before != nr_run_after",
595 	       "nr_run_before:%u nr_run_after:%u\n",
596 	       nr_run_before, nr_run_after);
597 
598 	close(cli_fd);
599 #else
600 	test__skip();
601 #endif
602 }
603 
prepare_sk_fds(int type,sa_family_t family,bool inany)604 static void prepare_sk_fds(int type, sa_family_t family, bool inany)
605 {
606 	const int first = REUSEPORT_ARRAY_SIZE - 1;
607 	int i, err, optval = 1;
608 	struct epoll_event ev;
609 	socklen_t addrlen;
610 
611 	if (inany)
612 		sa46_init_inany(&srv_sa, family);
613 	else
614 		sa46_init_loopback(&srv_sa, family);
615 	addrlen = sizeof(srv_sa);
616 
617 	/*
618 	 * The sk_fds[] is filled from the back such that the order
619 	 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
620 	 */
621 	for (i = first; i >= 0; i--) {
622 		sk_fds[i] = socket(family, type, 0);
623 		RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
624 		       i, sk_fds[i], errno);
625 		err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
626 				 &optval, sizeof(optval));
627 		RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
628 		       "sk_fds[%d] err:%d errno:%d\n",
629 		       i, err, errno);
630 
631 		if (i == first) {
632 			err = setsockopt(sk_fds[i], SOL_SOCKET,
633 					 SO_ATTACH_REUSEPORT_EBPF,
634 					 &select_by_skb_data_prog,
635 					 sizeof(select_by_skb_data_prog));
636 			RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
637 			       "err:%d errno:%d\n", err, errno);
638 		}
639 
640 		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
641 		RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
642 		       i, err, errno);
643 
644 		if (type == SOCK_STREAM) {
645 			err = listen(sk_fds[i], 10);
646 			RET_IF(err < 0, "listen()",
647 			       "sk_fds[%d] err:%d errno:%d\n",
648 			       i, err, errno);
649 		}
650 
651 		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
652 					  BPF_NOEXIST);
653 		RET_IF(err < 0, "update_elem(reuseport_array)",
654 		       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
655 
656 		if (i == first) {
657 			socklen_t addrlen = sizeof(srv_sa);
658 
659 			err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
660 					  &addrlen);
661 			RET_IF(err == -1, "getsockname()",
662 			       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
663 		}
664 	}
665 
666 	epfd = epoll_create(1);
667 	RET_IF(epfd == -1, "epoll_create(1)",
668 	       "epfd:%d errno:%d\n", epfd, errno);
669 
670 	ev.events = EPOLLIN;
671 	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
672 		ev.data.u32 = i;
673 		err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
674 		RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
675 	}
676 }
677 
setup_per_test(int type,sa_family_t family,bool inany,bool no_inner_map)678 static void setup_per_test(int type, sa_family_t family, bool inany,
679 			   bool no_inner_map)
680 {
681 	int ovr = -1, err;
682 
683 	prepare_sk_fds(type, family, inany);
684 	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
685 				  BPF_ANY);
686 	RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
687 	       "err:%d errno:%d\n", err, errno);
688 
689 	/* Install reuseport_array to outer_map? */
690 	if (no_inner_map)
691 		return;
692 
693 	err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
694 				  BPF_ANY);
695 	RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
696 	       "err:%d errno:%d\n", err, errno);
697 }
698 
cleanup_per_test(bool no_inner_map)699 static void cleanup_per_test(bool no_inner_map)
700 {
701 	int i, err, zero = 0;
702 
703 	memset(expected_results, 0, sizeof(expected_results));
704 
705 	for (i = 0; i < NR_RESULTS; i++) {
706 		err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
707 		RET_IF(err, "reset elem in result_map",
708 		       "i:%u err:%d errno:%d\n", i, err, errno);
709 	}
710 
711 	err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
712 	RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n",
713 	       err, errno);
714 
715 	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
716 		close(sk_fds[i]);
717 	close(epfd);
718 
719 	/* Delete reuseport_array from outer_map? */
720 	if (no_inner_map)
721 		return;
722 
723 	err = bpf_map_delete_elem(outer_map, &index_zero);
724 	RET_IF(err < 0, "delete_elem(outer_map)",
725 	       "err:%d errno:%d\n", err, errno);
726 }
727 
cleanup(void)728 static void cleanup(void)
729 {
730 	if (outer_map >= 0) {
731 		close(outer_map);
732 		outer_map = -1;
733 	}
734 
735 	if (reuseport_array >= 0) {
736 		close(reuseport_array);
737 		reuseport_array = -1;
738 	}
739 
740 	if (obj) {
741 		bpf_object__close(obj);
742 		obj = NULL;
743 	}
744 
745 	memset(expected_results, 0, sizeof(expected_results));
746 }
747 
maptype_str(enum bpf_map_type type)748 static const char *maptype_str(enum bpf_map_type type)
749 {
750 	switch (type) {
751 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
752 		return "reuseport_sockarray";
753 	case BPF_MAP_TYPE_SOCKMAP:
754 		return "sockmap";
755 	case BPF_MAP_TYPE_SOCKHASH:
756 		return "sockhash";
757 	default:
758 		return "unknown";
759 	}
760 }
761 
family_str(sa_family_t family)762 static const char *family_str(sa_family_t family)
763 {
764 	switch (family) {
765 	case AF_INET:
766 		return "IPv4";
767 	case AF_INET6:
768 		return "IPv6";
769 	default:
770 		return "unknown";
771 	}
772 }
773 
sotype_str(int sotype)774 static const char *sotype_str(int sotype)
775 {
776 	switch (sotype) {
777 	case SOCK_STREAM:
778 		return "TCP";
779 	case SOCK_DGRAM:
780 		return "UDP";
781 	default:
782 		return "unknown";
783 	}
784 }
785 
786 #define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
787 
test_config(int sotype,sa_family_t family,bool inany)788 static void test_config(int sotype, sa_family_t family, bool inany)
789 {
790 	const struct test {
791 		void (*fn)(int sotype, sa_family_t family);
792 		const char *name;
793 		bool no_inner_map;
794 		int need_sotype;
795 	} tests[] = {
796 		TEST_INIT(test_err_inner_map,
797 			  .no_inner_map = true),
798 		TEST_INIT(test_err_skb_data),
799 		TEST_INIT(test_err_sk_select_port),
800 		TEST_INIT(test_pass),
801 		TEST_INIT(test_syncookie,
802 			  .need_sotype = SOCK_STREAM),
803 		TEST_INIT(test_pass_on_err),
804 		TEST_INIT(test_detach_bpf),
805 	};
806 	char s[MAX_TEST_NAME];
807 	const struct test *t;
808 
809 	for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
810 		if (t->need_sotype && t->need_sotype != sotype)
811 			continue; /* test not compatible with socket type */
812 
813 		snprintf(s, sizeof(s), "%s %s/%s %s %s",
814 			 maptype_str(inner_map_type),
815 			 family_str(family), sotype_str(sotype),
816 			 inany ? "INANY" : "LOOPBACK", t->name);
817 
818 		if (!test__start_subtest(s))
819 			continue;
820 
821 		setup_per_test(sotype, family, inany, t->no_inner_map);
822 		t->fn(sotype, family);
823 		cleanup_per_test(t->no_inner_map);
824 	}
825 }
826 
827 #define BIND_INANY true
828 
test_all(void)829 static void test_all(void)
830 {
831 	const struct config {
832 		int sotype;
833 		sa_family_t family;
834 		bool inany;
835 	} configs[] = {
836 		{ SOCK_STREAM, AF_INET },
837 		{ SOCK_STREAM, AF_INET, BIND_INANY },
838 		{ SOCK_STREAM, AF_INET6 },
839 		{ SOCK_STREAM, AF_INET6, BIND_INANY },
840 		{ SOCK_DGRAM, AF_INET },
841 		{ SOCK_DGRAM, AF_INET6 },
842 	};
843 	const struct config *c;
844 
845 	for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
846 		test_config(c->sotype, c->family, c->inany);
847 }
848 
test_map_type(enum bpf_map_type mt)849 void test_map_type(enum bpf_map_type mt)
850 {
851 	if (create_maps(mt))
852 		goto out;
853 	if (prepare_bpf_obj())
854 		goto out;
855 
856 	test_all();
857 out:
858 	cleanup();
859 }
860 
test_select_reuseport(void)861 void test_select_reuseport(void)
862 {
863 	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
864 	if (saved_tcp_fo < 0)
865 		goto out;
866 	saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
867 	if (saved_tcp_syncookie < 0)
868 		goto out;
869 
870 	if (enable_fastopen())
871 		goto out;
872 	if (disable_syncookie())
873 		goto out;
874 
875 	test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
876 	test_map_type(BPF_MAP_TYPE_SOCKMAP);
877 	test_map_type(BPF_MAP_TYPE_SOCKHASH);
878 out:
879 	restore_sysctls();
880 }
881