1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 /*
4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5 * Covers:
6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem
7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8 * 3. BPF reuseport helper - bpf_sk_select_reuseport
9 */
10
11 #include <linux/compiler.h>
12 #include <errno.h>
13 #include <error.h>
14 #include <limits.h>
15 #include <netinet/in.h>
16 #include <pthread.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <unistd.h>
21
22 #include <bpf/bpf.h>
23 #include <bpf/libbpf.h>
24
25 #include "bpf_util.h"
26 #include "test_progs.h"
27 #include "test_sockmap_listen.skel.h"
28
29 #define IO_TIMEOUT_SEC 30
30 #define MAX_STRERR_LEN 256
31 #define MAX_TEST_NAME 80
32
33 #define _FAIL(errnum, fmt...) \
34 ({ \
35 error_at_line(0, (errnum), __func__, __LINE__, fmt); \
36 CHECK_FAIL(true); \
37 })
38 #define FAIL(fmt...) _FAIL(0, fmt)
39 #define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
40 #define FAIL_LIBBPF(err, msg) \
41 ({ \
42 char __buf[MAX_STRERR_LEN]; \
43 libbpf_strerror((err), __buf, sizeof(__buf)); \
44 FAIL("%s: %s", (msg), __buf); \
45 })
46
47 /* Wrappers that fail the test on error and report it. */
48
49 #define xaccept_nonblock(fd, addr, len) \
50 ({ \
51 int __ret = \
52 accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
53 if (__ret == -1) \
54 FAIL_ERRNO("accept"); \
55 __ret; \
56 })
57
58 #define xbind(fd, addr, len) \
59 ({ \
60 int __ret = bind((fd), (addr), (len)); \
61 if (__ret == -1) \
62 FAIL_ERRNO("bind"); \
63 __ret; \
64 })
65
66 #define xclose(fd) \
67 ({ \
68 int __ret = close((fd)); \
69 if (__ret == -1) \
70 FAIL_ERRNO("close"); \
71 __ret; \
72 })
73
74 #define xconnect(fd, addr, len) \
75 ({ \
76 int __ret = connect((fd), (addr), (len)); \
77 if (__ret == -1) \
78 FAIL_ERRNO("connect"); \
79 __ret; \
80 })
81
82 #define xgetsockname(fd, addr, len) \
83 ({ \
84 int __ret = getsockname((fd), (addr), (len)); \
85 if (__ret == -1) \
86 FAIL_ERRNO("getsockname"); \
87 __ret; \
88 })
89
90 #define xgetsockopt(fd, level, name, val, len) \
91 ({ \
92 int __ret = getsockopt((fd), (level), (name), (val), (len)); \
93 if (__ret == -1) \
94 FAIL_ERRNO("getsockopt(" #name ")"); \
95 __ret; \
96 })
97
98 #define xlisten(fd, backlog) \
99 ({ \
100 int __ret = listen((fd), (backlog)); \
101 if (__ret == -1) \
102 FAIL_ERRNO("listen"); \
103 __ret; \
104 })
105
106 #define xsetsockopt(fd, level, name, val, len) \
107 ({ \
108 int __ret = setsockopt((fd), (level), (name), (val), (len)); \
109 if (__ret == -1) \
110 FAIL_ERRNO("setsockopt(" #name ")"); \
111 __ret; \
112 })
113
114 #define xsend(fd, buf, len, flags) \
115 ({ \
116 ssize_t __ret = send((fd), (buf), (len), (flags)); \
117 if (__ret == -1) \
118 FAIL_ERRNO("send"); \
119 __ret; \
120 })
121
122 #define xrecv_nonblock(fd, buf, len, flags) \
123 ({ \
124 ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
125 IO_TIMEOUT_SEC); \
126 if (__ret == -1) \
127 FAIL_ERRNO("recv"); \
128 __ret; \
129 })
130
131 #define xsocket(family, sotype, flags) \
132 ({ \
133 int __ret = socket(family, sotype, flags); \
134 if (__ret == -1) \
135 FAIL_ERRNO("socket"); \
136 __ret; \
137 })
138
139 #define xbpf_map_delete_elem(fd, key) \
140 ({ \
141 int __ret = bpf_map_delete_elem((fd), (key)); \
142 if (__ret < 0) \
143 FAIL_ERRNO("map_delete"); \
144 __ret; \
145 })
146
147 #define xbpf_map_lookup_elem(fd, key, val) \
148 ({ \
149 int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
150 if (__ret < 0) \
151 FAIL_ERRNO("map_lookup"); \
152 __ret; \
153 })
154
155 #define xbpf_map_update_elem(fd, key, val, flags) \
156 ({ \
157 int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
158 if (__ret < 0) \
159 FAIL_ERRNO("map_update"); \
160 __ret; \
161 })
162
163 #define xbpf_prog_attach(prog, target, type, flags) \
164 ({ \
165 int __ret = \
166 bpf_prog_attach((prog), (target), (type), (flags)); \
167 if (__ret < 0) \
168 FAIL_ERRNO("prog_attach(" #type ")"); \
169 __ret; \
170 })
171
172 #define xbpf_prog_detach2(prog, target, type) \
173 ({ \
174 int __ret = bpf_prog_detach2((prog), (target), (type)); \
175 if (__ret < 0) \
176 FAIL_ERRNO("prog_detach2(" #type ")"); \
177 __ret; \
178 })
179
180 #define xpthread_create(thread, attr, func, arg) \
181 ({ \
182 int __ret = pthread_create((thread), (attr), (func), (arg)); \
183 errno = __ret; \
184 if (__ret) \
185 FAIL_ERRNO("pthread_create"); \
186 __ret; \
187 })
188
189 #define xpthread_join(thread, retval) \
190 ({ \
191 int __ret = pthread_join((thread), (retval)); \
192 errno = __ret; \
193 if (__ret) \
194 FAIL_ERRNO("pthread_join"); \
195 __ret; \
196 })
197
poll_read(int fd,unsigned int timeout_sec)198 static int poll_read(int fd, unsigned int timeout_sec)
199 {
200 struct timeval timeout = { .tv_sec = timeout_sec };
201 fd_set rfds;
202 int r;
203
204 FD_ZERO(&rfds);
205 FD_SET(fd, &rfds);
206
207 r = select(fd + 1, &rfds, NULL, NULL, &timeout);
208 if (r == 0)
209 errno = ETIME;
210
211 return r == 1 ? 0 : -1;
212 }
213
accept_timeout(int fd,struct sockaddr * addr,socklen_t * len,unsigned int timeout_sec)214 static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
215 unsigned int timeout_sec)
216 {
217 if (poll_read(fd, timeout_sec))
218 return -1;
219
220 return accept(fd, addr, len);
221 }
222
recv_timeout(int fd,void * buf,size_t len,int flags,unsigned int timeout_sec)223 static int recv_timeout(int fd, void *buf, size_t len, int flags,
224 unsigned int timeout_sec)
225 {
226 if (poll_read(fd, timeout_sec))
227 return -1;
228
229 return recv(fd, buf, len, flags);
230 }
231
init_addr_loopback4(struct sockaddr_storage * ss,socklen_t * len)232 static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
233 {
234 struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
235
236 addr4->sin_family = AF_INET;
237 addr4->sin_port = 0;
238 addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
239 *len = sizeof(*addr4);
240 }
241
init_addr_loopback6(struct sockaddr_storage * ss,socklen_t * len)242 static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
243 {
244 struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
245
246 addr6->sin6_family = AF_INET6;
247 addr6->sin6_port = 0;
248 addr6->sin6_addr = in6addr_loopback;
249 *len = sizeof(*addr6);
250 }
251
init_addr_loopback(int family,struct sockaddr_storage * ss,socklen_t * len)252 static void init_addr_loopback(int family, struct sockaddr_storage *ss,
253 socklen_t *len)
254 {
255 switch (family) {
256 case AF_INET:
257 init_addr_loopback4(ss, len);
258 return;
259 case AF_INET6:
260 init_addr_loopback6(ss, len);
261 return;
262 default:
263 FAIL("unsupported address family %d", family);
264 }
265 }
266
sockaddr(struct sockaddr_storage * ss)267 static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
268 {
269 return (struct sockaddr *)ss;
270 }
271
enable_reuseport(int s,int progfd)272 static int enable_reuseport(int s, int progfd)
273 {
274 int err, one = 1;
275
276 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
277 if (err)
278 return -1;
279 err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
280 sizeof(progfd));
281 if (err)
282 return -1;
283
284 return 0;
285 }
286
socket_loopback_reuseport(int family,int sotype,int progfd)287 static int socket_loopback_reuseport(int family, int sotype, int progfd)
288 {
289 struct sockaddr_storage addr;
290 socklen_t len;
291 int err, s;
292
293 init_addr_loopback(family, &addr, &len);
294
295 s = xsocket(family, sotype, 0);
296 if (s == -1)
297 return -1;
298
299 if (progfd >= 0)
300 enable_reuseport(s, progfd);
301
302 err = xbind(s, sockaddr(&addr), len);
303 if (err)
304 goto close;
305
306 if (sotype & SOCK_DGRAM)
307 return s;
308
309 err = xlisten(s, SOMAXCONN);
310 if (err)
311 goto close;
312
313 return s;
314 close:
315 xclose(s);
316 return -1;
317 }
318
socket_loopback(int family,int sotype)319 static int socket_loopback(int family, int sotype)
320 {
321 return socket_loopback_reuseport(family, sotype, -1);
322 }
323
test_insert_invalid(int family,int sotype,int mapfd)324 static void test_insert_invalid(int family, int sotype, int mapfd)
325 {
326 u32 key = 0;
327 u64 value;
328 int err;
329
330 value = -1;
331 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
332 if (!err || errno != EINVAL)
333 FAIL_ERRNO("map_update: expected EINVAL");
334
335 value = INT_MAX;
336 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
337 if (!err || errno != EBADF)
338 FAIL_ERRNO("map_update: expected EBADF");
339 }
340
test_insert_opened(int family,int sotype,int mapfd)341 static void test_insert_opened(int family, int sotype, int mapfd)
342 {
343 u32 key = 0;
344 u64 value;
345 int err, s;
346
347 s = xsocket(family, sotype, 0);
348 if (s == -1)
349 return;
350
351 errno = 0;
352 value = s;
353 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
354 if (sotype == SOCK_STREAM) {
355 if (!err || errno != EOPNOTSUPP)
356 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
357 } else if (err)
358 FAIL_ERRNO("map_update: expected success");
359 xclose(s);
360 }
361
test_insert_bound(int family,int sotype,int mapfd)362 static void test_insert_bound(int family, int sotype, int mapfd)
363 {
364 struct sockaddr_storage addr;
365 socklen_t len;
366 u32 key = 0;
367 u64 value;
368 int err, s;
369
370 init_addr_loopback(family, &addr, &len);
371
372 s = xsocket(family, sotype, 0);
373 if (s == -1)
374 return;
375
376 err = xbind(s, sockaddr(&addr), len);
377 if (err)
378 goto close;
379
380 errno = 0;
381 value = s;
382 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
383 if (!err || errno != EOPNOTSUPP)
384 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
385 close:
386 xclose(s);
387 }
388
test_insert(int family,int sotype,int mapfd)389 static void test_insert(int family, int sotype, int mapfd)
390 {
391 u64 value;
392 u32 key;
393 int s;
394
395 s = socket_loopback(family, sotype);
396 if (s < 0)
397 return;
398
399 key = 0;
400 value = s;
401 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
402 xclose(s);
403 }
404
test_delete_after_insert(int family,int sotype,int mapfd)405 static void test_delete_after_insert(int family, int sotype, int mapfd)
406 {
407 u64 value;
408 u32 key;
409 int s;
410
411 s = socket_loopback(family, sotype);
412 if (s < 0)
413 return;
414
415 key = 0;
416 value = s;
417 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
418 xbpf_map_delete_elem(mapfd, &key);
419 xclose(s);
420 }
421
test_delete_after_close(int family,int sotype,int mapfd)422 static void test_delete_after_close(int family, int sotype, int mapfd)
423 {
424 int err, s;
425 u64 value;
426 u32 key;
427
428 s = socket_loopback(family, sotype);
429 if (s < 0)
430 return;
431
432 key = 0;
433 value = s;
434 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
435
436 xclose(s);
437
438 errno = 0;
439 err = bpf_map_delete_elem(mapfd, &key);
440 if (!err || (errno != EINVAL && errno != ENOENT))
441 /* SOCKMAP and SOCKHASH return different error codes */
442 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
443 }
444
test_lookup_after_insert(int family,int sotype,int mapfd)445 static void test_lookup_after_insert(int family, int sotype, int mapfd)
446 {
447 u64 cookie, value;
448 socklen_t len;
449 u32 key;
450 int s;
451
452 s = socket_loopback(family, sotype);
453 if (s < 0)
454 return;
455
456 key = 0;
457 value = s;
458 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
459
460 len = sizeof(cookie);
461 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
462
463 xbpf_map_lookup_elem(mapfd, &key, &value);
464
465 if (value != cookie) {
466 FAIL("map_lookup: have %#llx, want %#llx",
467 (unsigned long long)value, (unsigned long long)cookie);
468 }
469
470 xclose(s);
471 }
472
test_lookup_after_delete(int family,int sotype,int mapfd)473 static void test_lookup_after_delete(int family, int sotype, int mapfd)
474 {
475 int err, s;
476 u64 value;
477 u32 key;
478
479 s = socket_loopback(family, sotype);
480 if (s < 0)
481 return;
482
483 key = 0;
484 value = s;
485 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
486 xbpf_map_delete_elem(mapfd, &key);
487
488 errno = 0;
489 err = bpf_map_lookup_elem(mapfd, &key, &value);
490 if (!err || errno != ENOENT)
491 FAIL_ERRNO("map_lookup: expected ENOENT");
492
493 xclose(s);
494 }
495
test_lookup_32_bit_value(int family,int sotype,int mapfd)496 static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
497 {
498 u32 key, value32;
499 int err, s;
500
501 s = socket_loopback(family, sotype);
502 if (s < 0)
503 return;
504
505 mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
506 sizeof(value32), 1, NULL);
507 if (mapfd < 0) {
508 FAIL_ERRNO("map_create");
509 goto close;
510 }
511
512 key = 0;
513 value32 = s;
514 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
515
516 errno = 0;
517 err = bpf_map_lookup_elem(mapfd, &key, &value32);
518 if (!err || errno != ENOSPC)
519 FAIL_ERRNO("map_lookup: expected ENOSPC");
520
521 xclose(mapfd);
522 close:
523 xclose(s);
524 }
525
test_update_existing(int family,int sotype,int mapfd)526 static void test_update_existing(int family, int sotype, int mapfd)
527 {
528 int s1, s2;
529 u64 value;
530 u32 key;
531
532 s1 = socket_loopback(family, sotype);
533 if (s1 < 0)
534 return;
535
536 s2 = socket_loopback(family, sotype);
537 if (s2 < 0)
538 goto close_s1;
539
540 key = 0;
541 value = s1;
542 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
543
544 value = s2;
545 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
546 xclose(s2);
547 close_s1:
548 xclose(s1);
549 }
550
551 /* Exercise the code path where we destroy child sockets that never
552 * got accept()'ed, aka orphans, when parent socket gets closed.
553 */
test_destroy_orphan_child(int family,int sotype,int mapfd)554 static void test_destroy_orphan_child(int family, int sotype, int mapfd)
555 {
556 struct sockaddr_storage addr;
557 socklen_t len;
558 int err, s, c;
559 u64 value;
560 u32 key;
561
562 s = socket_loopback(family, sotype);
563 if (s < 0)
564 return;
565
566 len = sizeof(addr);
567 err = xgetsockname(s, sockaddr(&addr), &len);
568 if (err)
569 goto close_srv;
570
571 key = 0;
572 value = s;
573 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
574
575 c = xsocket(family, sotype, 0);
576 if (c == -1)
577 goto close_srv;
578
579 xconnect(c, sockaddr(&addr), len);
580 xclose(c);
581 close_srv:
582 xclose(s);
583 }
584
585 /* Perform a passive open after removing listening socket from SOCKMAP
586 * to ensure that callbacks get restored properly.
587 */
test_clone_after_delete(int family,int sotype,int mapfd)588 static void test_clone_after_delete(int family, int sotype, int mapfd)
589 {
590 struct sockaddr_storage addr;
591 socklen_t len;
592 int err, s, c;
593 u64 value;
594 u32 key;
595
596 s = socket_loopback(family, sotype);
597 if (s < 0)
598 return;
599
600 len = sizeof(addr);
601 err = xgetsockname(s, sockaddr(&addr), &len);
602 if (err)
603 goto close_srv;
604
605 key = 0;
606 value = s;
607 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
608 xbpf_map_delete_elem(mapfd, &key);
609
610 c = xsocket(family, sotype, 0);
611 if (c < 0)
612 goto close_srv;
613
614 xconnect(c, sockaddr(&addr), len);
615 xclose(c);
616 close_srv:
617 xclose(s);
618 }
619
620 /* Check that child socket that got created while parent was in a
621 * SOCKMAP, but got accept()'ed only after the parent has been removed
622 * from SOCKMAP, gets cloned without parent psock state or callbacks.
623 */
test_accept_after_delete(int family,int sotype,int mapfd)624 static void test_accept_after_delete(int family, int sotype, int mapfd)
625 {
626 struct sockaddr_storage addr;
627 const u32 zero = 0;
628 int err, s, c, p;
629 socklen_t len;
630 u64 value;
631
632 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
633 if (s == -1)
634 return;
635
636 len = sizeof(addr);
637 err = xgetsockname(s, sockaddr(&addr), &len);
638 if (err)
639 goto close_srv;
640
641 value = s;
642 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
643 if (err)
644 goto close_srv;
645
646 c = xsocket(family, sotype, 0);
647 if (c == -1)
648 goto close_srv;
649
650 /* Create child while parent is in sockmap */
651 err = xconnect(c, sockaddr(&addr), len);
652 if (err)
653 goto close_cli;
654
655 /* Remove parent from sockmap */
656 err = xbpf_map_delete_elem(mapfd, &zero);
657 if (err)
658 goto close_cli;
659
660 p = xaccept_nonblock(s, NULL, NULL);
661 if (p == -1)
662 goto close_cli;
663
664 /* Check that child sk_user_data is not set */
665 value = p;
666 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
667
668 xclose(p);
669 close_cli:
670 xclose(c);
671 close_srv:
672 xclose(s);
673 }
674
675 /* Check that child socket that got created and accepted while parent
676 * was in a SOCKMAP is cloned without parent psock state or callbacks.
677 */
test_accept_before_delete(int family,int sotype,int mapfd)678 static void test_accept_before_delete(int family, int sotype, int mapfd)
679 {
680 struct sockaddr_storage addr;
681 const u32 zero = 0, one = 1;
682 int err, s, c, p;
683 socklen_t len;
684 u64 value;
685
686 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
687 if (s == -1)
688 return;
689
690 len = sizeof(addr);
691 err = xgetsockname(s, sockaddr(&addr), &len);
692 if (err)
693 goto close_srv;
694
695 value = s;
696 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
697 if (err)
698 goto close_srv;
699
700 c = xsocket(family, sotype, 0);
701 if (c == -1)
702 goto close_srv;
703
704 /* Create & accept child while parent is in sockmap */
705 err = xconnect(c, sockaddr(&addr), len);
706 if (err)
707 goto close_cli;
708
709 p = xaccept_nonblock(s, NULL, NULL);
710 if (p == -1)
711 goto close_cli;
712
713 /* Check that child sk_user_data is not set */
714 value = p;
715 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
716
717 xclose(p);
718 close_cli:
719 xclose(c);
720 close_srv:
721 xclose(s);
722 }
723
724 struct connect_accept_ctx {
725 int sockfd;
726 unsigned int done;
727 unsigned int nr_iter;
728 };
729
is_thread_done(struct connect_accept_ctx * ctx)730 static bool is_thread_done(struct connect_accept_ctx *ctx)
731 {
732 return READ_ONCE(ctx->done);
733 }
734
connect_accept_thread(void * arg)735 static void *connect_accept_thread(void *arg)
736 {
737 struct connect_accept_ctx *ctx = arg;
738 struct sockaddr_storage addr;
739 int family, socktype;
740 socklen_t len;
741 int err, i, s;
742
743 s = ctx->sockfd;
744
745 len = sizeof(addr);
746 err = xgetsockname(s, sockaddr(&addr), &len);
747 if (err)
748 goto done;
749
750 len = sizeof(family);
751 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
752 if (err)
753 goto done;
754
755 len = sizeof(socktype);
756 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
757 if (err)
758 goto done;
759
760 for (i = 0; i < ctx->nr_iter; i++) {
761 int c, p;
762
763 c = xsocket(family, socktype, 0);
764 if (c < 0)
765 break;
766
767 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
768 if (err) {
769 xclose(c);
770 break;
771 }
772
773 p = xaccept_nonblock(s, NULL, NULL);
774 if (p < 0) {
775 xclose(c);
776 break;
777 }
778
779 xclose(p);
780 xclose(c);
781 }
782 done:
783 WRITE_ONCE(ctx->done, 1);
784 return NULL;
785 }
786
test_syn_recv_insert_delete(int family,int sotype,int mapfd)787 static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
788 {
789 struct connect_accept_ctx ctx = { 0 };
790 struct sockaddr_storage addr;
791 socklen_t len;
792 u32 zero = 0;
793 pthread_t t;
794 int err, s;
795 u64 value;
796
797 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
798 if (s < 0)
799 return;
800
801 len = sizeof(addr);
802 err = xgetsockname(s, sockaddr(&addr), &len);
803 if (err)
804 goto close;
805
806 ctx.sockfd = s;
807 ctx.nr_iter = 1000;
808
809 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
810 if (err)
811 goto close;
812
813 value = s;
814 while (!is_thread_done(&ctx)) {
815 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
816 if (err)
817 break;
818
819 err = xbpf_map_delete_elem(mapfd, &zero);
820 if (err)
821 break;
822 }
823
824 xpthread_join(t, NULL);
825 close:
826 xclose(s);
827 }
828
listen_thread(void * arg)829 static void *listen_thread(void *arg)
830 {
831 struct sockaddr unspec = { AF_UNSPEC };
832 struct connect_accept_ctx *ctx = arg;
833 int err, i, s;
834
835 s = ctx->sockfd;
836
837 for (i = 0; i < ctx->nr_iter; i++) {
838 err = xlisten(s, 1);
839 if (err)
840 break;
841 err = xconnect(s, &unspec, sizeof(unspec));
842 if (err)
843 break;
844 }
845
846 WRITE_ONCE(ctx->done, 1);
847 return NULL;
848 }
849
test_race_insert_listen(int family,int socktype,int mapfd)850 static void test_race_insert_listen(int family, int socktype, int mapfd)
851 {
852 struct connect_accept_ctx ctx = { 0 };
853 const u32 zero = 0;
854 const int one = 1;
855 pthread_t t;
856 int err, s;
857 u64 value;
858
859 s = xsocket(family, socktype, 0);
860 if (s < 0)
861 return;
862
863 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
864 if (err)
865 goto close;
866
867 ctx.sockfd = s;
868 ctx.nr_iter = 10000;
869
870 err = pthread_create(&t, NULL, listen_thread, &ctx);
871 if (err)
872 goto close;
873
874 value = s;
875 while (!is_thread_done(&ctx)) {
876 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
877 /* Expecting EOPNOTSUPP before listen() */
878 if (err && errno != EOPNOTSUPP) {
879 FAIL_ERRNO("map_update");
880 break;
881 }
882
883 err = bpf_map_delete_elem(mapfd, &zero);
884 /* Expecting no entry after unhash on connect(AF_UNSPEC) */
885 if (err && errno != EINVAL && errno != ENOENT) {
886 FAIL_ERRNO("map_delete");
887 break;
888 }
889 }
890
891 xpthread_join(t, NULL);
892 close:
893 xclose(s);
894 }
895
zero_verdict_count(int mapfd)896 static void zero_verdict_count(int mapfd)
897 {
898 unsigned int zero = 0;
899 int key;
900
901 key = SK_DROP;
902 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
903 key = SK_PASS;
904 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
905 }
906
907 enum redir_mode {
908 REDIR_INGRESS,
909 REDIR_EGRESS,
910 };
911
redir_mode_str(enum redir_mode mode)912 static const char *redir_mode_str(enum redir_mode mode)
913 {
914 switch (mode) {
915 case REDIR_INGRESS:
916 return "ingress";
917 case REDIR_EGRESS:
918 return "egress";
919 default:
920 return "unknown";
921 }
922 }
923
add_to_sockmap(int sock_mapfd,int fd1,int fd2)924 static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
925 {
926 u64 value;
927 u32 key;
928 int err;
929
930 key = 0;
931 value = fd1;
932 err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
933 if (err)
934 return err;
935
936 key = 1;
937 value = fd2;
938 return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
939 }
940
redir_to_connected(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)941 static void redir_to_connected(int family, int sotype, int sock_mapfd,
942 int verd_mapfd, enum redir_mode mode)
943 {
944 const char *log_prefix = redir_mode_str(mode);
945 struct sockaddr_storage addr;
946 int s, c0, c1, p0, p1;
947 unsigned int pass;
948 socklen_t len;
949 int err, n;
950 u32 key;
951 char b;
952
953 zero_verdict_count(verd_mapfd);
954
955 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
956 if (s < 0)
957 return;
958
959 len = sizeof(addr);
960 err = xgetsockname(s, sockaddr(&addr), &len);
961 if (err)
962 goto close_srv;
963
964 c0 = xsocket(family, sotype, 0);
965 if (c0 < 0)
966 goto close_srv;
967 err = xconnect(c0, sockaddr(&addr), len);
968 if (err)
969 goto close_cli0;
970
971 p0 = xaccept_nonblock(s, NULL, NULL);
972 if (p0 < 0)
973 goto close_cli0;
974
975 c1 = xsocket(family, sotype, 0);
976 if (c1 < 0)
977 goto close_peer0;
978 err = xconnect(c1, sockaddr(&addr), len);
979 if (err)
980 goto close_cli1;
981
982 p1 = xaccept_nonblock(s, NULL, NULL);
983 if (p1 < 0)
984 goto close_cli1;
985
986 err = add_to_sockmap(sock_mapfd, p0, p1);
987 if (err)
988 goto close_peer1;
989
990 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
991 if (n < 0)
992 FAIL_ERRNO("%s: write", log_prefix);
993 if (n == 0)
994 FAIL("%s: incomplete write", log_prefix);
995 if (n < 1)
996 goto close_peer1;
997
998 key = SK_PASS;
999 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1000 if (err)
1001 goto close_peer1;
1002 if (pass != 1)
1003 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1004 n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
1005 if (n < 0)
1006 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1007 if (n == 0)
1008 FAIL("%s: incomplete recv", log_prefix);
1009
1010 close_peer1:
1011 xclose(p1);
1012 close_cli1:
1013 xclose(c1);
1014 close_peer0:
1015 xclose(p0);
1016 close_cli0:
1017 xclose(c0);
1018 close_srv:
1019 xclose(s);
1020 }
1021
test_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1022 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
1023 struct bpf_map *inner_map, int family,
1024 int sotype)
1025 {
1026 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1027 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1028 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1029 int sock_map = bpf_map__fd(inner_map);
1030 int err;
1031
1032 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1033 if (err)
1034 return;
1035 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1036 if (err)
1037 goto detach;
1038
1039 redir_to_connected(family, sotype, sock_map, verdict_map,
1040 REDIR_INGRESS);
1041
1042 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1043 detach:
1044 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1045 }
1046
test_msg_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1047 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
1048 struct bpf_map *inner_map, int family,
1049 int sotype)
1050 {
1051 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1052 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1053 int sock_map = bpf_map__fd(inner_map);
1054 int err;
1055
1056 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1057 if (err)
1058 return;
1059
1060 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1061
1062 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1063 }
1064
redir_to_listening(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1065 static void redir_to_listening(int family, int sotype, int sock_mapfd,
1066 int verd_mapfd, enum redir_mode mode)
1067 {
1068 const char *log_prefix = redir_mode_str(mode);
1069 struct sockaddr_storage addr;
1070 int s, c, p, err, n;
1071 unsigned int drop;
1072 socklen_t len;
1073 u32 key;
1074
1075 zero_verdict_count(verd_mapfd);
1076
1077 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
1078 if (s < 0)
1079 return;
1080
1081 len = sizeof(addr);
1082 err = xgetsockname(s, sockaddr(&addr), &len);
1083 if (err)
1084 goto close_srv;
1085
1086 c = xsocket(family, sotype, 0);
1087 if (c < 0)
1088 goto close_srv;
1089 err = xconnect(c, sockaddr(&addr), len);
1090 if (err)
1091 goto close_cli;
1092
1093 p = xaccept_nonblock(s, NULL, NULL);
1094 if (p < 0)
1095 goto close_cli;
1096
1097 err = add_to_sockmap(sock_mapfd, s, p);
1098 if (err)
1099 goto close_peer;
1100
1101 n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
1102 if (n < 0 && errno != EACCES)
1103 FAIL_ERRNO("%s: write", log_prefix);
1104 if (n == 0)
1105 FAIL("%s: incomplete write", log_prefix);
1106 if (n < 1)
1107 goto close_peer;
1108
1109 key = SK_DROP;
1110 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
1111 if (err)
1112 goto close_peer;
1113 if (drop != 1)
1114 FAIL("%s: want drop count 1, have %d", log_prefix, drop);
1115
1116 close_peer:
1117 xclose(p);
1118 close_cli:
1119 xclose(c);
1120 close_srv:
1121 xclose(s);
1122 }
1123
test_skb_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1124 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
1125 struct bpf_map *inner_map, int family,
1126 int sotype)
1127 {
1128 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
1129 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
1130 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1131 int sock_map = bpf_map__fd(inner_map);
1132 int err;
1133
1134 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
1135 if (err)
1136 return;
1137 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
1138 if (err)
1139 goto detach;
1140
1141 redir_to_listening(family, sotype, sock_map, verdict_map,
1142 REDIR_INGRESS);
1143
1144 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
1145 detach:
1146 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
1147 }
1148
test_msg_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)1149 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
1150 struct bpf_map *inner_map, int family,
1151 int sotype)
1152 {
1153 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
1154 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1155 int sock_map = bpf_map__fd(inner_map);
1156 int err;
1157
1158 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
1159 if (err)
1160 return;
1161
1162 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
1163
1164 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
1165 }
1166
test_reuseport_select_listening(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1167 static void test_reuseport_select_listening(int family, int sotype,
1168 int sock_map, int verd_map,
1169 int reuseport_prog)
1170 {
1171 struct sockaddr_storage addr;
1172 unsigned int pass;
1173 int s, c, err;
1174 socklen_t len;
1175 u64 value;
1176 u32 key;
1177
1178 zero_verdict_count(verd_map);
1179
1180 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
1181 reuseport_prog);
1182 if (s < 0)
1183 return;
1184
1185 len = sizeof(addr);
1186 err = xgetsockname(s, sockaddr(&addr), &len);
1187 if (err)
1188 goto close_srv;
1189
1190 key = 0;
1191 value = s;
1192 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1193 if (err)
1194 goto close_srv;
1195
1196 c = xsocket(family, sotype, 0);
1197 if (c < 0)
1198 goto close_srv;
1199 err = xconnect(c, sockaddr(&addr), len);
1200 if (err)
1201 goto close_cli;
1202
1203 if (sotype == SOCK_STREAM) {
1204 int p;
1205
1206 p = xaccept_nonblock(s, NULL, NULL);
1207 if (p < 0)
1208 goto close_cli;
1209 xclose(p);
1210 } else {
1211 char b = 'a';
1212 ssize_t n;
1213
1214 n = xsend(c, &b, sizeof(b), 0);
1215 if (n == -1)
1216 goto close_cli;
1217
1218 n = xrecv_nonblock(s, &b, sizeof(b), 0);
1219 if (n == -1)
1220 goto close_cli;
1221 }
1222
1223 key = SK_PASS;
1224 err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1225 if (err)
1226 goto close_cli;
1227 if (pass != 1)
1228 FAIL("want pass count 1, have %d", pass);
1229
1230 close_cli:
1231 xclose(c);
1232 close_srv:
1233 xclose(s);
1234 }
1235
test_reuseport_select_connected(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1236 static void test_reuseport_select_connected(int family, int sotype,
1237 int sock_map, int verd_map,
1238 int reuseport_prog)
1239 {
1240 struct sockaddr_storage addr;
1241 int s, c0, c1, p0, err;
1242 unsigned int drop;
1243 socklen_t len;
1244 u64 value;
1245 u32 key;
1246
1247 zero_verdict_count(verd_map);
1248
1249 s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1250 if (s < 0)
1251 return;
1252
1253 /* Populate sock_map[0] to avoid ENOENT on first connection */
1254 key = 0;
1255 value = s;
1256 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1257 if (err)
1258 goto close_srv;
1259
1260 len = sizeof(addr);
1261 err = xgetsockname(s, sockaddr(&addr), &len);
1262 if (err)
1263 goto close_srv;
1264
1265 c0 = xsocket(family, sotype, 0);
1266 if (c0 < 0)
1267 goto close_srv;
1268
1269 err = xconnect(c0, sockaddr(&addr), len);
1270 if (err)
1271 goto close_cli0;
1272
1273 if (sotype == SOCK_STREAM) {
1274 p0 = xaccept_nonblock(s, NULL, NULL);
1275 if (p0 < 0)
1276 goto close_cli0;
1277 } else {
1278 p0 = xsocket(family, sotype, 0);
1279 if (p0 < 0)
1280 goto close_cli0;
1281
1282 len = sizeof(addr);
1283 err = xgetsockname(c0, sockaddr(&addr), &len);
1284 if (err)
1285 goto close_cli0;
1286
1287 err = xconnect(p0, sockaddr(&addr), len);
1288 if (err)
1289 goto close_cli0;
1290 }
1291
1292 /* Update sock_map[0] to redirect to a connected socket */
1293 key = 0;
1294 value = p0;
1295 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1296 if (err)
1297 goto close_peer0;
1298
1299 c1 = xsocket(family, sotype, 0);
1300 if (c1 < 0)
1301 goto close_peer0;
1302
1303 len = sizeof(addr);
1304 err = xgetsockname(s, sockaddr(&addr), &len);
1305 if (err)
1306 goto close_srv;
1307
1308 errno = 0;
1309 err = connect(c1, sockaddr(&addr), len);
1310 if (sotype == SOCK_DGRAM) {
1311 char b = 'a';
1312 ssize_t n;
1313
1314 n = xsend(c1, &b, sizeof(b), 0);
1315 if (n == -1)
1316 goto close_cli1;
1317
1318 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1319 err = n == -1;
1320 }
1321 if (!err || errno != ECONNREFUSED)
1322 FAIL_ERRNO("connect: expected ECONNREFUSED");
1323
1324 key = SK_DROP;
1325 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1326 if (err)
1327 goto close_cli1;
1328 if (drop != 1)
1329 FAIL("want drop count 1, have %d", drop);
1330
1331 close_cli1:
1332 xclose(c1);
1333 close_peer0:
1334 xclose(p0);
1335 close_cli0:
1336 xclose(c0);
1337 close_srv:
1338 xclose(s);
1339 }
1340
1341 /* Check that redirecting across reuseport groups is not allowed. */
test_reuseport_mixed_groups(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1342 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1343 int verd_map, int reuseport_prog)
1344 {
1345 struct sockaddr_storage addr;
1346 int s1, s2, c, err;
1347 unsigned int drop;
1348 socklen_t len;
1349 u32 key;
1350
1351 zero_verdict_count(verd_map);
1352
1353 /* Create two listeners, each in its own reuseport group */
1354 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1355 if (s1 < 0)
1356 return;
1357
1358 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1359 if (s2 < 0)
1360 goto close_srv1;
1361
1362 err = add_to_sockmap(sock_map, s1, s2);
1363 if (err)
1364 goto close_srv2;
1365
1366 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1367 len = sizeof(addr);
1368 err = xgetsockname(s2, sockaddr(&addr), &len);
1369 if (err)
1370 goto close_srv2;
1371
1372 c = xsocket(family, sotype, 0);
1373 if (c < 0)
1374 goto close_srv2;
1375
1376 err = connect(c, sockaddr(&addr), len);
1377 if (sotype == SOCK_DGRAM) {
1378 char b = 'a';
1379 ssize_t n;
1380
1381 n = xsend(c, &b, sizeof(b), 0);
1382 if (n == -1)
1383 goto close_cli;
1384
1385 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1386 err = n == -1;
1387 }
1388 if (!err || errno != ECONNREFUSED) {
1389 FAIL_ERRNO("connect: expected ECONNREFUSED");
1390 goto close_cli;
1391 }
1392
1393 /* Expect drop, can't redirect outside of reuseport group */
1394 key = SK_DROP;
1395 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1396 if (err)
1397 goto close_cli;
1398 if (drop != 1)
1399 FAIL("want drop count 1, have %d", drop);
1400
1401 close_cli:
1402 xclose(c);
1403 close_srv2:
1404 xclose(s2);
1405 close_srv1:
1406 xclose(s1);
1407 }
1408
1409 #define TEST(fn, ...) \
1410 { \
1411 fn, #fn, __VA_ARGS__ \
1412 }
1413
test_ops_cleanup(const struct bpf_map * map)1414 static void test_ops_cleanup(const struct bpf_map *map)
1415 {
1416 int err, mapfd;
1417 u32 key;
1418
1419 mapfd = bpf_map__fd(map);
1420
1421 for (key = 0; key < bpf_map__max_entries(map); key++) {
1422 err = bpf_map_delete_elem(mapfd, &key);
1423 if (err && errno != EINVAL && errno != ENOENT)
1424 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1425 }
1426 }
1427
family_str(sa_family_t family)1428 static const char *family_str(sa_family_t family)
1429 {
1430 switch (family) {
1431 case AF_INET:
1432 return "IPv4";
1433 case AF_INET6:
1434 return "IPv6";
1435 case AF_UNIX:
1436 return "Unix";
1437 default:
1438 return "unknown";
1439 }
1440 }
1441
map_type_str(const struct bpf_map * map)1442 static const char *map_type_str(const struct bpf_map *map)
1443 {
1444 int type;
1445
1446 if (!map)
1447 return "invalid";
1448 type = bpf_map__type(map);
1449
1450 switch (type) {
1451 case BPF_MAP_TYPE_SOCKMAP:
1452 return "sockmap";
1453 case BPF_MAP_TYPE_SOCKHASH:
1454 return "sockhash";
1455 default:
1456 return "unknown";
1457 }
1458 }
1459
sotype_str(int sotype)1460 static const char *sotype_str(int sotype)
1461 {
1462 switch (sotype) {
1463 case SOCK_DGRAM:
1464 return "UDP";
1465 case SOCK_STREAM:
1466 return "TCP";
1467 default:
1468 return "unknown";
1469 }
1470 }
1471
test_ops(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1472 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1473 int family, int sotype)
1474 {
1475 const struct op_test {
1476 void (*fn)(int family, int sotype, int mapfd);
1477 const char *name;
1478 int sotype;
1479 } tests[] = {
1480 /* insert */
1481 TEST(test_insert_invalid),
1482 TEST(test_insert_opened),
1483 TEST(test_insert_bound, SOCK_STREAM),
1484 TEST(test_insert),
1485 /* delete */
1486 TEST(test_delete_after_insert),
1487 TEST(test_delete_after_close),
1488 /* lookup */
1489 TEST(test_lookup_after_insert),
1490 TEST(test_lookup_after_delete),
1491 TEST(test_lookup_32_bit_value),
1492 /* update */
1493 TEST(test_update_existing),
1494 /* races with insert/delete */
1495 TEST(test_destroy_orphan_child, SOCK_STREAM),
1496 TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1497 TEST(test_race_insert_listen, SOCK_STREAM),
1498 /* child clone */
1499 TEST(test_clone_after_delete, SOCK_STREAM),
1500 TEST(test_accept_after_delete, SOCK_STREAM),
1501 TEST(test_accept_before_delete, SOCK_STREAM),
1502 };
1503 const char *family_name, *map_name, *sotype_name;
1504 const struct op_test *t;
1505 char s[MAX_TEST_NAME];
1506 int map_fd;
1507
1508 family_name = family_str(family);
1509 map_name = map_type_str(map);
1510 sotype_name = sotype_str(sotype);
1511 map_fd = bpf_map__fd(map);
1512
1513 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1514 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1515 sotype_name, t->name);
1516
1517 if (t->sotype != 0 && t->sotype != sotype)
1518 continue;
1519
1520 if (!test__start_subtest(s))
1521 continue;
1522
1523 t->fn(family, sotype, map_fd);
1524 test_ops_cleanup(map);
1525 }
1526 }
1527
test_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1528 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1529 int family, int sotype)
1530 {
1531 const struct redir_test {
1532 void (*fn)(struct test_sockmap_listen *skel,
1533 struct bpf_map *map, int family, int sotype);
1534 const char *name;
1535 } tests[] = {
1536 TEST(test_skb_redir_to_connected),
1537 TEST(test_skb_redir_to_listening),
1538 TEST(test_msg_redir_to_connected),
1539 TEST(test_msg_redir_to_listening),
1540 };
1541 const char *family_name, *map_name;
1542 const struct redir_test *t;
1543 char s[MAX_TEST_NAME];
1544
1545 family_name = family_str(family);
1546 map_name = map_type_str(map);
1547
1548 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1549 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1550 t->name);
1551
1552 if (!test__start_subtest(s))
1553 continue;
1554
1555 t->fn(skel, map, family, sotype);
1556 }
1557 }
1558
unix_redir_to_connected(int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1559 static void unix_redir_to_connected(int sotype, int sock_mapfd,
1560 int verd_mapfd, enum redir_mode mode)
1561 {
1562 const char *log_prefix = redir_mode_str(mode);
1563 int c0, c1, p0, p1;
1564 unsigned int pass;
1565 int err, n;
1566 int sfd[2];
1567 u32 key;
1568 char b;
1569
1570 zero_verdict_count(verd_mapfd);
1571
1572 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1573 return;
1574 c0 = sfd[0], p0 = sfd[1];
1575
1576 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1577 goto close0;
1578 c1 = sfd[0], p1 = sfd[1];
1579
1580 err = add_to_sockmap(sock_mapfd, p0, p1);
1581 if (err)
1582 goto close;
1583
1584 n = write(c1, "a", 1);
1585 if (n < 0)
1586 FAIL_ERRNO("%s: write", log_prefix);
1587 if (n == 0)
1588 FAIL("%s: incomplete write", log_prefix);
1589 if (n < 1)
1590 goto close;
1591
1592 key = SK_PASS;
1593 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1594 if (err)
1595 goto close;
1596 if (pass != 1)
1597 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1598
1599 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1600 if (n < 0)
1601 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1602 if (n == 0)
1603 FAIL("%s: incomplete recv", log_prefix);
1604
1605 close:
1606 xclose(c1);
1607 xclose(p1);
1608 close0:
1609 xclose(c0);
1610 xclose(p0);
1611 }
1612
unix_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int sotype)1613 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1614 struct bpf_map *inner_map, int sotype)
1615 {
1616 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1617 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1618 int sock_map = bpf_map__fd(inner_map);
1619 int err;
1620
1621 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1622 if (err)
1623 return;
1624
1625 skel->bss->test_ingress = false;
1626 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1627 skel->bss->test_ingress = true;
1628 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1629
1630 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1631 }
1632
test_unix_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int sotype)1633 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1634 int sotype)
1635 {
1636 const char *family_name, *map_name;
1637 char s[MAX_TEST_NAME];
1638
1639 family_name = family_str(AF_UNIX);
1640 map_name = map_type_str(map);
1641 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1642 if (!test__start_subtest(s))
1643 return;
1644 unix_skb_redir_to_connected(skel, map, sotype);
1645 }
1646
test_reuseport(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1647 static void test_reuseport(struct test_sockmap_listen *skel,
1648 struct bpf_map *map, int family, int sotype)
1649 {
1650 const struct reuseport_test {
1651 void (*fn)(int family, int sotype, int socket_map,
1652 int verdict_map, int reuseport_prog);
1653 const char *name;
1654 int sotype;
1655 } tests[] = {
1656 TEST(test_reuseport_select_listening),
1657 TEST(test_reuseport_select_connected),
1658 TEST(test_reuseport_mixed_groups),
1659 };
1660 int socket_map, verdict_map, reuseport_prog;
1661 const char *family_name, *map_name, *sotype_name;
1662 const struct reuseport_test *t;
1663 char s[MAX_TEST_NAME];
1664
1665 family_name = family_str(family);
1666 map_name = map_type_str(map);
1667 sotype_name = sotype_str(sotype);
1668
1669 socket_map = bpf_map__fd(map);
1670 verdict_map = bpf_map__fd(skel->maps.verdict_map);
1671 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1672
1673 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1674 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1675 sotype_name, t->name);
1676
1677 if (t->sotype != 0 && t->sotype != sotype)
1678 continue;
1679
1680 if (!test__start_subtest(s))
1681 continue;
1682
1683 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1684 }
1685 }
1686
inet_socketpair(int family,int type,int * s,int * c)1687 static int inet_socketpair(int family, int type, int *s, int *c)
1688 {
1689 struct sockaddr_storage addr;
1690 socklen_t len;
1691 int p0, c0;
1692 int err;
1693
1694 p0 = socket_loopback(family, type | SOCK_NONBLOCK);
1695 if (p0 < 0)
1696 return p0;
1697
1698 len = sizeof(addr);
1699 err = xgetsockname(p0, sockaddr(&addr), &len);
1700 if (err)
1701 goto close_peer0;
1702
1703 c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
1704 if (c0 < 0) {
1705 err = c0;
1706 goto close_peer0;
1707 }
1708 err = xconnect(c0, sockaddr(&addr), len);
1709 if (err)
1710 goto close_cli0;
1711 err = xgetsockname(c0, sockaddr(&addr), &len);
1712 if (err)
1713 goto close_cli0;
1714 err = xconnect(p0, sockaddr(&addr), len);
1715 if (err)
1716 goto close_cli0;
1717
1718 *s = p0;
1719 *c = c0;
1720 return 0;
1721
1722 close_cli0:
1723 xclose(c0);
1724 close_peer0:
1725 xclose(p0);
1726 return err;
1727 }
1728
udp_redir_to_connected(int family,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1729 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1730 enum redir_mode mode)
1731 {
1732 const char *log_prefix = redir_mode_str(mode);
1733 int c0, c1, p0, p1;
1734 unsigned int pass;
1735 int err, n;
1736 u32 key;
1737 char b;
1738
1739 zero_verdict_count(verd_mapfd);
1740
1741 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1742 if (err)
1743 return;
1744 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1745 if (err)
1746 goto close_cli0;
1747
1748 err = add_to_sockmap(sock_mapfd, p0, p1);
1749 if (err)
1750 goto close_cli1;
1751
1752 n = write(c1, "a", 1);
1753 if (n < 0)
1754 FAIL_ERRNO("%s: write", log_prefix);
1755 if (n == 0)
1756 FAIL("%s: incomplete write", log_prefix);
1757 if (n < 1)
1758 goto close_cli1;
1759
1760 key = SK_PASS;
1761 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1762 if (err)
1763 goto close_cli1;
1764 if (pass != 1)
1765 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1766
1767 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1768 if (n < 0)
1769 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1770 if (n == 0)
1771 FAIL("%s: incomplete recv", log_prefix);
1772
1773 close_cli1:
1774 xclose(c1);
1775 xclose(p1);
1776 close_cli0:
1777 xclose(c0);
1778 xclose(p0);
1779 }
1780
udp_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1781 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1782 struct bpf_map *inner_map, int family)
1783 {
1784 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1785 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1786 int sock_map = bpf_map__fd(inner_map);
1787 int err;
1788
1789 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1790 if (err)
1791 return;
1792
1793 skel->bss->test_ingress = false;
1794 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1795 skel->bss->test_ingress = true;
1796 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1797
1798 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1799 }
1800
test_udp_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1801 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1802 int family)
1803 {
1804 const char *family_name, *map_name;
1805 char s[MAX_TEST_NAME];
1806
1807 family_name = family_str(family);
1808 map_name = map_type_str(map);
1809 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1810 if (!test__start_subtest(s))
1811 return;
1812 udp_skb_redir_to_connected(skel, map, family);
1813 }
1814
inet_unix_redir_to_connected(int family,int type,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1815 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1816 int verd_mapfd, enum redir_mode mode)
1817 {
1818 const char *log_prefix = redir_mode_str(mode);
1819 int c0, c1, p0, p1;
1820 unsigned int pass;
1821 int err, n;
1822 int sfd[2];
1823 u32 key;
1824 char b;
1825
1826 zero_verdict_count(verd_mapfd);
1827
1828 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1829 return;
1830 c0 = sfd[0], p0 = sfd[1];
1831
1832 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1833 if (err)
1834 goto close;
1835
1836 err = add_to_sockmap(sock_mapfd, p0, p1);
1837 if (err)
1838 goto close_cli1;
1839
1840 n = write(c1, "a", 1);
1841 if (n < 0)
1842 FAIL_ERRNO("%s: write", log_prefix);
1843 if (n == 0)
1844 FAIL("%s: incomplete write", log_prefix);
1845 if (n < 1)
1846 goto close_cli1;
1847
1848 key = SK_PASS;
1849 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1850 if (err)
1851 goto close_cli1;
1852 if (pass != 1)
1853 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1854
1855 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1856 if (n < 0)
1857 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1858 if (n == 0)
1859 FAIL("%s: incomplete recv", log_prefix);
1860
1861 close_cli1:
1862 xclose(c1);
1863 xclose(p1);
1864 close:
1865 xclose(c0);
1866 xclose(p0);
1867 }
1868
inet_unix_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1869 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1870 struct bpf_map *inner_map, int family)
1871 {
1872 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1873 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1874 int sock_map = bpf_map__fd(inner_map);
1875 int err;
1876
1877 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1878 if (err)
1879 return;
1880
1881 skel->bss->test_ingress = false;
1882 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1883 REDIR_EGRESS);
1884 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1885 REDIR_EGRESS);
1886 skel->bss->test_ingress = true;
1887 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1888 REDIR_INGRESS);
1889 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1890 REDIR_INGRESS);
1891
1892 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1893 }
1894
unix_inet_redir_to_connected(int family,int type,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1895 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
1896 int verd_mapfd, enum redir_mode mode)
1897 {
1898 const char *log_prefix = redir_mode_str(mode);
1899 int c0, c1, p0, p1;
1900 unsigned int pass;
1901 int err, n;
1902 int sfd[2];
1903 u32 key;
1904 char b;
1905
1906 zero_verdict_count(verd_mapfd);
1907
1908 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1909 if (err)
1910 return;
1911
1912 if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
1913 goto close_cli0;
1914 c1 = sfd[0], p1 = sfd[1];
1915
1916 err = add_to_sockmap(sock_mapfd, p0, p1);
1917 if (err)
1918 goto close;
1919
1920 n = write(c1, "a", 1);
1921 if (n < 0)
1922 FAIL_ERRNO("%s: write", log_prefix);
1923 if (n == 0)
1924 FAIL("%s: incomplete write", log_prefix);
1925 if (n < 1)
1926 goto close;
1927
1928 key = SK_PASS;
1929 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1930 if (err)
1931 goto close;
1932 if (pass != 1)
1933 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1934
1935 n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
1936 if (n < 0)
1937 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1938 if (n == 0)
1939 FAIL("%s: incomplete recv", log_prefix);
1940
1941 close:
1942 xclose(c1);
1943 xclose(p1);
1944 close_cli0:
1945 xclose(c0);
1946 xclose(p0);
1947
1948 }
1949
unix_inet_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1950 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1951 struct bpf_map *inner_map, int family)
1952 {
1953 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1954 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1955 int sock_map = bpf_map__fd(inner_map);
1956 int err;
1957
1958 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1959 if (err)
1960 return;
1961
1962 skel->bss->test_ingress = false;
1963 unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1964 REDIR_EGRESS);
1965 unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1966 REDIR_EGRESS);
1967 skel->bss->test_ingress = true;
1968 unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1969 REDIR_INGRESS);
1970 unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1971 REDIR_INGRESS);
1972
1973 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1974 }
1975
test_udp_unix_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1976 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1977 int family)
1978 {
1979 const char *family_name, *map_name;
1980 char s[MAX_TEST_NAME];
1981
1982 family_name = family_str(family);
1983 map_name = map_type_str(map);
1984 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1985 if (!test__start_subtest(s))
1986 return;
1987 inet_unix_skb_redir_to_connected(skel, map, family);
1988 unix_inet_skb_redir_to_connected(skel, map, family);
1989 }
1990
run_tests(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1991 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1992 int family)
1993 {
1994 test_ops(skel, map, family, SOCK_STREAM);
1995 test_ops(skel, map, family, SOCK_DGRAM);
1996 test_redir(skel, map, family, SOCK_STREAM);
1997 test_reuseport(skel, map, family, SOCK_STREAM);
1998 test_reuseport(skel, map, family, SOCK_DGRAM);
1999 test_udp_redir(skel, map, family);
2000 test_udp_unix_redir(skel, map, family);
2001 }
2002
serial_test_sockmap_listen(void)2003 void serial_test_sockmap_listen(void)
2004 {
2005 struct test_sockmap_listen *skel;
2006
2007 skel = test_sockmap_listen__open_and_load();
2008 if (!skel) {
2009 FAIL("skeleton open/load failed");
2010 return;
2011 }
2012
2013 skel->bss->test_sockmap = true;
2014 run_tests(skel, skel->maps.sock_map, AF_INET);
2015 run_tests(skel, skel->maps.sock_map, AF_INET6);
2016 test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
2017 test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
2018
2019 skel->bss->test_sockmap = false;
2020 run_tests(skel, skel->maps.sock_hash, AF_INET);
2021 run_tests(skel, skel->maps.sock_hash, AF_INET6);
2022 test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
2023 test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
2024
2025 test_sockmap_listen__destroy(skel);
2026 }
2027