• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #define _GNU_SOURCE
14 
15 #include <arpa/inet.h>
16 #include <linux/if.h>
17 #include <linux/if_tun.h>
18 #include <linux/limits.h>
19 #include <linux/sysctl.h>
20 #include <sched.h>
21 #include <stdbool.h>
22 #include <stdio.h>
23 #include <sys/mount.h>
24 #include <sys/stat.h>
25 #include <unistd.h>
26 
27 #include "test_progs.h"
28 #include "network_helpers.h"
29 #include "test_tc_neigh_fib.skel.h"
30 #include "test_tc_neigh.skel.h"
31 #include "test_tc_peer.skel.h"
32 
33 #define NS_SRC "ns_src"
34 #define NS_FWD "ns_fwd"
35 #define NS_DST "ns_dst"
36 
37 #define IP4_SRC "172.16.1.100"
38 #define IP4_DST "172.16.2.100"
39 #define IP4_TUN_SRC "172.17.1.100"
40 #define IP4_TUN_FWD "172.17.1.200"
41 #define IP4_PORT 9004
42 
43 #define IP6_SRC "0::1:dead:beef:cafe"
44 #define IP6_DST "0::2:dead:beef:cafe"
45 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
46 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
47 #define IP6_PORT 9006
48 
49 #define IP4_SLL "169.254.0.1"
50 #define IP4_DLL "169.254.0.2"
51 #define IP4_NET "169.254.0.0"
52 
53 #define MAC_DST_FWD "00:11:22:33:44:55"
54 #define MAC_DST "00:22:33:44:55:66"
55 
56 #define IFADDR_STR_LEN 18
57 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
58 
59 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
60 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
61 #define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
62 
63 #define TIMEOUT_MILLIS 10000
64 
65 #define log_err(MSG, ...) \
66 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
67 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
68 
69 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
70 
write_file(const char * path,const char * newval)71 static int write_file(const char *path, const char *newval)
72 {
73 	FILE *f;
74 
75 	f = fopen(path, "r+");
76 	if (!f)
77 		return -1;
78 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
79 		log_err("writing to %s failed", path);
80 		fclose(f);
81 		return -1;
82 	}
83 	fclose(f);
84 	return 0;
85 }
86 
87 struct nstoken {
88 	int orig_netns_fd;
89 };
90 
setns_by_fd(int nsfd)91 static int setns_by_fd(int nsfd)
92 {
93 	int err;
94 
95 	err = setns(nsfd, CLONE_NEWNET);
96 	close(nsfd);
97 
98 	if (!ASSERT_OK(err, "setns"))
99 		return err;
100 
101 	/* Switch /sys to the new namespace so that e.g. /sys/class/net
102 	 * reflects the devices in the new namespace.
103 	 */
104 	err = unshare(CLONE_NEWNS);
105 	if (!ASSERT_OK(err, "unshare"))
106 		return err;
107 
108 	/* Make our /sys mount private, so the following umount won't
109 	 * trigger the global umount in case it's shared.
110 	 */
111 	err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
112 	if (!ASSERT_OK(err, "remount private /sys"))
113 		return err;
114 
115 	err = umount2("/sys", MNT_DETACH);
116 	if (!ASSERT_OK(err, "umount2 /sys"))
117 		return err;
118 
119 	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
120 	if (!ASSERT_OK(err, "mount /sys"))
121 		return err;
122 
123 	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
124 	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
125 		return err;
126 
127 	return 0;
128 }
129 
130 /**
131  * open_netns() - Switch to specified network namespace by name.
132  *
133  * Returns token with which to restore the original namespace
134  * using close_netns().
135  */
open_netns(const char * name)136 static struct nstoken *open_netns(const char *name)
137 {
138 	int nsfd;
139 	char nspath[PATH_MAX];
140 	int err;
141 	struct nstoken *token;
142 
143 	token = malloc(sizeof(struct nstoken));
144 	if (!ASSERT_OK_PTR(token, "malloc token"))
145 		return NULL;
146 
147 	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
148 	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
149 		goto fail;
150 
151 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
152 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
153 	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
154 		goto fail;
155 
156 	err = setns_by_fd(nsfd);
157 	if (!ASSERT_OK(err, "setns_by_fd"))
158 		goto fail;
159 
160 	return token;
161 fail:
162 	free(token);
163 	return NULL;
164 }
165 
close_netns(struct nstoken * token)166 static void close_netns(struct nstoken *token)
167 {
168 	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
169 	free(token);
170 }
171 
netns_setup_namespaces(const char * verb)172 static int netns_setup_namespaces(const char *verb)
173 {
174 	const char * const *ns = namespaces;
175 	char cmd[128];
176 
177 	while (*ns) {
178 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
179 		if (!ASSERT_OK(system(cmd), cmd))
180 			return -1;
181 		ns++;
182 	}
183 	return 0;
184 }
185 
186 struct netns_setup_result {
187 	int ifindex_veth_src_fwd;
188 	int ifindex_veth_dst_fwd;
189 };
190 
get_ifaddr(const char * name,char * ifaddr)191 static int get_ifaddr(const char *name, char *ifaddr)
192 {
193 	char path[PATH_MAX];
194 	FILE *f;
195 	int ret;
196 
197 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
198 	f = fopen(path, "r");
199 	if (!ASSERT_OK_PTR(f, path))
200 		return -1;
201 
202 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
203 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
204 		fclose(f);
205 		return -1;
206 	}
207 	fclose(f);
208 	return 0;
209 }
210 
get_ifindex(const char * name)211 static int get_ifindex(const char *name)
212 {
213 	char path[PATH_MAX];
214 	char buf[32];
215 	FILE *f;
216 	int ret;
217 
218 	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
219 	f = fopen(path, "r");
220 	if (!ASSERT_OK_PTR(f, path))
221 		return -1;
222 
223 	ret = fread(buf, 1, sizeof(buf), f);
224 	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
225 		fclose(f);
226 		return -1;
227 	}
228 	fclose(f);
229 	return atoi(buf);
230 }
231 
232 #define SYS(fmt, ...)						\
233 	({							\
234 		char cmd[1024];					\
235 		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
236 		if (!ASSERT_OK(system(cmd), cmd))		\
237 			goto fail;				\
238 	})
239 
netns_setup_links_and_routes(struct netns_setup_result * result)240 static int netns_setup_links_and_routes(struct netns_setup_result *result)
241 {
242 	struct nstoken *nstoken = NULL;
243 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
244 
245 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
246 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
247 
248 	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
249 	SYS("ip link set veth_dst address " MAC_DST);
250 
251 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
252 		goto fail;
253 
254 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
255 	if (result->ifindex_veth_src_fwd < 0)
256 		goto fail;
257 	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
258 	if (result->ifindex_veth_dst_fwd < 0)
259 		goto fail;
260 
261 	SYS("ip link set veth_src netns " NS_SRC);
262 	SYS("ip link set veth_src_fwd netns " NS_FWD);
263 	SYS("ip link set veth_dst_fwd netns " NS_FWD);
264 	SYS("ip link set veth_dst netns " NS_DST);
265 
266 	/** setup in 'src' namespace */
267 	nstoken = open_netns(NS_SRC);
268 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
269 		goto fail;
270 
271 	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
272 	SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
273 	SYS("ip link set dev veth_src up");
274 
275 	SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
276 	SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
277 	SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
278 
279 	SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
280 	    veth_src_fwd_addr);
281 	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
282 	    veth_src_fwd_addr);
283 
284 	close_netns(nstoken);
285 
286 	/** setup in 'fwd' namespace */
287 	nstoken = open_netns(NS_FWD);
288 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
289 		goto fail;
290 
291 	/* The fwd netns automatically gets a v6 LL address / routes, but also
292 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
293 	 * to the endpoints so that the ARP processing will reply.
294 	 */
295 	SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
296 	SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
297 	SYS("ip link set dev veth_src_fwd up");
298 	SYS("ip link set dev veth_dst_fwd up");
299 
300 	SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
301 	SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
302 	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
303 	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
304 
305 	close_netns(nstoken);
306 
307 	/** setup in 'dst' namespace */
308 	nstoken = open_netns(NS_DST);
309 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
310 		goto fail;
311 
312 	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
313 	SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
314 	SYS("ip link set dev veth_dst up");
315 
316 	SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
317 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
318 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
319 
320 	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
321 	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
322 
323 	close_netns(nstoken);
324 
325 	return 0;
326 fail:
327 	if (nstoken)
328 		close_netns(nstoken);
329 	return -1;
330 }
331 
netns_load_bpf(void)332 static int netns_load_bpf(void)
333 {
334 	SYS("tc qdisc add dev veth_src_fwd clsact");
335 	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
336 	    SRC_PROG_PIN_FILE);
337 	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
338 	    CHK_PROG_PIN_FILE);
339 
340 	SYS("tc qdisc add dev veth_dst_fwd clsact");
341 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
342 	    DST_PROG_PIN_FILE);
343 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
344 	    CHK_PROG_PIN_FILE);
345 
346 	return 0;
347 fail:
348 	return -1;
349 }
350 
test_tcp(int family,const char * addr,__u16 port)351 static void test_tcp(int family, const char *addr, __u16 port)
352 {
353 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
354 	char buf[] = "testing testing";
355 	int n;
356 	struct nstoken *nstoken;
357 
358 	nstoken = open_netns(NS_DST);
359 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
360 		return;
361 
362 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
363 	if (!ASSERT_GE(listen_fd, 0, "listen"))
364 		goto done;
365 
366 	close_netns(nstoken);
367 	nstoken = open_netns(NS_SRC);
368 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
369 		goto done;
370 
371 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
372 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
373 		goto done;
374 
375 	accept_fd = accept(listen_fd, NULL, NULL);
376 	if (!ASSERT_GE(accept_fd, 0, "accept"))
377 		goto done;
378 
379 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
380 		goto done;
381 
382 	n = write(client_fd, buf, sizeof(buf));
383 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
384 		goto done;
385 
386 	n = read(accept_fd, buf, sizeof(buf));
387 	ASSERT_EQ(n, sizeof(buf), "recv from server");
388 
389 done:
390 	if (nstoken)
391 		close_netns(nstoken);
392 	if (listen_fd >= 0)
393 		close(listen_fd);
394 	if (accept_fd >= 0)
395 		close(accept_fd);
396 	if (client_fd >= 0)
397 		close(client_fd);
398 }
399 
test_ping(int family,const char * addr)400 static int test_ping(int family, const char *addr)
401 {
402 	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
403 	return 0;
404 fail:
405 	return -1;
406 }
407 
test_connectivity(void)408 static void test_connectivity(void)
409 {
410 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
411 	test_ping(AF_INET, IP4_DST);
412 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
413 	test_ping(AF_INET6, IP6_DST);
414 }
415 
set_forwarding(bool enable)416 static int set_forwarding(bool enable)
417 {
418 	int err;
419 
420 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
421 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
422 		return err;
423 
424 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
425 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
426 		return err;
427 
428 	return 0;
429 }
430 
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)431 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
432 {
433 	struct nstoken *nstoken = NULL;
434 	struct test_tc_neigh_fib *skel = NULL;
435 	int err;
436 
437 	nstoken = open_netns(NS_FWD);
438 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
439 		return;
440 
441 	skel = test_tc_neigh_fib__open();
442 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
443 		goto done;
444 
445 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
446 		goto done;
447 
448 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
449 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
450 		goto done;
451 
452 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
453 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
454 		goto done;
455 
456 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
457 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
458 		goto done;
459 
460 	if (netns_load_bpf())
461 		goto done;
462 
463 	/* bpf_fib_lookup() checks if forwarding is enabled */
464 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
465 		goto done;
466 
467 	test_connectivity();
468 
469 done:
470 	if (skel)
471 		test_tc_neigh_fib__destroy(skel);
472 	close_netns(nstoken);
473 }
474 
test_tc_redirect_neigh(struct netns_setup_result * setup_result)475 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
476 {
477 	struct nstoken *nstoken = NULL;
478 	struct test_tc_neigh *skel = NULL;
479 	int err;
480 
481 	nstoken = open_netns(NS_FWD);
482 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
483 		return;
484 
485 	skel = test_tc_neigh__open();
486 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
487 		goto done;
488 
489 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
490 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
491 
492 	err = test_tc_neigh__load(skel);
493 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
494 		goto done;
495 
496 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
497 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
498 		goto done;
499 
500 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
501 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
502 		goto done;
503 
504 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
505 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
506 		goto done;
507 
508 	if (netns_load_bpf())
509 		goto done;
510 
511 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
512 		goto done;
513 
514 	test_connectivity();
515 
516 done:
517 	if (skel)
518 		test_tc_neigh__destroy(skel);
519 	close_netns(nstoken);
520 }
521 
test_tc_redirect_peer(struct netns_setup_result * setup_result)522 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
523 {
524 	struct nstoken *nstoken;
525 	struct test_tc_peer *skel;
526 	int err;
527 
528 	nstoken = open_netns(NS_FWD);
529 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
530 		return;
531 
532 	skel = test_tc_peer__open();
533 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
534 		goto done;
535 
536 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
537 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
538 
539 	err = test_tc_peer__load(skel);
540 	if (!ASSERT_OK(err, "test_tc_peer__load"))
541 		goto done;
542 
543 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
544 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
545 		goto done;
546 
547 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
548 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
549 		goto done;
550 
551 	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
552 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
553 		goto done;
554 
555 	if (netns_load_bpf())
556 		goto done;
557 
558 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
559 		goto done;
560 
561 	test_connectivity();
562 
563 done:
564 	if (skel)
565 		test_tc_peer__destroy(skel);
566 	close_netns(nstoken);
567 }
568 
tun_open(char * name)569 static int tun_open(char *name)
570 {
571 	struct ifreq ifr;
572 	int fd, err;
573 
574 	fd = open("/dev/net/tun", O_RDWR);
575 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
576 		return -1;
577 
578 	memset(&ifr, 0, sizeof(ifr));
579 
580 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
581 	if (*name)
582 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
583 
584 	err = ioctl(fd, TUNSETIFF, &ifr);
585 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
586 		goto fail;
587 
588 	SYS("ip link set dev %s up", name);
589 
590 	return fd;
591 fail:
592 	close(fd);
593 	return -1;
594 }
595 
596 #define MAX(a, b) ((a) > (b) ? (a) : (b))
597 enum {
598 	SRC_TO_TARGET = 0,
599 	TARGET_TO_SRC = 1,
600 };
601 
tun_relay_loop(int src_fd,int target_fd)602 static int tun_relay_loop(int src_fd, int target_fd)
603 {
604 	fd_set rfds, wfds;
605 
606 	FD_ZERO(&rfds);
607 	FD_ZERO(&wfds);
608 
609 	for (;;) {
610 		char buf[1500];
611 		int direction, nread, nwrite;
612 
613 		FD_SET(src_fd, &rfds);
614 		FD_SET(target_fd, &rfds);
615 
616 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
617 			log_err("select failed");
618 			return 1;
619 		}
620 
621 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
622 
623 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
624 		if (nread < 0) {
625 			log_err("read failed");
626 			return 1;
627 		}
628 
629 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
630 		if (nwrite != nread) {
631 			log_err("write failed");
632 			return 1;
633 		}
634 	}
635 }
636 
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)637 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
638 {
639 	struct test_tc_peer *skel = NULL;
640 	struct nstoken *nstoken = NULL;
641 	int err;
642 	int tunnel_pid = -1;
643 	int src_fd, target_fd;
644 	int ifindex;
645 
646 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
647 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
648 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
649 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
650 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
651 	 * but that requires much more complicated setup.
652 	 */
653 	nstoken = open_netns(NS_SRC);
654 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
655 		return;
656 
657 	src_fd = tun_open("tun_src");
658 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
659 		goto fail;
660 
661 	close_netns(nstoken);
662 
663 	nstoken = open_netns(NS_FWD);
664 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
665 		goto fail;
666 
667 	target_fd = tun_open("tun_fwd");
668 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
669 		goto fail;
670 
671 	tunnel_pid = fork();
672 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
673 		goto fail;
674 
675 	if (tunnel_pid == 0)
676 		exit(tun_relay_loop(src_fd, target_fd));
677 
678 	skel = test_tc_peer__open();
679 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
680 		goto fail;
681 
682 	ifindex = get_ifindex("tun_fwd");
683 	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
684 		goto fail;
685 
686 	skel->rodata->IFINDEX_SRC = ifindex;
687 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
688 
689 	err = test_tc_peer__load(skel);
690 	if (!ASSERT_OK(err, "test_tc_peer__load"))
691 		goto fail;
692 
693 	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
694 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
695 		goto fail;
696 
697 	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
698 	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
699 		goto fail;
700 
701 	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
702 	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
703 		goto fail;
704 
705 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
706 	 * towards dst, and "tc_dst" to redirect packets
707 	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
708 	 */
709 	SYS("tc qdisc add dev tun_fwd clsact");
710 	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
711 	    SRC_PROG_PIN_FILE);
712 
713 	SYS("tc qdisc add dev veth_dst_fwd clsact");
714 	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
715 	    DST_PROG_PIN_FILE);
716 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
717 	    CHK_PROG_PIN_FILE);
718 
719 	/* Setup route and neigh tables */
720 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
721 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
722 
723 	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
724 	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
725 
726 	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
727 	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
728 	    " dev tun_src scope global");
729 	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
730 	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
731 	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
732 	    " dev tun_src scope global");
733 	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
734 
735 	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
736 	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
737 
738 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
739 		goto fail;
740 
741 	test_connectivity();
742 
743 fail:
744 	if (tunnel_pid > 0) {
745 		kill(tunnel_pid, SIGTERM);
746 		waitpid(tunnel_pid, NULL, 0);
747 	}
748 	if (src_fd >= 0)
749 		close(src_fd);
750 	if (target_fd >= 0)
751 		close(target_fd);
752 	if (skel)
753 		test_tc_peer__destroy(skel);
754 	if (nstoken)
755 		close_netns(nstoken);
756 }
757 
758 #define RUN_TEST(name)                                                                      \
759 	({                                                                                  \
760 		struct netns_setup_result setup_result;                                     \
761 		if (test__start_subtest(#name))                                             \
762 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
763 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
764 					      "setup links and routes"))                    \
765 					test_ ## name(&setup_result);                       \
766 				netns_setup_namespaces("delete");                           \
767 			}                                                                   \
768 	})
769 
test_tc_redirect_run_tests(void * arg)770 static void *test_tc_redirect_run_tests(void *arg)
771 {
772 	RUN_TEST(tc_redirect_peer);
773 	RUN_TEST(tc_redirect_peer_l3);
774 	RUN_TEST(tc_redirect_neigh);
775 	RUN_TEST(tc_redirect_neigh_fib);
776 	return NULL;
777 }
778 
test_tc_redirect(void)779 void test_tc_redirect(void)
780 {
781 	pthread_t test_thread;
782 	int err;
783 
784 	/* Run the tests in their own thread to isolate the namespace changes
785 	 * so they do not affect the environment of other tests.
786 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
787 	 */
788 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
789 	if (ASSERT_OK(err, "pthread_create"))
790 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
791 }
792