• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /*
4  * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5  * between src and dst. The netns fwd has veth links to each src and dst. The
6  * client is in src and server in dst. The test installs a TC BPF program to each
7  * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8  * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9  * switch from ingress side; it also installs a checker prog on the egress side
10  * to drop unexpected traffic.
11  */
12 
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "netlink_helpers.h"
28 #include "test_tc_neigh_fib.skel.h"
29 #include "test_tc_neigh.skel.h"
30 #include "test_tc_peer.skel.h"
31 #include "test_tc_dtime.skel.h"
32 
33 #ifndef TCP_TX_DELAY
34 #define TCP_TX_DELAY 37
35 #endif
36 
37 #define NS_SRC "ns_src"
38 #define NS_FWD "ns_fwd"
39 #define NS_DST "ns_dst"
40 
41 #define IP4_SRC "172.16.1.100"
42 #define IP4_DST "172.16.2.100"
43 #define IP4_TUN_SRC "172.17.1.100"
44 #define IP4_TUN_FWD "172.17.1.200"
45 #define IP4_PORT 9004
46 
47 #define IP6_SRC "0::1:dead:beef:cafe"
48 #define IP6_DST "0::2:dead:beef:cafe"
49 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
50 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
51 #define IP6_PORT 9006
52 
53 #define IP4_SLL "169.254.0.1"
54 #define IP4_DLL "169.254.0.2"
55 #define IP4_NET "169.254.0.0"
56 
57 #define MAC_DST_FWD "00:11:22:33:44:55"
58 #define MAC_DST "00:22:33:44:55:66"
59 
60 #define IFADDR_STR_LEN 18
61 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
62 
63 #define TIMEOUT_MILLIS 10000
64 #define NSEC_PER_SEC 1000000000ULL
65 
66 #define log_err(MSG, ...) \
67 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
68 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
69 
70 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
71 
write_file(const char * path,const char * newval)72 static int write_file(const char *path, const char *newval)
73 {
74 	FILE *f;
75 
76 	f = fopen(path, "r+");
77 	if (!f)
78 		return -1;
79 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
80 		log_err("writing to %s failed", path);
81 		fclose(f);
82 		return -1;
83 	}
84 	fclose(f);
85 	return 0;
86 }
87 
netns_setup_namespaces(const char * verb)88 static int netns_setup_namespaces(const char *verb)
89 {
90 	const char * const *ns = namespaces;
91 	char cmd[128];
92 
93 	while (*ns) {
94 		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
95 		if (!ASSERT_OK(system(cmd), cmd))
96 			return -1;
97 		ns++;
98 	}
99 	return 0;
100 }
101 
netns_setup_namespaces_nofail(const char * verb)102 static void netns_setup_namespaces_nofail(const char *verb)
103 {
104 	const char * const *ns = namespaces;
105 	char cmd[128];
106 
107 	while (*ns) {
108 		snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
109 		system(cmd);
110 		ns++;
111 	}
112 }
113 
114 enum dev_mode {
115 	MODE_VETH,
116 	MODE_NETKIT,
117 };
118 
119 struct netns_setup_result {
120 	enum dev_mode dev_mode;
121 	int ifindex_src;
122 	int ifindex_src_fwd;
123 	int ifindex_dst;
124 	int ifindex_dst_fwd;
125 };
126 
get_ifaddr(const char * name,char * ifaddr)127 static int get_ifaddr(const char *name, char *ifaddr)
128 {
129 	char path[PATH_MAX];
130 	FILE *f;
131 	int ret;
132 
133 	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
134 	f = fopen(path, "r");
135 	if (!ASSERT_OK_PTR(f, path))
136 		return -1;
137 
138 	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
139 	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
140 		fclose(f);
141 		return -1;
142 	}
143 	fclose(f);
144 	return 0;
145 }
146 
create_netkit(int mode,char * prim,char * peer)147 static int create_netkit(int mode, char *prim, char *peer)
148 {
149 	struct rtattr *linkinfo, *data, *peer_info;
150 	struct rtnl_handle rth = { .fd = -1 };
151 	const char *type = "netkit";
152 	struct {
153 		struct nlmsghdr n;
154 		struct ifinfomsg i;
155 		char buf[1024];
156 	} req = {};
157 	int err;
158 
159 	err = rtnl_open(&rth, 0);
160 	if (!ASSERT_OK(err, "open_rtnetlink"))
161 		return err;
162 
163 	memset(&req, 0, sizeof(req));
164 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
165 	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
166 	req.n.nlmsg_type = RTM_NEWLINK;
167 	req.i.ifi_family = AF_UNSPEC;
168 
169 	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
170 	linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
171 	addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
172 	data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
173 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
174 	peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
175 	req.n.nlmsg_len += sizeof(struct ifinfomsg);
176 	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
177 	addattr_nest_end(&req.n, peer_info);
178 	addattr_nest_end(&req.n, data);
179 	addattr_nest_end(&req.n, linkinfo);
180 
181 	err = rtnl_talk(&rth, &req.n, NULL);
182 	ASSERT_OK(err, "talk_rtnetlink");
183 	rtnl_close(&rth);
184 	return err;
185 }
186 
netns_setup_links_and_routes(struct netns_setup_result * result)187 static int netns_setup_links_and_routes(struct netns_setup_result *result)
188 {
189 	struct nstoken *nstoken = NULL;
190 	char src_fwd_addr[IFADDR_STR_LEN+1] = {};
191 	char src_addr[IFADDR_STR_LEN + 1] = {};
192 	int err;
193 
194 	if (result->dev_mode == MODE_VETH) {
195 		SYS(fail, "ip link add src type veth peer name src_fwd");
196 		SYS(fail, "ip link add dst type veth peer name dst_fwd");
197 
198 		SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
199 		SYS(fail, "ip link set dst address " MAC_DST);
200 	} else if (result->dev_mode == MODE_NETKIT) {
201 		err = create_netkit(NETKIT_L3, "src", "src_fwd");
202 		if (!ASSERT_OK(err, "create_ifindex_src"))
203 			goto fail;
204 		err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
205 		if (!ASSERT_OK(err, "create_ifindex_dst"))
206 			goto fail;
207 	}
208 
209 	if (get_ifaddr("src_fwd", src_fwd_addr))
210 		goto fail;
211 
212 	if (get_ifaddr("src", src_addr))
213 		goto fail;
214 
215 	result->ifindex_src = if_nametoindex("src");
216 	if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
217 		goto fail;
218 
219 	result->ifindex_src_fwd = if_nametoindex("src_fwd");
220 	if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
221 		goto fail;
222 
223 	result->ifindex_dst = if_nametoindex("dst");
224 	if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
225 		goto fail;
226 
227 	result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
228 	if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
229 		goto fail;
230 
231 	SYS(fail, "ip link set src netns " NS_SRC);
232 	SYS(fail, "ip link set src_fwd netns " NS_FWD);
233 	SYS(fail, "ip link set dst_fwd netns " NS_FWD);
234 	SYS(fail, "ip link set dst netns " NS_DST);
235 
236 	/** setup in 'src' namespace */
237 	nstoken = open_netns(NS_SRC);
238 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
239 		goto fail;
240 
241 	SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
242 	SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
243 	SYS(fail, "ip link set dev src up");
244 
245 	SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
246 	SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
247 	SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
248 
249 	if (result->dev_mode == MODE_VETH) {
250 		SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
251 		    src_fwd_addr);
252 		SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
253 		    src_fwd_addr);
254 	}
255 
256 	close_netns(nstoken);
257 
258 	/** setup in 'fwd' namespace */
259 	nstoken = open_netns(NS_FWD);
260 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
261 		goto fail;
262 
263 	/* The fwd netns automatically gets a v6 LL address / routes, but also
264 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
265 	 * to the endpoints so that the ARP processing will reply.
266 	 */
267 	SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
268 	SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
269 	SYS(fail, "ip link set dev src_fwd up");
270 	SYS(fail, "ip link set dev dst_fwd up");
271 
272 	SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
273 	SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
274 	SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
275 	SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
276 
277 	if (result->dev_mode == MODE_VETH) {
278 		SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
279 		SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
280 		SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
281 		SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
282 	}
283 
284 	close_netns(nstoken);
285 
286 	/** setup in 'dst' namespace */
287 	nstoken = open_netns(NS_DST);
288 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
289 		goto fail;
290 
291 	SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
292 	SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
293 	SYS(fail, "ip link set dev dst up");
294 	SYS(fail, "ip link set dev lo up");
295 
296 	SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
297 	SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
298 	SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
299 
300 	if (result->dev_mode == MODE_VETH) {
301 		SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
302 		SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
303 	}
304 
305 	close_netns(nstoken);
306 
307 	return 0;
308 fail:
309 	if (nstoken)
310 		close_netns(nstoken);
311 	return -1;
312 }
313 
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)314 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
315 {
316 	char err_str[128], ifname[16];
317 	int err;
318 
319 	qdisc_hook->ifindex = ifindex;
320 	qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
321 	err = bpf_tc_hook_create(qdisc_hook);
322 	snprintf(err_str, sizeof(err_str),
323 		 "qdisc add dev %s clsact",
324 		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
325 	err_str[sizeof(err_str) - 1] = 0;
326 	ASSERT_OK(err, err_str);
327 
328 	return err;
329 }
330 
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)331 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
332 			     enum bpf_tc_attach_point xgress,
333 			     const struct bpf_program *prog, int priority)
334 {
335 	LIBBPF_OPTS(bpf_tc_opts, tc_attach);
336 	char err_str[128], ifname[16];
337 	int err;
338 
339 	qdisc_hook->attach_point = xgress;
340 	tc_attach.prog_fd = bpf_program__fd(prog);
341 	tc_attach.priority = priority;
342 	err = bpf_tc_attach(qdisc_hook, &tc_attach);
343 	snprintf(err_str, sizeof(err_str),
344 		 "filter add dev %s %s prio %d bpf da %s",
345 		 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
346 		 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
347 		 priority, bpf_program__name(prog));
348 	err_str[sizeof(err_str) - 1] = 0;
349 	ASSERT_OK(err, err_str);
350 
351 	return err;
352 }
353 
354 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({		\
355 	if ((err = qdisc_clsact_create(qdisc_hook, ifindex)))	\
356 		goto fail;					\
357 })
358 
359 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({		\
360 	if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority)))	\
361 		goto fail;							\
362 })
363 
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)364 static int netns_load_bpf(const struct bpf_program *src_prog,
365 			  const struct bpf_program *dst_prog,
366 			  const struct bpf_program *chk_prog,
367 			  const struct netns_setup_result *setup_result)
368 {
369 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
370 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
371 	int err;
372 
373 	/* tc qdisc add dev src_fwd clsact */
374 	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
375 	/* tc filter add dev src_fwd ingress bpf da src_prog */
376 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
377 	/* tc filter add dev src_fwd egress bpf da chk_prog */
378 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
379 
380 	/* tc qdisc add dev dst_fwd clsact */
381 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
382 	/* tc filter add dev dst_fwd ingress bpf da dst_prog */
383 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
384 	/* tc filter add dev dst_fwd egress bpf da chk_prog */
385 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
386 
387 	return 0;
388 fail:
389 	return -1;
390 }
391 
test_tcp(int family,const char * addr,__u16 port)392 static void test_tcp(int family, const char *addr, __u16 port)
393 {
394 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
395 	char buf[] = "testing testing";
396 	int n;
397 	struct nstoken *nstoken;
398 
399 	nstoken = open_netns(NS_DST);
400 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
401 		return;
402 
403 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
404 	if (!ASSERT_GE(listen_fd, 0, "listen"))
405 		goto done;
406 
407 	close_netns(nstoken);
408 	nstoken = open_netns(NS_SRC);
409 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
410 		goto done;
411 
412 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
413 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
414 		goto done;
415 
416 	accept_fd = accept(listen_fd, NULL, NULL);
417 	if (!ASSERT_GE(accept_fd, 0, "accept"))
418 		goto done;
419 
420 	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
421 		goto done;
422 
423 	n = write(client_fd, buf, sizeof(buf));
424 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
425 		goto done;
426 
427 	n = read(accept_fd, buf, sizeof(buf));
428 	ASSERT_EQ(n, sizeof(buf), "recv from server");
429 
430 done:
431 	if (nstoken)
432 		close_netns(nstoken);
433 	if (listen_fd >= 0)
434 		close(listen_fd);
435 	if (accept_fd >= 0)
436 		close(accept_fd);
437 	if (client_fd >= 0)
438 		close(client_fd);
439 }
440 
test_ping(int family,const char * addr)441 static int test_ping(int family, const char *addr)
442 {
443 	SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
444 	return 0;
445 fail:
446 	return -1;
447 }
448 
test_connectivity(void)449 static void test_connectivity(void)
450 {
451 	test_tcp(AF_INET, IP4_DST, IP4_PORT);
452 	test_ping(AF_INET, IP4_DST);
453 	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
454 	test_ping(AF_INET6, IP6_DST);
455 }
456 
set_forwarding(bool enable)457 static int set_forwarding(bool enable)
458 {
459 	int err;
460 
461 	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
462 	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
463 		return err;
464 
465 	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
466 	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
467 		return err;
468 
469 	return 0;
470 }
471 
__rcv_tstamp(int fd,const char * expected,size_t s,__u64 * tstamp)472 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
473 {
474 	struct __kernel_timespec pkt_ts = {};
475 	char ctl[CMSG_SPACE(sizeof(pkt_ts))];
476 	struct timespec now_ts;
477 	struct msghdr msg = {};
478 	__u64 now_ns, pkt_ns;
479 	struct cmsghdr *cmsg;
480 	struct iovec iov;
481 	char data[32];
482 	int ret;
483 
484 	iov.iov_base = data;
485 	iov.iov_len = sizeof(data);
486 	msg.msg_iov = &iov;
487 	msg.msg_iovlen = 1;
488 	msg.msg_control = &ctl;
489 	msg.msg_controllen = sizeof(ctl);
490 
491 	ret = recvmsg(fd, &msg, 0);
492 	if (!ASSERT_EQ(ret, s, "recvmsg"))
493 		return -1;
494 	ASSERT_STRNEQ(data, expected, s, "expected rcv data");
495 
496 	cmsg = CMSG_FIRSTHDR(&msg);
497 	if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
498 	    cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
499 		memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
500 
501 	pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
502 	if (tstamp) {
503 		/* caller will check the tstamp itself */
504 		*tstamp = pkt_ns;
505 		return 0;
506 	}
507 
508 	ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
509 
510 	ret = clock_gettime(CLOCK_REALTIME, &now_ts);
511 	ASSERT_OK(ret, "clock_gettime");
512 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
513 
514 	if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
515 		ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
516 			  "check rcv tstamp");
517 	return 0;
518 }
519 
rcv_tstamp(int fd,const char * expected,size_t s)520 static void rcv_tstamp(int fd, const char *expected, size_t s)
521 {
522 	__rcv_tstamp(fd, expected, s, NULL);
523 }
524 
wait_netstamp_needed_key(void)525 static int wait_netstamp_needed_key(void)
526 {
527 	int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
528 	char buf[] = "testing testing";
529 	struct nstoken *nstoken;
530 	__u64 tstamp = 0;
531 
532 	nstoken = open_netns(NS_DST);
533 	if (!nstoken)
534 		return -1;
535 
536 	srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
537 	if (!ASSERT_GE(srv_fd, 0, "start_server"))
538 		goto done;
539 
540 	err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
541 			 &opt, sizeof(opt));
542 	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
543 		goto done;
544 
545 	cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
546 	if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
547 		goto done;
548 
549 again:
550 	n = write(cli_fd, buf, sizeof(buf));
551 	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
552 		goto done;
553 	err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
554 	if (!ASSERT_OK(err, "__rcv_tstamp"))
555 		goto done;
556 	if (!tstamp && nretries++ < 5) {
557 		sleep(1);
558 		printf("netstamp_needed_key retry#%d\n", nretries);
559 		goto again;
560 	}
561 
562 done:
563 	if (!tstamp && srv_fd != -1) {
564 		close(srv_fd);
565 		srv_fd = -1;
566 	}
567 	if (cli_fd != -1)
568 		close(cli_fd);
569 	close_netns(nstoken);
570 	return srv_fd;
571 }
572 
snd_tstamp(int fd,char * b,size_t s)573 static void snd_tstamp(int fd, char *b, size_t s)
574 {
575 	struct sock_txtime opt = { .clockid = CLOCK_TAI };
576 	char ctl[CMSG_SPACE(sizeof(__u64))];
577 	struct timespec now_ts;
578 	struct msghdr msg = {};
579 	struct cmsghdr *cmsg;
580 	struct iovec iov;
581 	__u64 now_ns;
582 	int ret;
583 
584 	ret = clock_gettime(CLOCK_TAI, &now_ts);
585 	ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
586 	now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
587 
588 	iov.iov_base = b;
589 	iov.iov_len = s;
590 	msg.msg_iov = &iov;
591 	msg.msg_iovlen = 1;
592 	msg.msg_control = &ctl;
593 	msg.msg_controllen = sizeof(ctl);
594 
595 	cmsg = CMSG_FIRSTHDR(&msg);
596 	cmsg->cmsg_level = SOL_SOCKET;
597 	cmsg->cmsg_type = SCM_TXTIME;
598 	cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
599 	*(__u64 *)CMSG_DATA(cmsg) = now_ns;
600 
601 	ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
602 	ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
603 
604 	ret = sendmsg(fd, &msg, 0);
605 	ASSERT_EQ(ret, s, "sendmsg");
606 }
607 
test_inet_dtime(int family,int type,const char * addr,__u16 port)608 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
609 {
610 	int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
611 	char buf[] = "testing testing";
612 	struct nstoken *nstoken;
613 
614 	nstoken = open_netns(NS_DST);
615 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
616 		return;
617 	listen_fd = start_server(family, type, addr, port, 0);
618 	close_netns(nstoken);
619 
620 	if (!ASSERT_GE(listen_fd, 0, "listen"))
621 		return;
622 
623 	/* Ensure the kernel puts the (rcv) timestamp for all skb */
624 	err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
625 			 &opt, sizeof(opt));
626 	if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
627 		goto done;
628 
629 	if (type == SOCK_STREAM) {
630 		/* Ensure the kernel set EDT when sending out rst/ack
631 		 * from the kernel's ctl_sk.
632 		 */
633 		err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
634 				 sizeof(opt));
635 		if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
636 			goto done;
637 	}
638 
639 	nstoken = open_netns(NS_SRC);
640 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
641 		goto done;
642 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
643 	close_netns(nstoken);
644 
645 	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
646 		goto done;
647 
648 	if (type == SOCK_STREAM) {
649 		int n;
650 
651 		accept_fd = accept(listen_fd, NULL, NULL);
652 		if (!ASSERT_GE(accept_fd, 0, "accept"))
653 			goto done;
654 
655 		n = write(client_fd, buf, sizeof(buf));
656 		if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
657 			goto done;
658 		rcv_tstamp(accept_fd, buf, sizeof(buf));
659 	} else {
660 		snd_tstamp(client_fd, buf, sizeof(buf));
661 		rcv_tstamp(listen_fd, buf, sizeof(buf));
662 	}
663 
664 done:
665 	close(listen_fd);
666 	if (accept_fd != -1)
667 		close(accept_fd);
668 	if (client_fd != -1)
669 		close(client_fd);
670 }
671 
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)672 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
673 				const struct netns_setup_result *setup_result)
674 {
675 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
676 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
677 	LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
678 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
679 	struct nstoken *nstoken;
680 	int err;
681 
682 	/* setup ns_src tc progs */
683 	nstoken = open_netns(NS_SRC);
684 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
685 		return -1;
686 	/* tc qdisc add dev src clsact */
687 	QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
688 	/* tc filter add dev src ingress bpf da ingress_host */
689 	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
690 	/* tc filter add dev src egress bpf da egress_host */
691 	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
692 	close_netns(nstoken);
693 
694 	/* setup ns_dst tc progs */
695 	nstoken = open_netns(NS_DST);
696 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
697 		return -1;
698 	/* tc qdisc add dev dst clsact */
699 	QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
700 	/* tc filter add dev dst ingress bpf da ingress_host */
701 	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
702 	/* tc filter add dev dst egress bpf da egress_host */
703 	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
704 	close_netns(nstoken);
705 
706 	/* setup ns_fwd tc progs */
707 	nstoken = open_netns(NS_FWD);
708 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
709 		return -1;
710 	/* tc qdisc add dev dst_fwd clsact */
711 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
712 	/* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
713 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
714 			  skel->progs.ingress_fwdns_prio100, 100);
715 	/* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
716 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
717 			  skel->progs.ingress_fwdns_prio101, 101);
718 	/* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
719 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
720 			  skel->progs.egress_fwdns_prio100, 100);
721 	/* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
722 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
723 			  skel->progs.egress_fwdns_prio101, 101);
724 
725 	/* tc qdisc add dev src_fwd clsact */
726 	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
727 	/* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
728 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
729 			  skel->progs.ingress_fwdns_prio100, 100);
730 	/* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
731 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
732 			  skel->progs.ingress_fwdns_prio101, 101);
733 	/* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
734 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
735 			  skel->progs.egress_fwdns_prio100, 100);
736 	/* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
737 	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
738 			  skel->progs.egress_fwdns_prio101, 101);
739 	close_netns(nstoken);
740 	return 0;
741 
742 fail:
743 	close_netns(nstoken);
744 	return err;
745 }
746 
747 enum {
748 	INGRESS_FWDNS_P100,
749 	INGRESS_FWDNS_P101,
750 	EGRESS_FWDNS_P100,
751 	EGRESS_FWDNS_P101,
752 	INGRESS_ENDHOST,
753 	EGRESS_ENDHOST,
754 	SET_DTIME,
755 	__MAX_CNT,
756 };
757 
758 const char *cnt_names[] = {
759 	"ingress_fwdns_p100",
760 	"ingress_fwdns_p101",
761 	"egress_fwdns_p100",
762 	"egress_fwdns_p101",
763 	"ingress_endhost",
764 	"egress_endhost",
765 	"set_dtime",
766 };
767 
768 enum {
769 	TCP_IP6_CLEAR_DTIME,
770 	TCP_IP4,
771 	TCP_IP6,
772 	UDP_IP4,
773 	UDP_IP6,
774 	TCP_IP4_RT_FWD,
775 	TCP_IP6_RT_FWD,
776 	UDP_IP4_RT_FWD,
777 	UDP_IP6_RT_FWD,
778 	UKN_TEST,
779 	__NR_TESTS,
780 };
781 
782 const char *test_names[] = {
783 	"tcp ip6 clear dtime",
784 	"tcp ip4",
785 	"tcp ip6",
786 	"udp ip4",
787 	"udp ip6",
788 	"tcp ip4 rt fwd",
789 	"tcp ip6 rt fwd",
790 	"udp ip4 rt fwd",
791 	"udp ip6 rt fwd",
792 };
793 
dtime_cnt_str(int test,int cnt)794 static const char *dtime_cnt_str(int test, int cnt)
795 {
796 	static char name[64];
797 
798 	snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
799 
800 	return name;
801 }
802 
dtime_err_str(int test,int cnt)803 static const char *dtime_err_str(int test, int cnt)
804 {
805 	static char name[64];
806 
807 	snprintf(name, sizeof(name), "%s %s errs", test_names[test],
808 		 cnt_names[cnt]);
809 
810 	return name;
811 }
812 
test_tcp_clear_dtime(struct test_tc_dtime * skel)813 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
814 {
815 	int i, t = TCP_IP6_CLEAR_DTIME;
816 	__u32 *dtimes = skel->bss->dtimes[t];
817 	__u32 *errs = skel->bss->errs[t];
818 
819 	skel->bss->test = t;
820 	test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
821 
822 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
823 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
824 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
825 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
826 	ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
827 		  dtime_cnt_str(t, EGRESS_FWDNS_P100));
828 	ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
829 		  dtime_cnt_str(t, EGRESS_FWDNS_P101));
830 	ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
831 		  dtime_cnt_str(t, EGRESS_ENDHOST));
832 	ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
833 		  dtime_cnt_str(t, INGRESS_ENDHOST));
834 
835 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
836 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
837 }
838 
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)839 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
840 {
841 	__u32 *dtimes, *errs;
842 	const char *addr;
843 	int i, t;
844 
845 	if (family == AF_INET) {
846 		t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
847 		addr = IP4_DST;
848 	} else {
849 		t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
850 		addr = IP6_DST;
851 	}
852 
853 	dtimes = skel->bss->dtimes[t];
854 	errs = skel->bss->errs[t];
855 
856 	skel->bss->test = t;
857 	test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
858 
859 	/* fwdns_prio100 prog does not read delivery_time_type, so
860 	 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
861 	 */
862 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
863 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
864 	for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
865 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
866 
867 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
868 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
869 }
870 
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)871 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
872 {
873 	__u32 *dtimes, *errs;
874 	const char *addr;
875 	int i, t;
876 
877 	if (family == AF_INET) {
878 		t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
879 		addr = IP4_DST;
880 	} else {
881 		t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
882 		addr = IP6_DST;
883 	}
884 
885 	dtimes = skel->bss->dtimes[t];
886 	errs = skel->bss->errs[t];
887 
888 	skel->bss->test = t;
889 	test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
890 
891 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
892 		  dtime_cnt_str(t, INGRESS_FWDNS_P100));
893 	/* non mono delivery time is not forwarded */
894 	ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
895 		  dtime_cnt_str(t, INGRESS_FWDNS_P101));
896 	for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
897 		ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
898 
899 	for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
900 		ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
901 }
902 
test_tc_redirect_dtime(struct netns_setup_result * setup_result)903 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
904 {
905 	struct test_tc_dtime *skel;
906 	struct nstoken *nstoken;
907 	int hold_tstamp_fd, err;
908 
909 	/* Hold a sk with the SOCK_TIMESTAMP set to ensure there
910 	 * is no delay in the kernel net_enable_timestamp().
911 	 * This ensures the following tests must have
912 	 * non zero rcv tstamp in the recvmsg().
913 	 */
914 	hold_tstamp_fd = wait_netstamp_needed_key();
915 	if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
916 		return;
917 
918 	skel = test_tc_dtime__open();
919 	if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
920 		goto done;
921 
922 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
923 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
924 
925 	err = test_tc_dtime__load(skel);
926 	if (!ASSERT_OK(err, "test_tc_dtime__load"))
927 		goto done;
928 
929 	if (netns_load_dtime_bpf(skel, setup_result))
930 		goto done;
931 
932 	nstoken = open_netns(NS_FWD);
933 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
934 		goto done;
935 	err = set_forwarding(false);
936 	close_netns(nstoken);
937 	if (!ASSERT_OK(err, "disable forwarding"))
938 		goto done;
939 
940 	test_tcp_clear_dtime(skel);
941 
942 	test_tcp_dtime(skel, AF_INET, true);
943 	test_tcp_dtime(skel, AF_INET6, true);
944 	test_udp_dtime(skel, AF_INET, true);
945 	test_udp_dtime(skel, AF_INET6, true);
946 
947 	/* Test the kernel ip[6]_forward path instead
948 	 * of bpf_redirect_neigh().
949 	 */
950 	nstoken = open_netns(NS_FWD);
951 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
952 		goto done;
953 	err = set_forwarding(true);
954 	close_netns(nstoken);
955 	if (!ASSERT_OK(err, "enable forwarding"))
956 		goto done;
957 
958 	test_tcp_dtime(skel, AF_INET, false);
959 	test_tcp_dtime(skel, AF_INET6, false);
960 	test_udp_dtime(skel, AF_INET, false);
961 	test_udp_dtime(skel, AF_INET6, false);
962 
963 done:
964 	test_tc_dtime__destroy(skel);
965 	close(hold_tstamp_fd);
966 }
967 
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)968 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
969 {
970 	struct nstoken *nstoken = NULL;
971 	struct test_tc_neigh_fib *skel = NULL;
972 
973 	nstoken = open_netns(NS_FWD);
974 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
975 		return;
976 
977 	skel = test_tc_neigh_fib__open();
978 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
979 		goto done;
980 
981 	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
982 		goto done;
983 
984 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
985 			   skel->progs.tc_chk, setup_result))
986 		goto done;
987 
988 	/* bpf_fib_lookup() checks if forwarding is enabled */
989 	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
990 		goto done;
991 
992 	test_connectivity();
993 
994 done:
995 	if (skel)
996 		test_tc_neigh_fib__destroy(skel);
997 	close_netns(nstoken);
998 }
999 
test_tc_redirect_neigh(struct netns_setup_result * setup_result)1000 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
1001 {
1002 	struct nstoken *nstoken = NULL;
1003 	struct test_tc_neigh *skel = NULL;
1004 	int err;
1005 
1006 	nstoken = open_netns(NS_FWD);
1007 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1008 		return;
1009 
1010 	skel = test_tc_neigh__open();
1011 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
1012 		goto done;
1013 
1014 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1015 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1016 
1017 	err = test_tc_neigh__load(skel);
1018 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
1019 		goto done;
1020 
1021 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1022 			   skel->progs.tc_chk, setup_result))
1023 		goto done;
1024 
1025 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1026 		goto done;
1027 
1028 	test_connectivity();
1029 
1030 done:
1031 	if (skel)
1032 		test_tc_neigh__destroy(skel);
1033 	close_netns(nstoken);
1034 }
1035 
test_tc_redirect_peer(struct netns_setup_result * setup_result)1036 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
1037 {
1038 	struct nstoken *nstoken;
1039 	struct test_tc_peer *skel;
1040 	int err;
1041 
1042 	nstoken = open_netns(NS_FWD);
1043 	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1044 		return;
1045 
1046 	skel = test_tc_peer__open();
1047 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1048 		goto done;
1049 
1050 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1051 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1052 
1053 	err = test_tc_peer__load(skel);
1054 	if (!ASSERT_OK(err, "test_tc_peer__load"))
1055 		goto done;
1056 
1057 	if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1058 			   skel->progs.tc_chk, setup_result))
1059 		goto done;
1060 
1061 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1062 		goto done;
1063 
1064 	test_connectivity();
1065 
1066 done:
1067 	if (skel)
1068 		test_tc_peer__destroy(skel);
1069 	close_netns(nstoken);
1070 }
1071 
tun_open(char * name)1072 static int tun_open(char *name)
1073 {
1074 	struct ifreq ifr;
1075 	int fd, err;
1076 
1077 	fd = open("/dev/net/tun", O_RDWR);
1078 	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
1079 		return -1;
1080 
1081 	memset(&ifr, 0, sizeof(ifr));
1082 
1083 	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1084 	if (*name)
1085 		strncpy(ifr.ifr_name, name, IFNAMSIZ);
1086 
1087 	err = ioctl(fd, TUNSETIFF, &ifr);
1088 	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1089 		goto fail;
1090 
1091 	SYS(fail, "ip link set dev %s up", name);
1092 
1093 	return fd;
1094 fail:
1095 	close(fd);
1096 	return -1;
1097 }
1098 
1099 enum {
1100 	SRC_TO_TARGET = 0,
1101 	TARGET_TO_SRC = 1,
1102 };
1103 
tun_relay_loop(int src_fd,int target_fd)1104 static int tun_relay_loop(int src_fd, int target_fd)
1105 {
1106 	fd_set rfds, wfds;
1107 
1108 	FD_ZERO(&rfds);
1109 	FD_ZERO(&wfds);
1110 
1111 	for (;;) {
1112 		char buf[1500];
1113 		int direction, nread, nwrite;
1114 
1115 		FD_SET(src_fd, &rfds);
1116 		FD_SET(target_fd, &rfds);
1117 
1118 		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1119 			log_err("select failed");
1120 			return 1;
1121 		}
1122 
1123 		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1124 
1125 		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1126 		if (nread < 0) {
1127 			log_err("read failed");
1128 			return 1;
1129 		}
1130 
1131 		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1132 		if (nwrite != nread) {
1133 			log_err("write failed");
1134 			return 1;
1135 		}
1136 	}
1137 }
1138 
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1139 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1140 {
1141 	LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1142 	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1143 	struct test_tc_peer *skel = NULL;
1144 	struct nstoken *nstoken = NULL;
1145 	int err;
1146 	int tunnel_pid = -1;
1147 	int src_fd, target_fd = -1;
1148 	int ifindex;
1149 
1150 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1151 	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1152 	 * expose the L2 headers encapsulating the IP packet to BPF and hence
1153 	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1154 	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1155 	 * but that requires much more complicated setup.
1156 	 */
1157 	nstoken = open_netns(NS_SRC);
1158 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1159 		return;
1160 
1161 	src_fd = tun_open("tun_src");
1162 	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1163 		goto fail;
1164 
1165 	close_netns(nstoken);
1166 
1167 	nstoken = open_netns(NS_FWD);
1168 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1169 		goto fail;
1170 
1171 	target_fd = tun_open("tun_fwd");
1172 	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1173 		goto fail;
1174 
1175 	tunnel_pid = fork();
1176 	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1177 		goto fail;
1178 
1179 	if (tunnel_pid == 0)
1180 		exit(tun_relay_loop(src_fd, target_fd));
1181 
1182 	skel = test_tc_peer__open();
1183 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1184 		goto fail;
1185 
1186 	ifindex = if_nametoindex("tun_fwd");
1187 	if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1188 		goto fail;
1189 
1190 	skel->rodata->IFINDEX_SRC = ifindex;
1191 	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1192 
1193 	err = test_tc_peer__load(skel);
1194 	if (!ASSERT_OK(err, "test_tc_peer__load"))
1195 		goto fail;
1196 
1197 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1198 	 * towards dst, and "tc_dst" to redirect packets
1199 	 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1200 	 */
1201 	/* tc qdisc add dev tun_fwd clsact */
1202 	QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1203 	/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1204 	XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1205 
1206 	/* tc qdisc add dev dst_fwd clsact */
1207 	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1208 	/* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1209 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1210 	/* tc filter add dev dst_fwd egress bpf da tc_chk */
1211 	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1212 
1213 	/* Setup route and neigh tables */
1214 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1215 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1216 
1217 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1218 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1219 
1220 	SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1221 	SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1222 	    " dev tun_src scope global");
1223 	SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1224 	SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1225 	SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1226 	    " dev tun_src scope global");
1227 	SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1228 
1229 	SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1230 	SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1231 
1232 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1233 		goto fail;
1234 
1235 	test_connectivity();
1236 
1237 fail:
1238 	if (tunnel_pid > 0) {
1239 		kill(tunnel_pid, SIGTERM);
1240 		waitpid(tunnel_pid, NULL, 0);
1241 	}
1242 	if (src_fd >= 0)
1243 		close(src_fd);
1244 	if (target_fd >= 0)
1245 		close(target_fd);
1246 	if (skel)
1247 		test_tc_peer__destroy(skel);
1248 	if (nstoken)
1249 		close_netns(nstoken);
1250 }
1251 
1252 #define RUN_TEST(name, mode)                                                                \
1253 	({                                                                                  \
1254 		struct netns_setup_result setup_result = { .dev_mode = mode, };             \
1255 		if (test__start_subtest(#name))                                             \
1256 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1257 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
1258 					      "setup links and routes"))                    \
1259 					test_ ## name(&setup_result);                       \
1260 				netns_setup_namespaces("delete");                           \
1261 			}                                                                   \
1262 	})
1263 
test_tc_redirect_run_tests(void * arg)1264 static void *test_tc_redirect_run_tests(void *arg)
1265 {
1266 	netns_setup_namespaces_nofail("delete");
1267 
1268 	RUN_TEST(tc_redirect_peer, MODE_VETH);
1269 	RUN_TEST(tc_redirect_peer, MODE_NETKIT);
1270 	RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1271 	RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
1272 	RUN_TEST(tc_redirect_neigh, MODE_VETH);
1273 	RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1274 	RUN_TEST(tc_redirect_dtime, MODE_VETH);
1275 	return NULL;
1276 }
1277 
test_tc_redirect(void)1278 void test_tc_redirect(void)
1279 {
1280 	pthread_t test_thread;
1281 	int err;
1282 
1283 	/* Run the tests in their own thread to isolate the namespace changes
1284 	 * so they do not affect the environment of other tests.
1285 	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1286 	 */
1287 	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1288 	if (ASSERT_OK(err, "pthread_create"))
1289 		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1290 }
1291