1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
11 */
12
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "netlink_helpers.h"
28 #include "test_tc_neigh_fib.skel.h"
29 #include "test_tc_neigh.skel.h"
30 #include "test_tc_peer.skel.h"
31 #include "test_tc_dtime.skel.h"
32
33 #ifndef TCP_TX_DELAY
34 #define TCP_TX_DELAY 37
35 #endif
36
37 #define NS_SRC "ns_src"
38 #define NS_FWD "ns_fwd"
39 #define NS_DST "ns_dst"
40
41 #define IP4_SRC "172.16.1.100"
42 #define IP4_DST "172.16.2.100"
43 #define IP4_TUN_SRC "172.17.1.100"
44 #define IP4_TUN_FWD "172.17.1.200"
45 #define IP4_PORT 9004
46
47 #define IP6_SRC "0::1:dead:beef:cafe"
48 #define IP6_DST "0::2:dead:beef:cafe"
49 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
50 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
51 #define IP6_PORT 9006
52
53 #define IP4_SLL "169.254.0.1"
54 #define IP4_DLL "169.254.0.2"
55 #define IP4_NET "169.254.0.0"
56
57 #define MAC_DST_FWD "00:11:22:33:44:55"
58 #define MAC_DST "00:22:33:44:55:66"
59
60 #define IFADDR_STR_LEN 18
61 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
62
63 #define TIMEOUT_MILLIS 10000
64 #define NSEC_PER_SEC 1000000000ULL
65
66 #define log_err(MSG, ...) \
67 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
68 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
69
70 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
71
write_file(const char * path,const char * newval)72 static int write_file(const char *path, const char *newval)
73 {
74 FILE *f;
75
76 f = fopen(path, "r+");
77 if (!f)
78 return -1;
79 if (fwrite(newval, strlen(newval), 1, f) != 1) {
80 log_err("writing to %s failed", path);
81 fclose(f);
82 return -1;
83 }
84 fclose(f);
85 return 0;
86 }
87
netns_setup_namespaces(const char * verb)88 static int netns_setup_namespaces(const char *verb)
89 {
90 const char * const *ns = namespaces;
91 char cmd[128];
92
93 while (*ns) {
94 snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
95 if (!ASSERT_OK(system(cmd), cmd))
96 return -1;
97 ns++;
98 }
99 return 0;
100 }
101
netns_setup_namespaces_nofail(const char * verb)102 static void netns_setup_namespaces_nofail(const char *verb)
103 {
104 const char * const *ns = namespaces;
105 char cmd[128];
106
107 while (*ns) {
108 snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
109 system(cmd);
110 ns++;
111 }
112 }
113
114 enum dev_mode {
115 MODE_VETH,
116 MODE_NETKIT,
117 };
118
119 struct netns_setup_result {
120 enum dev_mode dev_mode;
121 int ifindex_src;
122 int ifindex_src_fwd;
123 int ifindex_dst;
124 int ifindex_dst_fwd;
125 };
126
get_ifaddr(const char * name,char * ifaddr)127 static int get_ifaddr(const char *name, char *ifaddr)
128 {
129 char path[PATH_MAX];
130 FILE *f;
131 int ret;
132
133 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
134 f = fopen(path, "r");
135 if (!ASSERT_OK_PTR(f, path))
136 return -1;
137
138 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
139 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
140 fclose(f);
141 return -1;
142 }
143 fclose(f);
144 return 0;
145 }
146
create_netkit(int mode,char * prim,char * peer)147 static int create_netkit(int mode, char *prim, char *peer)
148 {
149 struct rtattr *linkinfo, *data, *peer_info;
150 struct rtnl_handle rth = { .fd = -1 };
151 const char *type = "netkit";
152 struct {
153 struct nlmsghdr n;
154 struct ifinfomsg i;
155 char buf[1024];
156 } req = {};
157 int err;
158
159 err = rtnl_open(&rth, 0);
160 if (!ASSERT_OK(err, "open_rtnetlink"))
161 return err;
162
163 memset(&req, 0, sizeof(req));
164 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
165 req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
166 req.n.nlmsg_type = RTM_NEWLINK;
167 req.i.ifi_family = AF_UNSPEC;
168
169 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
170 linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
171 addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
172 data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
173 addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
174 peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
175 req.n.nlmsg_len += sizeof(struct ifinfomsg);
176 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
177 addattr_nest_end(&req.n, peer_info);
178 addattr_nest_end(&req.n, data);
179 addattr_nest_end(&req.n, linkinfo);
180
181 err = rtnl_talk(&rth, &req.n, NULL);
182 ASSERT_OK(err, "talk_rtnetlink");
183 rtnl_close(&rth);
184 return err;
185 }
186
netns_setup_links_and_routes(struct netns_setup_result * result)187 static int netns_setup_links_and_routes(struct netns_setup_result *result)
188 {
189 struct nstoken *nstoken = NULL;
190 char src_fwd_addr[IFADDR_STR_LEN+1] = {};
191 char src_addr[IFADDR_STR_LEN + 1] = {};
192 int err;
193
194 if (result->dev_mode == MODE_VETH) {
195 SYS(fail, "ip link add src type veth peer name src_fwd");
196 SYS(fail, "ip link add dst type veth peer name dst_fwd");
197
198 SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
199 SYS(fail, "ip link set dst address " MAC_DST);
200 } else if (result->dev_mode == MODE_NETKIT) {
201 err = create_netkit(NETKIT_L3, "src", "src_fwd");
202 if (!ASSERT_OK(err, "create_ifindex_src"))
203 goto fail;
204 err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
205 if (!ASSERT_OK(err, "create_ifindex_dst"))
206 goto fail;
207 }
208
209 if (get_ifaddr("src_fwd", src_fwd_addr))
210 goto fail;
211
212 if (get_ifaddr("src", src_addr))
213 goto fail;
214
215 result->ifindex_src = if_nametoindex("src");
216 if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
217 goto fail;
218
219 result->ifindex_src_fwd = if_nametoindex("src_fwd");
220 if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
221 goto fail;
222
223 result->ifindex_dst = if_nametoindex("dst");
224 if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
225 goto fail;
226
227 result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
228 if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
229 goto fail;
230
231 SYS(fail, "ip link set src netns " NS_SRC);
232 SYS(fail, "ip link set src_fwd netns " NS_FWD);
233 SYS(fail, "ip link set dst_fwd netns " NS_FWD);
234 SYS(fail, "ip link set dst netns " NS_DST);
235
236 /** setup in 'src' namespace */
237 nstoken = open_netns(NS_SRC);
238 if (!ASSERT_OK_PTR(nstoken, "setns src"))
239 goto fail;
240
241 SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
242 SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
243 SYS(fail, "ip link set dev src up");
244
245 SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
246 SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
247 SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
248
249 if (result->dev_mode == MODE_VETH) {
250 SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
251 src_fwd_addr);
252 SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
253 src_fwd_addr);
254 }
255
256 close_netns(nstoken);
257
258 /** setup in 'fwd' namespace */
259 nstoken = open_netns(NS_FWD);
260 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
261 goto fail;
262
263 /* The fwd netns automatically gets a v6 LL address / routes, but also
264 * needs v4 one in order to start ARP probing. IP4_NET route is added
265 * to the endpoints so that the ARP processing will reply.
266 */
267 SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
268 SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
269 SYS(fail, "ip link set dev src_fwd up");
270 SYS(fail, "ip link set dev dst_fwd up");
271
272 SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
273 SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
274 SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
275 SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
276
277 if (result->dev_mode == MODE_VETH) {
278 SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
279 SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
280 SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
281 SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
282 }
283
284 close_netns(nstoken);
285
286 /** setup in 'dst' namespace */
287 nstoken = open_netns(NS_DST);
288 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
289 goto fail;
290
291 SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
292 SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
293 SYS(fail, "ip link set dev dst up");
294 SYS(fail, "ip link set dev lo up");
295
296 SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
297 SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
298 SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
299
300 if (result->dev_mode == MODE_VETH) {
301 SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
302 SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
303 }
304
305 close_netns(nstoken);
306
307 return 0;
308 fail:
309 if (nstoken)
310 close_netns(nstoken);
311 return -1;
312 }
313
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)314 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
315 {
316 char err_str[128], ifname[16];
317 int err;
318
319 qdisc_hook->ifindex = ifindex;
320 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
321 err = bpf_tc_hook_create(qdisc_hook);
322 snprintf(err_str, sizeof(err_str),
323 "qdisc add dev %s clsact",
324 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
325 err_str[sizeof(err_str) - 1] = 0;
326 ASSERT_OK(err, err_str);
327
328 return err;
329 }
330
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)331 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
332 enum bpf_tc_attach_point xgress,
333 const struct bpf_program *prog, int priority)
334 {
335 LIBBPF_OPTS(bpf_tc_opts, tc_attach);
336 char err_str[128], ifname[16];
337 int err;
338
339 qdisc_hook->attach_point = xgress;
340 tc_attach.prog_fd = bpf_program__fd(prog);
341 tc_attach.priority = priority;
342 err = bpf_tc_attach(qdisc_hook, &tc_attach);
343 snprintf(err_str, sizeof(err_str),
344 "filter add dev %s %s prio %d bpf da %s",
345 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
346 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
347 priority, bpf_program__name(prog));
348 err_str[sizeof(err_str) - 1] = 0;
349 ASSERT_OK(err, err_str);
350
351 return err;
352 }
353
354 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
355 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
356 goto fail; \
357 })
358
359 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
360 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
361 goto fail; \
362 })
363
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)364 static int netns_load_bpf(const struct bpf_program *src_prog,
365 const struct bpf_program *dst_prog,
366 const struct bpf_program *chk_prog,
367 const struct netns_setup_result *setup_result)
368 {
369 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
370 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
371 int err;
372
373 /* tc qdisc add dev src_fwd clsact */
374 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
375 /* tc filter add dev src_fwd ingress bpf da src_prog */
376 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
377 /* tc filter add dev src_fwd egress bpf da chk_prog */
378 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
379
380 /* tc qdisc add dev dst_fwd clsact */
381 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
382 /* tc filter add dev dst_fwd ingress bpf da dst_prog */
383 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
384 /* tc filter add dev dst_fwd egress bpf da chk_prog */
385 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
386
387 return 0;
388 fail:
389 return -1;
390 }
391
test_tcp(int family,const char * addr,__u16 port)392 static void test_tcp(int family, const char *addr, __u16 port)
393 {
394 int listen_fd = -1, accept_fd = -1, client_fd = -1;
395 char buf[] = "testing testing";
396 int n;
397 struct nstoken *nstoken;
398
399 nstoken = open_netns(NS_DST);
400 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
401 return;
402
403 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
404 if (!ASSERT_GE(listen_fd, 0, "listen"))
405 goto done;
406
407 close_netns(nstoken);
408 nstoken = open_netns(NS_SRC);
409 if (!ASSERT_OK_PTR(nstoken, "setns src"))
410 goto done;
411
412 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
413 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
414 goto done;
415
416 accept_fd = accept(listen_fd, NULL, NULL);
417 if (!ASSERT_GE(accept_fd, 0, "accept"))
418 goto done;
419
420 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
421 goto done;
422
423 n = write(client_fd, buf, sizeof(buf));
424 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
425 goto done;
426
427 n = read(accept_fd, buf, sizeof(buf));
428 ASSERT_EQ(n, sizeof(buf), "recv from server");
429
430 done:
431 if (nstoken)
432 close_netns(nstoken);
433 if (listen_fd >= 0)
434 close(listen_fd);
435 if (accept_fd >= 0)
436 close(accept_fd);
437 if (client_fd >= 0)
438 close(client_fd);
439 }
440
test_ping(int family,const char * addr)441 static int test_ping(int family, const char *addr)
442 {
443 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
444 return 0;
445 fail:
446 return -1;
447 }
448
test_connectivity(void)449 static void test_connectivity(void)
450 {
451 test_tcp(AF_INET, IP4_DST, IP4_PORT);
452 test_ping(AF_INET, IP4_DST);
453 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
454 test_ping(AF_INET6, IP6_DST);
455 }
456
set_forwarding(bool enable)457 static int set_forwarding(bool enable)
458 {
459 int err;
460
461 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
462 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
463 return err;
464
465 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
466 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
467 return err;
468
469 return 0;
470 }
471
__rcv_tstamp(int fd,const char * expected,size_t s,__u64 * tstamp)472 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
473 {
474 struct __kernel_timespec pkt_ts = {};
475 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
476 struct timespec now_ts;
477 struct msghdr msg = {};
478 __u64 now_ns, pkt_ns;
479 struct cmsghdr *cmsg;
480 struct iovec iov;
481 char data[32];
482 int ret;
483
484 iov.iov_base = data;
485 iov.iov_len = sizeof(data);
486 msg.msg_iov = &iov;
487 msg.msg_iovlen = 1;
488 msg.msg_control = &ctl;
489 msg.msg_controllen = sizeof(ctl);
490
491 ret = recvmsg(fd, &msg, 0);
492 if (!ASSERT_EQ(ret, s, "recvmsg"))
493 return -1;
494 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
495
496 cmsg = CMSG_FIRSTHDR(&msg);
497 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
498 cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
499 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
500
501 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
502 if (tstamp) {
503 /* caller will check the tstamp itself */
504 *tstamp = pkt_ns;
505 return 0;
506 }
507
508 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
509
510 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
511 ASSERT_OK(ret, "clock_gettime");
512 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
513
514 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
515 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
516 "check rcv tstamp");
517 return 0;
518 }
519
rcv_tstamp(int fd,const char * expected,size_t s)520 static void rcv_tstamp(int fd, const char *expected, size_t s)
521 {
522 __rcv_tstamp(fd, expected, s, NULL);
523 }
524
wait_netstamp_needed_key(void)525 static int wait_netstamp_needed_key(void)
526 {
527 int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
528 char buf[] = "testing testing";
529 struct nstoken *nstoken;
530 __u64 tstamp = 0;
531
532 nstoken = open_netns(NS_DST);
533 if (!nstoken)
534 return -1;
535
536 srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
537 if (!ASSERT_GE(srv_fd, 0, "start_server"))
538 goto done;
539
540 err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
541 &opt, sizeof(opt));
542 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
543 goto done;
544
545 cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
546 if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
547 goto done;
548
549 again:
550 n = write(cli_fd, buf, sizeof(buf));
551 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
552 goto done;
553 err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
554 if (!ASSERT_OK(err, "__rcv_tstamp"))
555 goto done;
556 if (!tstamp && nretries++ < 5) {
557 sleep(1);
558 printf("netstamp_needed_key retry#%d\n", nretries);
559 goto again;
560 }
561
562 done:
563 if (!tstamp && srv_fd != -1) {
564 close(srv_fd);
565 srv_fd = -1;
566 }
567 if (cli_fd != -1)
568 close(cli_fd);
569 close_netns(nstoken);
570 return srv_fd;
571 }
572
snd_tstamp(int fd,char * b,size_t s)573 static void snd_tstamp(int fd, char *b, size_t s)
574 {
575 struct sock_txtime opt = { .clockid = CLOCK_TAI };
576 char ctl[CMSG_SPACE(sizeof(__u64))];
577 struct timespec now_ts;
578 struct msghdr msg = {};
579 struct cmsghdr *cmsg;
580 struct iovec iov;
581 __u64 now_ns;
582 int ret;
583
584 ret = clock_gettime(CLOCK_TAI, &now_ts);
585 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
586 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
587
588 iov.iov_base = b;
589 iov.iov_len = s;
590 msg.msg_iov = &iov;
591 msg.msg_iovlen = 1;
592 msg.msg_control = &ctl;
593 msg.msg_controllen = sizeof(ctl);
594
595 cmsg = CMSG_FIRSTHDR(&msg);
596 cmsg->cmsg_level = SOL_SOCKET;
597 cmsg->cmsg_type = SCM_TXTIME;
598 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
599 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
600
601 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
602 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
603
604 ret = sendmsg(fd, &msg, 0);
605 ASSERT_EQ(ret, s, "sendmsg");
606 }
607
test_inet_dtime(int family,int type,const char * addr,__u16 port)608 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
609 {
610 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
611 char buf[] = "testing testing";
612 struct nstoken *nstoken;
613
614 nstoken = open_netns(NS_DST);
615 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
616 return;
617 listen_fd = start_server(family, type, addr, port, 0);
618 close_netns(nstoken);
619
620 if (!ASSERT_GE(listen_fd, 0, "listen"))
621 return;
622
623 /* Ensure the kernel puts the (rcv) timestamp for all skb */
624 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
625 &opt, sizeof(opt));
626 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
627 goto done;
628
629 if (type == SOCK_STREAM) {
630 /* Ensure the kernel set EDT when sending out rst/ack
631 * from the kernel's ctl_sk.
632 */
633 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
634 sizeof(opt));
635 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
636 goto done;
637 }
638
639 nstoken = open_netns(NS_SRC);
640 if (!ASSERT_OK_PTR(nstoken, "setns src"))
641 goto done;
642 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
643 close_netns(nstoken);
644
645 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
646 goto done;
647
648 if (type == SOCK_STREAM) {
649 int n;
650
651 accept_fd = accept(listen_fd, NULL, NULL);
652 if (!ASSERT_GE(accept_fd, 0, "accept"))
653 goto done;
654
655 n = write(client_fd, buf, sizeof(buf));
656 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
657 goto done;
658 rcv_tstamp(accept_fd, buf, sizeof(buf));
659 } else {
660 snd_tstamp(client_fd, buf, sizeof(buf));
661 rcv_tstamp(listen_fd, buf, sizeof(buf));
662 }
663
664 done:
665 close(listen_fd);
666 if (accept_fd != -1)
667 close(accept_fd);
668 if (client_fd != -1)
669 close(client_fd);
670 }
671
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)672 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
673 const struct netns_setup_result *setup_result)
674 {
675 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
676 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
677 LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
678 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
679 struct nstoken *nstoken;
680 int err;
681
682 /* setup ns_src tc progs */
683 nstoken = open_netns(NS_SRC);
684 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
685 return -1;
686 /* tc qdisc add dev src clsact */
687 QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
688 /* tc filter add dev src ingress bpf da ingress_host */
689 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
690 /* tc filter add dev src egress bpf da egress_host */
691 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
692 close_netns(nstoken);
693
694 /* setup ns_dst tc progs */
695 nstoken = open_netns(NS_DST);
696 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
697 return -1;
698 /* tc qdisc add dev dst clsact */
699 QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
700 /* tc filter add dev dst ingress bpf da ingress_host */
701 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
702 /* tc filter add dev dst egress bpf da egress_host */
703 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
704 close_netns(nstoken);
705
706 /* setup ns_fwd tc progs */
707 nstoken = open_netns(NS_FWD);
708 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
709 return -1;
710 /* tc qdisc add dev dst_fwd clsact */
711 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
712 /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
713 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
714 skel->progs.ingress_fwdns_prio100, 100);
715 /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
716 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
717 skel->progs.ingress_fwdns_prio101, 101);
718 /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
719 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
720 skel->progs.egress_fwdns_prio100, 100);
721 /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
722 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
723 skel->progs.egress_fwdns_prio101, 101);
724
725 /* tc qdisc add dev src_fwd clsact */
726 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
727 /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
728 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
729 skel->progs.ingress_fwdns_prio100, 100);
730 /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
731 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
732 skel->progs.ingress_fwdns_prio101, 101);
733 /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
734 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
735 skel->progs.egress_fwdns_prio100, 100);
736 /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
737 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
738 skel->progs.egress_fwdns_prio101, 101);
739 close_netns(nstoken);
740 return 0;
741
742 fail:
743 close_netns(nstoken);
744 return err;
745 }
746
747 enum {
748 INGRESS_FWDNS_P100,
749 INGRESS_FWDNS_P101,
750 EGRESS_FWDNS_P100,
751 EGRESS_FWDNS_P101,
752 INGRESS_ENDHOST,
753 EGRESS_ENDHOST,
754 SET_DTIME,
755 __MAX_CNT,
756 };
757
758 const char *cnt_names[] = {
759 "ingress_fwdns_p100",
760 "ingress_fwdns_p101",
761 "egress_fwdns_p100",
762 "egress_fwdns_p101",
763 "ingress_endhost",
764 "egress_endhost",
765 "set_dtime",
766 };
767
768 enum {
769 TCP_IP6_CLEAR_DTIME,
770 TCP_IP4,
771 TCP_IP6,
772 UDP_IP4,
773 UDP_IP6,
774 TCP_IP4_RT_FWD,
775 TCP_IP6_RT_FWD,
776 UDP_IP4_RT_FWD,
777 UDP_IP6_RT_FWD,
778 UKN_TEST,
779 __NR_TESTS,
780 };
781
782 const char *test_names[] = {
783 "tcp ip6 clear dtime",
784 "tcp ip4",
785 "tcp ip6",
786 "udp ip4",
787 "udp ip6",
788 "tcp ip4 rt fwd",
789 "tcp ip6 rt fwd",
790 "udp ip4 rt fwd",
791 "udp ip6 rt fwd",
792 };
793
dtime_cnt_str(int test,int cnt)794 static const char *dtime_cnt_str(int test, int cnt)
795 {
796 static char name[64];
797
798 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
799
800 return name;
801 }
802
dtime_err_str(int test,int cnt)803 static const char *dtime_err_str(int test, int cnt)
804 {
805 static char name[64];
806
807 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
808 cnt_names[cnt]);
809
810 return name;
811 }
812
test_tcp_clear_dtime(struct test_tc_dtime * skel)813 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
814 {
815 int i, t = TCP_IP6_CLEAR_DTIME;
816 __u32 *dtimes = skel->bss->dtimes[t];
817 __u32 *errs = skel->bss->errs[t];
818
819 skel->bss->test = t;
820 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
821
822 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
823 dtime_cnt_str(t, INGRESS_FWDNS_P100));
824 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
825 dtime_cnt_str(t, INGRESS_FWDNS_P101));
826 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
827 dtime_cnt_str(t, EGRESS_FWDNS_P100));
828 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
829 dtime_cnt_str(t, EGRESS_FWDNS_P101));
830 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
831 dtime_cnt_str(t, EGRESS_ENDHOST));
832 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
833 dtime_cnt_str(t, INGRESS_ENDHOST));
834
835 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
836 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
837 }
838
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)839 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
840 {
841 __u32 *dtimes, *errs;
842 const char *addr;
843 int i, t;
844
845 if (family == AF_INET) {
846 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
847 addr = IP4_DST;
848 } else {
849 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
850 addr = IP6_DST;
851 }
852
853 dtimes = skel->bss->dtimes[t];
854 errs = skel->bss->errs[t];
855
856 skel->bss->test = t;
857 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
858
859 /* fwdns_prio100 prog does not read delivery_time_type, so
860 * kernel puts the (rcv) timetamp in __sk_buff->tstamp
861 */
862 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
863 dtime_cnt_str(t, INGRESS_FWDNS_P100));
864 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
865 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
866
867 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
868 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
869 }
870
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)871 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
872 {
873 __u32 *dtimes, *errs;
874 const char *addr;
875 int i, t;
876
877 if (family == AF_INET) {
878 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
879 addr = IP4_DST;
880 } else {
881 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
882 addr = IP6_DST;
883 }
884
885 dtimes = skel->bss->dtimes[t];
886 errs = skel->bss->errs[t];
887
888 skel->bss->test = t;
889 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
890
891 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
892 dtime_cnt_str(t, INGRESS_FWDNS_P100));
893 /* non mono delivery time is not forwarded */
894 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
895 dtime_cnt_str(t, INGRESS_FWDNS_P101));
896 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
897 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
898
899 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
900 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
901 }
902
test_tc_redirect_dtime(struct netns_setup_result * setup_result)903 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
904 {
905 struct test_tc_dtime *skel;
906 struct nstoken *nstoken;
907 int hold_tstamp_fd, err;
908
909 /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
910 * is no delay in the kernel net_enable_timestamp().
911 * This ensures the following tests must have
912 * non zero rcv tstamp in the recvmsg().
913 */
914 hold_tstamp_fd = wait_netstamp_needed_key();
915 if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
916 return;
917
918 skel = test_tc_dtime__open();
919 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
920 goto done;
921
922 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
923 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
924
925 err = test_tc_dtime__load(skel);
926 if (!ASSERT_OK(err, "test_tc_dtime__load"))
927 goto done;
928
929 if (netns_load_dtime_bpf(skel, setup_result))
930 goto done;
931
932 nstoken = open_netns(NS_FWD);
933 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
934 goto done;
935 err = set_forwarding(false);
936 close_netns(nstoken);
937 if (!ASSERT_OK(err, "disable forwarding"))
938 goto done;
939
940 test_tcp_clear_dtime(skel);
941
942 test_tcp_dtime(skel, AF_INET, true);
943 test_tcp_dtime(skel, AF_INET6, true);
944 test_udp_dtime(skel, AF_INET, true);
945 test_udp_dtime(skel, AF_INET6, true);
946
947 /* Test the kernel ip[6]_forward path instead
948 * of bpf_redirect_neigh().
949 */
950 nstoken = open_netns(NS_FWD);
951 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
952 goto done;
953 err = set_forwarding(true);
954 close_netns(nstoken);
955 if (!ASSERT_OK(err, "enable forwarding"))
956 goto done;
957
958 test_tcp_dtime(skel, AF_INET, false);
959 test_tcp_dtime(skel, AF_INET6, false);
960 test_udp_dtime(skel, AF_INET, false);
961 test_udp_dtime(skel, AF_INET6, false);
962
963 done:
964 test_tc_dtime__destroy(skel);
965 close(hold_tstamp_fd);
966 }
967
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)968 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
969 {
970 struct nstoken *nstoken = NULL;
971 struct test_tc_neigh_fib *skel = NULL;
972
973 nstoken = open_netns(NS_FWD);
974 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
975 return;
976
977 skel = test_tc_neigh_fib__open();
978 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
979 goto done;
980
981 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
982 goto done;
983
984 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
985 skel->progs.tc_chk, setup_result))
986 goto done;
987
988 /* bpf_fib_lookup() checks if forwarding is enabled */
989 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
990 goto done;
991
992 test_connectivity();
993
994 done:
995 if (skel)
996 test_tc_neigh_fib__destroy(skel);
997 close_netns(nstoken);
998 }
999
test_tc_redirect_neigh(struct netns_setup_result * setup_result)1000 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
1001 {
1002 struct nstoken *nstoken = NULL;
1003 struct test_tc_neigh *skel = NULL;
1004 int err;
1005
1006 nstoken = open_netns(NS_FWD);
1007 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1008 return;
1009
1010 skel = test_tc_neigh__open();
1011 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
1012 goto done;
1013
1014 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1015 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1016
1017 err = test_tc_neigh__load(skel);
1018 if (!ASSERT_OK(err, "test_tc_neigh__load"))
1019 goto done;
1020
1021 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1022 skel->progs.tc_chk, setup_result))
1023 goto done;
1024
1025 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1026 goto done;
1027
1028 test_connectivity();
1029
1030 done:
1031 if (skel)
1032 test_tc_neigh__destroy(skel);
1033 close_netns(nstoken);
1034 }
1035
test_tc_redirect_peer(struct netns_setup_result * setup_result)1036 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
1037 {
1038 struct nstoken *nstoken;
1039 struct test_tc_peer *skel;
1040 int err;
1041
1042 nstoken = open_netns(NS_FWD);
1043 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1044 return;
1045
1046 skel = test_tc_peer__open();
1047 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1048 goto done;
1049
1050 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1051 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1052
1053 err = test_tc_peer__load(skel);
1054 if (!ASSERT_OK(err, "test_tc_peer__load"))
1055 goto done;
1056
1057 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1058 skel->progs.tc_chk, setup_result))
1059 goto done;
1060
1061 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1062 goto done;
1063
1064 test_connectivity();
1065
1066 done:
1067 if (skel)
1068 test_tc_peer__destroy(skel);
1069 close_netns(nstoken);
1070 }
1071
tun_open(char * name)1072 static int tun_open(char *name)
1073 {
1074 struct ifreq ifr;
1075 int fd, err;
1076
1077 fd = open("/dev/net/tun", O_RDWR);
1078 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
1079 return -1;
1080
1081 memset(&ifr, 0, sizeof(ifr));
1082
1083 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1084 if (*name)
1085 strncpy(ifr.ifr_name, name, IFNAMSIZ);
1086
1087 err = ioctl(fd, TUNSETIFF, &ifr);
1088 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1089 goto fail;
1090
1091 SYS(fail, "ip link set dev %s up", name);
1092
1093 return fd;
1094 fail:
1095 close(fd);
1096 return -1;
1097 }
1098
1099 enum {
1100 SRC_TO_TARGET = 0,
1101 TARGET_TO_SRC = 1,
1102 };
1103
tun_relay_loop(int src_fd,int target_fd)1104 static int tun_relay_loop(int src_fd, int target_fd)
1105 {
1106 fd_set rfds, wfds;
1107
1108 FD_ZERO(&rfds);
1109 FD_ZERO(&wfds);
1110
1111 for (;;) {
1112 char buf[1500];
1113 int direction, nread, nwrite;
1114
1115 FD_SET(src_fd, &rfds);
1116 FD_SET(target_fd, &rfds);
1117
1118 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1119 log_err("select failed");
1120 return 1;
1121 }
1122
1123 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1124
1125 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1126 if (nread < 0) {
1127 log_err("read failed");
1128 return 1;
1129 }
1130
1131 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1132 if (nwrite != nread) {
1133 log_err("write failed");
1134 return 1;
1135 }
1136 }
1137 }
1138
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1139 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1140 {
1141 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1142 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1143 struct test_tc_peer *skel = NULL;
1144 struct nstoken *nstoken = NULL;
1145 int err;
1146 int tunnel_pid = -1;
1147 int src_fd, target_fd = -1;
1148 int ifindex;
1149
1150 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1151 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1152 * expose the L2 headers encapsulating the IP packet to BPF and hence
1153 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1154 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1155 * but that requires much more complicated setup.
1156 */
1157 nstoken = open_netns(NS_SRC);
1158 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1159 return;
1160
1161 src_fd = tun_open("tun_src");
1162 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1163 goto fail;
1164
1165 close_netns(nstoken);
1166
1167 nstoken = open_netns(NS_FWD);
1168 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1169 goto fail;
1170
1171 target_fd = tun_open("tun_fwd");
1172 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1173 goto fail;
1174
1175 tunnel_pid = fork();
1176 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1177 goto fail;
1178
1179 if (tunnel_pid == 0)
1180 exit(tun_relay_loop(src_fd, target_fd));
1181
1182 skel = test_tc_peer__open();
1183 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1184 goto fail;
1185
1186 ifindex = if_nametoindex("tun_fwd");
1187 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1188 goto fail;
1189
1190 skel->rodata->IFINDEX_SRC = ifindex;
1191 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1192
1193 err = test_tc_peer__load(skel);
1194 if (!ASSERT_OK(err, "test_tc_peer__load"))
1195 goto fail;
1196
1197 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1198 * towards dst, and "tc_dst" to redirect packets
1199 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1200 */
1201 /* tc qdisc add dev tun_fwd clsact */
1202 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1203 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1204 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1205
1206 /* tc qdisc add dev dst_fwd clsact */
1207 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1208 /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1209 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1210 /* tc filter add dev dst_fwd egress bpf da tc_chk */
1211 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1212
1213 /* Setup route and neigh tables */
1214 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1215 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1216
1217 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1218 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1219
1220 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1221 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1222 " dev tun_src scope global");
1223 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1224 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1225 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1226 " dev tun_src scope global");
1227 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1228
1229 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1230 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1231
1232 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1233 goto fail;
1234
1235 test_connectivity();
1236
1237 fail:
1238 if (tunnel_pid > 0) {
1239 kill(tunnel_pid, SIGTERM);
1240 waitpid(tunnel_pid, NULL, 0);
1241 }
1242 if (src_fd >= 0)
1243 close(src_fd);
1244 if (target_fd >= 0)
1245 close(target_fd);
1246 if (skel)
1247 test_tc_peer__destroy(skel);
1248 if (nstoken)
1249 close_netns(nstoken);
1250 }
1251
1252 #define RUN_TEST(name, mode) \
1253 ({ \
1254 struct netns_setup_result setup_result = { .dev_mode = mode, }; \
1255 if (test__start_subtest(#name)) \
1256 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1257 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1258 "setup links and routes")) \
1259 test_ ## name(&setup_result); \
1260 netns_setup_namespaces("delete"); \
1261 } \
1262 })
1263
test_tc_redirect_run_tests(void * arg)1264 static void *test_tc_redirect_run_tests(void *arg)
1265 {
1266 netns_setup_namespaces_nofail("delete");
1267
1268 RUN_TEST(tc_redirect_peer, MODE_VETH);
1269 RUN_TEST(tc_redirect_peer, MODE_NETKIT);
1270 RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1271 RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
1272 RUN_TEST(tc_redirect_neigh, MODE_VETH);
1273 RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1274 RUN_TEST(tc_redirect_dtime, MODE_VETH);
1275 return NULL;
1276 }
1277
test_tc_redirect(void)1278 void test_tc_redirect(void)
1279 {
1280 pthread_t test_thread;
1281 int err;
1282
1283 /* Run the tests in their own thread to isolate the namespace changes
1284 * so they do not affect the environment of other tests.
1285 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1286 */
1287 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1288 if (ASSERT_OK(err, "pthread_create"))
1289 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1290 }
1291