• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# This tests basic flowtable functionality.
5# Creates following default topology:
6#
7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8# Router1 is the one doing flow offloading, Router2 has no special
9# purpose other than having a link that is smaller than either Originator
10# and responder, i.e. TCPMSS announced values are too large and will still
11# result in fragmentation and/or PMTU discovery.
12#
13# You can check with different Orgininator/Link/Responder MTU eg:
14# nft_flowtable.sh -o8000 -l1500 -r2000
15#
16
17sfx=$(mktemp -u "XXXXXXXX")
18ns1="ns1-$sfx"
19ns2="ns2-$sfx"
20nsr1="nsr1-$sfx"
21nsr2="nsr2-$sfx"
22
23# Kselftest framework requirement - SKIP code is 4.
24ksft_skip=4
25ret=0
26
27ns1in=""
28ns2in=""
29ns1out=""
30ns2out=""
31
32log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
33
34checktool (){
35	if ! $1 > /dev/null 2>&1; then
36		echo "SKIP: Could not $2"
37		exit $ksft_skip
38	fi
39}
40
41checktool "nft --version" "run test without nft tool"
42checktool "ip -Version" "run test without ip tool"
43checktool "which nc" "run test without nc (netcat)"
44checktool "ip netns add $nsr1" "create net namespace $nsr1"
45
46ip netns add $ns1
47ip netns add $ns2
48ip netns add $nsr2
49
50cleanup() {
51	ip netns del $ns1
52	ip netns del $ns2
53	ip netns del $nsr1
54	ip netns del $nsr2
55
56	rm -f "$ns1in" "$ns1out"
57	rm -f "$ns2in" "$ns2out"
58
59	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
60}
61
62trap cleanup EXIT
63
64sysctl -q net.netfilter.nf_log_all_netns=1
65
66ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
67ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
68
69ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
70
71for dev in lo veth0 veth1; do
72    ip -net $nsr1 link set $dev up
73    ip -net $nsr2 link set $dev up
74done
75
76ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
77ip -net $nsr1 addr add dead:1::1/64 dev veth0
78
79ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
80ip -net $nsr2 addr add dead:2::1/64 dev veth1
81
82# set different MTUs so we need to push packets coming from ns1 (large MTU)
83# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
84# or to do PTMU discovery (send ICMP error back to originator).
85# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
86# is NOT the lowest link mtu.
87
88omtu=9000
89lmtu=1500
90rmtu=2000
91
92usage(){
93	echo "nft_flowtable.sh [OPTIONS]"
94	echo
95	echo "MTU options"
96	echo "   -o originator"
97	echo "   -l link"
98	echo "   -r responder"
99	exit 1
100}
101
102while getopts "o:l:r:" o
103do
104	case $o in
105		o) omtu=$OPTARG;;
106		l) lmtu=$OPTARG;;
107		r) rmtu=$OPTARG;;
108		*) usage;;
109	esac
110done
111
112if ! ip -net $nsr1 link set veth0 mtu $omtu; then
113	exit 1
114fi
115
116ip -net $ns1 link set eth0 mtu $omtu
117
118if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
119	exit 1
120fi
121
122ip -net $ns2 link set eth0 mtu $rmtu
123
124# transfer-net between nsr1 and nsr2.
125# these addresses are not used for connections.
126ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
127ip -net $nsr1 addr add fee1:2::1/64 dev veth1
128
129ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
130ip -net $nsr2 addr add fee1:2::2/64 dev veth0
131
132for i in 0 1; do
133  ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
134  ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
135done
136
137for ns in $ns1 $ns2;do
138  ip -net $ns link set lo up
139  ip -net $ns link set eth0 up
140
141  if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
142	echo "ERROR: Check Originator/Responder values (problem during address addition)"
143	exit 1
144  fi
145  # don't set ip DF bit for first two tests
146  ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
147done
148
149ip -net $ns1 addr add 10.0.1.99/24 dev eth0
150ip -net $ns2 addr add 10.0.2.99/24 dev eth0
151ip -net $ns1 route add default via 10.0.1.1
152ip -net $ns2 route add default via 10.0.2.1
153ip -net $ns1 addr add dead:1::99/64 dev eth0
154ip -net $ns2 addr add dead:2::99/64 dev eth0
155ip -net $ns1 route add default via dead:1::1
156ip -net $ns2 route add default via dead:2::1
157
158ip -net $nsr1 route add default via 192.168.10.2
159ip -net $nsr2 route add default via 192.168.10.1
160
161ip netns exec $nsr1 nft -f - <<EOF
162table inet filter {
163  flowtable f1 {
164     hook ingress priority 0
165     devices = { veth0, veth1 }
166   }
167
168   chain forward {
169      type filter hook forward priority 0; policy drop;
170
171      # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
172      meta oif "veth1" tcp dport 12345 flow offload @f1 counter
173
174      # use packet size to trigger 'should be offloaded by now'.
175      # otherwise, if 'flow offload' expression never offloads, the
176      # test will pass.
177      tcp dport 12345 meta length gt 200 ct mark set 1 counter
178
179      # this turns off flow offloading internally, so expect packets again
180      tcp flags fin,rst ct mark set 0 accept
181
182      # this allows large packets from responder, we need this as long
183      # as PMTUd is off.
184      # This rule is deleted for the last test, when we expect PMTUd
185      # to kick in and ensure all packets meet mtu requirements.
186      meta length gt $lmtu accept comment something-to-grep-for
187
188      # next line blocks connection w.o. working offload.
189      # we only do this for reverse dir, because we expect packets to
190      # enter slow path due to MTU mismatch of veth0 and veth1.
191      tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
192
193      ct state established,related accept
194
195      # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
196      meta length lt 200 oif "veth1" tcp dport 12345 counter accept
197
198      meta nfproto ipv4 meta l4proto icmp accept
199      meta nfproto ipv6 meta l4proto icmpv6 accept
200   }
201}
202EOF
203
204if [ $? -ne 0 ]; then
205	echo "SKIP: Could not load nft ruleset"
206	exit $ksft_skip
207fi
208
209# test basic connectivity
210if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
211  echo "ERROR: $ns1 cannot reach ns2" 1>&2
212  exit 1
213fi
214
215if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
216  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
217  exit 1
218fi
219
220if [ $ret -eq 0 ];then
221	echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
222fi
223
224ns1in=$(mktemp)
225ns1out=$(mktemp)
226ns2in=$(mktemp)
227ns2out=$(mktemp)
228
229make_file()
230{
231	name=$1
232
233	SIZE=$((RANDOM % (1024 * 8)))
234	TSIZE=$((SIZE * 1024))
235
236	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
237
238	SIZE=$((RANDOM % 1024))
239	SIZE=$((SIZE + 128))
240	TSIZE=$((TSIZE + SIZE))
241	dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
242}
243
244check_transfer()
245{
246	in=$1
247	out=$2
248	what=$3
249
250	if ! cmp "$in" "$out" > /dev/null 2>&1; then
251		echo "FAIL: file mismatch for $what" 1>&2
252		ls -l "$in"
253		ls -l "$out"
254		return 1
255	fi
256
257	return 0
258}
259
260test_tcp_forwarding_ip()
261{
262	local nsa=$1
263	local nsb=$2
264	local dstip=$3
265	local dstport=$4
266	local lret=0
267
268	ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
269	lpid=$!
270
271	sleep 1
272	ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
273	cpid=$!
274
275	sleep 3
276
277	if ps -p $lpid > /dev/null;then
278		kill $lpid
279	fi
280
281	if ps -p $cpid > /dev/null;then
282		kill $cpid
283	fi
284
285	wait
286
287	if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
288		lret=1
289	fi
290
291	if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
292		lret=1
293	fi
294
295	return $lret
296}
297
298test_tcp_forwarding()
299{
300	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
301
302	return $?
303}
304
305test_tcp_forwarding_nat()
306{
307	local lret
308
309	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
310	lret=$?
311
312	if [ $lret -eq 0 ] ; then
313		test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
314		lret=$?
315	fi
316
317	return $lret
318}
319
320make_file "$ns1in"
321make_file "$ns2in"
322
323# First test:
324# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
325if test_tcp_forwarding $ns1 $ns2; then
326	echo "PASS: flow offloaded for ns1/ns2"
327else
328	echo "FAIL: flow offload for ns1/ns2:" 1>&2
329	ip netns exec $nsr1 nft list ruleset
330	ret=1
331fi
332
333# delete default route, i.e. ns2 won't be able to reach ns1 and
334# will depend on ns1 being masqueraded in nsr1.
335# expect ns1 has nsr1 address.
336ip -net $ns2 route del default via 10.0.2.1
337ip -net $ns2 route del default via dead:2::1
338ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
339
340# Second test:
341# Same, but with NAT enabled.
342ip netns exec $nsr1 nft -f - <<EOF
343table ip nat {
344   chain prerouting {
345      type nat hook prerouting priority 0; policy accept;
346      meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
347   }
348
349   chain postrouting {
350      type nat hook postrouting priority 0; policy accept;
351      meta oifname "veth1" counter masquerade
352   }
353}
354EOF
355
356if test_tcp_forwarding_nat $ns1 $ns2; then
357	echo "PASS: flow offloaded for ns1/ns2 with NAT"
358else
359	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
360	ip netns exec $nsr1 nft list ruleset
361	ret=1
362fi
363
364# Third test:
365# Same as second test, but with PMTU discovery enabled.
366handle=$(ip netns exec $nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
367
368if ! ip netns exec $nsr1 nft delete rule inet filter forward $handle; then
369	echo "FAIL: Could not delete large-packet accept rule"
370	exit 1
371fi
372
373ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
374ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
375
376if test_tcp_forwarding_nat $ns1 $ns2; then
377	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
378else
379	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
380	ip netns exec $nsr1 nft list ruleset
381fi
382
383# Another test:
384# Add bridge interface br0 to Router1, with NAT enabled.
385ip -net $nsr1 link add name br0 type bridge
386ip -net $nsr1 addr flush dev veth0
387ip -net $nsr1 link set up dev veth0
388ip -net $nsr1 link set veth0 master br0
389ip -net $nsr1 addr add 10.0.1.1/24 dev br0
390ip -net $nsr1 addr add dead:1::1/64 dev br0
391ip -net $nsr1 link set up dev br0
392
393ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
394
395# br0 with NAT enabled.
396ip netns exec $nsr1 nft -f - <<EOF
397flush table ip nat
398table ip nat {
399   chain prerouting {
400      type nat hook prerouting priority 0; policy accept;
401      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
402   }
403
404   chain postrouting {
405      type nat hook postrouting priority 0; policy accept;
406      meta oifname "veth1" counter masquerade
407   }
408}
409EOF
410
411if test_tcp_forwarding_nat $ns1 $ns2; then
412	echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
413else
414	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
415	ip netns exec $nsr1 nft list ruleset
416	ret=1
417fi
418
419# Another test:
420# Add bridge interface br0 to Router1, with NAT and VLAN.
421ip -net $nsr1 link set veth0 nomaster
422ip -net $nsr1 link set down dev veth0
423ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
424ip -net $nsr1 link set up dev veth0
425ip -net $nsr1 link set up dev veth0.10
426ip -net $nsr1 link set veth0.10 master br0
427
428ip -net $ns1 addr flush dev eth0
429ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
430ip -net $ns1 link set eth0 up
431ip -net $ns1 link set eth0.10 up
432ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
433ip -net $ns1 route add default via 10.0.1.1
434ip -net $ns1 addr add dead:1::99/64 dev eth0.10
435
436if test_tcp_forwarding_nat $ns1 $ns2; then
437	echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
438else
439	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
440	ip netns exec $nsr1 nft list ruleset
441	ret=1
442fi
443
444# restore test topology (remove bridge and VLAN)
445ip -net $nsr1 link set veth0 nomaster
446ip -net $nsr1 link set veth0 down
447ip -net $nsr1 link set veth0.10 down
448ip -net $nsr1 link delete veth0.10 type vlan
449ip -net $nsr1 link delete br0 type bridge
450ip -net $ns1 addr flush dev eth0.10
451ip -net $ns1 link set eth0.10 down
452ip -net $ns1 link set eth0 down
453ip -net $ns1 link delete eth0.10 type vlan
454
455# restore address in ns1 and nsr1
456ip -net $ns1 link set eth0 up
457ip -net $ns1 addr add 10.0.1.99/24 dev eth0
458ip -net $ns1 route add default via 10.0.1.1
459ip -net $ns1 addr add dead:1::99/64 dev eth0
460ip -net $ns1 route add default via dead:1::1
461ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
462ip -net $nsr1 addr add dead:1::1/64 dev veth0
463ip -net $nsr1 link set up dev veth0
464
465KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
466KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
467SPI1=$RANDOM
468SPI2=$RANDOM
469
470if [ $SPI1 -eq $SPI2 ]; then
471	SPI2=$((SPI2+1))
472fi
473
474do_esp() {
475    local ns=$1
476    local me=$2
477    local remote=$3
478    local lnet=$4
479    local rnet=$5
480    local spi_out=$6
481    local spi_in=$7
482
483    ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
484    ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
485
486    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
487    ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
488    # to fwd decrypted packets after esp processing:
489    ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
490
491}
492
493do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
494
495do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
496
497ip netns exec $nsr1 nft delete table ip nat
498
499# restore default routes
500ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
501ip -net $ns2 route add default via 10.0.2.1
502ip -net $ns2 route add default via dead:2::1
503
504if test_tcp_forwarding $ns1 $ns2; then
505	echo "PASS: ipsec tunnel mode for ns1/ns2"
506else
507	echo "FAIL: ipsec tunnel mode for ns1/ns2"
508	ip netns exec $nsr1 nft list ruleset 1>&2
509	ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
510fi
511
512exit $ret
513