1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This tests basic flowtable functionality. 5# Creates following default topology: 6# 7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) 8# Router1 is the one doing flow offloading, Router2 has no special 9# purpose other than having a link that is smaller than either Originator 10# and responder, i.e. TCPMSS announced values are too large and will still 11# result in fragmentation and/or PMTU discovery. 12# 13# You can check with different Orgininator/Link/Responder MTU eg: 14# nft_flowtable.sh -o8000 -l1500 -r2000 15# 16 17sfx=$(mktemp -u "XXXXXXXX") 18ns1="ns1-$sfx" 19ns2="ns2-$sfx" 20nsr1="nsr1-$sfx" 21nsr2="nsr2-$sfx" 22 23# Kselftest framework requirement - SKIP code is 4. 24ksft_skip=4 25ret=0 26 27ns1in="" 28ns2in="" 29ns1out="" 30ns2out="" 31 32log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 33 34checktool (){ 35 if ! $1 > /dev/null 2>&1; then 36 echo "SKIP: Could not $2" 37 exit $ksft_skip 38 fi 39} 40 41checktool "nft --version" "run test without nft tool" 42checktool "ip -Version" "run test without ip tool" 43checktool "which nc" "run test without nc (netcat)" 44checktool "ip netns add $nsr1" "create net namespace $nsr1" 45 46ip netns add $ns1 47ip netns add $ns2 48ip netns add $nsr2 49 50cleanup() { 51 ip netns del $ns1 52 ip netns del $ns2 53 ip netns del $nsr1 54 ip netns del $nsr2 55 56 rm -f "$ns1in" "$ns1out" 57 rm -f "$ns2in" "$ns2out" 58 59 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 60} 61 62trap cleanup EXIT 63 64sysctl -q net.netfilter.nf_log_all_netns=1 65 66ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 67ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 68 69ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 70 71for dev in lo veth0 veth1; do 72 ip -net $nsr1 link set $dev up 73 ip -net $nsr2 link set $dev up 74done 75 76ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 77ip -net $nsr1 addr add dead:1::1/64 dev veth0 78 79ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 80ip -net $nsr2 addr add dead:2::1/64 dev veth1 81 82# set different MTUs so we need to push packets coming from ns1 (large MTU) 83# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), 84# or to do PTMU discovery (send ICMP error back to originator). 85# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers 86# is NOT the lowest link mtu. 87 88omtu=9000 89lmtu=1500 90rmtu=2000 91 92usage(){ 93 echo "nft_flowtable.sh [OPTIONS]" 94 echo 95 echo "MTU options" 96 echo " -o originator" 97 echo " -l link" 98 echo " -r responder" 99 exit 1 100} 101 102while getopts "o:l:r:" o 103do 104 case $o in 105 o) omtu=$OPTARG;; 106 l) lmtu=$OPTARG;; 107 r) rmtu=$OPTARG;; 108 *) usage;; 109 esac 110done 111 112if ! ip -net $nsr1 link set veth0 mtu $omtu; then 113 exit 1 114fi 115 116ip -net $ns1 link set eth0 mtu $omtu 117 118if ! ip -net $nsr2 link set veth1 mtu $rmtu; then 119 exit 1 120fi 121 122ip -net $ns2 link set eth0 mtu $rmtu 123 124# transfer-net between nsr1 and nsr2. 125# these addresses are not used for connections. 126ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 127ip -net $nsr1 addr add fee1:2::1/64 dev veth1 128 129ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 130ip -net $nsr2 addr add fee1:2::2/64 dev veth0 131 132for i in 0 1; do 133 ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 134 ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 135done 136 137for ns in $ns1 $ns2;do 138 ip -net $ns link set lo up 139 ip -net $ns link set eth0 up 140 141 if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then 142 echo "ERROR: Check Originator/Responder values (problem during address addition)" 143 exit 1 144 fi 145 # don't set ip DF bit for first two tests 146 ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 147done 148 149ip -net $ns1 addr add 10.0.1.99/24 dev eth0 150ip -net $ns2 addr add 10.0.2.99/24 dev eth0 151ip -net $ns1 route add default via 10.0.1.1 152ip -net $ns2 route add default via 10.0.2.1 153ip -net $ns1 addr add dead:1::99/64 dev eth0 154ip -net $ns2 addr add dead:2::99/64 dev eth0 155ip -net $ns1 route add default via dead:1::1 156ip -net $ns2 route add default via dead:2::1 157 158ip -net $nsr1 route add default via 192.168.10.2 159ip -net $nsr2 route add default via 192.168.10.1 160 161ip netns exec $nsr1 nft -f - <<EOF 162table inet filter { 163 flowtable f1 { 164 hook ingress priority 0 165 devices = { veth0, veth1 } 166 } 167 168 chain forward { 169 type filter hook forward priority 0; policy drop; 170 171 # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 172 meta oif "veth1" tcp dport 12345 flow offload @f1 counter 173 174 # use packet size to trigger 'should be offloaded by now'. 175 # otherwise, if 'flow offload' expression never offloads, the 176 # test will pass. 177 tcp dport 12345 meta length gt 200 ct mark set 1 counter 178 179 # this turns off flow offloading internally, so expect packets again 180 tcp flags fin,rst ct mark set 0 accept 181 182 # this allows large packets from responder, we need this as long 183 # as PMTUd is off. 184 # This rule is deleted for the last test, when we expect PMTUd 185 # to kick in and ensure all packets meet mtu requirements. 186 meta length gt $lmtu accept comment something-to-grep-for 187 188 # next line blocks connection w.o. working offload. 189 # we only do this for reverse dir, because we expect packets to 190 # enter slow path due to MTU mismatch of veth0 and veth1. 191 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop 192 193 ct state established,related accept 194 195 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) 196 meta length lt 200 oif "veth1" tcp dport 12345 counter accept 197 198 meta nfproto ipv4 meta l4proto icmp accept 199 meta nfproto ipv6 meta l4proto icmpv6 accept 200 } 201} 202EOF 203 204if [ $? -ne 0 ]; then 205 echo "SKIP: Could not load nft ruleset" 206 exit $ksft_skip 207fi 208 209# test basic connectivity 210if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then 211 echo "ERROR: $ns1 cannot reach ns2" 1>&2 212 exit 1 213fi 214 215if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then 216 echo "ERROR: $ns2 cannot reach $ns1" 1>&2 217 exit 1 218fi 219 220if [ $ret -eq 0 ];then 221 echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" 222fi 223 224ns1in=$(mktemp) 225ns1out=$(mktemp) 226ns2in=$(mktemp) 227ns2out=$(mktemp) 228 229make_file() 230{ 231 name=$1 232 233 SIZE=$((RANDOM % (1024 * 8))) 234 TSIZE=$((SIZE * 1024)) 235 236 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null 237 238 SIZE=$((RANDOM % 1024)) 239 SIZE=$((SIZE + 128)) 240 TSIZE=$((TSIZE + SIZE)) 241 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null 242} 243 244check_transfer() 245{ 246 in=$1 247 out=$2 248 what=$3 249 250 if ! cmp "$in" "$out" > /dev/null 2>&1; then 251 echo "FAIL: file mismatch for $what" 1>&2 252 ls -l "$in" 253 ls -l "$out" 254 return 1 255 fi 256 257 return 0 258} 259 260test_tcp_forwarding_ip() 261{ 262 local nsa=$1 263 local nsb=$2 264 local dstip=$3 265 local dstport=$4 266 local lret=0 267 268 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & 269 lpid=$! 270 271 sleep 1 272 ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & 273 cpid=$! 274 275 sleep 3 276 277 if ps -p $lpid > /dev/null;then 278 kill $lpid 279 fi 280 281 if ps -p $cpid > /dev/null;then 282 kill $cpid 283 fi 284 285 wait 286 287 if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then 288 lret=1 289 fi 290 291 if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then 292 lret=1 293 fi 294 295 return $lret 296} 297 298test_tcp_forwarding() 299{ 300 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 301 302 return $? 303} 304 305test_tcp_forwarding_nat() 306{ 307 local lret 308 309 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 310 lret=$? 311 312 if [ $lret -eq 0 ] ; then 313 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 314 lret=$? 315 fi 316 317 return $lret 318} 319 320make_file "$ns1in" 321make_file "$ns2in" 322 323# First test: 324# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 325if test_tcp_forwarding $ns1 $ns2; then 326 echo "PASS: flow offloaded for ns1/ns2" 327else 328 echo "FAIL: flow offload for ns1/ns2:" 1>&2 329 ip netns exec $nsr1 nft list ruleset 330 ret=1 331fi 332 333# delete default route, i.e. ns2 won't be able to reach ns1 and 334# will depend on ns1 being masqueraded in nsr1. 335# expect ns1 has nsr1 address. 336ip -net $ns2 route del default via 10.0.2.1 337ip -net $ns2 route del default via dead:2::1 338ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 339 340# Second test: 341# Same, but with NAT enabled. 342ip netns exec $nsr1 nft -f - <<EOF 343table ip nat { 344 chain prerouting { 345 type nat hook prerouting priority 0; policy accept; 346 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 347 } 348 349 chain postrouting { 350 type nat hook postrouting priority 0; policy accept; 351 meta oifname "veth1" counter masquerade 352 } 353} 354EOF 355 356if test_tcp_forwarding_nat $ns1 $ns2; then 357 echo "PASS: flow offloaded for ns1/ns2 with NAT" 358else 359 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 360 ip netns exec $nsr1 nft list ruleset 361 ret=1 362fi 363 364# Third test: 365# Same as second test, but with PMTU discovery enabled. 366handle=$(ip netns exec $nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) 367 368if ! ip netns exec $nsr1 nft delete rule inet filter forward $handle; then 369 echo "FAIL: Could not delete large-packet accept rule" 370 exit 1 371fi 372 373ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 374ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 375 376if test_tcp_forwarding_nat $ns1 $ns2; then 377 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" 378else 379 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 380 ip netns exec $nsr1 nft list ruleset 381fi 382 383# Another test: 384# Add bridge interface br0 to Router1, with NAT enabled. 385ip -net $nsr1 link add name br0 type bridge 386ip -net $nsr1 addr flush dev veth0 387ip -net $nsr1 link set up dev veth0 388ip -net $nsr1 link set veth0 master br0 389ip -net $nsr1 addr add 10.0.1.1/24 dev br0 390ip -net $nsr1 addr add dead:1::1/64 dev br0 391ip -net $nsr1 link set up dev br0 392 393ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null 394 395# br0 with NAT enabled. 396ip netns exec $nsr1 nft -f - <<EOF 397flush table ip nat 398table ip nat { 399 chain prerouting { 400 type nat hook prerouting priority 0; policy accept; 401 meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 402 } 403 404 chain postrouting { 405 type nat hook postrouting priority 0; policy accept; 406 meta oifname "veth1" counter masquerade 407 } 408} 409EOF 410 411if test_tcp_forwarding_nat $ns1 $ns2; then 412 echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" 413else 414 echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 415 ip netns exec $nsr1 nft list ruleset 416 ret=1 417fi 418 419# Another test: 420# Add bridge interface br0 to Router1, with NAT and VLAN. 421ip -net $nsr1 link set veth0 nomaster 422ip -net $nsr1 link set down dev veth0 423ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 424ip -net $nsr1 link set up dev veth0 425ip -net $nsr1 link set up dev veth0.10 426ip -net $nsr1 link set veth0.10 master br0 427 428ip -net $ns1 addr flush dev eth0 429ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 430ip -net $ns1 link set eth0 up 431ip -net $ns1 link set eth0.10 up 432ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 433ip -net $ns1 route add default via 10.0.1.1 434ip -net $ns1 addr add dead:1::99/64 dev eth0.10 435 436if test_tcp_forwarding_nat $ns1 $ns2; then 437 echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" 438else 439 echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 440 ip netns exec $nsr1 nft list ruleset 441 ret=1 442fi 443 444# restore test topology (remove bridge and VLAN) 445ip -net $nsr1 link set veth0 nomaster 446ip -net $nsr1 link set veth0 down 447ip -net $nsr1 link set veth0.10 down 448ip -net $nsr1 link delete veth0.10 type vlan 449ip -net $nsr1 link delete br0 type bridge 450ip -net $ns1 addr flush dev eth0.10 451ip -net $ns1 link set eth0.10 down 452ip -net $ns1 link set eth0 down 453ip -net $ns1 link delete eth0.10 type vlan 454 455# restore address in ns1 and nsr1 456ip -net $ns1 link set eth0 up 457ip -net $ns1 addr add 10.0.1.99/24 dev eth0 458ip -net $ns1 route add default via 10.0.1.1 459ip -net $ns1 addr add dead:1::99/64 dev eth0 460ip -net $ns1 route add default via dead:1::1 461ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 462ip -net $nsr1 addr add dead:1::1/64 dev veth0 463ip -net $nsr1 link set up dev veth0 464 465KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) 466KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) 467SPI1=$RANDOM 468SPI2=$RANDOM 469 470if [ $SPI1 -eq $SPI2 ]; then 471 SPI2=$((SPI2+1)) 472fi 473 474do_esp() { 475 local ns=$1 476 local me=$2 477 local remote=$3 478 local lnet=$4 479 local rnet=$5 480 local spi_out=$6 481 local spi_in=$7 482 483 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet 484 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet 485 486 # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 487 ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow 488 # to fwd decrypted packets after esp processing: 489 ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow 490 491} 492 493do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 494 495do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 496 497ip netns exec $nsr1 nft delete table ip nat 498 499# restore default routes 500ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 501ip -net $ns2 route add default via 10.0.2.1 502ip -net $ns2 route add default via dead:2::1 503 504if test_tcp_forwarding $ns1 $ns2; then 505 echo "PASS: ipsec tunnel mode for ns1/ns2" 506else 507 echo "FAIL: ipsec tunnel mode for ns1/ns2" 508 ip netns exec $nsr1 nft list ruleset 1>&2 509 ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 510fi 511 512exit $ret 513