1#!/bin/bash 2 3# This script demonstrates interaction of conntrack and vrf. 4# The vrf driver calls the netfilter hooks again, with oif/iif 5# pointing at the VRF device. 6# 7# For ingress, this means first iteration has iifname of lower/real 8# device. In this script, thats veth0. 9# Second iteration is iifname set to vrf device, tvrf in this script. 10# 11# For egress, this is reversed: first iteration has the vrf device, 12# second iteration is done with the lower/real/veth0 device. 13# 14# test_ct_zone_in demonstrates unexpected change of nftables 15# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack 16# connection on VRF rcv" 17# 18# It was possible to assign conntrack zone to a packet (or mark it for 19# `notracking`) in the prerouting chain before conntrack, based on real iif. 20# 21# After the change, the zone assignment is lost and the zone is assigned based 22# on the VRF master interface (in case such a rule exists). 23# assignment is lost. Instead, assignment based on the `iif` matching 24# Thus it is impossible to distinguish packets based on the original 25# interface. 26# 27# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem 28# that was supposed to be fixed by the commit mentioned above to make sure 29# that any fix to test case 1 won't break masquerade again. 30 31source lib.sh 32 33IP0=172.30.30.1 34IP1=172.30.30.2 35DUMMYNET=10.9.9 36PFXL=30 37ret=0 38 39cleanup() 40{ 41 ip netns pids $ns0 | xargs kill 2>/dev/null 42 ip netns pids $ns1 | xargs kill 2>/dev/null 43 44 cleanup_all_ns 45} 46 47checktool "nft --version" "run test without nft" 48checktool "conntrack --version" "run test without conntrack" 49checktool "socat -h" "run test without socat" 50 51trap cleanup EXIT 52 53setup_ns ns0 ns1 54 55ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 56ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 57ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 58ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 59 60if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then 61 echo "SKIP: Could not add veth device" 62 exit $ksft_skip 63fi 64 65if ! ip -net "$ns0" li add tvrf type vrf table 9876; then 66 echo "SKIP: Could not add vrf device" 67 exit $ksft_skip 68fi 69 70ip -net "$ns0" link add dummy0 type dummy 71 72ip -net "$ns0" li set veth0 master tvrf 73ip -net "$ns0" li set dummy0 master tvrf 74ip -net "$ns0" li set tvrf up 75ip -net "$ns0" li set veth0 up 76ip -net "$ns0" li set dummy0 up 77ip -net "$ns1" li set veth0 up 78 79ip -net "$ns0" addr add $IP0/$PFXL dev veth0 80ip -net "$ns1" addr add $IP1/$PFXL dev veth0 81ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 82 83listener_ready() 84{ 85 local ns="$1" 86 87 ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555" 88} 89 90ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null & 91busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 92 93# test vrf ingress handling. 94# The incoming connection should be placed in conntrack zone 1, 95# as decided by the first iteration of the ruleset. 96test_ct_zone_in() 97{ 98ip netns exec "$ns0" nft -f - <<EOF 99table testct { 100 chain rawpre { 101 type filter hook prerouting priority raw; 102 103 iif { veth0, tvrf } counter meta nftrace set 1 104 iif veth0 counter ct zone set 1 counter return 105 iif tvrf counter ct zone set 2 counter return 106 ip protocol icmp counter 107 notrack counter 108 } 109 110 chain rawout { 111 type filter hook output priority raw; 112 113 oif veth0 counter ct zone set 1 counter return 114 oif tvrf counter ct zone set 2 counter return 115 notrack counter 116 } 117} 118EOF 119 ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null 120 121 # should be in zone 1, not zone 2 122 count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) 123 if [ "$count" -eq 1 ]; then 124 echo "PASS: entry found in conntrack zone 1" 125 else 126 echo "FAIL: entry not found in conntrack zone 1" 127 count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) 128 if [ "$count" -eq 1 ]; then 129 echo "FAIL: entry found in zone 2 instead" 130 else 131 echo "FAIL: entry not in zone 1 or 2, dumping table" 132 ip netns exec "$ns0" conntrack -L 133 ip netns exec "$ns0" nft list ruleset 134 fi 135 fi 136} 137 138# add masq rule that gets evaluated w. outif set to vrf device. 139# This tests the first iteration of the packet through conntrack, 140# oifname is the vrf device. 141test_masquerade_vrf() 142{ 143 local qdisc=$1 144 145 if [ "$qdisc" != "default" ]; then 146 tc -net "$ns0" qdisc add dev tvrf root "$qdisc" 147 fi 148 149 ip netns exec "$ns0" conntrack -F 2>/dev/null 150 151ip netns exec "$ns0" nft -f - <<EOF 152flush ruleset 153table ip nat { 154 chain rawout { 155 type filter hook output priority raw; 156 157 oif tvrf ct state untracked counter 158 } 159 chain postrouting2 { 160 type filter hook postrouting priority mangle; 161 162 oif tvrf ct state untracked counter 163 } 164 chain postrouting { 165 type nat hook postrouting priority 0; 166 # NB: masquerade should always be combined with 'oif(name) bla', 167 # lack of this is intentional here, we want to exercise double-snat. 168 ip saddr 172.30.30.0/30 counter masquerade random 169 } 170} 171EOF 172 if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then 173 echo "FAIL: connect failure with masquerade + sport rewrite on vrf device" 174 ret=1 175 return 176 fi 177 178 # must also check that nat table was evaluated on second (lower device) iteration. 179 if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' && 180 ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then 181 echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" 182 else 183 echo "FAIL: vrf rules have unexpected counter value" 184 ret=1 185 fi 186 187 if [ "$qdisc" != "default" ]; then 188 tc -net "$ns0" qdisc del dev tvrf root 189 fi 190} 191 192# add masq rule that gets evaluated w. outif set to veth device. 193# This tests the 2nd iteration of the packet through conntrack, 194# oifname is the lower device (veth0 in this case). 195test_masquerade_veth() 196{ 197 ip netns exec "$ns0" conntrack -F 2>/dev/null 198ip netns exec "$ns0" nft -f - <<EOF 199flush ruleset 200table ip nat { 201 chain postrouting { 202 type nat hook postrouting priority 0; 203 meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random 204 } 205} 206EOF 207 if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then 208 echo "FAIL: connect failure with masquerade + sport rewrite on veth device" 209 ret=1 210 return 211 fi 212 213 # must also check that nat table was evaluated on second (lower device) iteration. 214 if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then 215 echo "PASS: connect with masquerade + sport rewrite on veth device" 216 else 217 echo "FAIL: vrf masq rule has unexpected counter value" 218 ret=1 219 fi 220} 221 222test_fib() 223{ 224ip netns exec "$ns0" nft -f - <<EOF 225flush ruleset 226table ip t { 227 counter fibcount { } 228 229 chain prerouting { 230 type filter hook prerouting priority 0; 231 meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack 232 } 233} 234EOF 235 ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 236 ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null 237 238 if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then 239 echo "PASS: fib lookup returned exepected output interface" 240 else 241 echo "FAIL: fib lookup did not return exepected output interface" 242 ret=1 243 return 244 fi 245} 246 247test_ct_zone_in 248test_masquerade_vrf "default" 249test_masquerade_vrf "pfifo" 250test_masquerade_veth 251test_fib 252 253exit $ret 254