• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/bin/bash
2
3# This script demonstrates interaction of conntrack and vrf.
4# The vrf driver calls the netfilter hooks again, with oif/iif
5# pointing at the VRF device.
6#
7# For ingress, this means first iteration has iifname of lower/real
8# device.  In this script, thats veth0.
9# Second iteration is iifname set to vrf device, tvrf in this script.
10#
11# For egress, this is reversed: first iteration has the vrf device,
12# second iteration is done with the lower/real/veth0 device.
13#
14# test_ct_zone_in demonstrates unexpected change of nftables
15# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
16# connection on VRF rcv"
17#
18# It was possible to assign conntrack zone to a packet (or mark it for
19# `notracking`) in the prerouting chain before conntrack, based on real iif.
20#
21# After the change, the zone assignment is lost and the zone is assigned based
22# on the VRF master interface (in case such a rule exists).
23# assignment is lost. Instead, assignment based on the `iif` matching
24# Thus it is impossible to distinguish packets based on the original
25# interface.
26#
27# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
28# that was supposed to be fixed by the commit mentioned above to make sure
29# that any fix to test case 1 won't break masquerade again.
30
31source lib.sh
32
33IP0=172.30.30.1
34IP1=172.30.30.2
35DUMMYNET=10.9.9
36PFXL=30
37ret=0
38
39cleanup()
40{
41	ip netns pids $ns0 | xargs kill 2>/dev/null
42	ip netns pids $ns1 | xargs kill 2>/dev/null
43
44	cleanup_all_ns
45}
46
47checktool "nft --version" "run test without nft"
48checktool "conntrack --version" "run test without conntrack"
49checktool "socat -h" "run test without socat"
50
51trap cleanup EXIT
52
53setup_ns ns0 ns1
54
55ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
56ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
57ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
58ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
59
60if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
61	echo "SKIP: Could not add veth device"
62	exit $ksft_skip
63fi
64
65if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
66	echo "SKIP: Could not add vrf device"
67	exit $ksft_skip
68fi
69
70ip -net "$ns0" link add dummy0 type dummy
71
72ip -net "$ns0" li set veth0 master tvrf
73ip -net "$ns0" li set dummy0 master tvrf
74ip -net "$ns0" li set tvrf up
75ip -net "$ns0" li set veth0 up
76ip -net "$ns0" li set dummy0 up
77ip -net "$ns1" li set veth0 up
78
79ip -net "$ns0" addr add $IP0/$PFXL dev veth0
80ip -net "$ns1" addr add $IP1/$PFXL dev veth0
81ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
82
83listener_ready()
84{
85        local ns="$1"
86
87        ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555"
88}
89
90ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null &
91busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1"
92
93# test vrf ingress handling.
94# The incoming connection should be placed in conntrack zone 1,
95# as decided by the first iteration of the ruleset.
96test_ct_zone_in()
97{
98ip netns exec "$ns0" nft -f - <<EOF
99table testct {
100	chain rawpre {
101		type filter hook prerouting priority raw;
102
103		iif { veth0, tvrf } counter meta nftrace set 1
104		iif veth0 counter ct zone set 1 counter return
105		iif tvrf counter ct zone set 2 counter return
106		ip protocol icmp counter
107		notrack counter
108	}
109
110	chain rawout {
111		type filter hook output priority raw;
112
113		oif veth0 counter ct zone set 1 counter return
114		oif tvrf counter ct zone set 2 counter return
115		notrack counter
116	}
117}
118EOF
119	ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null
120
121	# should be in zone 1, not zone 2
122	count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
123	if [ "$count" -eq 1 ]; then
124		echo "PASS: entry found in conntrack zone 1"
125	else
126		echo "FAIL: entry not found in conntrack zone 1"
127		count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
128		if [ "$count" -eq 1 ]; then
129			echo "FAIL: entry found in zone 2 instead"
130		else
131			echo "FAIL: entry not in zone 1 or 2, dumping table"
132			ip netns exec "$ns0" conntrack -L
133			ip netns exec "$ns0" nft list ruleset
134		fi
135	fi
136}
137
138# add masq rule that gets evaluated w. outif set to vrf device.
139# This tests the first iteration of the packet through conntrack,
140# oifname is the vrf device.
141test_masquerade_vrf()
142{
143	local qdisc=$1
144
145	if [ "$qdisc" != "default" ]; then
146		tc -net "$ns0" qdisc add dev tvrf root "$qdisc"
147	fi
148
149	ip netns exec "$ns0" conntrack -F 2>/dev/null
150
151ip netns exec "$ns0" nft -f - <<EOF
152flush ruleset
153table ip nat {
154	chain rawout {
155		type filter hook output priority raw;
156
157		oif tvrf ct state untracked counter
158	}
159	chain postrouting2 {
160		type filter hook postrouting priority mangle;
161
162		oif tvrf ct state untracked counter
163	}
164	chain postrouting {
165		type nat hook postrouting priority 0;
166		# NB: masquerade should always be combined with 'oif(name) bla',
167		# lack of this is intentional here, we want to exercise double-snat.
168		ip saddr 172.30.30.0/30 counter masquerade random
169	}
170}
171EOF
172	if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
173		echo "FAIL: connect failure with masquerade + sport rewrite on vrf device"
174		ret=1
175		return
176	fi
177
178	# must also check that nat table was evaluated on second (lower device) iteration.
179	if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' &&
180	   ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then
181		echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
182	else
183		echo "FAIL: vrf rules have unexpected counter value"
184		ret=1
185	fi
186
187	if [ "$qdisc" != "default" ]; then
188		tc -net "$ns0" qdisc del dev tvrf root
189	fi
190}
191
192# add masq rule that gets evaluated w. outif set to veth device.
193# This tests the 2nd iteration of the packet through conntrack,
194# oifname is the lower device (veth0 in this case).
195test_masquerade_veth()
196{
197	ip netns exec "$ns0" conntrack -F 2>/dev/null
198ip netns exec "$ns0" nft -f - <<EOF
199flush ruleset
200table ip nat {
201	chain postrouting {
202		type nat hook postrouting priority 0;
203		meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
204	}
205}
206EOF
207	if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
208		echo "FAIL: connect failure with masquerade + sport rewrite on veth device"
209		ret=1
210		return
211	fi
212
213	# must also check that nat table was evaluated on second (lower device) iteration.
214	if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then
215		echo "PASS: connect with masquerade + sport rewrite on veth device"
216	else
217		echo "FAIL: vrf masq rule has unexpected counter value"
218		ret=1
219	fi
220}
221
222test_fib()
223{
224ip netns exec "$ns0" nft -f - <<EOF
225flush ruleset
226table ip t {
227	counter fibcount { }
228
229	chain prerouting {
230		type filter hook prerouting priority 0;
231		meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
232	}
233}
234EOF
235	ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
236	ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
237
238	if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
239		echo "PASS: fib lookup returned exepected output interface"
240	else
241		echo "FAIL: fib lookup did not return exepected output interface"
242		ret=1
243		return
244	fi
245}
246
247test_ct_zone_in
248test_masquerade_vrf "default"
249test_masquerade_vrf "pfifo"
250test_masquerade_veth
251test_fib
252
253exit $ret
254