1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# A test for switch behavior under MC overload. An issue in Spectrum chips 5# causes throughput of UC traffic to drop severely when a switch is under heavy 6# MC load. This issue can be overcome by putting the switch to MC-aware mode. 7# This test verifies that UC performance stays intact even as the switch is 8# under MC flood, and therefore that the MC-aware mode is enabled and correctly 9# configured. 10# 11# Because mlxsw throttles CPU port, the traffic can't actually reach userspace 12# at full speed. That makes it impossible to use iperf3 to simply measure the 13# throughput, because many packets (that reach $h3) don't get to the kernel at 14# all even in UDP mode (the situation is even worse in TCP mode, where one can't 15# hope to see more than a couple Mbps). 16# 17# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. 18# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore 19# each gets a different priority and we can use per-prio ethtool counters to 20# measure the throughput. In order to avoid prioritizing unicast traffic, prio 21# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and 22# thus TC 0). 23# 24# Mausezahn can't actually saturate the links unless it's using large frames. 25# Thus we set MTU to 10K on all involved interfaces. Then both unicast and 26# multicast traffic uses 8K frames. 27# 28# +---------------------------+ +----------------------------------+ 29# | H1 | | H2 | 30# | | | unicast --> + $h2.111 | 31# | multicast | | traffic | 192.0.2.129/28 | 32# | traffic | | | e-qos-map 0:1 | 33# | $h1 + <----- | | | | 34# | 192.0.2.65/28 | | | + $h2 | 35# +---------------|-----------+ +--------------|-------------------+ 36# | | 37# +---------------|---------------------------------------|-------------------+ 38# | $swp1 + + $swp2 | 39# | >1Gbps | | >1Gbps | 40# | +-------------|------+ +----------|----------------+ | 41# | | $swp1.1 + | | + $swp2.111 | | 42# | | BR1 | SW | BR111 | | 43# | | $swp3.1 + | | + $swp3.111 | | 44# | +-------------|------+ +----------|----------------+ | 45# | \_______________________________________/ | 46# | | | 47# | + $swp3 | 48# | | 1Gbps bottleneck | 49# | | prio qdisc: {0..7} -> 7 | 50# +------------------------------------|--------------------------------------+ 51# | 52# +--|-----------------+ 53# | + $h3 H3 | 54# | | 192.0.2.66/28 | 55# | | | 56# | + $h3.111 | 57# | 192.0.2.130/28 | 58# +--------------------+ 59 60ALL_TESTS=" 61 ping_ipv4 62 test_mc_aware 63 test_uc_aware 64" 65 66lib_dir=$(dirname $0)/../../../net/forwarding 67 68NUM_NETIFS=6 69source $lib_dir/lib.sh 70 71h1_create() 72{ 73 simple_if_init $h1 192.0.2.65/28 74 mtu_set $h1 10000 75} 76 77h1_destroy() 78{ 79 mtu_restore $h1 80 simple_if_fini $h1 192.0.2.65/28 81} 82 83h2_create() 84{ 85 simple_if_init $h2 86 mtu_set $h2 10000 87 88 vlan_create $h2 111 v$h2 192.0.2.129/28 89 ip link set dev $h2.111 type vlan egress-qos-map 0:1 90} 91 92h2_destroy() 93{ 94 vlan_destroy $h2 111 95 96 mtu_restore $h2 97 simple_if_fini $h2 98} 99 100h3_create() 101{ 102 simple_if_init $h3 192.0.2.66/28 103 mtu_set $h3 10000 104 105 vlan_create $h3 111 v$h3 192.0.2.130/28 106} 107 108h3_destroy() 109{ 110 vlan_destroy $h3 111 111 112 mtu_restore $h3 113 simple_if_fini $h3 192.0.2.66/28 114} 115 116switch_create() 117{ 118 ip link set dev $swp1 up 119 mtu_set $swp1 10000 120 121 ip link set dev $swp2 up 122 mtu_set $swp2 10000 123 124 ip link set dev $swp3 up 125 mtu_set $swp3 10000 126 127 vlan_create $swp2 111 128 vlan_create $swp3 111 129 130 ethtool -s $swp3 speed 1000 autoneg off 131 tc qdisc replace dev $swp3 root handle 3: \ 132 prio bands 8 priomap 7 7 7 7 7 7 7 7 133 134 ip link add name br1 type bridge vlan_filtering 0 135 ip link set dev br1 up 136 ip link set dev $swp1 master br1 137 ip link set dev $swp3 master br1 138 139 ip link add name br111 type bridge vlan_filtering 0 140 ip link set dev br111 up 141 ip link set dev $swp2.111 master br111 142 ip link set dev $swp3.111 master br111 143} 144 145switch_destroy() 146{ 147 ip link del dev br111 148 ip link del dev br1 149 150 tc qdisc del dev $swp3 root handle 3: 151 ethtool -s $swp3 autoneg on 152 153 vlan_destroy $swp3 111 154 vlan_destroy $swp2 111 155 156 mtu_restore $swp3 157 ip link set dev $swp3 down 158 159 mtu_restore $swp2 160 ip link set dev $swp2 down 161 162 mtu_restore $swp1 163 ip link set dev $swp1 down 164} 165 166setup_prepare() 167{ 168 h1=${NETIFS[p1]} 169 swp1=${NETIFS[p2]} 170 171 swp2=${NETIFS[p3]} 172 h2=${NETIFS[p4]} 173 174 swp3=${NETIFS[p5]} 175 h3=${NETIFS[p6]} 176 177 h3mac=$(mac_get $h3) 178 179 vrf_prepare 180 181 h1_create 182 h2_create 183 h3_create 184 switch_create 185} 186 187cleanup() 188{ 189 pre_cleanup 190 191 switch_destroy 192 h3_destroy 193 h2_destroy 194 h1_destroy 195 196 vrf_cleanup 197} 198 199ping_ipv4() 200{ 201 ping_test $h2 192.0.2.130 202} 203 204humanize() 205{ 206 local speed=$1; shift 207 208 for unit in bps Kbps Mbps Gbps; do 209 if (($(echo "$speed < 1024" | bc))); then 210 break 211 fi 212 213 speed=$(echo "scale=1; $speed / 1024" | bc) 214 done 215 216 echo "$speed${unit}" 217} 218 219rate() 220{ 221 local t0=$1; shift 222 local t1=$1; shift 223 local interval=$1; shift 224 225 echo $((8 * (t1 - t0) / interval)) 226} 227 228check_rate() 229{ 230 local rate=$1; shift 231 local min=$1; shift 232 local what=$1; shift 233 234 if ((rate > min)); then 235 return 0 236 fi 237 238 echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr 239 return 1 240} 241 242measure_uc_rate() 243{ 244 local what=$1; shift 245 246 local interval=10 247 local i 248 local ret=0 249 250 # Dips in performance might cause momentary ingress rate to drop below 251 # 1Gbps. That wouldn't saturate egress and MC would thus get through, 252 # seemingly winning bandwidth on account of UC. Demand at least 2Gbps 253 # average ingress rate to somewhat mitigate this. 254 local min_ingress=2147483648 255 256 $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ 257 -a own -b $h3mac -t udp -q & 258 sleep 1 259 260 for i in {5..0}; do 261 local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) 262 local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) 263 sleep $interval 264 local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) 265 local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) 266 267 local ir=$(rate $u0 $u1 $interval) 268 local er=$(rate $t0 $t1 $interval) 269 270 if check_rate $ir $min_ingress "$what ingress rate"; then 271 break 272 fi 273 274 # Fail the test if we can't get the throughput. 275 if ((i == 0)); then 276 ret=1 277 fi 278 done 279 280 # Suppress noise from killing mausezahn. 281 { kill %% && wait; } 2>/dev/null 282 283 echo $ir $er 284 exit $ret 285} 286 287test_mc_aware() 288{ 289 RET=0 290 291 local -a uc_rate 292 uc_rate=($(measure_uc_rate "UC-only")) 293 check_err $? "Could not get high enough UC-only ingress rate" 294 local ucth1=${uc_rate[1]} 295 296 $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q & 297 298 local d0=$(date +%s) 299 local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) 300 local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) 301 302 local -a uc_rate_2 303 uc_rate_2=($(measure_uc_rate "UC+MC")) 304 check_err $? "Could not get high enough UC+MC ingress rate" 305 local ucth2=${uc_rate_2[1]} 306 307 local d1=$(date +%s) 308 local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) 309 local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) 310 311 local deg=$(bc <<< " 312 scale=2 313 ret = 100 * ($ucth1 - $ucth2) / $ucth1 314 if (ret > 0) { ret } else { 0 } 315 ") 316 check_err $(bc <<< "$deg > 25") 317 318 local interval=$((d1 - d0)) 319 local mc_ir=$(rate $u0 $u1 $interval) 320 local mc_er=$(rate $t0 $t1 $interval) 321 322 # Suppress noise from killing mausezahn. 323 { kill %% && wait; } 2>/dev/null 324 325 log_test "UC performace under MC overload" 326 327 echo "UC-only throughput $(humanize $ucth1)" 328 echo "UC+MC throughput $(humanize $ucth2)" 329 echo "Degradation $deg %" 330 echo 331 echo "Full report:" 332 echo " UC only:" 333 echo " ingress UC throughput $(humanize ${uc_rate[0]})" 334 echo " egress UC throughput $(humanize ${uc_rate[1]})" 335 echo " UC+MC:" 336 echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" 337 echo " egress UC throughput $(humanize ${uc_rate_2[1]})" 338 echo " ingress MC throughput $(humanize $mc_ir)" 339 echo " egress MC throughput $(humanize $mc_er)" 340 echo 341} 342 343test_uc_aware() 344{ 345 RET=0 346 347 $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ 348 -a own -b $h3mac -t udp -q & 349 350 local d0=$(date +%s) 351 local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) 352 local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) 353 sleep 1 354 355 local attempts=50 356 local passes=0 357 local i 358 359 for ((i = 0; i < attempts; ++i)); do 360 if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then 361 ((passes++)) 362 fi 363 364 sleep 0.1 365 done 366 367 local d1=$(date +%s) 368 local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) 369 local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) 370 371 local interval=$((d1 - d0)) 372 local uc_ir=$(rate $u0 $u1 $interval) 373 local uc_er=$(rate $t0 $t1 $interval) 374 375 ((attempts == passes)) 376 check_err $? 377 378 # Suppress noise from killing mausezahn. 379 { kill %% && wait; } 2>/dev/null 380 381 log_test "MC performace under UC overload" 382 echo " ingress UC throughput $(humanize ${uc_ir})" 383 echo " egress UC throughput $(humanize ${uc_er})" 384 echo " sent $attempts BC ARPs, got $passes responses" 385} 386 387trap cleanup EXIT 388 389setup_prepare 390setup_wait 391 392tests_run 393 394exit $EXIT_STATUS 395