1# this is a quick and dirty migration of runemomniagg2.sh to the 2# --enable-demo mode of aggregate testing 3function kill_netperfs { 4 pkill -ALRM netperf 5 6 pgrep -P 1 -f netperf > /dev/null 7 while [ $? -eq 0 ] 8 do 9 sleep 1 10 pgrep -P 1 -f netperf > /dev/null 11 done 12} 13 14function run_cmd { 15 16 NOW=`date +%s.%N` 17 echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG 18 i=0; 19 20# the starting point for our load level pauses 21 PAUSE_AT=1 22 23 24 while [ $i -lt $MAX_INSTANCES ] 25 do 26 TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]} 27 echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG 28 id=`printf "%.5d" $i` 29 $NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out & 30 31 # give it a moment to get going 32 sleep 1 33 34 i=`expr $i + 1` 35 36 if [ $i -eq $PAUSE_AT ] && [ $i -ne $MAX_INSTANCES ] 37 then 38 NOW=`date +%s.%N` 39 echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG 40 sleep $DURATION 41 PAUSE_AT=`expr $PAUSE_AT \* 2` 42 NOW=`date +%s.%N` 43 echo "Resuming at $NOW for $TEST" | tee -a $TESTLOG 44 fi 45 done 46 47 NOW=`date +%s.%N` 48 echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG 49 50#wait for our test duration 51 sleep $DURATION 52 53#kludgey but this sleep should mean that another interim result will be emitted 54 sleep 3 55 56# stop all the netperfs 57 NOW=`date +%s.%N` 58 echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG 59 kill_netperfs 60 61 NOW=`date +%s.%N` 62 echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG 63 64} 65 66# very much like run_cmd, but it runs the tests one at a time rather 67# than in parallel. We keep the same logging strings to be compatible 68# (hopefully) with the post processing script, even though they don't 69# make all that much sense :) 70 71function run_cmd_serial { 72 73 NOW=`date +%s.%N` 74 echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG 75 i=0; 76 77# the starting point for our load level pauses 78 PAUSE_AT=1 79 80 81 while [ $i -lt $NUM_REMOTE_HOSTS ] 82 do 83 TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]} 84 echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG 85 id=`printf "%.5d" $i` 86 $NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out & 87 88 # give it a moment to get going 89 sleep 1 90 91 i=`expr $i + 1` 92 93 NOW=`date +%s.%N` 94 echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG 95 # the plus two is to make sure we have a full set of interim 96 # results. probably not necessary here but we want to be 97 # certain 98 sleep `expr $DURATION + 1` 99 kill_netperfs 100 NOW=`date +%s.%N` 101 THEN=`echo $NOW | awk -F "." '{printf("%d.%d",$1-1,$2)}'` 102 echo "Resuming at $THEN for $TEST" | tee -a $TESTLOG 103 104 done 105 106 NOW=`date +%s.%N` 107 echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG 108 109# stop all the netperfs - of course actually they have all been 110# stopped already, we just want the log entries 111 NOW=`date +%s.%N` 112 echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG 113 kill_netperfs 114 NOW=`date +%s.%N` 115 echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG 116} 117 118# here then is the "main" part 119 120if [ ! -f ./remote_hosts ] 121then 122 echo "This script requires a remote_hosts file" 123 exit -1 124fi 125. ./remote_hosts 126 127# how many processors are there on this system 128NUM_CPUS=`grep processor /proc/cpuinfo | wc -l` 129 130# the number of netperf instances we will run will be up to 2x the 131# number of CPUs 132MAX_INSTANCES=`expr $NUM_CPUS \* 2` 133 134# but at least as many as there are entries in remote_hosts 135if [ $MAX_INSTANCES -lt $NUM_REMOTE_HOSTS ] 136then 137 MAX_INSTANCES=$NUM_REMOTE_HOSTS 138fi 139 140# allow the netperf binary to be used to be overridden 141NETPERF=${NETPERF:="netperf"} 142 143if [ $NUM_REMOTE_HOSTS -lt 2 ] 144then 145 echo "The list of remote hosts is too short. There must be at least 2." 146 exit -1 147fi 148 149# we assume that netservers are already running on all the load generators 150 151DURATION=120 152# do not have a uuidgen? then use the one in netperf 153MY_UUID=`uuidgen` 154# with top-of-trunk we could make this 0 and run forever 155# but two hours is something of a failsafe if the signals 156# get lost 157LENGTH="-l 7200" 158OUTPUT="-o all" 159 160DO_STREAM=1; 161DO_MAERTS=1; 162# NOTE! The Bidir test depends on being able to set a socket buffer 163# size greater than 13 * 64KB or 832 KB or there is a risk of the test 164# hanging. If you are running linux, make certain that 165# net.core.[r|w]mem_max are sufficiently large 166DO_BIDIR=1; 167DO_RRAGG=1; 168DO_RR=1; 169DO_ANCILLARY=1; 170 171# UDP_RR for TPC/PPS using single-byte transactions. we do not use 172# TCP_RR any longer because any packet losses or other matters 173# affecting the congestion window will break our desire that there be 174# a one to one correspondence between requests/responses and packets. 175if [ $DO_RRAGG -eq 1 ]; then 176 BURST=`find_max_burst.sh ${REMOTE_HOSTS[0]}` 177 if [ $BURST -eq -1 ]; then 178 # use a value that find_max_burst will not have picked 179 BURST=9 180 echo "find_max_burst.sh returned -1 so picking a burst of $BURST" 181 fi 182 TEST="tps" 183 TESTLOG="netperf_tps.log" 184 NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -b $BURST -e 1 -T udp -u $MY_UUID $OUTPUT" 185 run_cmd 186fi 187 188# Bidirectional using burst-mode TCP_RR and large request/response size 189if [ $DO_BIDIR -eq 1 ]; then 190 TEST="bidirectional" 191 TESTLOG="netperf_bidirectional.log" 192 NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -r 64K -s 1M -S 1M -b 12 -u $MY_UUID $OUTPUT" 193 run_cmd 194fi 195 196# TCP_STREAM aka outbound with a 64K send size 197# the netperf command is everything but netperf -H mumble 198if [ $DO_STREAM -eq 1 ];then 199 TEST="outbound" 200 TESTLOG="netperf_outbound.log" 201 NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m 64K -u $MY_UUID $OUTPUT" 202 run_cmd 203fi 204 205# TCP_MAERTS aka inbound with a 64K send size - why is this one last? 206# because presently when I pkill the netperf of a "MAERTS" test, the 207# netserver does not behave well and it may not be possible to get it 208# to behave well. but we will still have all the interim results even 209# if we don't get the final results, the useful parts of which will be 210# the same as the other tests anyway 211if [ $DO_MAERTS -eq 1 ]; then 212 TEST="inbound" 213 TESTLOG="netperf_inbound.log" 214 NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m ,64K -u $MY_UUID $OUTPUT" 215 run_cmd 216fi 217 218# A single-stream of synchronous, no-burst TCP_RR in an "aggregate" 219# script? Yes, because the way the aggregate tests work, while there 220# is a way to see what the performance of a single bulk transfer was, 221# there is no way to see a basic latency - by the time 222# find_max_burst.sh has completed, we are past a burst size of 0 223if [ $DO_RR -eq 1 ]; then 224 if [ $DURATION -lt 60 ]; then 225 DURATION=60 226 fi 227 TEST="sync_tps" 228 TESTLOG="netperf_sync_tps.log" 229 NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -u $MY_UUID $OUTPUT" 230 run_cmd_serial 231fi 232 233 234# now some ancillary things which may nor may not work on your platform 235if [ $DO_ANCILLARY -eq 1 ];then 236 dmidecode 2>&1 > dmidecode.txt 237 uname -a 2>&1 > uname.txt 238 cat /proc/cpuinfo 2>&1 > cpuinfo.txt 239 cat /proc/meminfo 2>&1 > meminfo.txt 240 ifconfig -a 2>&1 > ifconfig.txt 241 netstat -rn 2>&1 > netstat.txt 242 dpkg -l 2>&1 > dpkg.txt 243 rpm -qa 2>&1 > rpm.txt 244 cat /proc/interrupts 2>&1 > interrupts.txt 245 i=0 246 while [ $i -lt `expr $NUM_REMOTE_HOSTS - 1` ] 247 do 248 traceroute ${REMOTE_HOSTS[$i]} > traceroute_${REMOTE_HOSTS[$i]}.txt 249 i=`expr $i + 1` 250 done 251fi 252