• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# this is a quick and dirty migration of runemomniagg2.sh to the
2# --enable-demo mode of aggregate testing
3function kill_netperfs {
4    pkill -ALRM netperf
5
6    pgrep -P 1 -f netperf > /dev/null
7    while [ $? -eq 0 ]
8    do
9	sleep 1
10	pgrep -P 1 -f netperf > /dev/null
11    done
12}
13
14function run_cmd {
15
16    NOW=`date +%s.%N`
17    echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG
18    i=0;
19
20# the starting point for our load level pauses
21    PAUSE_AT=1
22
23
24    while [ $i -lt $MAX_INSTANCES ]
25    do
26	TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]}
27	echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG
28	id=`printf "%.5d" $i`
29	$NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out &
30
31    # give it a moment to get going
32	sleep 1
33
34	i=`expr $i + 1`
35
36	if [ $i  -eq $PAUSE_AT ] && [ $i -ne $MAX_INSTANCES ]
37	then
38	    NOW=`date +%s.%N`
39	    echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG
40	    sleep $DURATION
41	    PAUSE_AT=`expr $PAUSE_AT \* 2`
42	    NOW=`date +%s.%N`
43	    echo "Resuming at $NOW for $TEST" | tee -a $TESTLOG
44	fi
45    done
46
47    NOW=`date +%s.%N`
48    echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG
49
50#wait for our test duration
51    sleep $DURATION
52
53#kludgey but this sleep should mean that another interim result will be emitted
54    sleep 3
55
56# stop all the netperfs
57    NOW=`date +%s.%N`
58    echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG
59    kill_netperfs
60
61    NOW=`date +%s.%N`
62    echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG
63
64}
65
66# very much like run_cmd, but it runs the tests one at a time rather
67# than in parallel.  We keep the same logging strings to be compatible
68# (hopefully) with the post processing script, even though they don't
69# make all that much sense :)
70
71function run_cmd_serial {
72
73    NOW=`date +%s.%N`
74    echo "Starting netperfs at $NOW for $TEST" | tee $TESTLOG
75    i=0;
76
77# the starting point for our load level pauses
78    PAUSE_AT=1
79
80
81    while [ $i -lt $NUM_REMOTE_HOSTS ]
82    do
83	TARGET=${REMOTE_HOSTS[`expr $i % $NUM_REMOTE_HOSTS`]}
84	echo "Starting netperfs on localhost targeting ${TARGET} for $TEST" | tee -a $TESTLOG
85	id=`printf "%.5d" $i`
86	$NETPERF -H $TARGET $NETPERF_CMD 2>&1 > netperf_${TEST}_${id}_to_${TARGET}.out &
87
88    # give it a moment to get going
89	sleep 1
90
91	i=`expr $i + 1`
92
93	NOW=`date +%s.%N`
94	echo "Pausing for $DURATION seconds at $NOW with $i netperfs running for $TEST" | tee -a $TESTLOG
95	# the plus two is to make sure we have a full set of interim
96	# results.  probably not necessary here but we want to be
97	# certain
98	sleep `expr $DURATION + 1`
99	kill_netperfs
100	NOW=`date +%s.%N`
101	THEN=`echo $NOW | awk -F "." '{printf("%d.%d",$1-1,$2)}'`
102	echo "Resuming at $THEN for $TEST" | tee -a $TESTLOG
103
104    done
105
106    NOW=`date +%s.%N`
107    echo "Netperfs started by $NOW for $TEST" | tee -a $TESTLOG
108
109# stop all the netperfs - of course actually they have all been
110# stopped already, we just want the log entries
111    NOW=`date +%s.%N`
112    echo "Netperfs stopping $NOW for $TEST" | tee -a $TESTLOG
113    kill_netperfs
114    NOW=`date +%s.%N`
115    echo "Netperfs stopped $NOW for $TEST" | tee -a $TESTLOG
116}
117
118# here then is the "main" part
119
120if [ ! -f ./remote_hosts ]
121then
122    echo "This script requires a remote_hosts file"
123    exit -1
124fi
125. ./remote_hosts
126
127# how many processors are there on this system
128NUM_CPUS=`grep processor /proc/cpuinfo | wc -l`
129
130# the number of netperf instances we will run will be up to 2x the
131# number of CPUs
132MAX_INSTANCES=`expr $NUM_CPUS \* 2`
133
134# but at least as many as there are entries in remote_hosts
135if [ $MAX_INSTANCES -lt $NUM_REMOTE_HOSTS ]
136then
137    MAX_INSTANCES=$NUM_REMOTE_HOSTS
138fi
139
140# allow the netperf binary to be used to be overridden
141NETPERF=${NETPERF:="netperf"}
142
143if [ $NUM_REMOTE_HOSTS -lt 2 ]
144then
145    echo "The list of remote hosts is too short.  There must be at least 2."
146    exit -1
147fi
148
149# we assume that netservers are already running on all the load generators
150
151DURATION=120
152# do not have a uuidgen? then use the one in netperf
153MY_UUID=`uuidgen`
154# with top-of-trunk we could make this 0 and run forever
155# but two hours is something of a failsafe if the signals
156# get lost
157LENGTH="-l 7200"
158OUTPUT="-o all"
159
160DO_STREAM=1;
161DO_MAERTS=1;
162# NOTE!  The Bidir test depends on being able to set a socket buffer
163# size greater than 13 * 64KB or 832 KB or there is a risk of the test
164# hanging.  If you are running linux, make certain that
165# net.core.[r|w]mem_max are sufficiently large
166DO_BIDIR=1;
167DO_RRAGG=1;
168DO_RR=1;
169DO_ANCILLARY=1;
170
171# UDP_RR for TPC/PPS using single-byte transactions. we do not use
172# TCP_RR any longer because any packet losses or other matters
173# affecting the congestion window will break our desire that there be
174# a one to one correspondence between requests/responses and packets.
175if [ $DO_RRAGG -eq 1 ]; then
176    BURST=`find_max_burst.sh ${REMOTE_HOSTS[0]}`
177    if [ $BURST -eq -1 ]; then
178        # use a value that find_max_burst will not have picked
179        BURST=9
180        echo "find_max_burst.sh returned -1 so picking a burst of $BURST"
181    fi
182    TEST="tps"
183    TESTLOG="netperf_tps.log"
184    NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -b $BURST -e 1 -T udp -u $MY_UUID $OUTPUT"
185    run_cmd
186fi
187
188# Bidirectional using burst-mode TCP_RR and large request/response size
189if [ $DO_BIDIR -eq 1 ]; then
190    TEST="bidirectional"
191    TESTLOG="netperf_bidirectional.log"
192    NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -r 64K -s 1M -S 1M -b 12 -u $MY_UUID $OUTPUT"
193    run_cmd
194fi
195
196# TCP_STREAM aka outbound with a 64K send size
197# the netperf command is everything but netperf -H mumble
198if [ $DO_STREAM -eq 1 ];then
199    TEST="outbound"
200    TESTLOG="netperf_outbound.log"
201    NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m 64K -u $MY_UUID $OUTPUT"
202    run_cmd
203fi
204
205# TCP_MAERTS aka inbound with a 64K send size - why is this one last?
206# because presently when I pkill the netperf of a "MAERTS" test, the
207# netserver does not behave well and it may not be possible to get it
208# to behave well.  but we will still have all the interim results even
209# if we don't get the final results, the useful parts of which will be
210# the same as the other tests anyway
211if [ $DO_MAERTS -eq 1 ]; then
212    TEST="inbound"
213    TESTLOG="netperf_inbound.log"
214    NETPERF_CMD="-D 0.5 -c -C -f m -P 0 -t omni $LENGTH -v 2 -- -m ,64K -u $MY_UUID $OUTPUT"
215    run_cmd
216fi
217
218# A single-stream of synchronous, no-burst TCP_RR in an "aggregate"
219# script?  Yes, because the way the aggregate tests work, while there
220# is a way to see what the performance of a single bulk transfer was,
221# there is no way to see a basic latency - by the time
222# find_max_burst.sh has completed, we are past a burst size of 0
223if [ $DO_RR -eq 1 ]; then
224    if [ $DURATION -lt 60 ]; then
225	DURATION=60
226    fi
227    TEST="sync_tps"
228    TESTLOG="netperf_sync_tps.log"
229    NETPERF_CMD="-D 0.5 -c -C -f x -P 0 -t omni $LENGTH -v 2 -- -r 1 -u $MY_UUID $OUTPUT"
230    run_cmd_serial
231fi
232
233
234# now some ancillary things which may nor may not work on your platform
235if [ $DO_ANCILLARY -eq 1 ];then
236    dmidecode 2>&1 > dmidecode.txt
237    uname -a 2>&1 > uname.txt
238    cat /proc/cpuinfo 2>&1 > cpuinfo.txt
239    cat /proc/meminfo 2>&1 > meminfo.txt
240    ifconfig -a 2>&1 > ifconfig.txt
241    netstat -rn 2>&1 > netstat.txt
242    dpkg -l 2>&1 > dpkg.txt
243    rpm -qa 2>&1 > rpm.txt
244    cat /proc/interrupts 2>&1 > interrupts.txt
245    i=0
246    while [ $i -lt `expr $NUM_REMOTE_HOSTS - 1` ]
247    do
248	traceroute ${REMOTE_HOSTS[$i]} > traceroute_${REMOTE_HOSTS[$i]}.txt
249	i=`expr $i + 1`
250    done
251fi
252