1#!/usr/bin/env python2 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Script to compare the performance of two different chromeOS builds. 7 8This script is meant to be used when the performance impact of a change in 9chromeOS needs to be analyzed. It requires that you have already created two 10chromeOS test images (one with the change, and one without), and that you have 11at least one device available on which to run performance tests. 12 13This script is actually a light-weight wrapper around crosperf, a tool for 14automatically imaging one or more chromeOS devices with particular builds, 15running a set of tests on those builds, and then notifying the user of test 16results (along with some statistical analysis of perf keyvals). This wrapper 17script performs the following tasks: 18 191) Creates a crosperf "experiment" file to be consumed by crosperf. 202) Invokes crosperf using the created experiment file. Crosperf produces 2 21outputs: an e-mail that is sent to the user who invoked it; and an output 22folder that is named based on the given --experiment-name, which is created in 23the directory in which this script was run. 243) Parses the results of crosperf and outputs a summary of relevant data. This 25script produces output in a CSV file, as well as in stdout. 26 27Before running this script for the first time, you should set up your system to 28run sudo without prompting for a password (otherwise, crosperf prompts for a 29sudo password). You should only have to do that once per host machine. 30 31Once you're set up with passwordless sudo, you can run the script (preferably 32from an empty directory, since several output files are produced): 33 34> python perf_compare.py --crosperf=CROSPERF_EXE --image-1=IMAGE_1 \ 35 --image-2=IMAGE_2 --board-1=BOARD_1 --board-2=BOARD_2 --remote-1=REMOTE_1 \ 36 --remote-2=REMOTE_2 37 38You'll need to specify the following inputs: the full path to the crosperf 39executable; the absolute paths to 2 locally-built chromeOS images (which must 40reside in the "typical location" relative to the chroot, as required by 41crosperf); the name of the boards associated with the 2 images (if both images 42have the same board, you can specify that single board with --board=BOARD); and 43the IP addresses of the 2 remote devices on which to run crosperf (if you have 44only a single device available, specify it with --remote=REMOTE). Run with -h to 45see the full set of accepted command-line arguments. 46 47Notes: 48 491) When you run this script, it will delete any previously-created crosperf 50output directories and created CSV files based on the specified 51--experiment-name. If you don't want to lose any old crosperf/CSV data, either 52move it to another location, or run this script with a different 53--experiment-name. 542) This script will only run the benchmarks and process the perf keys specified 55in the file "perf_benchmarks.json". Some benchmarks output more perf keys than 56what are specified in perf_benchmarks.json, and these will appear in the 57crosperf outputs, but not in the outputs produced specifically by this script. 58""" 59 60 61from __future__ import absolute_import 62from __future__ import division 63from __future__ import print_function 64import json 65import logging 66import math 67import optparse 68import os 69import re 70import shutil 71import subprocess 72import sys 73from six.moves import input 74 75 76_ITERATIONS = 5 77_IMAGE_1_NAME = 'Image1' 78_IMAGE_2_NAME = 'Image2' 79_DEFAULT_EXPERIMENT_NAME = 'perf_comparison' 80_ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 81_BENCHMARK_INFO_FILE_NAME = os.path.join(_ROOT_DIR, 'perf_benchmarks.json') 82_CROSPERF_REPORT_LINE_DELIMITER = '\t' 83_EXPERIMENT_FILE_NAME = 'experiment.txt' 84 85_BENCHMARK_INFO_TEMPLATE = """ 86benchmark: {benchmark} {{ 87 autotest_name: {autotest_name} 88 autotest_args: --use_emerged {autotest_args} 89 iterations: {iterations} 90}} 91""" 92 93_IMAGE_INFO_TEMPLATE = """ 94label: {label} {{ 95 chromeos_image: {image} 96 board: {board} 97 remote: {remote} 98}} 99""" 100 101 102def identify_benchmarks_to_run(benchmark_info, iteration_nums, perf_keys): 103 """Identifies which benchmarks to run, and for how many iterations. 104 105 @param benchmark_info: A list of dictionaries containing information about 106 the complete set of default perf benchmarks to run. 107 @param iteration_nums: See output_benchmarks_info(). 108 @param perf_keys: See output_benchmarks_info(). 109 110 @return A tuple (X, Y), where X is a list of dictionaries containing 111 information about the set of benchmarks to run, and Y is the set of 112 perf keys requested to be run. 113 """ 114 perf_keys_requested = set() 115 benchmarks_to_run = [] 116 if not perf_keys: 117 # Run every benchmark for the specified number of iterations. 118 benchmarks_to_run = benchmark_info 119 for benchmark in benchmarks_to_run: 120 benchmark['iterations'] = iteration_nums[0] 121 for perf_key in benchmark['perf_keys']: 122 perf_keys_requested.add(perf_key) 123 else: 124 # Identify which benchmarks to run, and for how many iterations. 125 identified_benchmarks = {} 126 for i, perf_key in enumerate(perf_keys): 127 perf_keys_requested.add(perf_key) 128 benchmarks = [benchmark for benchmark in benchmark_info 129 if perf_key in benchmark['perf_keys']] 130 if not benchmarks: 131 logging.error('Perf key "%s" isn\'t associated with a known ' 132 'benchmark.', perf_key) 133 sys.exit(1) 134 elif len(benchmarks) > 1: 135 logging.error('Perf key "%s" is associated with more than one ' 136 'benchmark, but should be unique.', perf_key) 137 sys.exit(1) 138 benchmark_to_add = benchmarks[0] 139 benchmark_to_add = identified_benchmarks.setdefault( 140 benchmark_to_add['benchmark'], benchmark_to_add) 141 if len(iteration_nums) == 1: 142 # If only a single iteration number is specified, we assume 143 # that applies to every benchmark. 144 benchmark_to_add['iterations'] = iteration_nums[0] 145 else: 146 # The user must have specified a separate iteration number for 147 # each perf key. If the benchmark associated with the current 148 # perf key already has an interation number associated with it, 149 # choose the maximum of the two. 150 iter_num = iteration_nums[i] 151 if 'iterations' in benchmark_to_add: 152 benchmark_to_add['iterations'] = ( 153 iter_num if iter_num > benchmark_to_add['iterations'] 154 else benchmark_to_add['iterations']) 155 else: 156 benchmark_to_add['iterations'] = iter_num 157 benchmarks_to_run = list(identified_benchmarks.values()) 158 159 return benchmarks_to_run, perf_keys_requested 160 161 162def output_benchmarks_info(f, iteration_nums, perf_keys): 163 """Identifies details of benchmarks to run, and writes that info to a file. 164 165 @param f: A file object that is writeable. 166 @param iteration_nums: A list of one or more integers representing the 167 number of iterations to run for one or more benchmarks. 168 @param perf_keys: A list of one or more string perf keys we need to 169 run, or None if we should use the complete set of default perf keys. 170 171 @return Set of perf keys actually requested to be run in the output file. 172 """ 173 benchmark_info = [] 174 with open(_BENCHMARK_INFO_FILE_NAME, 'r') as f_bench: 175 benchmark_info = json.load(f_bench) 176 177 benchmarks_to_run, perf_keys_requested = identify_benchmarks_to_run( 178 benchmark_info, iteration_nums, perf_keys) 179 180 for benchmark in benchmarks_to_run: 181 f.write(_BENCHMARK_INFO_TEMPLATE.format( 182 benchmark=benchmark['benchmark'], 183 autotest_name=benchmark['autotest_name'], 184 autotest_args=benchmark.get('autotest_args', ''), 185 iterations=benchmark['iterations'])) 186 187 return perf_keys_requested 188 189 190def output_image_info(f, label, image, board, remote): 191 """Writes information about a given image to an output file. 192 193 @param f: A file object that is writeable. 194 @param label: A string label for the given image. 195 @param image: The string path to the image on disk. 196 @param board: The string board associated with the image. 197 @param remote: The string IP address on which to install the image. 198 """ 199 f.write(_IMAGE_INFO_TEMPLATE.format( 200 label=label, image=image, board=board, remote=remote)) 201 202 203def invoke_crosperf(crosperf_exe, result_dir, experiment_name, board_1, board_2, 204 remote_1, remote_2, iteration_nums, perf_keys, image_1, 205 image_2, image_1_name, image_2_name): 206 """Invokes crosperf with a set of benchmarks and waits for it to complete. 207 208 @param crosperf_exe: The string path to a crosperf executable. 209 @param result_dir: The string name of the directory in which crosperf is 210 expected to write its output. 211 @param experiment_name: A string name to give the crosperf invocation. 212 @param board_1: The string board associated with the first image. 213 @param board_2: The string board associated with the second image. 214 @param remote_1: The string IP address/name of the first remote device. 215 @param remote_2: The string IP address/name of the second remote device. 216 @param iteration_nums: A list of integers representing the number of 217 iterations to run for the different benchmarks. 218 @param perf_keys: A list of perf keys to run, or None to run the full set 219 of default perf benchmarks. 220 @param image_1: The string path to the first image. 221 @param image_2: The string path to the second image. 222 @param image_1_name: A string label to give the first image. 223 @param image_2_name: A string label to give the second image. 224 225 @return A tuple (X, Y), where X is the path to the created crosperf report 226 file, and Y is the set of perf keys actually requested to be run. 227 """ 228 # Create experiment file for crosperf. 229 with open(_EXPERIMENT_FILE_NAME, 'w') as f: 230 f.write('name: {name}\n'.format(name=experiment_name)) 231 perf_keys_requested = output_benchmarks_info( 232 f, iteration_nums, perf_keys) 233 output_image_info(f, image_1_name, image_1, board_1, remote_1) 234 output_image_info(f, image_2_name, image_2, board_2, remote_2) 235 236 # Invoke crosperf with the experiment file. 237 logging.info('Invoking crosperf with created experiment file...') 238 p = subprocess.Popen([crosperf_exe, _EXPERIMENT_FILE_NAME], 239 stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 240 241 # Pass through crosperf output as debug messages until crosperf run is 242 # complete. 243 while True: 244 next_line = p.stdout.readline().strip() 245 if not next_line and p.poll() != None: 246 break 247 logging.debug(next_line) 248 sys.stdout.flush() 249 p.communicate() 250 exit_code = p.returncode 251 252 if exit_code: 253 logging.error('Crosperf returned exit code %s', exit_code) 254 sys.exit(1) 255 256 report_file = os.path.join(result_dir, 'results.html') 257 if not os.path.exists(report_file): 258 logging.error('Crosperf report file missing, cannot proceed.') 259 sys.exit(1) 260 261 logging.info('Crosperf run complete.') 262 logging.info('Crosperf results available in "%s"', result_dir) 263 return report_file, perf_keys_requested 264 265 266def parse_crosperf_report_file(report_file, perf_keys_requested): 267 """Reads in and parses a crosperf report file for relevant perf data. 268 269 @param report_file: See generate_results(). 270 @param perf_keys_requested: See generate_results(). 271 272 @return A dictionary containing perf information extracted from the crosperf 273 report file. 274 """ 275 results = {} 276 with open(report_file, 'r') as f: 277 contents = f.read() 278 279 match = re.search(r'summary-tsv.+?/pre', contents, flags=re.DOTALL) 280 contents = match.group(0) 281 282 curr_benchmark = None 283 for line in contents.splitlines(): 284 delimiter = r'\s+?' 285 match = re.search( 286 r'Benchmark:%s(?P<benchmark>\w+?);%sIterations:%s' 287 '(?P<iterations>\w+?)\s' % (delimiter, delimiter, delimiter), 288 line) 289 if match: 290 curr_benchmark = match.group('benchmark') 291 iterations = match.group('iterations') 292 results[curr_benchmark] = {'iterations': iterations, 293 'p_values': []} 294 continue 295 split = line.strip().split(_CROSPERF_REPORT_LINE_DELIMITER) 296 if (len(split) == 12 and split[-2] == '--' and 297 split[0] not in ['retval', 'iterations'] and 298 split[0] in perf_keys_requested): 299 results[curr_benchmark]['p_values'].append( 300 (split[0], split[-1])) 301 302 return results 303 304 305def generate_results(report_file, result_file, perf_keys_requested): 306 """Output relevant crosperf results to a CSV file, and to stdout. 307 308 This code parses the "results.html" output file of crosperf. It then creates 309 a CSV file that has the following format per line: 310 311 benchmark_name,num_iterations,perf_key,p_value[,perf_key,p_value] 312 313 @param report_file: The string name of the report file created by crosperf. 314 @param result_file: A string name for the CSV file to output. 315 @param perf_keys_requested: The set of perf keys originally requested to be 316 run. 317 """ 318 results = parse_crosperf_report_file(report_file, perf_keys_requested) 319 320 # Output p-value data to a CSV file. 321 with open(result_file, 'w') as f: 322 for bench in results: 323 perf_key_substring = ','.join( 324 ['%s,%s' % (x[0], x[1]) for x in results[bench]['p_values']]) 325 f.write('%s,%s,%s\n' % ( 326 bench, results[bench]['iterations'], perf_key_substring)) 327 328 logging.info('P-value results available in "%s"', result_file) 329 330 # Collect and output some additional summary results to stdout. 331 small_p_value = [] 332 nan_p_value = [] 333 perf_keys_obtained = set() 334 for benchmark in results: 335 p_values = results[benchmark]['p_values'] 336 for key, p_val in p_values: 337 perf_keys_obtained.add(key) 338 if float(p_val) <= 0.05: 339 small_p_value.append((benchmark, key, p_val)) 340 elif math.isnan(float(p_val)): 341 nan_p_value.append((benchmark, key, p_val)) 342 343 if small_p_value: 344 logging.info('The following perf keys showed statistically significant ' 345 'result differences (p-value <= 0.05):') 346 for item in small_p_value: 347 logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2]) 348 else: 349 logging.info('No perf keys showed statistically significant result ' 350 'differences (p-value <= 0.05)') 351 352 if nan_p_value: 353 logging.info('The following perf keys had "NaN" p-values:') 354 for item in nan_p_value: 355 logging.info('* [%s] %s (p-value %s)', item[0], item[1], item[2]) 356 357 # Check if any perf keys are missing from what was requested, and notify 358 # the user if so. 359 for key_requested in perf_keys_requested: 360 if key_requested not in perf_keys_obtained: 361 logging.warning('Could not find results for requested perf key ' 362 '"%s".', key_requested) 363 364 365def parse_options(): 366 """Parses command-line arguments.""" 367 parser = optparse.OptionParser() 368 369 parser.add_option('--crosperf', metavar='PATH', type='string', default=None, 370 help='Absolute path to the crosperf executable ' 371 '(required).') 372 parser.add_option('--image-1', metavar='PATH', type='string', default=None, 373 help='Absolute path to the first image .bin file ' 374 '(required).') 375 parser.add_option('--image-2', metavar='PATH', type='string', default=None, 376 help='Absolute path to the second image .bin file ' 377 '(required).') 378 379 board_group = optparse.OptionGroup( 380 parser, 'Specifying the boards (required)') 381 board_group.add_option('--board', metavar='BOARD', type='string', 382 default=None, 383 help='Name of the board associated with the images, ' 384 'if both images have the same board. If each ' 385 'image has a different board, use ' 386 'options --board-1 and --board-2 instead.') 387 board_group.add_option('--board-1', metavar='BOARD', type='string', 388 default=None, 389 help='Board associated with the first image.') 390 board_group.add_option('--board-2', metavar='BOARD', type='string', 391 default=None, 392 help='Board associated with the second image.') 393 parser.add_option_group(board_group) 394 395 remote_group = optparse.OptionGroup( 396 parser, 'Specifying the remote devices (required)') 397 remote_group.add_option('--remote', metavar='IP', type='string', 398 default=None, 399 help='IP address/name of remote device to use, if ' 400 'only one physical device is to be used. If ' 401 'using two devices, use options --remote-1 ' 402 'and --remote-2 instead.') 403 remote_group.add_option('--remote-1', metavar='IP', type='string', 404 default=None, 405 help='IP address/name of first device to use.') 406 remote_group.add_option('--remote-2', metavar='IP', type='string', 407 default=None, 408 help='IP address/name of second device to use.') 409 parser.add_option_group(remote_group) 410 411 optional_group = optparse.OptionGroup(parser, 'Optional settings') 412 optional_group.add_option('--image-1-name', metavar='NAME', type='string', 413 default=_IMAGE_1_NAME, 414 help='Descriptive name for the first image. ' 415 'Defaults to "%default".') 416 optional_group.add_option('--image-2-name', metavar='NAME', type='string', 417 default=_IMAGE_2_NAME, 418 help='Descriptive name for the second image. ' 419 'Defaults to "%default".') 420 optional_group.add_option('--experiment-name', metavar='NAME', 421 type='string', default=_DEFAULT_EXPERIMENT_NAME, 422 help='A descriptive name for the performance ' 423 'comparison experiment to run. Defaults to ' 424 '"%default".') 425 optional_group.add_option('--perf-keys', metavar='KEY1[,KEY2...]', 426 type='string', default=None, 427 help='Comma-separated list of perf keys to ' 428 'evaluate, if you do not want to run the ' 429 'complete set. By default, will evaluate ' 430 'with the complete set of perf keys.') 431 optional_group.add_option('--iterations', metavar='N1[,N2...]', 432 type='string', default=str(_ITERATIONS), 433 help='Number of iterations to use to evaluate ' 434 'each perf key (defaults to %default). If ' 435 'specifying a custom list of perf keys ' 436 '(with --perf-keys) and you want to have a ' 437 'different number of iterations for each ' 438 'perf key, specify a comma-separated list ' 439 'of iteration numbers where N1 corresponds ' 440 'to KEY1, N2 corresponds to KEY2, etc.') 441 optional_group.add_option('-v', '--verbose', action='store_true', 442 default=False, help='Use verbose logging.') 443 parser.add_option_group(optional_group) 444 445 options, _ = parser.parse_args() 446 return options 447 448 449def verify_command_line_options(options, iteration_nums, perf_keys): 450 """Verifies there are no errors in the specified command-line options. 451 452 @param options: An optparse.Options object. 453 @param iteration_nums: An array of numbers representing the number of 454 iterations to perform to evaluate each perf key. 455 @param perf_keys: A list of strings representing perf keys to evaluate, or 456 None if no particular perf keys are specified. 457 458 @return True, if there were no errors in the command-line options, or 459 False if any error was detected. 460 """ 461 success = True 462 if not options.crosperf: 463 logging.error('You must specify the path to a crosperf executable.') 464 success = False 465 if options.crosperf and not os.path.isfile(options.crosperf): 466 logging.error('Could not locate crosperf executable "%s".', 467 options.crosperf) 468 if options.crosperf.startswith('/google'): 469 logging.error('Did you remember to run prodaccess?') 470 success = False 471 if not options.image_1 or not options.image_2: 472 logging.error('You must specify the paths for 2 image .bin files.') 473 success = False 474 if not options.board and (not options.board_1 or not options.board_2): 475 logging.error('You must specify the board name(s): either a single ' 476 'board with --board, or else two board names with ' 477 '--board-1 and --board-2.') 478 success = False 479 if options.board and options.board_1 and options.board_2: 480 logging.error('Specify either one board with --board, or two boards ' 481 'with --board-1 and --board-2, but not both.') 482 success = False 483 if not options.remote and (not options.remote_1 or not options.remote_2): 484 logging.error('You must specify the remote device(s) to use: either a ' 485 'single device with --remote, or else two devices with ' 486 '--remote-1 and --remote-2.') 487 success = False 488 if options.remote and options.remote_1 and options.remote_2: 489 logging.error('Specify either one remote device with --remote, or two ' 490 'devices with --remote-1 and --remote-2, but not both.') 491 success = False 492 if len(iteration_nums) > 1 and not perf_keys: 493 logging.error('You should only specify multiple iteration numbers ' 494 'if you\'re specifying a custom list of perf keys to ' 495 'evaluate.') 496 success = False 497 if (options.perf_keys and len(iteration_nums) > 1 and 498 len(options.perf_keys.split(',')) > len(iteration_nums)): 499 logging.error('You specified %d custom perf keys, but only %d ' 500 'iteration numbers.', len(options.perf_keys.split(',')), 501 len(iteration_nums)) 502 success = False 503 return success 504 505 506def main(): 507 """Main script logic.""" 508 options = parse_options() 509 510 log_level = logging.DEBUG if options.verbose else logging.INFO 511 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', 512 level=log_level) 513 514 iteration_nums = [int(i) for i in options.iterations.split(',')] 515 perf_keys = options.perf_keys.split(',') if options.perf_keys else None 516 517 # Verify there are no errors in the specified command-line options. 518 if not verify_command_line_options(options, iteration_nums, perf_keys): 519 return 1 520 521 # Clean up any old results that will be overwritten. 522 result_dir = options.experiment_name + '_results' 523 if os.path.isdir(result_dir): 524 shutil.rmtree(result_dir) 525 result_file = options.experiment_name + '_results.csv' 526 if os.path.isfile(result_file): 527 os.remove(result_file) 528 529 if options.remote: 530 remote_1, remote_2 = options.remote, options.remote 531 else: 532 remote_1, remote_2 = options.remote_1, options.remote_2 533 534 if options.board: 535 board_1, board_2 = options.board, options.board 536 else: 537 board_1, board_2 = options.board_1, options.board_2 538 539 report_file, perf_keys_requested = invoke_crosperf( 540 options.crosperf, result_dir, options.experiment_name, board_1, board_2, 541 remote_1, remote_2, iteration_nums, perf_keys, options.image_1, 542 options.image_2, options.image_1_name, options.image_2_name) 543 generate_results(report_file, result_file, perf_keys_requested) 544 545 return 0 546 547 548if __name__ == '__main__': 549 sys.exit(main()) 550