1#! /usr/bin/env vpython3 2# 3# Copyright 2021 The ANGLE Project Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6# 7# run_perf_test.py: 8# Runs ANGLE perf tests using some statistical averaging. 9 10import argparse 11import fnmatch 12import importlib 13import io 14import json 15import logging 16import time 17import os 18import re 19import subprocess 20import sys 21 22# Add //src/testing into sys.path for importing xvfb and test_env, and 23# //src/testing/scripts for importing common. 24d = os.path.dirname 25THIS_DIR = d(os.path.abspath(__file__)) 26ANGLE_DIR = d(d(THIS_DIR)) 27sys.path.append(os.path.join(ANGLE_DIR, 'testing')) 28sys.path.append(os.path.join(ANGLE_DIR, 'testing', 'scripts')) 29 30import common 31import test_env 32import xvfb 33 34sys.path.append(os.path.join(ANGLE_DIR, 'third_party', 'catapult', 'tracing')) 35from tracing.value import histogram 36from tracing.value import histogram_set 37from tracing.value import merge_histograms 38 39DEFAULT_TEST_SUITE = 'angle_perftests' 40DEFAULT_LOG = 'info' 41DEFAULT_SAMPLES = 4 42DEFAULT_TRIALS = 3 43DEFAULT_MAX_ERRORS = 3 44DEFAULT_WARMUP_LOOPS = 2 45DEFAULT_CALIBRATION_TIME = 2 46 47# Filters out stuff like: " I 72.572s run_tests_on_device(96071FFAZ00096) " 48ANDROID_LOGGING_PREFIX = r'I +\d+.\d+s \w+\(\w+\) ' 49 50# Test expectations 51FAIL = 'FAIL' 52PASS = 'PASS' 53SKIP = 'SKIP' 54 55EXIT_FAILURE = 1 56EXIT_SUCCESS = 0 57 58 59def is_windows(): 60 return sys.platform == 'cygwin' or sys.platform.startswith('win') 61 62 63def get_binary_name(binary): 64 if is_windows(): 65 return '.\\%s.exe' % binary 66 else: 67 return './%s' % binary 68 69 70def _popen(*args, **kwargs): 71 assert 'creationflags' not in kwargs 72 if sys.platform == 'win32': 73 # Necessary for signal handling. See crbug.com/733612#c6. 74 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP 75 return subprocess.Popen(*args, **kwargs) 76 77 78def run_command_with_output(argv, stdoutfile, env=None, cwd=None, log=True): 79 assert stdoutfile 80 with io.open(stdoutfile, 'wb') as writer: 81 process = _popen(argv, env=env, cwd=cwd, stdout=writer, stderr=subprocess.STDOUT) 82 test_env.forward_signals([process]) 83 while process.poll() is None: 84 # This sleep is needed for signal propagation. See the 85 # wait_with_signals() docstring. 86 time.sleep(0.1) 87 return process.returncode 88 89 90def _run_and_get_output(args, cmd, env): 91 lines = [] 92 logging.debug(' '.join(cmd)) 93 with common.temporary_file() as tempfile_path: 94 if args.xvfb: 95 exit_code = xvfb.run_executable(cmd, env, stdoutfile=tempfile_path) 96 else: 97 exit_code = run_command_with_output(cmd, env=env, stdoutfile=tempfile_path, log=False) 98 with open(tempfile_path) as f: 99 for line in f: 100 lines.append(line.strip()) 101 return exit_code, lines 102 103 104def _filter_tests(tests, pattern): 105 return [test for test in tests if fnmatch.fnmatch(test, pattern)] 106 107 108def _shard_tests(tests, shard_count, shard_index): 109 return [tests[index] for index in range(shard_index, len(tests), shard_count)] 110 111 112def _get_results_from_output(output, result): 113 output = '\n'.join(output) 114 m = re.search(r'Running (\d+) tests', output) 115 if m and int(m.group(1)) > 1: 116 raise Exception('Found more than one test result in output') 117 118 # Results are reported in the format: 119 # name_backend.result: story= value units. 120 pattern = r'\.' + result + r':.*= ([0-9.]+)' 121 logging.debug('Searching for %s in output' % pattern) 122 m = re.findall(pattern, output) 123 if not m: 124 logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output)) 125 return None 126 127 return [float(value) for value in m] 128 129 130def _get_tests_from_output(lines): 131 seen_start_of_tests = False 132 tests = [] 133 android_prefix = re.compile(ANDROID_LOGGING_PREFIX) 134 logging.debug('Read %d lines from test output.' % len(lines)) 135 for line in lines: 136 line = android_prefix.sub('', line.strip()) 137 if line == 'Tests list:': 138 seen_start_of_tests = True 139 elif line == 'End tests list.': 140 break 141 elif seen_start_of_tests: 142 tests.append(line) 143 if not seen_start_of_tests: 144 raise Exception('Did not find test list in test output!') 145 logging.debug('Found %d tests from test output.' % len(tests)) 146 return tests 147 148 149def _truncated_list(data, n): 150 """Compute a truncated list, n is truncation size""" 151 if len(data) < n * 2: 152 raise ValueError('list not large enough to truncate') 153 return sorted(data)[n:-n] 154 155 156def _mean(data): 157 """Return the sample arithmetic mean of data.""" 158 n = len(data) 159 if n < 1: 160 raise ValueError('mean requires at least one data point') 161 return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n) 162 163 164def _sum_of_square_deviations(data, c): 165 """Return sum of square deviations of sequence data.""" 166 ss = sum((float(x) - c)**2 for x in data) 167 return ss 168 169 170def _coefficient_of_variation(data): 171 """Calculates the population coefficient of variation.""" 172 n = len(data) 173 if n < 2: 174 raise ValueError('variance requires at least two data points') 175 c = _mean(data) 176 ss = _sum_of_square_deviations(data, c) 177 pvar = ss / n # the population variance 178 stddev = (pvar**0.5) # population standard deviation 179 return stddev / c 180 181 182def _save_extra_output_files(args, results, histograms): 183 isolated_out_dir = os.path.dirname(args.isolated_script_test_output) 184 if not os.path.isdir(isolated_out_dir): 185 return 186 benchmark_path = os.path.join(isolated_out_dir, args.test_suite) 187 if not os.path.isdir(benchmark_path): 188 os.makedirs(benchmark_path) 189 test_output_path = os.path.join(benchmark_path, 'test_results.json') 190 results.save_to_json_file(test_output_path) 191 perf_output_path = os.path.join(benchmark_path, 'perf_results.json') 192 logging.info('Saving perf histograms to %s.' % perf_output_path) 193 with open(perf_output_path, 'w') as out_file: 194 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 195 196 197class Results: 198 199 def __init__(self): 200 self._results = { 201 'tests': {}, 202 'interrupted': False, 203 'seconds_since_epoch': time.time(), 204 'path_delimiter': '.', 205 'version': 3, 206 'num_failures_by_type': { 207 FAIL: 0, 208 PASS: 0, 209 SKIP: 0, 210 }, 211 } 212 self._test_results = {} 213 214 def has_result(self, test): 215 return test in self._test_results 216 217 def result_skip(self, test): 218 self._test_results[test] = {'expected': SKIP, 'actual': SKIP} 219 self._results['num_failures_by_type'][SKIP] += 1 220 221 def result_pass(self, test): 222 self._test_results[test] = {'expected': PASS, 'actual': PASS} 223 self._results['num_failures_by_type'][PASS] += 1 224 225 def result_fail(self, test): 226 self._test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True} 227 self._results['num_failures_by_type'][FAIL] += 1 228 229 def save_to_output_file(self, test_suite, fname): 230 self._update_results(test_suite) 231 with open(fname, 'w') as out_file: 232 out_file.write(json.dumps(self._results, indent=2)) 233 234 def save_to_json_file(self, fname): 235 logging.info('Saving test results to %s.' % fname) 236 with open(fname, 'w') as out_file: 237 out_file.write(json.dumps(self._results, indent=2)) 238 239 def _update_results(self, test_suite): 240 if self._test_results: 241 self._results['tests'][test_suite] = self._test_results 242 self._test_results = {} 243 244 245def main(): 246 parser = argparse.ArgumentParser() 247 parser.add_argument('--isolated-script-test-output', type=str) 248 parser.add_argument('--isolated-script-test-perf-output', type=str) 249 parser.add_argument( 250 '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.') 251 parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE) 252 parser.add_argument('--xvfb', help='Use xvfb.', action='store_true') 253 parser.add_argument( 254 '--shard-count', 255 help='Number of shards for test splitting. Default is 1.', 256 type=int, 257 default=1) 258 parser.add_argument( 259 '--shard-index', 260 help='Index of the current shard for test splitting. Default is 0.', 261 type=int, 262 default=0) 263 parser.add_argument( 264 '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG) 265 parser.add_argument( 266 '-s', 267 '--samples-per-test', 268 help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES, 269 type=int, 270 default=DEFAULT_SAMPLES) 271 parser.add_argument( 272 '-t', 273 '--trials-per-sample', 274 help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS, 275 type=int, 276 default=DEFAULT_TRIALS) 277 parser.add_argument( 278 '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int) 279 parser.add_argument( 280 '--max-errors', 281 help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS, 282 type=int, 283 default=DEFAULT_MAX_ERRORS) 284 parser.add_argument( 285 '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true') 286 parser.add_argument( 287 '--warmup-loops', 288 help='Number of warmup loops to run in the perf test. Default is %d.' % 289 DEFAULT_WARMUP_LOOPS, 290 type=int, 291 default=DEFAULT_WARMUP_LOOPS) 292 parser.add_argument( 293 '--calibration-time', 294 help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.' 295 % DEFAULT_CALIBRATION_TIME, 296 type=int, 297 default=DEFAULT_CALIBRATION_TIME) 298 299 args, extra_flags = parser.parse_known_args() 300 301 importlib.reload(logging) 302 logging.basicConfig(level=args.log.upper()) 303 304 start_time = time.time() 305 306 # Use fast execution for smoke test mode. 307 if args.smoke_test_mode: 308 args.steps_per_trial = 1 309 args.trials_per_sample = 1 310 args.samples_per_test = 1 311 312 env = os.environ.copy() 313 314 # Get sharding args 315 if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1: 316 if 'GTEST_SHARD_INDEX' not in env: 317 logging.error('Sharding params must be specified together.') 318 sys.exit(1) 319 args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS')) 320 args.shard_index = int(env.pop('GTEST_SHARD_INDEX')) 321 322 # Get test list 323 cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose'] 324 exit_code, lines = _run_and_get_output(args, cmd, env) 325 if exit_code != EXIT_SUCCESS: 326 logging.fatal('Could not find test list from test output:\n%s' % '\n'.join(lines)) 327 tests = _get_tests_from_output(lines) 328 329 if args.filter: 330 tests = _filter_tests(tests, args.filter) 331 332 # Get tests for this shard (if using sharding args) 333 tests = _shard_tests(tests, args.shard_count, args.shard_index) 334 335 num_tests = len(tests) 336 if num_tests == 0: 337 logging.error('No tests to run.') 338 return EXIT_FAILURE 339 340 logging.info('Running %d test%s' % (num_tests, 's' if num_tests > 1 else ' ')) 341 342 # Run tests 343 results = Results() 344 345 histograms = histogram_set.HistogramSet() 346 total_errors = 0 347 348 for test_index in range(num_tests): 349 test = tests[test_index] 350 cmd = [ 351 get_binary_name(args.test_suite), 352 '--gtest_filter=%s' % test, 353 '--extract-test-list-from-filter', 354 '--enable-device-cache', 355 '--skip-clear-data', 356 '--use-existing-test-data', 357 '--verbose', 358 '--calibration-time', 359 str(args.calibration_time), 360 ] 361 if args.steps_per_trial: 362 steps_per_trial = args.steps_per_trial 363 else: 364 cmd_calibrate = cmd + [ 365 '--calibration', 366 '--warmup-loops', 367 str(args.warmup_loops), 368 ] 369 exit_code, calibrate_output = _run_and_get_output(args, cmd_calibrate, env) 370 if exit_code != EXIT_SUCCESS: 371 logging.fatal('%s failed. Output:\n%s' % 372 (cmd_calibrate[0], '\n'.join(calibrate_output))) 373 total_errors += 1 374 results.result_fail(test) 375 continue 376 steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run') 377 if not steps_per_trial: 378 logging.warning('Skipping test %s' % test) 379 continue 380 assert (len(steps_per_trial) == 1) 381 steps_per_trial = int(steps_per_trial[0]) 382 logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d steps_per_trial=%d)' % 383 (test_index + 1, num_tests, test, args.samples_per_test, 384 args.trials_per_sample, steps_per_trial)) 385 wall_times = [] 386 test_histogram_set = histogram_set.HistogramSet() 387 for sample in range(args.samples_per_test): 388 if total_errors >= args.max_errors: 389 logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors) 390 return EXIT_FAILURE 391 392 cmd_run = cmd + [ 393 '--steps-per-trial', 394 str(steps_per_trial), 395 '--trials', 396 str(args.trials_per_sample), 397 ] 398 if args.smoke_test_mode: 399 cmd_run += ['--no-warmup'] 400 else: 401 cmd_run += ['--warmup-loops', str(args.warmup_loops)] 402 with common.temporary_file() as histogram_file_path: 403 cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path] 404 exit_code, output = _run_and_get_output(args, cmd_run, env) 405 if exit_code != EXIT_SUCCESS: 406 logging.error('%s failed. Output:\n%s' % (cmd_run[0], '\n'.join(output))) 407 results.result_fail(test) 408 total_errors += 1 409 break 410 411 sample_wall_times = _get_results_from_output(output, 'wall_time') 412 if not sample_wall_times: 413 # This can be intentional for skipped tests. They are handled below. 414 logging.warning('Test %s failed to produce a sample output' % test) 415 break 416 logging.info('Test %d/%d Sample %d/%d wall_times: %s' % 417 (test_index + 1, num_tests, sample + 1, args.samples_per_test, 418 str(sample_wall_times))) 419 wall_times += sample_wall_times 420 with open(histogram_file_path) as histogram_file: 421 sample_json = json.load(histogram_file) 422 sample_histogram = histogram_set.HistogramSet() 423 sample_histogram.ImportDicts(sample_json) 424 test_histogram_set.Merge(sample_histogram) 425 426 if not results.has_result(test): 427 if not wall_times: 428 logging.warning('Skipping test %s. Assuming this is intentional.' % test) 429 results.result_skip(test) 430 elif len(wall_times) == (args.samples_per_test * args.trials_per_sample): 431 if len(wall_times) > 7: 432 truncation_n = len(wall_times) >> 3 433 logging.debug( 434 'Truncation: Removing the %d highest and lowest times from wall_times.' % 435 truncation_n) 436 wall_times = _truncated_list(wall_times, truncation_n) 437 438 if len(wall_times) > 1: 439 logging.info('Test %d/%d: %s: truncated mean wall_time = %.2f, cov = %.2f%%' % 440 (test_index + 1, num_tests, test, _mean(wall_times), 441 (_coefficient_of_variation(wall_times) * 100.0))) 442 results.result_pass(test) 443 444 # Merge the histogram set into one histogram 445 with common.temporary_file() as merge_histogram_path: 446 logging.info('Writing merged histograms to %s.' % merge_histogram_path) 447 with open(merge_histogram_path, 'w') as merge_histogram_file: 448 json.dump(test_histogram_set.AsDicts(), merge_histogram_file) 449 merge_histogram_file.close() 450 merged_dicts = merge_histograms.MergeHistograms( 451 merge_histogram_path, groupby=['name']) 452 merged_histogram = histogram_set.HistogramSet() 453 merged_histogram.ImportDicts(merged_dicts) 454 histograms.Merge(merged_histogram) 455 else: 456 logging.error('Test %s failed to record some samples' % test) 457 458 if args.isolated_script_test_output: 459 results.save_to_output_file(args.test_suite, args.isolated_script_test_output) 460 461 # Uses special output files to match the merge script. 462 _save_extra_output_files(args, results, histograms) 463 464 if args.isolated_script_test_perf_output: 465 with open(args.isolated_script_test_perf_output, 'w') as out_file: 466 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 467 468 end_time = time.time() 469 logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time)) 470 471 return EXIT_SUCCESS 472 473 474# This is not really a "script test" so does not need to manually add 475# any additional compile targets. 476def main_compile_targets(args): 477 json.dump([], args.output) 478 479 480if __name__ == '__main__': 481 # Conform minimally to the protocol defined by ScriptTest. 482 if 'compile_targets' in sys.argv: 483 funcs = { 484 'run': None, 485 'compile_targets': main_compile_targets, 486 } 487 sys.exit(common.run_script(sys.argv[1:], funcs)) 488 sys.exit(main()) 489