1#! /usr/bin/env vpython3 2# 3# Copyright 2021 The ANGLE Project Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6# 7# run_perf_test.py: 8# Runs ANGLE perf tests using some statistical averaging. 9 10import argparse 11import fnmatch 12import importlib 13import io 14import json 15import logging 16import time 17import os 18import re 19import subprocess 20import sys 21 22# Add //src/testing into sys.path for importing xvfb and test_env, and 23# //src/testing/scripts for importing common. 24d = os.path.dirname 25THIS_DIR = d(os.path.abspath(__file__)) 26ANGLE_DIR = d(d(THIS_DIR)) 27sys.path.append(os.path.join(ANGLE_DIR, 'testing')) 28sys.path.append(os.path.join(ANGLE_DIR, 'testing', 'scripts')) 29 30import common 31import test_env 32import xvfb 33 34sys.path.append(os.path.join(ANGLE_DIR, 'third_party', 'catapult', 'tracing')) 35from tracing.value import histogram 36from tracing.value import histogram_set 37from tracing.value import merge_histograms 38 39DEFAULT_TEST_SUITE = 'angle_perftests' 40DEFAULT_LOG = 'info' 41DEFAULT_SAMPLES = 4 42DEFAULT_TRIALS = 3 43DEFAULT_MAX_ERRORS = 3 44DEFAULT_WARMUP_LOOPS = 2 45DEFAULT_CALIBRATION_TIME = 2 46 47# Filters out stuff like: " I 72.572s run_tests_on_device(96071FFAZ00096) " 48ANDROID_LOGGING_PREFIX = r'I +\d+.\d+s \w+\(\w+\) ' 49 50# Test expectations 51FAIL = 'FAIL' 52PASS = 'PASS' 53SKIP = 'SKIP' 54 55EXIT_FAILURE = 1 56EXIT_SUCCESS = 0 57 58 59def is_windows(): 60 return sys.platform == 'cygwin' or sys.platform.startswith('win') 61 62 63def get_binary_name(binary): 64 if is_windows(): 65 return '.\\%s.exe' % binary 66 else: 67 return './%s' % binary 68 69 70def _popen(*args, **kwargs): 71 assert 'creationflags' not in kwargs 72 if sys.platform == 'win32': 73 # Necessary for signal handling. See crbug.com/733612#c6. 74 kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP 75 return subprocess.Popen(*args, **kwargs) 76 77 78def run_command_with_output(argv, stdoutfile, env=None, cwd=None, log=True): 79 assert stdoutfile 80 with io.open(stdoutfile, 'wb') as writer, \ 81 io.open(stdoutfile, 'rb') as reader: 82 process = _popen(argv, env=env, cwd=cwd, stdout=writer, stderr=subprocess.STDOUT) 83 test_env.forward_signals([process]) 84 while process.poll() is None: 85 if log: 86 sys.stdout.write(reader.read().decode('utf-8')) 87 # This sleep is needed for signal propagation. See the 88 # wait_with_signals() docstring. 89 time.sleep(0.1) 90 if log: 91 sys.stdout.write(reader.read().decode('utf-8')) 92 return process.returncode 93 94 95def _run_and_get_output(args, cmd, env): 96 lines = [] 97 logging.debug(' '.join(cmd)) 98 with common.temporary_file() as tempfile_path: 99 if args.xvfb: 100 exit_code = xvfb.run_executable(cmd, env, stdoutfile=tempfile_path) 101 else: 102 exit_code = run_command_with_output( 103 cmd, env=env, stdoutfile=tempfile_path, log=args.show_test_stdout) 104 with open(tempfile_path) as f: 105 for line in f: 106 lines.append(line.strip()) 107 return exit_code, lines 108 109 110def _filter_tests(tests, pattern): 111 return [test for test in tests if fnmatch.fnmatch(test, pattern)] 112 113 114def _shard_tests(tests, shard_count, shard_index): 115 return [tests[index] for index in range(shard_index, len(tests), shard_count)] 116 117 118def _get_results_from_output(output, result): 119 output = '\n'.join(output) 120 m = re.search(r'Running (\d+) tests', output) 121 if m and int(m.group(1)) > 1: 122 raise Exception('Found more than one test result in output') 123 124 # Results are reported in the format: 125 # name_backend.result: story= value units. 126 pattern = r'\.' + result + r':.*= ([0-9.]+)' 127 logging.debug('Searching for %s in output' % pattern) 128 m = re.findall(pattern, output) 129 if not m: 130 logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output)) 131 return None 132 133 return [float(value) for value in m] 134 135 136def _get_tests_from_output(lines): 137 seen_start_of_tests = False 138 tests = [] 139 android_prefix = re.compile(ANDROID_LOGGING_PREFIX) 140 logging.debug('Read %d lines from test output.' % len(lines)) 141 for line in lines: 142 line = android_prefix.sub('', line.strip()) 143 if line == 'Tests list:': 144 seen_start_of_tests = True 145 elif line == 'End tests list.': 146 break 147 elif seen_start_of_tests: 148 tests.append(line) 149 if not seen_start_of_tests: 150 raise Exception('Did not find test list in test output!') 151 logging.debug('Found %d tests from test output.' % len(tests)) 152 return tests 153 154 155def _truncated_list(data, n): 156 """Compute a truncated list, n is truncation size""" 157 if len(data) < n * 2: 158 raise ValueError('list not large enough to truncate') 159 return sorted(data)[n:-n] 160 161 162def _mean(data): 163 """Return the sample arithmetic mean of data.""" 164 n = len(data) 165 if n < 1: 166 raise ValueError('mean requires at least one data point') 167 return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n) 168 169 170def _sum_of_square_deviations(data, c): 171 """Return sum of square deviations of sequence data.""" 172 ss = sum((float(x) - c)**2 for x in data) 173 return ss 174 175 176def _coefficient_of_variation(data): 177 """Calculates the population coefficient of variation.""" 178 n = len(data) 179 if n < 2: 180 raise ValueError('variance requires at least two data points') 181 c = _mean(data) 182 ss = _sum_of_square_deviations(data, c) 183 pvar = ss / n # the population variance 184 stddev = (pvar**0.5) # population standard deviation 185 return stddev / c 186 187 188def _save_extra_output_files(args, results, histograms): 189 isolated_out_dir = os.path.dirname(args.isolated_script_test_output) 190 if not os.path.isdir(isolated_out_dir): 191 return 192 benchmark_path = os.path.join(isolated_out_dir, args.test_suite) 193 if not os.path.isdir(benchmark_path): 194 os.makedirs(benchmark_path) 195 test_output_path = os.path.join(benchmark_path, 'test_results.json') 196 results.save_to_json_file(test_output_path) 197 perf_output_path = os.path.join(benchmark_path, 'perf_results.json') 198 logging.info('Saving perf histograms to %s.' % perf_output_path) 199 with open(perf_output_path, 'w') as out_file: 200 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 201 202 203class Results: 204 205 def __init__(self): 206 self._results = { 207 'tests': {}, 208 'interrupted': False, 209 'seconds_since_epoch': time.time(), 210 'path_delimiter': '.', 211 'version': 3, 212 'num_failures_by_type': { 213 FAIL: 0, 214 PASS: 0, 215 SKIP: 0, 216 }, 217 } 218 self._test_results = {} 219 220 def has_result(self, test): 221 return test in self._test_results 222 223 def result_skip(self, test): 224 self._test_results[test] = {'expected': SKIP, 'actual': SKIP} 225 self._results['num_failures_by_type'][SKIP] += 1 226 227 def result_pass(self, test): 228 self._test_results[test] = {'expected': PASS, 'actual': PASS} 229 self._results['num_failures_by_type'][PASS] += 1 230 231 def result_fail(self, test): 232 self._test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True} 233 self._results['num_failures_by_type'][FAIL] += 1 234 235 def save_to_output_file(self, test_suite, fname): 236 self._update_results(test_suite) 237 with open(fname, 'w') as out_file: 238 out_file.write(json.dumps(self._results, indent=2)) 239 240 def save_to_json_file(self, fname): 241 logging.info('Saving test results to %s.' % fname) 242 with open(fname, 'w') as out_file: 243 out_file.write(json.dumps(self._results, indent=2)) 244 245 def _update_results(self, test_suite): 246 if self._test_results: 247 self._results['tests'][test_suite] = self._test_results 248 self._test_results = {} 249 250 251def main(): 252 parser = argparse.ArgumentParser() 253 parser.add_argument('--isolated-script-test-output', type=str) 254 parser.add_argument('--isolated-script-test-perf-output', type=str) 255 parser.add_argument( 256 '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.') 257 parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE) 258 parser.add_argument('--xvfb', help='Use xvfb.', action='store_true') 259 parser.add_argument( 260 '--shard-count', 261 help='Number of shards for test splitting. Default is 1.', 262 type=int, 263 default=1) 264 parser.add_argument( 265 '--shard-index', 266 help='Index of the current shard for test splitting. Default is 0.', 267 type=int, 268 default=0) 269 parser.add_argument( 270 '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG) 271 parser.add_argument( 272 '-s', 273 '--samples-per-test', 274 help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES, 275 type=int, 276 default=DEFAULT_SAMPLES) 277 parser.add_argument( 278 '-t', 279 '--trials-per-sample', 280 help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS, 281 type=int, 282 default=DEFAULT_TRIALS) 283 parser.add_argument( 284 '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int) 285 parser.add_argument( 286 '--max-errors', 287 help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS, 288 type=int, 289 default=DEFAULT_MAX_ERRORS) 290 parser.add_argument( 291 '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true') 292 parser.add_argument( 293 '--warmup-loops', 294 help='Number of warmup loops to run in the perf test. Default is %d.' % 295 DEFAULT_WARMUP_LOOPS, 296 type=int, 297 default=DEFAULT_WARMUP_LOOPS) 298 parser.add_argument( 299 '--calibration-time', 300 help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.' 301 % DEFAULT_CALIBRATION_TIME, 302 type=int, 303 default=DEFAULT_CALIBRATION_TIME) 304 parser.add_argument( 305 '--show-test-stdout', help='Prints all test stdout during execution.', action='store_true') 306 parser.add_argument( 307 '--perf-counters', help='Colon-separated list of extra perf counter metrics.') 308 309 args, extra_flags = parser.parse_known_args() 310 311 importlib.reload(logging) 312 logging.basicConfig(level=args.log.upper()) 313 314 start_time = time.time() 315 316 # Use fast execution for smoke test mode. 317 if args.smoke_test_mode: 318 args.steps_per_trial = 1 319 args.trials_per_sample = 1 320 args.samples_per_test = 1 321 322 env = os.environ.copy() 323 324 # Get sharding args 325 if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1: 326 if 'GTEST_SHARD_INDEX' not in env: 327 logging.error('Sharding params must be specified together.') 328 sys.exit(1) 329 args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS')) 330 args.shard_index = int(env.pop('GTEST_SHARD_INDEX')) 331 332 # The harness currently uploads all traces in a batch, which is very slow. 333 # TODO: Reduce lag from trace uploads and remove this. http://anglebug.com/6854 334 env['DEVICE_TIMEOUT_MULTIPLIER'] = '20' 335 336 # Get test list 337 cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose'] 338 exit_code, lines = _run_and_get_output(args, cmd, env) 339 if exit_code != EXIT_SUCCESS: 340 logging.fatal('Could not find test list from test output:\n%s' % '\n'.join(lines)) 341 tests = _get_tests_from_output(lines) 342 343 if args.filter: 344 tests = _filter_tests(tests, args.filter) 345 346 # Get tests for this shard (if using sharding args) 347 tests = _shard_tests(tests, args.shard_count, args.shard_index) 348 349 num_tests = len(tests) 350 if num_tests == 0: 351 logging.error('No tests to run.') 352 return EXIT_FAILURE 353 354 logging.info('Running %d test%s' % (num_tests, 's' if num_tests > 1 else ' ')) 355 356 # Run tests 357 results = Results() 358 359 histograms = histogram_set.HistogramSet() 360 total_errors = 0 361 362 for test_index in range(num_tests): 363 test = tests[test_index] 364 cmd = [ 365 get_binary_name(args.test_suite), 366 '--gtest_filter=%s' % test, 367 '--extract-test-list-from-filter', 368 '--enable-device-cache', 369 '--skip-clear-data', 370 '--use-existing-test-data', 371 '--verbose', 372 '--calibration-time', 373 str(args.calibration_time), 374 ] 375 if args.steps_per_trial: 376 steps_per_trial = args.steps_per_trial 377 else: 378 cmd_calibrate = cmd + [ 379 '--calibration', 380 '--warmup-loops', 381 str(args.warmup_loops), 382 ] 383 exit_code, calibrate_output = _run_and_get_output(args, cmd_calibrate, env) 384 if exit_code != EXIT_SUCCESS: 385 logging.fatal('%s failed. Output:\n%s' % 386 (cmd_calibrate[0], '\n'.join(calibrate_output))) 387 total_errors += 1 388 results.result_fail(test) 389 continue 390 steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run') 391 if not steps_per_trial: 392 logging.warning('Skipping test %s' % test) 393 continue 394 assert (len(steps_per_trial) == 1) 395 steps_per_trial = int(steps_per_trial[0]) 396 logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d steps_per_trial=%d)' % 397 (test_index + 1, num_tests, test, args.samples_per_test, 398 args.trials_per_sample, steps_per_trial)) 399 wall_times = [] 400 test_histogram_set = histogram_set.HistogramSet() 401 for sample in range(args.samples_per_test): 402 if total_errors >= args.max_errors: 403 logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors) 404 return EXIT_FAILURE 405 406 cmd_run = cmd + [ 407 '--steps-per-trial', 408 str(steps_per_trial), 409 '--trials', 410 str(args.trials_per_sample), 411 ] 412 413 if args.smoke_test_mode: 414 cmd_run += ['--no-warmup'] 415 else: 416 cmd_run += ['--warmup-loops', str(args.warmup_loops)] 417 418 if args.perf_counters: 419 cmd_run += ['--perf-counters', args.perf_counters] 420 421 with common.temporary_file() as histogram_file_path: 422 cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path] 423 exit_code, output = _run_and_get_output(args, cmd_run, env) 424 if exit_code != EXIT_SUCCESS: 425 logging.error('%s failed. Output:\n%s' % (cmd_run[0], '\n'.join(output))) 426 results.result_fail(test) 427 total_errors += 1 428 break 429 430 sample_wall_times = _get_results_from_output(output, 'wall_time') 431 if not sample_wall_times: 432 # This can be intentional for skipped tests. They are handled below. 433 logging.warning('Test %s failed to produce a sample output' % test) 434 break 435 logging.info('Test %d/%d Sample %d/%d wall_times: %s' % 436 (test_index + 1, num_tests, sample + 1, args.samples_per_test, 437 str(sample_wall_times))) 438 wall_times += sample_wall_times 439 with open(histogram_file_path) as histogram_file: 440 sample_json = json.load(histogram_file) 441 sample_histogram = histogram_set.HistogramSet() 442 sample_histogram.ImportDicts(sample_json) 443 test_histogram_set.Merge(sample_histogram) 444 445 if not results.has_result(test): 446 if not wall_times: 447 logging.warning('Skipping test %s. Assuming this is intentional.' % test) 448 results.result_skip(test) 449 elif len(wall_times) == (args.samples_per_test * args.trials_per_sample): 450 if len(wall_times) > 7: 451 truncation_n = len(wall_times) >> 3 452 logging.debug( 453 'Truncation: Removing the %d highest and lowest times from wall_times.' % 454 truncation_n) 455 wall_times = _truncated_list(wall_times, truncation_n) 456 457 if len(wall_times) > 1: 458 logging.info('Test %d/%d: %s: truncated mean wall_time = %.2f, cov = %.2f%%' % 459 (test_index + 1, num_tests, test, _mean(wall_times), 460 (_coefficient_of_variation(wall_times) * 100.0))) 461 results.result_pass(test) 462 463 # Merge the histogram set into one histogram 464 with common.temporary_file() as merge_histogram_path: 465 logging.info('Writing merged histograms to %s.' % merge_histogram_path) 466 with open(merge_histogram_path, 'w') as merge_histogram_file: 467 json.dump(test_histogram_set.AsDicts(), merge_histogram_file) 468 merge_histogram_file.close() 469 merged_dicts = merge_histograms.MergeHistograms( 470 merge_histogram_path, groupby=['name']) 471 merged_histogram = histogram_set.HistogramSet() 472 merged_histogram.ImportDicts(merged_dicts) 473 histograms.Merge(merged_histogram) 474 else: 475 logging.error('Test %s failed to record some samples' % test) 476 477 if args.isolated_script_test_output: 478 results.save_to_output_file(args.test_suite, args.isolated_script_test_output) 479 480 # Uses special output files to match the merge script. 481 _save_extra_output_files(args, results, histograms) 482 483 if args.isolated_script_test_perf_output: 484 with open(args.isolated_script_test_perf_output, 'w') as out_file: 485 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 486 487 end_time = time.time() 488 logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time)) 489 490 return EXIT_SUCCESS 491 492 493# This is not really a "script test" so does not need to manually add 494# any additional compile targets. 495def main_compile_targets(args): 496 json.dump([], args.output) 497 498 499if __name__ == '__main__': 500 # Conform minimally to the protocol defined by ScriptTest. 501 if 'compile_targets' in sys.argv: 502 funcs = { 503 'run': None, 504 'compile_targets': main_compile_targets, 505 } 506 sys.exit(common.run_script(sys.argv[1:], funcs)) 507 sys.exit(main()) 508