1#! /usr/bin/env vpython3 2# 3# Copyright 2021 The ANGLE Project Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6# 7# run_perf_test.py: 8# Runs ANGLE perf tests using some statistical averaging. 9 10import argparse 11import contextlib 12import glob 13import importlib 14import io 15import json 16import logging 17import tempfile 18import time 19import os 20import pathlib 21import re 22import subprocess 23import shutil 24import sys 25 26SCRIPT_DIR = str(pathlib.Path(__file__).resolve().parent) 27PY_UTILS = str(pathlib.Path(SCRIPT_DIR) / 'py_utils') 28if PY_UTILS not in sys.path: 29 os.stat(PY_UTILS) and sys.path.insert(0, PY_UTILS) 30import android_helper 31import angle_metrics 32import angle_path_util 33import angle_test_util 34 35angle_path_util.AddDepsDirToPath('testing/scripts') 36import common 37 38angle_path_util.AddDepsDirToPath('third_party/catapult/tracing') 39from tracing.value import histogram 40from tracing.value import histogram_set 41from tracing.value import merge_histograms 42 43DEFAULT_TEST_SUITE = 'angle_perftests' 44DEFAULT_LOG = 'info' 45DEFAULT_SAMPLES = 10 46DEFAULT_TRIALS = 4 47DEFAULT_MAX_ERRORS = 3 48 49# These parameters condition the test warmup to stabilize the scores across runs. 50DEFAULT_WARMUP_TRIALS = 2 51DEFAULT_TRIAL_TIME = 3 52 53# Test expectations 54FAIL = 'FAIL' 55PASS = 'PASS' 56SKIP = 'SKIP' 57 58EXIT_FAILURE = 1 59EXIT_SUCCESS = 0 60 61 62@contextlib.contextmanager 63def temporary_dir(prefix=''): 64 path = tempfile.mkdtemp(prefix=prefix) 65 try: 66 yield path 67 finally: 68 shutil.rmtree(path) 69 70 71def _shard_tests(tests, shard_count, shard_index): 72 return [tests[index] for index in range(shard_index, len(tests), shard_count)] 73 74 75def _get_results_from_output(output, result): 76 m = re.search(r'Running (\d+) tests', output) 77 if m and int(m.group(1)) > 1: 78 raise Exception('Found more than one test result in output') 79 80 # Results are reported in the format: 81 # name_backend.result: story= value units. 82 pattern = r'\.' + result + r':.*= ([0-9.]+)' 83 logging.debug('Searching for %s in output' % pattern) 84 m = re.findall(pattern, output) 85 if not m: 86 logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output)) 87 return None 88 89 return [float(value) for value in m] 90 91 92def _truncated_list(data, n): 93 """Compute a truncated list, n is truncation size""" 94 if len(data) < n * 2: 95 raise ValueError('list not large enough to truncate') 96 return sorted(data)[n:-n] 97 98 99def _mean(data): 100 """Return the sample arithmetic mean of data.""" 101 n = len(data) 102 if n < 1: 103 raise ValueError('mean requires at least one data point') 104 return float(sum(data)) / float(n) # in Python 2 use sum(data)/float(n) 105 106 107def _sum_of_square_deviations(data, c): 108 """Return sum of square deviations of sequence data.""" 109 ss = sum((float(x) - c)**2 for x in data) 110 return ss 111 112 113def _coefficient_of_variation(data): 114 """Calculates the population coefficient of variation.""" 115 n = len(data) 116 if n < 2: 117 raise ValueError('variance requires at least two data points') 118 c = _mean(data) 119 ss = _sum_of_square_deviations(data, c) 120 pvar = ss / n # the population variance 121 stddev = (pvar**0.5) # population standard deviation 122 return stddev / c 123 124 125def _save_extra_output_files(args, results, histograms, metrics): 126 isolated_out_dir = os.path.dirname(args.isolated_script_test_output) 127 if not os.path.isdir(isolated_out_dir): 128 return 129 benchmark_path = os.path.join(isolated_out_dir, args.test_suite) 130 if not os.path.isdir(benchmark_path): 131 os.makedirs(benchmark_path) 132 test_output_path = os.path.join(benchmark_path, 'test_results.json') 133 results.save_to_json_file(test_output_path) 134 perf_output_path = os.path.join(benchmark_path, 'perf_results.json') 135 logging.info('Saving perf histograms to %s.' % perf_output_path) 136 with open(perf_output_path, 'w') as out_file: 137 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 138 139 angle_metrics_path = os.path.join(benchmark_path, 'angle_metrics.json') 140 with open(angle_metrics_path, 'w') as f: 141 f.write(json.dumps(metrics, indent=2)) 142 143 # Calling here to catch errors earlier (fail shard instead of merge script) 144 assert angle_metrics.ConvertToSkiaPerf([angle_metrics_path]) 145 146 147class Results: 148 149 def __init__(self, suffix): 150 self._results = { 151 'tests': {}, 152 'interrupted': False, 153 'seconds_since_epoch': time.time(), 154 'path_delimiter': '.', 155 'version': 3, 156 'num_failures_by_type': { 157 FAIL: 0, 158 PASS: 0, 159 SKIP: 0, 160 }, 161 } 162 self._test_results = {} 163 self._suffix = suffix 164 165 def _testname(self, name): 166 return name + self._suffix 167 168 def has_failures(self): 169 return self._results['num_failures_by_type'][FAIL] > 0 170 171 def has_result(self, test): 172 return self._testname(test) in self._test_results 173 174 def result_skip(self, test): 175 self._test_results[self._testname(test)] = {'expected': SKIP, 'actual': SKIP} 176 self._results['num_failures_by_type'][SKIP] += 1 177 178 def result_pass(self, test): 179 self._test_results[self._testname(test)] = {'expected': PASS, 'actual': PASS} 180 self._results['num_failures_by_type'][PASS] += 1 181 182 def result_fail(self, test): 183 self._test_results[self._testname(test)] = { 184 'expected': PASS, 185 'actual': FAIL, 186 'is_unexpected': True 187 } 188 self._results['num_failures_by_type'][FAIL] += 1 189 190 def save_to_output_file(self, test_suite, fname): 191 self._update_results(test_suite) 192 with open(fname, 'w') as out_file: 193 out_file.write(json.dumps(self._results, indent=2)) 194 195 def save_to_json_file(self, fname): 196 logging.info('Saving test results to %s.' % fname) 197 with open(fname, 'w') as out_file: 198 out_file.write(json.dumps(self._results, indent=2)) 199 200 def _update_results(self, test_suite): 201 if self._test_results: 202 self._results['tests'][test_suite] = self._test_results 203 self._test_results = {} 204 205 206def _read_histogram(histogram_file_path): 207 with open(histogram_file_path) as histogram_file: 208 histogram = histogram_set.HistogramSet() 209 histogram.ImportDicts(json.load(histogram_file)) 210 return histogram 211 212 213def _read_metrics(metrics_file_path): 214 try: 215 with open(metrics_file_path) as f: 216 return [json.loads(l) for l in f] 217 except FileNotFoundError: 218 return [] 219 220 221def _merge_into_one_histogram(test_histogram_set): 222 with common.temporary_file() as merge_histogram_path: 223 logging.info('Writing merged histograms to %s.' % merge_histogram_path) 224 with open(merge_histogram_path, 'w') as merge_histogram_file: 225 json.dump(test_histogram_set.AsDicts(), merge_histogram_file) 226 merge_histogram_file.close() 227 merged_dicts = merge_histograms.MergeHistograms(merge_histogram_path, groupby=['name']) 228 merged_histogram = histogram_set.HistogramSet() 229 merged_histogram.ImportDicts(merged_dicts) 230 return merged_histogram 231 232 233def _wall_times_stats(wall_times): 234 if len(wall_times) > 7: 235 truncation_n = len(wall_times) >> 3 236 logging.debug('Truncation: Removing the %d highest and lowest times from wall_times.' % 237 truncation_n) 238 wall_times = _truncated_list(wall_times, truncation_n) 239 240 if len(wall_times) > 1: 241 return ('truncated mean wall_time = %.2f, cov = %.2f%%' % 242 (_mean(wall_times), _coefficient_of_variation(wall_times) * 100.0)) 243 244 return None 245 246 247def _run_test_suite(args, cmd_args, env): 248 return angle_test_util.RunTestSuite( 249 args.test_suite, 250 cmd_args, 251 env, 252 use_xvfb=args.xvfb, 253 show_test_stdout=args.show_test_stdout) 254 255 256def _run_calibration(args, common_args, env): 257 exit_code, calibrate_output, json_results = _run_test_suite( 258 args, common_args + [ 259 '--calibration', 260 '--warmup-trials', 261 str(args.warmup_trials), 262 '--calibration-time', 263 str(args.trial_time), 264 ], env) 265 if exit_code != EXIT_SUCCESS: 266 raise RuntimeError('%s failed. Output:\n%s' % (args.test_suite, calibrate_output)) 267 if SKIP in json_results['num_failures_by_type']: 268 return SKIP, None 269 270 steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run') 271 if not steps_per_trial: 272 return FAIL, None 273 274 assert (len(steps_per_trial) == 1) 275 return PASS, int(steps_per_trial[0]) 276 277 278def _run_perf(args, common_args, env, steps_per_trial=None): 279 run_args = common_args + [ 280 '--trials', 281 str(args.trials_per_sample), 282 ] 283 284 if steps_per_trial: 285 run_args += ['--steps-per-trial', str(steps_per_trial)] 286 else: 287 run_args += ['--trial-time', str(args.trial_time)] 288 289 if args.smoke_test_mode: 290 run_args += ['--no-warmup'] 291 else: 292 run_args += ['--warmup-trials', str(args.warmup_trials)] 293 294 if args.perf_counters: 295 run_args += ['--perf-counters', args.perf_counters] 296 297 with temporary_dir() as render_output_dir: 298 histogram_file_path = os.path.join(render_output_dir, 'histogram') 299 run_args += ['--isolated-script-test-perf-output=%s' % histogram_file_path] 300 run_args += ['--render-test-output-dir=%s' % render_output_dir] 301 302 exit_code, output, json_results = _run_test_suite(args, run_args, env) 303 if exit_code != EXIT_SUCCESS: 304 raise RuntimeError('%s failed. Output:\n%s' % (args.test_suite, output)) 305 if SKIP in json_results['num_failures_by_type']: 306 return SKIP, None, None 307 308 sample_metrics = _read_metrics(os.path.join(render_output_dir, 'angle_metrics')) 309 310 if sample_metrics: 311 sample_histogram = _read_histogram(histogram_file_path) 312 return PASS, sample_metrics, sample_histogram 313 314 return FAIL, None, None 315 316 317class _MaxErrorsException(Exception): 318 pass 319 320 321def _skipped_or_glmark2(test, test_status): 322 if test_status == SKIP: 323 logging.info('Test skipped by suite: %s' % test) 324 return True 325 326 # GLMark2Benchmark logs .fps/.score instead of our perf metrics. 327 if test.startswith('GLMark2Benchmark.Run/'): 328 logging.info('GLMark2Benchmark missing metrics (as expected, skipping): %s' % test) 329 return True 330 331 return False 332 333 334def _run_tests(tests, args, extra_flags, env): 335 if args.split_shard_samples and args.shard_index is not None: 336 test_suffix = Results('_shard%d' % args.shard_index) 337 else: 338 test_suffix = '' 339 340 results = Results(test_suffix) 341 342 histograms = histogram_set.HistogramSet() 343 metrics = [] 344 total_errors = 0 345 prepared_traces = set() 346 347 for test_index in range(len(tests)): 348 if total_errors >= args.max_errors: 349 raise _MaxErrorsException() 350 351 test = tests[test_index] 352 353 if angle_test_util.IsAndroid(): 354 trace = android_helper.GetTraceFromTestName(test) 355 if trace and trace not in prepared_traces: 356 android_helper.PrepareRestrictedTraces([trace]) 357 prepared_traces.add(trace) 358 359 common_args = [ 360 '--gtest_filter=%s' % test, 361 '--verbose', 362 ] + extra_flags 363 364 if args.steps_per_trial: 365 steps_per_trial = args.steps_per_trial 366 trial_limit = 'steps_per_trial=%d' % steps_per_trial 367 elif args.calibrate_steps_per_trial: 368 try: 369 test_status, steps_per_trial = _run_calibration(args, common_args, env) 370 except RuntimeError as e: 371 logging.fatal(e) 372 total_errors += 1 373 results.result_fail(test) 374 continue 375 376 if _skipped_or_glmark2(test, test_status): 377 results.result_skip(test) 378 continue 379 380 if not steps_per_trial: 381 logging.error('Test %s missing steps_per_trial' % test) 382 results.result_fail(test) 383 continue 384 trial_limit = 'steps_per_trial=%d' % steps_per_trial 385 else: 386 steps_per_trial = None 387 trial_limit = 'trial_time=%d' % args.trial_time 388 389 logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d %s)' % 390 (test_index + 1, len(tests), test, args.samples_per_test, 391 args.trials_per_sample, trial_limit)) 392 393 wall_times = [] 394 test_histogram_set = histogram_set.HistogramSet() 395 for sample in range(args.samples_per_test): 396 try: 397 test_status, sample_metrics, sample_histogram = _run_perf( 398 args, common_args, env, steps_per_trial) 399 except RuntimeError as e: 400 logging.error(e) 401 results.result_fail(test) 402 total_errors += 1 403 break 404 405 if _skipped_or_glmark2(test, test_status): 406 results.result_skip(test) 407 break 408 409 if not sample_metrics: 410 logging.error('Test %s failed to produce a sample output' % test) 411 results.result_fail(test) 412 break 413 414 sample_wall_times = [ 415 float(m['value']) for m in sample_metrics if m['metric'] == '.wall_time' 416 ] 417 418 logging.info('Test %d/%d Sample %d/%d wall_times: %s' % 419 (test_index + 1, len(tests), sample + 1, args.samples_per_test, 420 str(sample_wall_times))) 421 422 if len(sample_wall_times) != args.trials_per_sample: 423 logging.error('Test %s failed to record some wall_times (expected %d, got %d)' % 424 (test, args.trials_per_sample, len(sample_wall_times))) 425 results.result_fail(test) 426 break 427 428 wall_times += sample_wall_times 429 test_histogram_set.Merge(sample_histogram) 430 metrics.append(sample_metrics) 431 432 if not results.has_result(test): 433 assert len(wall_times) == (args.samples_per_test * args.trials_per_sample) 434 stats = _wall_times_stats(wall_times) 435 if stats: 436 logging.info('Test %d/%d: %s: %s' % (test_index + 1, len(tests), test, stats)) 437 histograms.Merge(_merge_into_one_histogram(test_histogram_set)) 438 results.result_pass(test) 439 440 return results, histograms, metrics 441 442 443def _find_test_suite_directory(test_suite): 444 if os.path.exists(angle_test_util.ExecutablePathInCurrentDir(test_suite)): 445 return '.' 446 447 if angle_test_util.IsWindows(): 448 test_suite += '.exe' 449 450 # Find most recent binary in search paths. 451 newest_binary = None 452 newest_mtime = None 453 454 for path in glob.glob('out/*'): 455 binary_path = str(pathlib.Path(SCRIPT_DIR).parent.parent / path / test_suite) 456 if os.path.exists(binary_path): 457 binary_mtime = os.path.getmtime(binary_path) 458 if (newest_binary is None) or (binary_mtime > newest_mtime): 459 newest_binary = binary_path 460 newest_mtime = binary_mtime 461 462 if newest_binary: 463 logging.info('Found %s in %s' % (test_suite, os.path.dirname(newest_binary))) 464 return os.path.dirname(newest_binary) 465 return None 466 467 468def _split_shard_samples(tests, samples_per_test, shard_count, shard_index): 469 test_samples = [(test, sample) for test in tests for sample in range(samples_per_test)] 470 shard_test_samples = _shard_tests(test_samples, shard_count, shard_index) 471 return [test for (test, sample) in shard_test_samples] 472 473 474def _should_lock_gpu_clocks(): 475 if not angle_test_util.IsWindows(): 476 return False 477 478 try: 479 gpu_info = subprocess.check_output( 480 ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv,noheader']).decode() 481 except FileNotFoundError: 482 # expected in some cases, e.g. non-nvidia bots 483 return False 484 485 logging.info('nvidia-smi --query-gpu=gpu_name output: %s' % gpu_info) 486 487 return gpu_info.strip() == 'GeForce GTX 1660' 488 489 490def _log_nvidia_gpu_temperature(): 491 t = subprocess.check_output( 492 ['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader']).decode().strip() 493 logging.info('Current GPU temperature: %s ' % t) 494 495 496@contextlib.contextmanager 497def _maybe_lock_gpu_clocks(): 498 if not _should_lock_gpu_clocks(): 499 yield 500 return 501 502 # Lock to 1410Mhz (`nvidia-smi --query-supported-clocks=gr --format=csv`) 503 lgc_out = subprocess.check_output(['nvidia-smi', '--lock-gpu-clocks=1410,1410']).decode() 504 logging.info('Lock GPU clocks output: %s' % lgc_out) 505 _log_nvidia_gpu_temperature() 506 try: 507 yield 508 finally: 509 rgc_out = subprocess.check_output(['nvidia-smi', '--reset-gpu-clocks']).decode() 510 logging.info('Reset GPU clocks output: %s' % rgc_out) 511 _log_nvidia_gpu_temperature() 512 513 514def main(): 515 parser = argparse.ArgumentParser() 516 parser.add_argument('--isolated-script-test-output', type=str) 517 parser.add_argument('--isolated-script-test-perf-output', type=str) 518 parser.add_argument( 519 '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.') 520 suite_group = parser.add_mutually_exclusive_group() 521 suite_group.add_argument( 522 '--test-suite', '--suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE) 523 suite_group.add_argument( 524 '-T', 525 '--trace-tests', 526 help='Run with the angle_trace_tests test suite.', 527 action='store_true') 528 parser.add_argument('--xvfb', help='Use xvfb.', action='store_true') 529 parser.add_argument( 530 '--shard-count', 531 help='Number of shards for test splitting. Default is 1.', 532 type=int, 533 default=1) 534 parser.add_argument( 535 '--shard-index', 536 help='Index of the current shard for test splitting. Default is 0.', 537 type=int, 538 default=0) 539 parser.add_argument( 540 '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG) 541 parser.add_argument( 542 '-s', 543 '--samples-per-test', 544 help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES, 545 type=int, 546 default=DEFAULT_SAMPLES) 547 parser.add_argument( 548 '-t', 549 '--trials-per-sample', 550 help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS, 551 type=int, 552 default=DEFAULT_TRIALS) 553 trial_group = parser.add_mutually_exclusive_group() 554 trial_group.add_argument( 555 '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int) 556 trial_group.add_argument( 557 '--trial-time', 558 help='Number of seconds to run per trial. Default is %d.' % DEFAULT_TRIAL_TIME, 559 type=int, 560 default=DEFAULT_TRIAL_TIME) 561 trial_group.add_argument( 562 '--calibrate-steps-per-trial', 563 help='Automatically determine a number of steps per trial.', 564 action='store_true') 565 parser.add_argument( 566 '--max-errors', 567 help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS, 568 type=int, 569 default=DEFAULT_MAX_ERRORS) 570 parser.add_argument( 571 '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true') 572 parser.add_argument( 573 '--warmup-trials', 574 help='Number of warmup trials to run in the perf test. Default is %d.' % 575 DEFAULT_WARMUP_TRIALS, 576 type=int, 577 default=DEFAULT_WARMUP_TRIALS) 578 parser.add_argument( 579 '--show-test-stdout', help='Prints all test stdout during execution.', action='store_true') 580 parser.add_argument( 581 '--perf-counters', help='Colon-separated list of extra perf counter metrics.') 582 parser.add_argument( 583 '-a', 584 '--auto-dir', 585 help='Run with the most recent test suite found in the build directories.', 586 action='store_true') 587 parser.add_argument( 588 '--split-shard-samples', 589 help='Attempt to mitigate variance between machines by splitting samples between shards.', 590 action='store_true') 591 592 args, extra_flags = parser.parse_known_args() 593 594 if args.trace_tests: 595 args.test_suite = angle_test_util.ANGLE_TRACE_TEST_SUITE 596 597 angle_test_util.SetupLogging(args.log.upper()) 598 599 start_time = time.time() 600 601 # Use fast execution for smoke test mode. 602 if args.smoke_test_mode: 603 args.steps_per_trial = 1 604 args.trials_per_sample = 1 605 args.samples_per_test = 1 606 607 env = os.environ.copy() 608 609 if angle_test_util.HasGtestShardsAndIndex(env): 610 args.shard_count, args.shard_index = angle_test_util.PopGtestShardsAndIndex(env) 611 612 if args.auto_dir: 613 test_suite_dir = _find_test_suite_directory(args.test_suite) 614 if not test_suite_dir: 615 logging.fatal('Could not find test suite: %s' % args.test_suite) 616 return EXIT_FAILURE 617 else: 618 os.chdir(test_suite_dir) 619 620 angle_test_util.Initialize(args.test_suite) 621 622 # Get test list 623 exit_code, output, _ = _run_test_suite(args, ['--list-tests', '--verbose'] + extra_flags, env) 624 if exit_code != EXIT_SUCCESS: 625 logging.fatal('Could not find test list from test output:\n%s' % output) 626 sys.exit(EXIT_FAILURE) 627 tests = angle_test_util.GetTestsFromOutput(output) 628 629 if args.filter: 630 tests = angle_test_util.FilterTests(tests, args.filter) 631 632 # Get tests for this shard (if using sharding args) 633 if args.split_shard_samples and args.shard_count >= args.samples_per_test: 634 tests = _split_shard_samples(tests, args.samples_per_test, args.shard_count, 635 args.shard_index) 636 assert (len(set(tests)) == len(tests)) 637 args.samples_per_test = 1 638 else: 639 tests = _shard_tests(tests, args.shard_count, args.shard_index) 640 641 if not tests: 642 logging.error('No tests to run.') 643 return EXIT_FAILURE 644 645 if angle_test_util.IsAndroid() and args.test_suite == android_helper.ANGLE_TRACE_TEST_SUITE: 646 android_helper.RunSmokeTest() 647 648 logging.info('Running %d test%s' % (len(tests), 's' if len(tests) > 1 else ' ')) 649 650 try: 651 with _maybe_lock_gpu_clocks(): 652 results, histograms, metrics = _run_tests(tests, args, extra_flags, env) 653 except _MaxErrorsException: 654 logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors) 655 return EXIT_FAILURE 656 657 for test in tests: 658 assert results.has_result(test) 659 660 if args.isolated_script_test_output: 661 results.save_to_output_file(args.test_suite, args.isolated_script_test_output) 662 663 # Uses special output files to match the merge script. 664 _save_extra_output_files(args, results, histograms, metrics) 665 666 if args.isolated_script_test_perf_output: 667 with open(args.isolated_script_test_perf_output, 'w') as out_file: 668 out_file.write(json.dumps(histograms.AsDicts(), indent=2)) 669 670 end_time = time.time() 671 logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time)) 672 673 if results.has_failures(): 674 return EXIT_FAILURE 675 return EXIT_SUCCESS 676 677 678if __name__ == '__main__': 679 sys.exit(main()) 680