• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env vpython3
2#
3# Copyright 2021 The ANGLE Project Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7# run_perf_test.py:
8#   Runs ANGLE perf tests using some statistical averaging.
9
10import argparse
11import contextlib
12import glob
13import importlib
14import io
15import json
16import logging
17import tempfile
18import time
19import os
20import pathlib
21import re
22import subprocess
23import shutil
24import sys
25
26SCRIPT_DIR = str(pathlib.Path(__file__).resolve().parent)
27PY_UTILS = str(pathlib.Path(SCRIPT_DIR) / 'py_utils')
28if PY_UTILS not in sys.path:
29    os.stat(PY_UTILS) and sys.path.insert(0, PY_UTILS)
30import android_helper
31import angle_metrics
32import angle_path_util
33import angle_test_util
34
35angle_path_util.AddDepsDirToPath('testing/scripts')
36import common
37
38angle_path_util.AddDepsDirToPath('third_party/catapult/tracing')
39from tracing.value import histogram
40from tracing.value import histogram_set
41from tracing.value import merge_histograms
42
43DEFAULT_TEST_SUITE = 'angle_perftests'
44DEFAULT_LOG = 'info'
45DEFAULT_SAMPLES = 10
46DEFAULT_TRIALS = 4
47DEFAULT_MAX_ERRORS = 3
48
49# These parameters condition the test warmup to stabilize the scores across runs.
50DEFAULT_WARMUP_TRIALS = 2
51DEFAULT_TRIAL_TIME = 3
52
53# Test expectations
54FAIL = 'FAIL'
55PASS = 'PASS'
56SKIP = 'SKIP'
57
58EXIT_FAILURE = 1
59EXIT_SUCCESS = 0
60
61
62@contextlib.contextmanager
63def temporary_dir(prefix=''):
64    path = tempfile.mkdtemp(prefix=prefix)
65    try:
66        yield path
67    finally:
68        shutil.rmtree(path)
69
70
71def _shard_tests(tests, shard_count, shard_index):
72    return [tests[index] for index in range(shard_index, len(tests), shard_count)]
73
74
75def _get_results_from_output(output, result):
76    m = re.search(r'Running (\d+) tests', output)
77    if m and int(m.group(1)) > 1:
78        raise Exception('Found more than one test result in output')
79
80    # Results are reported in the format:
81    # name_backend.result: story= value units.
82    pattern = r'\.' + result + r':.*= ([0-9.]+)'
83    logging.debug('Searching for %s in output' % pattern)
84    m = re.findall(pattern, output)
85    if not m:
86        logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output))
87        return None
88
89    return [float(value) for value in m]
90
91
92def _truncated_list(data, n):
93    """Compute a truncated list, n is truncation size"""
94    if len(data) < n * 2:
95        raise ValueError('list not large enough to truncate')
96    return sorted(data)[n:-n]
97
98
99def _mean(data):
100    """Return the sample arithmetic mean of data."""
101    n = len(data)
102    if n < 1:
103        raise ValueError('mean requires at least one data point')
104    return float(sum(data)) / float(n)  # in Python 2 use sum(data)/float(n)
105
106
107def _sum_of_square_deviations(data, c):
108    """Return sum of square deviations of sequence data."""
109    ss = sum((float(x) - c)**2 for x in data)
110    return ss
111
112
113def _coefficient_of_variation(data):
114    """Calculates the population coefficient of variation."""
115    n = len(data)
116    if n < 2:
117        raise ValueError('variance requires at least two data points')
118    c = _mean(data)
119    ss = _sum_of_square_deviations(data, c)
120    pvar = ss / n  # the population variance
121    stddev = (pvar**0.5)  # population standard deviation
122    return stddev / c
123
124
125def _save_extra_output_files(args, results, histograms, metrics):
126    isolated_out_dir = os.path.dirname(args.isolated_script_test_output)
127    if not os.path.isdir(isolated_out_dir):
128        return
129    benchmark_path = os.path.join(isolated_out_dir, args.test_suite)
130    if not os.path.isdir(benchmark_path):
131        os.makedirs(benchmark_path)
132    test_output_path = os.path.join(benchmark_path, 'test_results.json')
133    results.save_to_json_file(test_output_path)
134    perf_output_path = os.path.join(benchmark_path, 'perf_results.json')
135    logging.info('Saving perf histograms to %s.' % perf_output_path)
136    with open(perf_output_path, 'w') as out_file:
137        out_file.write(json.dumps(histograms.AsDicts(), indent=2))
138
139    angle_metrics_path = os.path.join(benchmark_path, 'angle_metrics.json')
140    with open(angle_metrics_path, 'w') as f:
141        f.write(json.dumps(metrics, indent=2))
142
143    # Calling here to catch errors earlier (fail shard instead of merge script)
144    assert angle_metrics.ConvertToSkiaPerf([angle_metrics_path])
145
146
147class Results:
148
149    def __init__(self, suffix):
150        self._results = {
151            'tests': {},
152            'interrupted': False,
153            'seconds_since_epoch': time.time(),
154            'path_delimiter': '.',
155            'version': 3,
156            'num_failures_by_type': {
157                FAIL: 0,
158                PASS: 0,
159                SKIP: 0,
160            },
161        }
162        self._test_results = {}
163        self._suffix = suffix
164
165    def _testname(self, name):
166        return name + self._suffix
167
168    def has_failures(self):
169        return self._results['num_failures_by_type'][FAIL] > 0
170
171    def has_result(self, test):
172        return self._testname(test) in self._test_results
173
174    def result_skip(self, test):
175        self._test_results[self._testname(test)] = {'expected': SKIP, 'actual': SKIP}
176        self._results['num_failures_by_type'][SKIP] += 1
177
178    def result_pass(self, test):
179        self._test_results[self._testname(test)] = {'expected': PASS, 'actual': PASS}
180        self._results['num_failures_by_type'][PASS] += 1
181
182    def result_fail(self, test):
183        self._test_results[self._testname(test)] = {
184            'expected': PASS,
185            'actual': FAIL,
186            'is_unexpected': True
187        }
188        self._results['num_failures_by_type'][FAIL] += 1
189
190    def save_to_output_file(self, test_suite, fname):
191        self._update_results(test_suite)
192        with open(fname, 'w') as out_file:
193            out_file.write(json.dumps(self._results, indent=2))
194
195    def save_to_json_file(self, fname):
196        logging.info('Saving test results to %s.' % fname)
197        with open(fname, 'w') as out_file:
198            out_file.write(json.dumps(self._results, indent=2))
199
200    def _update_results(self, test_suite):
201        if self._test_results:
202            self._results['tests'][test_suite] = self._test_results
203            self._test_results = {}
204
205
206def _read_histogram(histogram_file_path):
207    with open(histogram_file_path) as histogram_file:
208        histogram = histogram_set.HistogramSet()
209        histogram.ImportDicts(json.load(histogram_file))
210        return histogram
211
212
213def _read_metrics(metrics_file_path):
214    try:
215        with open(metrics_file_path) as f:
216            return [json.loads(l) for l in f]
217    except FileNotFoundError:
218        return []
219
220
221def _merge_into_one_histogram(test_histogram_set):
222    with common.temporary_file() as merge_histogram_path:
223        logging.info('Writing merged histograms to %s.' % merge_histogram_path)
224        with open(merge_histogram_path, 'w') as merge_histogram_file:
225            json.dump(test_histogram_set.AsDicts(), merge_histogram_file)
226            merge_histogram_file.close()
227        merged_dicts = merge_histograms.MergeHistograms(merge_histogram_path, groupby=['name'])
228        merged_histogram = histogram_set.HistogramSet()
229        merged_histogram.ImportDicts(merged_dicts)
230        return merged_histogram
231
232
233def _wall_times_stats(wall_times):
234    if len(wall_times) > 7:
235        truncation_n = len(wall_times) >> 3
236        logging.debug('Truncation: Removing the %d highest and lowest times from wall_times.' %
237                      truncation_n)
238        wall_times = _truncated_list(wall_times, truncation_n)
239
240    if len(wall_times) > 1:
241        return ('truncated mean wall_time = %.2f, cov = %.2f%%' %
242                (_mean(wall_times), _coefficient_of_variation(wall_times) * 100.0))
243
244    return None
245
246
247def _run_test_suite(args, cmd_args, env):
248    return angle_test_util.RunTestSuite(
249        args.test_suite,
250        cmd_args,
251        env,
252        use_xvfb=args.xvfb,
253        show_test_stdout=args.show_test_stdout)
254
255
256def _run_calibration(args, common_args, env):
257    exit_code, calibrate_output, json_results = _run_test_suite(
258        args, common_args + [
259            '--calibration',
260            '--warmup-trials',
261            str(args.warmup_trials),
262            '--calibration-time',
263            str(args.trial_time),
264        ], env)
265    if exit_code != EXIT_SUCCESS:
266        raise RuntimeError('%s failed. Output:\n%s' % (args.test_suite, calibrate_output))
267    if SKIP in json_results['num_failures_by_type']:
268        return SKIP, None
269
270    steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run')
271    if not steps_per_trial:
272        return FAIL, None
273
274    assert (len(steps_per_trial) == 1)
275    return PASS, int(steps_per_trial[0])
276
277
278def _run_perf(args, common_args, env, steps_per_trial=None):
279    run_args = common_args + [
280        '--trials',
281        str(args.trials_per_sample),
282    ]
283
284    if steps_per_trial:
285        run_args += ['--steps-per-trial', str(steps_per_trial)]
286    else:
287        run_args += ['--trial-time', str(args.trial_time)]
288
289    if args.smoke_test_mode:
290        run_args += ['--no-warmup']
291    else:
292        run_args += ['--warmup-trials', str(args.warmup_trials)]
293
294    if args.perf_counters:
295        run_args += ['--perf-counters', args.perf_counters]
296
297    with temporary_dir() as render_output_dir:
298        histogram_file_path = os.path.join(render_output_dir, 'histogram')
299        run_args += ['--isolated-script-test-perf-output=%s' % histogram_file_path]
300        run_args += ['--render-test-output-dir=%s' % render_output_dir]
301
302        exit_code, output, json_results = _run_test_suite(args, run_args, env)
303        if exit_code != EXIT_SUCCESS:
304            raise RuntimeError('%s failed. Output:\n%s' % (args.test_suite, output))
305        if SKIP in json_results['num_failures_by_type']:
306            return SKIP, None, None
307
308        sample_metrics = _read_metrics(os.path.join(render_output_dir, 'angle_metrics'))
309
310        if sample_metrics:
311            sample_histogram = _read_histogram(histogram_file_path)
312            return PASS, sample_metrics, sample_histogram
313
314    return FAIL, None, None
315
316
317class _MaxErrorsException(Exception):
318    pass
319
320
321def _skipped_or_glmark2(test, test_status):
322    if test_status == SKIP:
323        logging.info('Test skipped by suite: %s' % test)
324        return True
325
326    # GLMark2Benchmark logs .fps/.score instead of our perf metrics.
327    if test.startswith('GLMark2Benchmark.Run/'):
328        logging.info('GLMark2Benchmark missing metrics (as expected, skipping): %s' % test)
329        return True
330
331    return False
332
333
334def _run_tests(tests, args, extra_flags, env):
335    if args.split_shard_samples and args.shard_index is not None:
336        test_suffix = Results('_shard%d' % args.shard_index)
337    else:
338        test_suffix = ''
339
340    results = Results(test_suffix)
341
342    histograms = histogram_set.HistogramSet()
343    metrics = []
344    total_errors = 0
345    prepared_traces = set()
346
347    for test_index in range(len(tests)):
348        if total_errors >= args.max_errors:
349            raise _MaxErrorsException()
350
351        test = tests[test_index]
352
353        if angle_test_util.IsAndroid():
354            trace = android_helper.GetTraceFromTestName(test)
355            if trace and trace not in prepared_traces:
356                android_helper.PrepareRestrictedTraces([trace])
357                prepared_traces.add(trace)
358
359        common_args = [
360            '--gtest_filter=%s' % test,
361            '--verbose',
362        ] + extra_flags
363
364        if args.steps_per_trial:
365            steps_per_trial = args.steps_per_trial
366            trial_limit = 'steps_per_trial=%d' % steps_per_trial
367        elif args.calibrate_steps_per_trial:
368            try:
369                test_status, steps_per_trial = _run_calibration(args, common_args, env)
370            except RuntimeError as e:
371                logging.fatal(e)
372                total_errors += 1
373                results.result_fail(test)
374                continue
375
376            if _skipped_or_glmark2(test, test_status):
377                results.result_skip(test)
378                continue
379
380            if not steps_per_trial:
381                logging.error('Test %s missing steps_per_trial' % test)
382                results.result_fail(test)
383                continue
384            trial_limit = 'steps_per_trial=%d' % steps_per_trial
385        else:
386            steps_per_trial = None
387            trial_limit = 'trial_time=%d' % args.trial_time
388
389        logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d %s)' %
390                     (test_index + 1, len(tests), test, args.samples_per_test,
391                      args.trials_per_sample, trial_limit))
392
393        wall_times = []
394        test_histogram_set = histogram_set.HistogramSet()
395        for sample in range(args.samples_per_test):
396            try:
397                test_status, sample_metrics, sample_histogram = _run_perf(
398                    args, common_args, env, steps_per_trial)
399            except RuntimeError as e:
400                logging.error(e)
401                results.result_fail(test)
402                total_errors += 1
403                break
404
405            if _skipped_or_glmark2(test, test_status):
406                results.result_skip(test)
407                break
408
409            if not sample_metrics:
410                logging.error('Test %s failed to produce a sample output' % test)
411                results.result_fail(test)
412                break
413
414            sample_wall_times = [
415                float(m['value']) for m in sample_metrics if m['metric'] == '.wall_time'
416            ]
417
418            logging.info('Test %d/%d Sample %d/%d wall_times: %s' %
419                         (test_index + 1, len(tests), sample + 1, args.samples_per_test,
420                          str(sample_wall_times)))
421
422            if len(sample_wall_times) != args.trials_per_sample:
423                logging.error('Test %s failed to record some wall_times (expected %d, got %d)' %
424                              (test, args.trials_per_sample, len(sample_wall_times)))
425                results.result_fail(test)
426                break
427
428            wall_times += sample_wall_times
429            test_histogram_set.Merge(sample_histogram)
430            metrics.append(sample_metrics)
431
432        if not results.has_result(test):
433            assert len(wall_times) == (args.samples_per_test * args.trials_per_sample)
434            stats = _wall_times_stats(wall_times)
435            if stats:
436                logging.info('Test %d/%d: %s: %s' % (test_index + 1, len(tests), test, stats))
437            histograms.Merge(_merge_into_one_histogram(test_histogram_set))
438            results.result_pass(test)
439
440    return results, histograms, metrics
441
442
443def _find_test_suite_directory(test_suite):
444    if os.path.exists(angle_test_util.ExecutablePathInCurrentDir(test_suite)):
445        return '.'
446
447    if angle_test_util.IsWindows():
448        test_suite += '.exe'
449
450    # Find most recent binary in search paths.
451    newest_binary = None
452    newest_mtime = None
453
454    for path in glob.glob('out/*'):
455        binary_path = str(pathlib.Path(SCRIPT_DIR).parent.parent / path / test_suite)
456        if os.path.exists(binary_path):
457            binary_mtime = os.path.getmtime(binary_path)
458            if (newest_binary is None) or (binary_mtime > newest_mtime):
459                newest_binary = binary_path
460                newest_mtime = binary_mtime
461
462    if newest_binary:
463        logging.info('Found %s in %s' % (test_suite, os.path.dirname(newest_binary)))
464        return os.path.dirname(newest_binary)
465    return None
466
467
468def _split_shard_samples(tests, samples_per_test, shard_count, shard_index):
469    test_samples = [(test, sample) for test in tests for sample in range(samples_per_test)]
470    shard_test_samples = _shard_tests(test_samples, shard_count, shard_index)
471    return [test for (test, sample) in shard_test_samples]
472
473
474def _should_lock_gpu_clocks():
475    if not angle_test_util.IsWindows():
476        return False
477
478    try:
479        gpu_info = subprocess.check_output(
480            ['nvidia-smi', '--query-gpu=gpu_name', '--format=csv,noheader']).decode()
481    except FileNotFoundError:
482        # expected in some cases, e.g. non-nvidia bots
483        return False
484
485    logging.info('nvidia-smi --query-gpu=gpu_name output: %s' % gpu_info)
486
487    return gpu_info.strip() == 'GeForce GTX 1660'
488
489
490def _log_nvidia_gpu_temperature():
491    t = subprocess.check_output(
492        ['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader']).decode().strip()
493    logging.info('Current GPU temperature: %s ' % t)
494
495
496@contextlib.contextmanager
497def _maybe_lock_gpu_clocks():
498    if not _should_lock_gpu_clocks():
499        yield
500        return
501
502    # Lock to 1410Mhz (`nvidia-smi --query-supported-clocks=gr --format=csv`)
503    lgc_out = subprocess.check_output(['nvidia-smi', '--lock-gpu-clocks=1410,1410']).decode()
504    logging.info('Lock GPU clocks output: %s' % lgc_out)
505    _log_nvidia_gpu_temperature()
506    try:
507        yield
508    finally:
509        rgc_out = subprocess.check_output(['nvidia-smi', '--reset-gpu-clocks']).decode()
510        logging.info('Reset GPU clocks output: %s' % rgc_out)
511        _log_nvidia_gpu_temperature()
512
513
514def main():
515    parser = argparse.ArgumentParser()
516    parser.add_argument('--isolated-script-test-output', type=str)
517    parser.add_argument('--isolated-script-test-perf-output', type=str)
518    parser.add_argument(
519        '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.')
520    suite_group = parser.add_mutually_exclusive_group()
521    suite_group.add_argument(
522        '--test-suite', '--suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE)
523    suite_group.add_argument(
524        '-T',
525        '--trace-tests',
526        help='Run with the angle_trace_tests test suite.',
527        action='store_true')
528    parser.add_argument('--xvfb', help='Use xvfb.', action='store_true')
529    parser.add_argument(
530        '--shard-count',
531        help='Number of shards for test splitting. Default is 1.',
532        type=int,
533        default=1)
534    parser.add_argument(
535        '--shard-index',
536        help='Index of the current shard for test splitting. Default is 0.',
537        type=int,
538        default=0)
539    parser.add_argument(
540        '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG)
541    parser.add_argument(
542        '-s',
543        '--samples-per-test',
544        help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES,
545        type=int,
546        default=DEFAULT_SAMPLES)
547    parser.add_argument(
548        '-t',
549        '--trials-per-sample',
550        help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS,
551        type=int,
552        default=DEFAULT_TRIALS)
553    trial_group = parser.add_mutually_exclusive_group()
554    trial_group.add_argument(
555        '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int)
556    trial_group.add_argument(
557        '--trial-time',
558        help='Number of seconds to run per trial. Default is %d.' % DEFAULT_TRIAL_TIME,
559        type=int,
560        default=DEFAULT_TRIAL_TIME)
561    trial_group.add_argument(
562        '--calibrate-steps-per-trial',
563        help='Automatically determine a number of steps per trial.',
564        action='store_true')
565    parser.add_argument(
566        '--max-errors',
567        help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS,
568        type=int,
569        default=DEFAULT_MAX_ERRORS)
570    parser.add_argument(
571        '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true')
572    parser.add_argument(
573        '--warmup-trials',
574        help='Number of warmup trials to run in the perf test. Default is %d.' %
575        DEFAULT_WARMUP_TRIALS,
576        type=int,
577        default=DEFAULT_WARMUP_TRIALS)
578    parser.add_argument(
579        '--show-test-stdout', help='Prints all test stdout during execution.', action='store_true')
580    parser.add_argument(
581        '--perf-counters', help='Colon-separated list of extra perf counter metrics.')
582    parser.add_argument(
583        '-a',
584        '--auto-dir',
585        help='Run with the most recent test suite found in the build directories.',
586        action='store_true')
587    parser.add_argument(
588        '--split-shard-samples',
589        help='Attempt to mitigate variance between machines by splitting samples between shards.',
590        action='store_true')
591
592    args, extra_flags = parser.parse_known_args()
593
594    if args.trace_tests:
595        args.test_suite = angle_test_util.ANGLE_TRACE_TEST_SUITE
596
597    angle_test_util.SetupLogging(args.log.upper())
598
599    start_time = time.time()
600
601    # Use fast execution for smoke test mode.
602    if args.smoke_test_mode:
603        args.steps_per_trial = 1
604        args.trials_per_sample = 1
605        args.samples_per_test = 1
606
607    env = os.environ.copy()
608
609    if angle_test_util.HasGtestShardsAndIndex(env):
610        args.shard_count, args.shard_index = angle_test_util.PopGtestShardsAndIndex(env)
611
612    if args.auto_dir:
613        test_suite_dir = _find_test_suite_directory(args.test_suite)
614        if not test_suite_dir:
615            logging.fatal('Could not find test suite: %s' % args.test_suite)
616            return EXIT_FAILURE
617        else:
618            os.chdir(test_suite_dir)
619
620    angle_test_util.Initialize(args.test_suite)
621
622    # Get test list
623    exit_code, output, _ = _run_test_suite(args, ['--list-tests', '--verbose'] + extra_flags, env)
624    if exit_code != EXIT_SUCCESS:
625        logging.fatal('Could not find test list from test output:\n%s' % output)
626        sys.exit(EXIT_FAILURE)
627    tests = angle_test_util.GetTestsFromOutput(output)
628
629    if args.filter:
630        tests = angle_test_util.FilterTests(tests, args.filter)
631
632    # Get tests for this shard (if using sharding args)
633    if args.split_shard_samples and args.shard_count >= args.samples_per_test:
634        tests = _split_shard_samples(tests, args.samples_per_test, args.shard_count,
635                                     args.shard_index)
636        assert (len(set(tests)) == len(tests))
637        args.samples_per_test = 1
638    else:
639        tests = _shard_tests(tests, args.shard_count, args.shard_index)
640
641    if not tests:
642        logging.error('No tests to run.')
643        return EXIT_FAILURE
644
645    if angle_test_util.IsAndroid() and args.test_suite == android_helper.ANGLE_TRACE_TEST_SUITE:
646        android_helper.RunSmokeTest()
647
648    logging.info('Running %d test%s' % (len(tests), 's' if len(tests) > 1 else ' '))
649
650    try:
651        with _maybe_lock_gpu_clocks():
652            results, histograms, metrics = _run_tests(tests, args, extra_flags, env)
653    except _MaxErrorsException:
654        logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors)
655        return EXIT_FAILURE
656
657    for test in tests:
658        assert results.has_result(test)
659
660    if args.isolated_script_test_output:
661        results.save_to_output_file(args.test_suite, args.isolated_script_test_output)
662
663        # Uses special output files to match the merge script.
664        _save_extra_output_files(args, results, histograms, metrics)
665
666    if args.isolated_script_test_perf_output:
667        with open(args.isolated_script_test_perf_output, 'w') as out_file:
668            out_file.write(json.dumps(histograms.AsDicts(), indent=2))
669
670    end_time = time.time()
671    logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time))
672
673    if results.has_failures():
674        return EXIT_FAILURE
675    return EXIT_SUCCESS
676
677
678if __name__ == '__main__':
679    sys.exit(main())
680