• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env vpython3
2#
3# Copyright 2021 The ANGLE Project Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6#
7# run_perf_test.py:
8#   Runs ANGLE perf tests using some statistical averaging.
9
10import argparse
11import fnmatch
12import importlib
13import io
14import json
15import logging
16import time
17import os
18import re
19import subprocess
20import sys
21
22# Add //src/testing into sys.path for importing xvfb and test_env, and
23# //src/testing/scripts for importing common.
24d = os.path.dirname
25THIS_DIR = d(os.path.abspath(__file__))
26ANGLE_DIR = d(d(THIS_DIR))
27sys.path.append(os.path.join(ANGLE_DIR, 'testing'))
28sys.path.append(os.path.join(ANGLE_DIR, 'testing', 'scripts'))
29
30import common
31import test_env
32import xvfb
33
34sys.path.append(os.path.join(ANGLE_DIR, 'third_party', 'catapult', 'tracing'))
35from tracing.value import histogram
36from tracing.value import histogram_set
37from tracing.value import merge_histograms
38
39DEFAULT_TEST_SUITE = 'angle_perftests'
40DEFAULT_LOG = 'info'
41DEFAULT_SAMPLES = 4
42DEFAULT_TRIALS = 3
43DEFAULT_MAX_ERRORS = 3
44DEFAULT_WARMUP_LOOPS = 2
45DEFAULT_CALIBRATION_TIME = 2
46
47# Filters out stuff like: " I   72.572s run_tests_on_device(96071FFAZ00096) "
48ANDROID_LOGGING_PREFIX = r'I +\d+.\d+s \w+\(\w+\)  '
49
50# Test expectations
51FAIL = 'FAIL'
52PASS = 'PASS'
53SKIP = 'SKIP'
54
55EXIT_FAILURE = 1
56EXIT_SUCCESS = 0
57
58
59def is_windows():
60    return sys.platform == 'cygwin' or sys.platform.startswith('win')
61
62
63def get_binary_name(binary):
64    if is_windows():
65        return '.\\%s.exe' % binary
66    else:
67        return './%s' % binary
68
69
70def _popen(*args, **kwargs):
71    assert 'creationflags' not in kwargs
72    if sys.platform == 'win32':
73        # Necessary for signal handling. See crbug.com/733612#c6.
74        kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
75    return subprocess.Popen(*args, **kwargs)
76
77
78def run_command_with_output(argv, stdoutfile, env=None, cwd=None, log=True):
79    assert stdoutfile
80    with io.open(stdoutfile, 'wb') as writer, \
81          io.open(stdoutfile, 'rb') as reader:
82        process = _popen(argv, env=env, cwd=cwd, stdout=writer, stderr=subprocess.STDOUT)
83        test_env.forward_signals([process])
84        while process.poll() is None:
85            if log:
86                sys.stdout.write(reader.read().decode('utf-8'))
87            # This sleep is needed for signal propagation. See the
88            # wait_with_signals() docstring.
89            time.sleep(0.1)
90        if log:
91            sys.stdout.write(reader.read().decode('utf-8'))
92        return process.returncode
93
94
95def _run_and_get_output(args, cmd, env):
96    lines = []
97    logging.debug(' '.join(cmd))
98    with common.temporary_file() as tempfile_path:
99        if args.xvfb:
100            exit_code = xvfb.run_executable(cmd, env, stdoutfile=tempfile_path)
101        else:
102            exit_code = run_command_with_output(
103                cmd, env=env, stdoutfile=tempfile_path, log=args.show_test_stdout)
104        with open(tempfile_path) as f:
105            for line in f:
106                lines.append(line.strip())
107    return exit_code, lines
108
109
110def _filter_tests(tests, pattern):
111    return [test for test in tests if fnmatch.fnmatch(test, pattern)]
112
113
114def _shard_tests(tests, shard_count, shard_index):
115    return [tests[index] for index in range(shard_index, len(tests), shard_count)]
116
117
118def _get_results_from_output(output, result):
119    output = '\n'.join(output)
120    m = re.search(r'Running (\d+) tests', output)
121    if m and int(m.group(1)) > 1:
122        raise Exception('Found more than one test result in output')
123
124    # Results are reported in the format:
125    # name_backend.result: story= value units.
126    pattern = r'\.' + result + r':.*= ([0-9.]+)'
127    logging.debug('Searching for %s in output' % pattern)
128    m = re.findall(pattern, output)
129    if not m:
130        logging.warning('Did not find the result "%s" in the test output:\n%s' % (result, output))
131        return None
132
133    return [float(value) for value in m]
134
135
136def _get_tests_from_output(lines):
137    seen_start_of_tests = False
138    tests = []
139    android_prefix = re.compile(ANDROID_LOGGING_PREFIX)
140    logging.debug('Read %d lines from test output.' % len(lines))
141    for line in lines:
142        line = android_prefix.sub('', line.strip())
143        if line == 'Tests list:':
144            seen_start_of_tests = True
145        elif line == 'End tests list.':
146            break
147        elif seen_start_of_tests:
148            tests.append(line)
149    if not seen_start_of_tests:
150        raise Exception('Did not find test list in test output!')
151    logging.debug('Found %d tests from test output.' % len(tests))
152    return tests
153
154
155def _truncated_list(data, n):
156    """Compute a truncated list, n is truncation size"""
157    if len(data) < n * 2:
158        raise ValueError('list not large enough to truncate')
159    return sorted(data)[n:-n]
160
161
162def _mean(data):
163    """Return the sample arithmetic mean of data."""
164    n = len(data)
165    if n < 1:
166        raise ValueError('mean requires at least one data point')
167    return float(sum(data)) / float(n)  # in Python 2 use sum(data)/float(n)
168
169
170def _sum_of_square_deviations(data, c):
171    """Return sum of square deviations of sequence data."""
172    ss = sum((float(x) - c)**2 for x in data)
173    return ss
174
175
176def _coefficient_of_variation(data):
177    """Calculates the population coefficient of variation."""
178    n = len(data)
179    if n < 2:
180        raise ValueError('variance requires at least two data points')
181    c = _mean(data)
182    ss = _sum_of_square_deviations(data, c)
183    pvar = ss / n  # the population variance
184    stddev = (pvar**0.5)  # population standard deviation
185    return stddev / c
186
187
188def _save_extra_output_files(args, results, histograms):
189    isolated_out_dir = os.path.dirname(args.isolated_script_test_output)
190    if not os.path.isdir(isolated_out_dir):
191        return
192    benchmark_path = os.path.join(isolated_out_dir, args.test_suite)
193    if not os.path.isdir(benchmark_path):
194        os.makedirs(benchmark_path)
195    test_output_path = os.path.join(benchmark_path, 'test_results.json')
196    results.save_to_json_file(test_output_path)
197    perf_output_path = os.path.join(benchmark_path, 'perf_results.json')
198    logging.info('Saving perf histograms to %s.' % perf_output_path)
199    with open(perf_output_path, 'w') as out_file:
200        out_file.write(json.dumps(histograms.AsDicts(), indent=2))
201
202
203class Results:
204
205    def __init__(self):
206        self._results = {
207            'tests': {},
208            'interrupted': False,
209            'seconds_since_epoch': time.time(),
210            'path_delimiter': '.',
211            'version': 3,
212            'num_failures_by_type': {
213                FAIL: 0,
214                PASS: 0,
215                SKIP: 0,
216            },
217        }
218        self._test_results = {}
219
220    def has_result(self, test):
221        return test in self._test_results
222
223    def result_skip(self, test):
224        self._test_results[test] = {'expected': SKIP, 'actual': SKIP}
225        self._results['num_failures_by_type'][SKIP] += 1
226
227    def result_pass(self, test):
228        self._test_results[test] = {'expected': PASS, 'actual': PASS}
229        self._results['num_failures_by_type'][PASS] += 1
230
231    def result_fail(self, test):
232        self._test_results[test] = {'expected': PASS, 'actual': FAIL, 'is_unexpected': True}
233        self._results['num_failures_by_type'][FAIL] += 1
234
235    def save_to_output_file(self, test_suite, fname):
236        self._update_results(test_suite)
237        with open(fname, 'w') as out_file:
238            out_file.write(json.dumps(self._results, indent=2))
239
240    def save_to_json_file(self, fname):
241        logging.info('Saving test results to %s.' % fname)
242        with open(fname, 'w') as out_file:
243            out_file.write(json.dumps(self._results, indent=2))
244
245    def _update_results(self, test_suite):
246        if self._test_results:
247            self._results['tests'][test_suite] = self._test_results
248            self._test_results = {}
249
250
251def main():
252    parser = argparse.ArgumentParser()
253    parser.add_argument('--isolated-script-test-output', type=str)
254    parser.add_argument('--isolated-script-test-perf-output', type=str)
255    parser.add_argument(
256        '-f', '--filter', '--isolated-script-test-filter', type=str, help='Test filter.')
257    parser.add_argument('--test-suite', help='Test suite to run.', default=DEFAULT_TEST_SUITE)
258    parser.add_argument('--xvfb', help='Use xvfb.', action='store_true')
259    parser.add_argument(
260        '--shard-count',
261        help='Number of shards for test splitting. Default is 1.',
262        type=int,
263        default=1)
264    parser.add_argument(
265        '--shard-index',
266        help='Index of the current shard for test splitting. Default is 0.',
267        type=int,
268        default=0)
269    parser.add_argument(
270        '-l', '--log', help='Log output level. Default is %s.' % DEFAULT_LOG, default=DEFAULT_LOG)
271    parser.add_argument(
272        '-s',
273        '--samples-per-test',
274        help='Number of samples to run per test. Default is %d.' % DEFAULT_SAMPLES,
275        type=int,
276        default=DEFAULT_SAMPLES)
277    parser.add_argument(
278        '-t',
279        '--trials-per-sample',
280        help='Number of trials to run per sample. Default is %d.' % DEFAULT_TRIALS,
281        type=int,
282        default=DEFAULT_TRIALS)
283    parser.add_argument(
284        '--steps-per-trial', help='Fixed number of steps to run per trial.', type=int)
285    parser.add_argument(
286        '--max-errors',
287        help='After this many errors, abort the run. Default is %d.' % DEFAULT_MAX_ERRORS,
288        type=int,
289        default=DEFAULT_MAX_ERRORS)
290    parser.add_argument(
291        '--smoke-test-mode', help='Do a quick run to validate correctness.', action='store_true')
292    parser.add_argument(
293        '--warmup-loops',
294        help='Number of warmup loops to run in the perf test. Default is %d.' %
295        DEFAULT_WARMUP_LOOPS,
296        type=int,
297        default=DEFAULT_WARMUP_LOOPS)
298    parser.add_argument(
299        '--calibration-time',
300        help='Amount of time to spend each loop in calibration and warmup. Default is %d seconds.'
301        % DEFAULT_CALIBRATION_TIME,
302        type=int,
303        default=DEFAULT_CALIBRATION_TIME)
304    parser.add_argument(
305        '--show-test-stdout', help='Prints all test stdout during execution.', action='store_true')
306    parser.add_argument(
307        '--perf-counters', help='Colon-separated list of extra perf counter metrics.')
308
309    args, extra_flags = parser.parse_known_args()
310
311    importlib.reload(logging)
312    logging.basicConfig(level=args.log.upper())
313
314    start_time = time.time()
315
316    # Use fast execution for smoke test mode.
317    if args.smoke_test_mode:
318        args.steps_per_trial = 1
319        args.trials_per_sample = 1
320        args.samples_per_test = 1
321
322    env = os.environ.copy()
323
324    # Get sharding args
325    if 'GTEST_TOTAL_SHARDS' in env and int(env['GTEST_TOTAL_SHARDS']) != 1:
326        if 'GTEST_SHARD_INDEX' not in env:
327            logging.error('Sharding params must be specified together.')
328            sys.exit(1)
329        args.shard_count = int(env.pop('GTEST_TOTAL_SHARDS'))
330        args.shard_index = int(env.pop('GTEST_SHARD_INDEX'))
331
332    # The harness currently uploads all traces in a batch, which is very slow.
333    # TODO: Reduce lag from trace uploads and remove this. http://anglebug.com/6854
334    env['DEVICE_TIMEOUT_MULTIPLIER'] = '20'
335
336    # Get test list
337    cmd = [get_binary_name(args.test_suite), '--list-tests', '--verbose']
338    exit_code, lines = _run_and_get_output(args, cmd, env)
339    if exit_code != EXIT_SUCCESS:
340        logging.fatal('Could not find test list from test output:\n%s' % '\n'.join(lines))
341    tests = _get_tests_from_output(lines)
342
343    if args.filter:
344        tests = _filter_tests(tests, args.filter)
345
346    # Get tests for this shard (if using sharding args)
347    tests = _shard_tests(tests, args.shard_count, args.shard_index)
348
349    num_tests = len(tests)
350    if num_tests == 0:
351        logging.error('No tests to run.')
352        return EXIT_FAILURE
353
354    logging.info('Running %d test%s' % (num_tests, 's' if num_tests > 1 else ' '))
355
356    # Run tests
357    results = Results()
358
359    histograms = histogram_set.HistogramSet()
360    total_errors = 0
361
362    for test_index in range(num_tests):
363        test = tests[test_index]
364        cmd = [
365            get_binary_name(args.test_suite),
366            '--gtest_filter=%s' % test,
367            '--extract-test-list-from-filter',
368            '--enable-device-cache',
369            '--skip-clear-data',
370            '--use-existing-test-data',
371            '--verbose',
372            '--calibration-time',
373            str(args.calibration_time),
374        ]
375        if args.steps_per_trial:
376            steps_per_trial = args.steps_per_trial
377        else:
378            cmd_calibrate = cmd + [
379                '--calibration',
380                '--warmup-loops',
381                str(args.warmup_loops),
382            ]
383            exit_code, calibrate_output = _run_and_get_output(args, cmd_calibrate, env)
384            if exit_code != EXIT_SUCCESS:
385                logging.fatal('%s failed. Output:\n%s' %
386                              (cmd_calibrate[0], '\n'.join(calibrate_output)))
387                total_errors += 1
388                results.result_fail(test)
389                continue
390            steps_per_trial = _get_results_from_output(calibrate_output, 'steps_to_run')
391            if not steps_per_trial:
392                logging.warning('Skipping test %s' % test)
393                continue
394            assert (len(steps_per_trial) == 1)
395            steps_per_trial = int(steps_per_trial[0])
396        logging.info('Test %d/%d: %s (samples=%d trials_per_sample=%d steps_per_trial=%d)' %
397                     (test_index + 1, num_tests, test, args.samples_per_test,
398                      args.trials_per_sample, steps_per_trial))
399        wall_times = []
400        test_histogram_set = histogram_set.HistogramSet()
401        for sample in range(args.samples_per_test):
402            if total_errors >= args.max_errors:
403                logging.error('Error count exceeded max errors (%d). Aborting.' % args.max_errors)
404                return EXIT_FAILURE
405
406            cmd_run = cmd + [
407                '--steps-per-trial',
408                str(steps_per_trial),
409                '--trials',
410                str(args.trials_per_sample),
411            ]
412
413            if args.smoke_test_mode:
414                cmd_run += ['--no-warmup']
415            else:
416                cmd_run += ['--warmup-loops', str(args.warmup_loops)]
417
418            if args.perf_counters:
419                cmd_run += ['--perf-counters', args.perf_counters]
420
421            with common.temporary_file() as histogram_file_path:
422                cmd_run += ['--isolated-script-test-perf-output=%s' % histogram_file_path]
423                exit_code, output = _run_and_get_output(args, cmd_run, env)
424                if exit_code != EXIT_SUCCESS:
425                    logging.error('%s failed. Output:\n%s' % (cmd_run[0], '\n'.join(output)))
426                    results.result_fail(test)
427                    total_errors += 1
428                    break
429
430                sample_wall_times = _get_results_from_output(output, 'wall_time')
431                if not sample_wall_times:
432                    # This can be intentional for skipped tests. They are handled below.
433                    logging.warning('Test %s failed to produce a sample output' % test)
434                    break
435                logging.info('Test %d/%d Sample %d/%d wall_times: %s' %
436                             (test_index + 1, num_tests, sample + 1, args.samples_per_test,
437                              str(sample_wall_times)))
438                wall_times += sample_wall_times
439                with open(histogram_file_path) as histogram_file:
440                    sample_json = json.load(histogram_file)
441                    sample_histogram = histogram_set.HistogramSet()
442                    sample_histogram.ImportDicts(sample_json)
443                    test_histogram_set.Merge(sample_histogram)
444
445        if not results.has_result(test):
446            if not wall_times:
447                logging.warning('Skipping test %s. Assuming this is intentional.' % test)
448                results.result_skip(test)
449            elif len(wall_times) == (args.samples_per_test * args.trials_per_sample):
450                if len(wall_times) > 7:
451                    truncation_n = len(wall_times) >> 3
452                    logging.debug(
453                        'Truncation: Removing the %d highest and lowest times from wall_times.' %
454                        truncation_n)
455                    wall_times = _truncated_list(wall_times, truncation_n)
456
457                if len(wall_times) > 1:
458                    logging.info('Test %d/%d: %s: truncated mean wall_time = %.2f, cov = %.2f%%' %
459                                 (test_index + 1, num_tests, test, _mean(wall_times),
460                                  (_coefficient_of_variation(wall_times) * 100.0)))
461                results.result_pass(test)
462
463                # Merge the histogram set into one histogram
464                with common.temporary_file() as merge_histogram_path:
465                    logging.info('Writing merged histograms to %s.' % merge_histogram_path)
466                    with open(merge_histogram_path, 'w') as merge_histogram_file:
467                        json.dump(test_histogram_set.AsDicts(), merge_histogram_file)
468                        merge_histogram_file.close()
469                    merged_dicts = merge_histograms.MergeHistograms(
470                        merge_histogram_path, groupby=['name'])
471                    merged_histogram = histogram_set.HistogramSet()
472                    merged_histogram.ImportDicts(merged_dicts)
473                    histograms.Merge(merged_histogram)
474            else:
475                logging.error('Test %s failed to record some samples' % test)
476
477    if args.isolated_script_test_output:
478        results.save_to_output_file(args.test_suite, args.isolated_script_test_output)
479
480        # Uses special output files to match the merge script.
481        _save_extra_output_files(args, results, histograms)
482
483    if args.isolated_script_test_perf_output:
484        with open(args.isolated_script_test_perf_output, 'w') as out_file:
485            out_file.write(json.dumps(histograms.AsDicts(), indent=2))
486
487    end_time = time.time()
488    logging.info('Elapsed time: %.2lf seconds.' % (end_time - start_time))
489
490    return EXIT_SUCCESS
491
492
493# This is not really a "script test" so does not need to manually add
494# any additional compile targets.
495def main_compile_targets(args):
496    json.dump([], args.output)
497
498
499if __name__ == '__main__':
500    # Conform minimally to the protocol defined by ScriptTest.
501    if 'compile_targets' in sys.argv:
502        funcs = {
503            'run': None,
504            'compile_targets': main_compile_targets,
505        }
506        sys.exit(common.run_script(sys.argv[1:], funcs))
507    sys.exit(main())
508