• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python2
2#
3# Copyright (c) 2018 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import logging
8import os
9import json
10import math
11import re
12import numpy
13
14from autotest_lib.server import test
15from autotest_lib.server.cros import telemetry_runner
16from autotest_lib.client.common_lib import error
17
18# This test detects issues with low-throughput latency-sensitive workloads
19# caused by entering idle state.
20#
21# Such loads sleep regularly but also need to wake up and hit deadlines. We've
22# observed on some systems that if idle-state is enabled, we miss a lot of
23# deadlines (even though the compute capacity is sufficient).
24#
25# This test runs top_25_smooth with idle-state both enabled and disabled, and
26# looks for a discrepancy in the results. This workload is quite noisy, so
27# we run multiple times and take N * stdev as the threshold for flagging an
28# issue.
29#
30# In testing, this approach seemed quite robust, if the parameters (repetitions
31# and threshold) are set appropriately. Increasing page-set repetitions helped a
32# lot (reduces noise), as did selecting a good value for N (which trades off
33# false positives vs. false negatives).
34#
35# Based on testing, we found good results by using 5 indicative pages, setting
36# pageset-repetitions to 7, and taking the mean - 2 * stddev as the estimate
37# for "we can be confident that the true regression is not worse than this".
38#
39# This results in under-estimating the regression (typically by around 2 with
40# a healthy system), so false alarms should be rare or non-existent. In testing
41# 50 iterations with a good and bad system, this identified 100% of regressions
42# and non-regressions correctly (in fact mean - 1 * stddev would also have done
43# so, but this seems a bit marginal).
44
45# Repeat each page given number of times
46PAGESET_REPEAT = 7
47
48# PAGES can be set to a subset of pages to run for a shorter test, or None to
49# run all pages in rendering.desktop.
50# Simpler pages emphasise the issue more, as the system is more likely to enter
51# idle state.
52#
53# These were selected by running all pages many times (on a system which
54# exhibits the issue), and choosing pages which have a high value
55# for mean_regression - 2 * stddev - i.e. give the clearest indication of a
56# regression.
57# The exact page set selected is a mix of real pages (e.g. blogspot_2018) and
58# synthetic (e.g. transform_transitions_js_block)
59# For a longer test,'twitter_2018', 'wikipedia_2018' can be added to PAGES.
60PAGES = ['blogspot_2018', 'transform_transitions_js_block', 'throughput_scrolling_passive_handler']
61
62# Benchmark to run
63BENCHMARK = 'rendering.desktop'
64
65# Path to sysfs control file for disabling idle state
66DISABLE_PATH = '/sys/devices/system/cpu/cpu{}/cpuidle/state{}/disable'
67
68class kernel_IdlePerf(test.test):
69    """
70    Server side regression test for performance impact of idle-state.
71
72    This test runs some smoothness tests with and without sleep enabled, to
73    check that the impact of enabling sleep is not significant.
74
75    """
76    version = 1
77    _cleanup_required = False
78
79    def _check_sysfs(self, host):
80        # First check that we are on a suitable DUT which offers the ability to
81        # disable the idle state
82        arch = host.run_output('uname -m')
83        if arch != 'aarch64':
84            # Idle states differ between CPU architectures, so this test would
85            # need further development to support other platforms.
86            raise error.TestNAError('Test only supports Arm aarch64 CPUs')
87        if not host.path_exists(DISABLE_PATH.format(0, 1)):
88            logging.error('sysfs path absent: cannot disable idle state')
89            raise error.TestError('Cannot disable idle state')
90
91        # Identify available idle states. state0 is running state; other states
92        # should be disabled when disabling idle.
93        self.states = []
94        state_dirs = host.run_output(
95            'ls -1 /sys/devices/system/cpu/cpu0/cpuidle/')
96        for state in state_dirs.split('\n'):
97            if re.match('state[1-9][0-9]*$', state):
98                # Look for dirnames like 'state1' (but exclude 'state0')
99                self.states.append(int(state[5:]))
100        logging.info('Found idle states: {}'.format(self.states))
101
102        self.cpu_count = int(host.run_output('nproc --all'))
103        logging.info('Found {} cpus'.format(self.cpu_count))
104        logging.info('Idle enabled = {}'.format(self._is_idle_enabled(host)))
105
106        # From this point on we expect the test to be able to run, so we will
107        # need to ensure that the idle state is restored when the test exits
108        self._cleanup_required = True
109        self._enable_idle(host, False)
110        if self._is_idle_enabled(host):
111            logging.error('Failed to disable idle state')
112            raise error.TestError('Cannot disable idle state')
113        self._enable_idle(host, True)
114        if not self._is_idle_enabled(host):
115            logging.error('Failed to re-enable idle state')
116            raise error.TestError('Cannot disable idle state')
117
118    def _is_idle_enabled(self, host):
119        return host.run_output('cat ' + DISABLE_PATH.format(0, 1)) == '0'
120
121    def _enable_idle(self, host, enable):
122        logging.info('Setting idle enabled to {}'.format(enable))
123        x = '0' if enable else '1'
124        for cpu in range(0, self.cpu_count):
125            for state in self.states:
126                path = DISABLE_PATH.format(cpu, state)
127                host.run_output('echo {} > {}'.format(x, path))
128
129    def _parse_results_file(self, path):
130        with open(path) as fp:
131            histogram_json = json.load(fp)
132
133        guids = {x["guid"]: x["values"][0] for x in histogram_json
134                    if "guid" in x and "values" in x and len(x["values"]) > 0}
135
136        scores = {}
137        for e in histogram_json:
138            if "name" in e and e["name"] == "exp_percentage_smooth":
139                story_guid = e["diagnostics"]["stories"]
140                story = guids[story_guid]
141                if story not in scores: scores[story] = []
142                scores[story] += [e["sampleValues"][0]]
143
144        for story in scores:
145            scores[story] = {
146                'raw_exp_percentage_smooth_scores': scores[story],
147                'exp_percentage_smooth': numpy.mean(scores[story]),
148                'std': numpy.std(scores[story])
149            }
150
151        return scores
152
153    def _compare_results(self, idle_enabled, idle_disabled):
154        results = {
155            'passed': True
156        }
157        for page in idle_enabled:
158            diff = (idle_disabled[page]['exp_percentage_smooth']
159                   - idle_enabled[page]['exp_percentage_smooth'])
160            diff_std = (math.sqrt(idle_enabled[page]['std'] ** 2
161                       + idle_disabled[page]['std'] ** 2))
162            passed = (idle_enabled[page]['exp_percentage_smooth'] >=
163                     (idle_disabled[page]['exp_percentage_smooth'] - diff_std * 2))
164            key = re.sub('\W', '_', page)
165            results[key] = {
166                'idle_enabled': idle_enabled[page],
167                'idle_disabled': idle_disabled[page],
168                'difference': diff,
169                'difference_std': diff_std,
170                'passed': passed
171                }
172            results['passed'] = results['passed'] and passed
173        return results
174
175    def _run_telemetry(self, host, telemetry, enable):
176        logging.info('Running telemetry with idle enabled = {}'.format(enable))
177        self._enable_idle(host, enable)
178
179        args = ['--pageset-repeat={}'.format(PAGESET_REPEAT)]
180        if PAGES:
181            stories = r'\|'.join(r'\(^' + p + r'$\)' for p in PAGES)
182            story_filter = '--story-filter={}'.format(stories)
183            args.append(story_filter)
184
185        logging.info('Running telemetry with args: {}'.format(args))
186        result = telemetry.run_telemetry_benchmark(
187            BENCHMARK, self, *args)
188        if result.status != telemetry_runner.SUCCESS_STATUS:
189            raise error.TestFail('Failed to run benchmark')
190
191        # ensure first run doesn't get overwritten by second run
192        default_path = os.path.join(self.resultsdir, 'histograms.json')
193        if enable:
194            unique_path = os.path.join(self.resultsdir,
195                                       'results-histograms-idle-enabled.json')
196        else:
197            unique_path = os.path.join(self.resultsdir,
198                                       'results-histograms-idle-disabled.json')
199        os.rename(default_path, unique_path)
200
201        return self._parse_results_file(unique_path)
202
203    def run_once(self, host=None, args={}):
204        """Run the telemetry scrolling benchmark.
205
206        @param host: host we are running telemetry on.
207
208        """
209
210        logging.info('Checking sysfs')
211        self._check_sysfs(host)
212
213        local = args.get('local') == 'True'
214        telemetry = telemetry_runner.TelemetryRunner(
215                        host, local, telemetry_on_dut=False)
216
217        logging.info('Starting test')
218        results_idle   = self._run_telemetry(host, telemetry, True)
219        results_noidle = self._run_telemetry(host, telemetry, False)
220
221        # Score is the regression in percentage of smooth frames caused by
222        # enabling CPU idle.
223        logging.info('Processing results')
224        results = self._compare_results(results_idle, results_noidle)
225
226        self.write_perf_keyval(results)
227
228        if not results['passed']:
229            raise error.TestFail('enabling CPU idle significantly '
230                                 'regresses scrolling performance')
231
232    def cleanup(self, host):
233        """Cleanup of the test.
234
235        @param host: host we are running telemetry on.
236
237        """
238        if self._cleanup_required:
239            logging.info('Restoring idle to enabled')
240            self._enable_idle(host, True)
241