• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2#
3# Copyright (c) 2018 The Chromium Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import logging
8import os
9import json
10import math
11import re
12
13from autotest_lib.server import test
14from autotest_lib.server.cros import telemetry_runner
15from autotest_lib.client.common_lib import error
16
17# This test detects issues with low-throughput latency-sensitive workloads
18# caused by entering idle state.
19#
20# Such loads sleep regularly but also need to wake up and hit deadlines. We've
21# observed on some systems that if idle-state is enabled, we miss a lot of
22# deadlines (even though the compute capacity is sufficient).
23#
24# This test runs top_25_smooth with idle-state both enabled and disabled, and
25# looks for a discrepancy in the results. This workload is quite noisy, so
26# we run multiple times and take N * stdev as the threshold for flagging an
27# issue.
28#
29# In testing, this approach seemed quite robust, if the parameters (repetitions
30# and threshold) are set appropriately. Increasing page-set repetitions helped a
31# lot (reduces noise), as did selecting a good value for N (which trades off
32# false positives vs. false negatives).
33#
34# Based on testing, we found good results by using 5 indicative pages, setting
35# pageset-repetitions to 7, and taking the mean - 2 * stddev as the estimate
36# for "we can be confident that the true regression is not worse than this".
37#
38# This results in under-estimating the regression (typically by around 2 with
39# a healthy system), so false alarms should be rare or non-existent. In testing
40# 50 iterations with a good and bad system, this identified 100% of regressions
41# and non-regressions correctly (in fact mean - 1 * stddev would also have done
42# so, but this seems a bit marginal).
43
44# Repeat each page given number of times
45PAGESET_REPEAT = 7
46
47# PAGES can be set to a subset of pages to run for a shorter test, or None to
48# run all pages in top_25_smooth.
49# Simpler pages emphasise the issue more, as the system is more likely to enter
50# idle state.
51#
52# These were selected by running all pages many times (on a system which
53# exhibits the issue), and choosing the 5 pages which have the highest values
54# for mean_regression - 2 * stddev - i.e. give the clearest indication of a
55# regression.
56PAGES = ['games.yahoo', 'Blogger', 'LinkedIn', 'cats', 'booking']
57
58# Path to sysfs control file for disabling idle state
59DISABLE_PATH = '/sys/devices/system/cpu/cpu{}/cpuidle/state{}/disable'
60
61class kernel_IdlePerf(test.test):
62    """
63    Server side regression test for performance impact of idle-state.
64
65    This test runs some smoothness tests with and without sleep enabled, to
66    check that the impact of enabling sleep is not significant.
67
68    """
69    version = 1
70    _cleanup_required = False
71
72    def _check_sysfs(self, host):
73        # First check that we are on a suitable DUT which offers the ability to
74        # disable the idle state
75        arch = host.run_output('uname -m')
76        if arch != 'aarch64':
77            # Idle states differ between CPU architectures, so this test would
78            # need further development to support other platforms.
79            raise error.TestNAError('Test only supports Arm aarch64 CPUs')
80        if not host.path_exists(DISABLE_PATH.format(0, 1)):
81            logging.error('sysfs path absent: cannot disable idle state')
82            raise error.TestError('Cannot disable idle state')
83
84        # Identify available idle states. state0 is running state; other states
85        # should be disabled when disabling idle.
86        self.states = []
87        state_dirs = host.run_output(
88            'ls -1 /sys/devices/system/cpu/cpu0/cpuidle/')
89        for state in state_dirs.split('\n'):
90            if re.match('state[1-9][0-9]*$', state):
91                # Look for dirnames like 'state1' (but exclude 'state0')
92                self.states.append(int(state[5:]))
93        logging.info('Found idle states: {}'.format(self.states))
94
95        self.cpu_count = int(host.run_output('nproc --all'))
96        logging.info('Found {} cpus'.format(self.cpu_count))
97        logging.info('Idle enabled = {}'.format(self._is_idle_enabled(host)))
98
99        # From this point on we expect the test to be able to run, so we will
100        # need to ensure that the idle state is restored when the test exits
101        self._cleanup_required = True
102        self._enable_idle(host, False)
103        if self._is_idle_enabled(host):
104            logging.error('Failed to disable idle state')
105            raise error.TestError('Cannot disable idle state')
106        self._enable_idle(host, True)
107        if not self._is_idle_enabled(host):
108            logging.error('Failed to re-enable idle state')
109            raise error.TestError('Cannot disable idle state')
110
111    def _is_idle_enabled(self, host):
112        return host.run_output('cat ' + DISABLE_PATH.format(0, 1)) == '0'
113
114    def _enable_idle(self, host, enable):
115        logging.info('Setting idle enabled to {}'.format(enable))
116        x = '0' if enable else '1'
117        for cpu in range(0, self.cpu_count):
118            for state in self.states:
119                path = DISABLE_PATH.format(cpu, state)
120                host.run_output('echo {} > {}'.format(x, path))
121
122    def _parse_results_file(self, path):
123        def _mean(values):
124            return sum(values) / float(len(values))
125
126        with open(path) as fp:
127            histogram_json = json.load(fp)
128
129        scores = {}
130        # list of % smooth scores for each page and for each pageset-repetition
131        for page in histogram_json['charts']['percentage_smooth']:
132            if page == 'summary':
133                continue
134            page_result = histogram_json['charts']['percentage_smooth'][page]
135            scores[page] = {'percentage_smooth': _mean(page_result['values']),
136                            'std': page_result['std']
137                           }
138        return scores
139
140    def _compare_results(self, idle_enabled, idle_disabled):
141        results = {
142            'passed': True
143        }
144        for page in idle_enabled:
145            diff = (idle_disabled[page]['percentage_smooth']
146                   - idle_enabled[page]['percentage_smooth'])
147            diff_std = (math.sqrt(idle_enabled[page]['std'] ** 2
148                       + idle_disabled[page]['std'] ** 2))
149            passed = (idle_enabled[page]['percentage_smooth'] >
150                     (idle_disabled[page]['percentage_smooth'] - diff_std * 2))
151            key = re.sub('\W', '_', page)
152            results[key] = {
153                'idle_enabled': idle_enabled[page],
154                'idle_disabled': idle_disabled[page],
155                'difference': diff,
156                'difference_std': diff_std,
157                'passed': passed
158                }
159            results['passed'] = results['passed'] and passed
160        return results
161
162    def _run_telemetry(self, host, telemetry, enable):
163        logging.info('Running telemetry with idle enabled = {}'.format(enable))
164        self._enable_idle(host, enable)
165
166        args = ['--pageset-repeat={}'.format(PAGESET_REPEAT)]
167        if PAGES:
168            stories = r'\|'.join(r'\(' + p + r'\)' for p in PAGES)
169            story_filter = '--story-filter={}'.format(stories)
170            args.append(story_filter)
171
172        logging.info('Running telemetry with args: {}'.format(args))
173        result = telemetry.run_telemetry_benchmark(
174            'smoothness.top_25_smooth', self, *args)
175        if result.status != telemetry_runner.SUCCESS_STATUS:
176            raise error.TestFail('Failed to run benchmark')
177
178        # ensure first run doesn't get overwritten by second run
179        default_path = os.path.join(self.resultsdir, 'results-chart.json')
180        if enable:
181            unique_path = os.path.join(self.resultsdir,
182                                       'results-chart-idle-enabled.json')
183        else:
184            unique_path = os.path.join(self.resultsdir,
185                                       'results-chart-idle-disabled.json')
186        os.rename(default_path, unique_path)
187
188        return self._parse_results_file(unique_path)
189
190    def run_once(self, host=None, args={}):
191        """Run the telemetry scrolling benchmark.
192
193        @param host: host we are running telemetry on.
194
195        """
196
197        logging.info('Checking sysfs')
198        self._check_sysfs(host)
199
200        local = args.get('local') == 'True'
201        telemetry = telemetry_runner.TelemetryRunner(
202                        host, local, telemetry_on_dut=False)
203
204        logging.info('Starting test')
205        results_idle   = self._run_telemetry(host, telemetry, True)
206        results_noidle = self._run_telemetry(host, telemetry, False)
207
208        # Score is the regression in percentage of smooth frames caused by
209        # enabling CPU idle.
210        logging.info('Processing results')
211        results = self._compare_results(results_idle, results_noidle)
212
213        self.write_perf_keyval(results)
214
215        if not results['passed']:
216            raise error.TestFail('enabling CPU idle significantly '
217                                 'regresses scrolling performance')
218
219    def cleanup(self, host):
220        """Cleanup of the test.
221
222        @param host: host we are running telemetry on.
223
224        """
225        if self._cleanup_required:
226            logging.info('Restoring idle to enabled')
227            self._enable_idle(host, True)
228