1#!/usr/bin/python2 2# 3# Copyright (c) 2018 The Chromium Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import logging 8import os 9import json 10import math 11import re 12import numpy 13 14from autotest_lib.server import test 15from autotest_lib.server.cros import telemetry_runner 16from autotest_lib.client.common_lib import error 17 18# This test detects issues with low-throughput latency-sensitive workloads 19# caused by entering idle state. 20# 21# Such loads sleep regularly but also need to wake up and hit deadlines. We've 22# observed on some systems that if idle-state is enabled, we miss a lot of 23# deadlines (even though the compute capacity is sufficient). 24# 25# This test runs top_25_smooth with idle-state both enabled and disabled, and 26# looks for a discrepancy in the results. This workload is quite noisy, so 27# we run multiple times and take N * stdev as the threshold for flagging an 28# issue. 29# 30# In testing, this approach seemed quite robust, if the parameters (repetitions 31# and threshold) are set appropriately. Increasing page-set repetitions helped a 32# lot (reduces noise), as did selecting a good value for N (which trades off 33# false positives vs. false negatives). 34# 35# Based on testing, we found good results by using 5 indicative pages, setting 36# pageset-repetitions to 7, and taking the mean - 2 * stddev as the estimate 37# for "we can be confident that the true regression is not worse than this". 38# 39# This results in under-estimating the regression (typically by around 2 with 40# a healthy system), so false alarms should be rare or non-existent. In testing 41# 50 iterations with a good and bad system, this identified 100% of regressions 42# and non-regressions correctly (in fact mean - 1 * stddev would also have done 43# so, but this seems a bit marginal). 44 45# Repeat each page given number of times 46PAGESET_REPEAT = 7 47 48# PAGES can be set to a subset of pages to run for a shorter test, or None to 49# run all pages in rendering.desktop. 50# Simpler pages emphasise the issue more, as the system is more likely to enter 51# idle state. 52# 53# These were selected by running all pages many times (on a system which 54# exhibits the issue), and choosing pages which have a high value 55# for mean_regression - 2 * stddev - i.e. give the clearest indication of a 56# regression. 57# The exact page set selected is a mix of real pages (e.g. blogspot_2018) and 58# synthetic (e.g. transform_transitions_js_block) 59# For a longer test,'twitter_2018', 'wikipedia_2018' can be added to PAGES. 60PAGES = ['blogspot_2018', 'transform_transitions_js_block', 'throughput_scrolling_passive_handler'] 61 62# Benchmark to run 63BENCHMARK = 'rendering.desktop' 64 65# Path to sysfs control file for disabling idle state 66DISABLE_PATH = '/sys/devices/system/cpu/cpu{}/cpuidle/state{}/disable' 67 68class kernel_IdlePerf(test.test): 69 """ 70 Server side regression test for performance impact of idle-state. 71 72 This test runs some smoothness tests with and without sleep enabled, to 73 check that the impact of enabling sleep is not significant. 74 75 """ 76 version = 1 77 _cleanup_required = False 78 79 def _check_sysfs(self, host): 80 # First check that we are on a suitable DUT which offers the ability to 81 # disable the idle state 82 arch = host.run_output('uname -m') 83 if arch != 'aarch64': 84 # Idle states differ between CPU architectures, so this test would 85 # need further development to support other platforms. 86 raise error.TestNAError('Test only supports Arm aarch64 CPUs') 87 if not host.path_exists(DISABLE_PATH.format(0, 1)): 88 logging.error('sysfs path absent: cannot disable idle state') 89 raise error.TestError('Cannot disable idle state') 90 91 # Identify available idle states. state0 is running state; other states 92 # should be disabled when disabling idle. 93 self.states = [] 94 state_dirs = host.run_output( 95 'ls -1 /sys/devices/system/cpu/cpu0/cpuidle/') 96 for state in state_dirs.split('\n'): 97 if re.match('state[1-9][0-9]*$', state): 98 # Look for dirnames like 'state1' (but exclude 'state0') 99 self.states.append(int(state[5:])) 100 logging.info('Found idle states: {}'.format(self.states)) 101 102 self.cpu_count = int(host.run_output('nproc --all')) 103 logging.info('Found {} cpus'.format(self.cpu_count)) 104 logging.info('Idle enabled = {}'.format(self._is_idle_enabled(host))) 105 106 # From this point on we expect the test to be able to run, so we will 107 # need to ensure that the idle state is restored when the test exits 108 self._cleanup_required = True 109 self._enable_idle(host, False) 110 if self._is_idle_enabled(host): 111 logging.error('Failed to disable idle state') 112 raise error.TestError('Cannot disable idle state') 113 self._enable_idle(host, True) 114 if not self._is_idle_enabled(host): 115 logging.error('Failed to re-enable idle state') 116 raise error.TestError('Cannot disable idle state') 117 118 def _is_idle_enabled(self, host): 119 return host.run_output('cat ' + DISABLE_PATH.format(0, 1)) == '0' 120 121 def _enable_idle(self, host, enable): 122 logging.info('Setting idle enabled to {}'.format(enable)) 123 x = '0' if enable else '1' 124 for cpu in range(0, self.cpu_count): 125 for state in self.states: 126 path = DISABLE_PATH.format(cpu, state) 127 host.run_output('echo {} > {}'.format(x, path)) 128 129 def _parse_results_file(self, path): 130 with open(path) as fp: 131 histogram_json = json.load(fp) 132 133 guids = {x["guid"]: x["values"][0] for x in histogram_json 134 if "guid" in x and "values" in x and len(x["values"]) > 0} 135 136 scores = {} 137 for e in histogram_json: 138 if "name" in e and e["name"] == "exp_percentage_smooth": 139 story_guid = e["diagnostics"]["stories"] 140 story = guids[story_guid] 141 if story not in scores: scores[story] = [] 142 scores[story] += [e["sampleValues"][0]] 143 144 for story in scores: 145 scores[story] = { 146 'raw_exp_percentage_smooth_scores': scores[story], 147 'exp_percentage_smooth': numpy.mean(scores[story]), 148 'std': numpy.std(scores[story]) 149 } 150 151 return scores 152 153 def _compare_results(self, idle_enabled, idle_disabled): 154 results = { 155 'passed': True 156 } 157 for page in idle_enabled: 158 diff = (idle_disabled[page]['exp_percentage_smooth'] 159 - idle_enabled[page]['exp_percentage_smooth']) 160 diff_std = (math.sqrt(idle_enabled[page]['std'] ** 2 161 + idle_disabled[page]['std'] ** 2)) 162 passed = (idle_enabled[page]['exp_percentage_smooth'] >= 163 (idle_disabled[page]['exp_percentage_smooth'] - diff_std * 2)) 164 key = re.sub('\W', '_', page) 165 results[key] = { 166 'idle_enabled': idle_enabled[page], 167 'idle_disabled': idle_disabled[page], 168 'difference': diff, 169 'difference_std': diff_std, 170 'passed': passed 171 } 172 results['passed'] = results['passed'] and passed 173 return results 174 175 def _run_telemetry(self, host, telemetry, enable): 176 logging.info('Running telemetry with idle enabled = {}'.format(enable)) 177 self._enable_idle(host, enable) 178 179 args = ['--pageset-repeat={}'.format(PAGESET_REPEAT)] 180 if PAGES: 181 stories = r'\|'.join(r'\(^' + p + r'$\)' for p in PAGES) 182 story_filter = '--story-filter={}'.format(stories) 183 args.append(story_filter) 184 185 logging.info('Running telemetry with args: {}'.format(args)) 186 result = telemetry.run_telemetry_benchmark( 187 BENCHMARK, self, *args) 188 if result.status != telemetry_runner.SUCCESS_STATUS: 189 raise error.TestFail('Failed to run benchmark') 190 191 # ensure first run doesn't get overwritten by second run 192 default_path = os.path.join(self.resultsdir, 'histograms.json') 193 if enable: 194 unique_path = os.path.join(self.resultsdir, 195 'results-histograms-idle-enabled.json') 196 else: 197 unique_path = os.path.join(self.resultsdir, 198 'results-histograms-idle-disabled.json') 199 os.rename(default_path, unique_path) 200 201 return self._parse_results_file(unique_path) 202 203 def run_once(self, host=None, args={}): 204 """Run the telemetry scrolling benchmark. 205 206 @param host: host we are running telemetry on. 207 208 """ 209 210 logging.info('Checking sysfs') 211 self._check_sysfs(host) 212 213 local = args.get('local') == 'True' 214 telemetry = telemetry_runner.TelemetryRunner( 215 host, local, telemetry_on_dut=False) 216 217 logging.info('Starting test') 218 results_idle = self._run_telemetry(host, telemetry, True) 219 results_noidle = self._run_telemetry(host, telemetry, False) 220 221 # Score is the regression in percentage of smooth frames caused by 222 # enabling CPU idle. 223 logging.info('Processing results') 224 results = self._compare_results(results_idle, results_noidle) 225 226 self.write_perf_keyval(results) 227 228 if not results['passed']: 229 raise error.TestFail('enabling CPU idle significantly ' 230 'regresses scrolling performance') 231 232 def cleanup(self, host): 233 """Cleanup of the test. 234 235 @param host: host we are running telemetry on. 236 237 """ 238 if self._cleanup_required: 239 logging.info('Restoring idle to enabled') 240 self._enable_idle(host, True) 241