1#!/usr/bin/python 2# 3# Copyright (c) 2018 The Chromium Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import logging 8import os 9import json 10import math 11import re 12 13from autotest_lib.server import test 14from autotest_lib.server.cros import telemetry_runner 15from autotest_lib.client.common_lib import error 16 17# This test detects issues with low-throughput latency-sensitive workloads 18# caused by entering idle state. 19# 20# Such loads sleep regularly but also need to wake up and hit deadlines. We've 21# observed on some systems that if idle-state is enabled, we miss a lot of 22# deadlines (even though the compute capacity is sufficient). 23# 24# This test runs top_25_smooth with idle-state both enabled and disabled, and 25# looks for a discrepancy in the results. This workload is quite noisy, so 26# we run multiple times and take N * stdev as the threshold for flagging an 27# issue. 28# 29# In testing, this approach seemed quite robust, if the parameters (repetitions 30# and threshold) are set appropriately. Increasing page-set repetitions helped a 31# lot (reduces noise), as did selecting a good value for N (which trades off 32# false positives vs. false negatives). 33# 34# Based on testing, we found good results by using 5 indicative pages, setting 35# pageset-repetitions to 7, and taking the mean - 2 * stddev as the estimate 36# for "we can be confident that the true regression is not worse than this". 37# 38# This results in under-estimating the regression (typically by around 2 with 39# a healthy system), so false alarms should be rare or non-existent. In testing 40# 50 iterations with a good and bad system, this identified 100% of regressions 41# and non-regressions correctly (in fact mean - 1 * stddev would also have done 42# so, but this seems a bit marginal). 43 44# Repeat each page given number of times 45PAGESET_REPEAT = 7 46 47# PAGES can be set to a subset of pages to run for a shorter test, or None to 48# run all pages in top_25_smooth. 49# Simpler pages emphasise the issue more, as the system is more likely to enter 50# idle state. 51# 52# These were selected by running all pages many times (on a system which 53# exhibits the issue), and choosing the 5 pages which have the highest values 54# for mean_regression - 2 * stddev - i.e. give the clearest indication of a 55# regression. 56PAGES = ['games.yahoo', 'Blogger', 'LinkedIn', 'cats', 'booking'] 57 58# Path to sysfs control file for disabling idle state 59DISABLE_PATH = '/sys/devices/system/cpu/cpu{}/cpuidle/state{}/disable' 60 61class kernel_IdlePerf(test.test): 62 """ 63 Server side regression test for performance impact of idle-state. 64 65 This test runs some smoothness tests with and without sleep enabled, to 66 check that the impact of enabling sleep is not significant. 67 68 """ 69 version = 1 70 _cleanup_required = False 71 72 def _check_sysfs(self, host): 73 # First check that we are on a suitable DUT which offers the ability to 74 # disable the idle state 75 arch = host.run_output('uname -m') 76 if arch != 'aarch64': 77 # Idle states differ between CPU architectures, so this test would 78 # need further development to support other platforms. 79 raise error.TestNAError('Test only supports Arm aarch64 CPUs') 80 if not host.path_exists(DISABLE_PATH.format(0, 1)): 81 logging.error('sysfs path absent: cannot disable idle state') 82 raise error.TestError('Cannot disable idle state') 83 84 # Identify available idle states. state0 is running state; other states 85 # should be disabled when disabling idle. 86 self.states = [] 87 state_dirs = host.run_output( 88 'ls -1 /sys/devices/system/cpu/cpu0/cpuidle/') 89 for state in state_dirs.split('\n'): 90 if re.match('state[1-9][0-9]*$', state): 91 # Look for dirnames like 'state1' (but exclude 'state0') 92 self.states.append(int(state[5:])) 93 logging.info('Found idle states: {}'.format(self.states)) 94 95 self.cpu_count = int(host.run_output('nproc --all')) 96 logging.info('Found {} cpus'.format(self.cpu_count)) 97 logging.info('Idle enabled = {}'.format(self._is_idle_enabled(host))) 98 99 # From this point on we expect the test to be able to run, so we will 100 # need to ensure that the idle state is restored when the test exits 101 self._cleanup_required = True 102 self._enable_idle(host, False) 103 if self._is_idle_enabled(host): 104 logging.error('Failed to disable idle state') 105 raise error.TestError('Cannot disable idle state') 106 self._enable_idle(host, True) 107 if not self._is_idle_enabled(host): 108 logging.error('Failed to re-enable idle state') 109 raise error.TestError('Cannot disable idle state') 110 111 def _is_idle_enabled(self, host): 112 return host.run_output('cat ' + DISABLE_PATH.format(0, 1)) == '0' 113 114 def _enable_idle(self, host, enable): 115 logging.info('Setting idle enabled to {}'.format(enable)) 116 x = '0' if enable else '1' 117 for cpu in range(0, self.cpu_count): 118 for state in self.states: 119 path = DISABLE_PATH.format(cpu, state) 120 host.run_output('echo {} > {}'.format(x, path)) 121 122 def _parse_results_file(self, path): 123 def _mean(values): 124 return sum(values) / float(len(values)) 125 126 with open(path) as fp: 127 histogram_json = json.load(fp) 128 129 scores = {} 130 # list of % smooth scores for each page and for each pageset-repetition 131 for page in histogram_json['charts']['percentage_smooth']: 132 if page == 'summary': 133 continue 134 page_result = histogram_json['charts']['percentage_smooth'][page] 135 scores[page] = {'percentage_smooth': _mean(page_result['values']), 136 'std': page_result['std'] 137 } 138 return scores 139 140 def _compare_results(self, idle_enabled, idle_disabled): 141 results = { 142 'passed': True 143 } 144 for page in idle_enabled: 145 diff = (idle_disabled[page]['percentage_smooth'] 146 - idle_enabled[page]['percentage_smooth']) 147 diff_std = (math.sqrt(idle_enabled[page]['std'] ** 2 148 + idle_disabled[page]['std'] ** 2)) 149 passed = (idle_enabled[page]['percentage_smooth'] > 150 (idle_disabled[page]['percentage_smooth'] - diff_std * 2)) 151 key = re.sub('\W', '_', page) 152 results[key] = { 153 'idle_enabled': idle_enabled[page], 154 'idle_disabled': idle_disabled[page], 155 'difference': diff, 156 'difference_std': diff_std, 157 'passed': passed 158 } 159 results['passed'] = results['passed'] and passed 160 return results 161 162 def _run_telemetry(self, host, telemetry, enable): 163 logging.info('Running telemetry with idle enabled = {}'.format(enable)) 164 self._enable_idle(host, enable) 165 166 args = ['--pageset-repeat={}'.format(PAGESET_REPEAT)] 167 if PAGES: 168 stories = r'\|'.join(r'\(' + p + r'\)' for p in PAGES) 169 story_filter = '--story-filter={}'.format(stories) 170 args.append(story_filter) 171 172 logging.info('Running telemetry with args: {}'.format(args)) 173 result = telemetry.run_telemetry_benchmark( 174 'smoothness.top_25_smooth', self, *args) 175 if result.status != telemetry_runner.SUCCESS_STATUS: 176 raise error.TestFail('Failed to run benchmark') 177 178 # ensure first run doesn't get overwritten by second run 179 default_path = os.path.join(self.resultsdir, 'results-chart.json') 180 if enable: 181 unique_path = os.path.join(self.resultsdir, 182 'results-chart-idle-enabled.json') 183 else: 184 unique_path = os.path.join(self.resultsdir, 185 'results-chart-idle-disabled.json') 186 os.rename(default_path, unique_path) 187 188 return self._parse_results_file(unique_path) 189 190 def run_once(self, host=None, args={}): 191 """Run the telemetry scrolling benchmark. 192 193 @param host: host we are running telemetry on. 194 195 """ 196 197 logging.info('Checking sysfs') 198 self._check_sysfs(host) 199 200 local = args.get('local') == 'True' 201 telemetry = telemetry_runner.TelemetryRunner( 202 host, local, telemetry_on_dut=False) 203 204 logging.info('Starting test') 205 results_idle = self._run_telemetry(host, telemetry, True) 206 results_noidle = self._run_telemetry(host, telemetry, False) 207 208 # Score is the regression in percentage of smooth frames caused by 209 # enabling CPU idle. 210 logging.info('Processing results') 211 results = self._compare_results(results_idle, results_noidle) 212 213 self.write_perf_keyval(results) 214 215 if not results['passed']: 216 raise error.TestFail('enabling CPU idle significantly ' 217 'regresses scrolling performance') 218 219 def cleanup(self, host): 220 """Cleanup of the test. 221 222 @param host: host we are running telemetry on. 223 224 """ 225 if self._cleanup_required: 226 logging.info('Restoring idle to enabled') 227 self._enable_idle(host, True) 228