1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from __future__ import print_function 6 7import json 8import logging 9import os 10import re 11import shlex 12import shutil 13 14from contextlib import contextmanager 15 16from autotest_lib.client.common_lib import error 17from autotest_lib.server import test 18from autotest_lib.server import utils 19from autotest_lib.server.cros import telemetry_runner 20from autotest_lib.server.cros.crosperf import device_setup_utils 21from autotest_lib.site_utils import test_runner_utils 22 23WAIT_FOR_CMD_TIMEOUT_SECS = 60 24DUT_COMMON_SSH_OPTIONS = [ 25 '-o StrictHostKeyChecking=no', '-o UserKnownHostsFile=/dev/null', 26 '-o BatchMode=yes', '-o ConnectTimeout=30', 27 '-o ServerAliveInterval=900', '-o ServerAliveCountMax=3', 28 '-o ConnectionAttempts=4', '-o Protocol=2' 29] 30DUT_SCP_OPTIONS = ' '.join(DUT_COMMON_SSH_OPTIONS) 31 32RSA_KEY = '-i %s' % test_runner_utils.TEST_KEY_PATH 33DUT_CHROME_RESULTS_DIR = '/usr/local/telemetry/src' 34 35TURBOSTAT_LOG = 'turbostat.log' 36CPUSTATS_LOG = 'cpustats.log' 37CPUINFO_LOG = 'cpuinfo.log' 38TOP_LOG = 'top.log' 39WAIT_TIME_LOG = 'wait_time.log' 40 41# Result Statuses 42SUCCESS_STATUS = 'SUCCESS' 43WARNING_STATUS = 'WARNING' 44FAILED_STATUS = 'FAILED' 45 46# Regex for the RESULT output lines understood by chrome buildbot. 47# Keep in sync with 48# chromium/tools/build/scripts/slave/performance_log_processor.py. 49RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT ' 50 r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= ' 51 r'(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)(' 52 r' ?(?P<UNITS>.+))?') 53HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM ' 54 r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= ' 55 r'(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?') 56 57 58class telemetry_Crosperf(test.test): 59 """ 60 Run one or more telemetry benchmarks under the crosperf script. 61 62 """ 63 version = 1 64 65 def scp_telemetry_results(self, client_ip, dut, file_path, host_dir, 66 ignore_status=False): 67 """ 68 Copy telemetry results from dut. 69 70 @param client_ip: The ip address of the DUT 71 @param dut: The autotest host object representing DUT. 72 @param file_path: The file to copy from DUT. 73 @param host_dir: The directory on host to put the file . 74 75 @returns status code for scp command. 76 77 """ 78 cmd = ['scp', DUT_SCP_OPTIONS, RSA_KEY, '-v'] 79 port = '' 80 81 if dut: 82 port = dut.port 83 ip = dut.hostname 84 else: 85 ip_and_port = client_ip.split(':') 86 ip = ip_and_port[0] 87 if len(ip_and_port) > 1: 88 port = ip_and_port[1] 89 90 if port: 91 cmd.extend(['-P', str(port)]) 92 93 src = 'root@%s:%s' % (ip, file_path) 94 cmd.extend([src, host_dir]) 95 command = ' '.join(cmd) 96 97 logging.debug('Retrieving Results: %s', command) 98 try: 99 result = utils.run( 100 command, 101 timeout=WAIT_FOR_CMD_TIMEOUT_SECS, 102 ignore_status=ignore_status) 103 exit_code = result.exit_status 104 except Exception as e: 105 logging.error('Failed to retrieve results: %s', e) 106 raise 107 108 if exit_code: 109 logging.error('Command "%s" returned non-zero status: %d', 110 command, exit_code) 111 return exit_code 112 113 @contextmanager 114 def no_background(self, *_args): 115 """ 116 Background stub. 117 118 """ 119 yield 120 121 @contextmanager 122 def run_in_background_with_log(self, cmd, dut, log_path): 123 """ 124 Get cpustats periodically in background. 125 126 NOTE: No error handling, exception or error report if command fails 127 to run in background. Command failure is silenced. 128 129 """ 130 logging.info('Running in background:\n%s', cmd) 131 pid = dut.run_background(cmd) 132 try: 133 # TODO(denik http://crbug.com/966514): replace with more reliable 134 # way to check run success/failure in background. 135 # Wait some time before checking the process. 136 check = dut.run('sleep 5; kill -0 %s' % pid, ignore_status=True) 137 if check.exit_status != 0: 138 # command wasn't started correctly 139 logging.error( 140 "Background command wasn't started correctly.\n" 141 'Command:\n%s', cmd) 142 pid = '' 143 yield 144 return 145 146 logging.info('Background process started successfully, pid %s', 147 pid) 148 yield 149 150 finally: 151 if pid: 152 # Stop background processes. 153 logging.info('Killing background process, pid %s', pid) 154 # Kill the process blindly. OK if it's already gone. 155 # There is an issue when underlying child processes stay alive 156 # while the parent master process is killed. 157 # The solution is to kill the chain of processes via process 158 # group id. 159 dut.run('pgid=$(cat /proc/%s/stat | cut -d")" -f2 | ' 160 'cut -d" " -f4) && kill -- -$pgid 2>/dev/null' % pid, 161 ignore_status=True) 162 163 # Copy the results to results directory with silenced failure. 164 scp_res = self.scp_telemetry_results( 165 '', dut, log_path, self.resultsdir, ignore_status=True) 166 if scp_res: 167 logging.error( 168 'scp of cpuinfo logs failed ' 169 'with error %d.', scp_res) 170 171 def run_cpustats_in_background(self, dut, log_name): 172 """ 173 Run command to collect CPU stats in background. 174 175 """ 176 log_path = '/tmp/%s' % log_name 177 cpu_stats_cmd = ( 178 'cpulog=%s; ' 179 'rm -f ${cpulog}; ' 180 # Stop after 720*0.5min=6hours if not killed at clean-up phase. 181 'for i in {1..720} ; do ' 182 # Collect current CPU frequency on all cores and thermal data. 183 ' paste -d" " ' 184 ' <(ls /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_cur_freq)' 185 ' <(cat /sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_cur_freq)' 186 ' >> ${cpulog} || break; ' # exit loop if fails. 187 ' paste -d" "' 188 ' <(cat /sys/class/thermal/thermal_zone*/type)' 189 ' <(cat /sys/class/thermal/thermal_zone*/temp)' 190 # Filter in thermal data from only CPU-related sources. 191 ' | grep -iE "soc|cpu|pkg|big|little" >> ${cpulog} || break; ' 192 # Repeat every 30 sec. 193 ' sleep 30; ' 194 'done;') % log_path 195 196 return self.run_in_background_with_log(cpu_stats_cmd, dut, log_path) 197 198 def run_top_in_background(self, dut, log_name, interval_in_sec): 199 """ 200 Run top in background. 201 202 """ 203 log_path = '/tmp/%s' % log_name 204 top_cmd = ( 205 # Run top in batch mode with specified interval and filter out 206 # top system summary and processes not consuming %CPU. 207 # Output of each iteration is separated by a blank line. 208 'HOME=/usr/local COLUMNS=128 top -bi -d%.1f' 209 ' | grep -E "^[ 0-9]|^$" > %s;') % (interval_in_sec, log_path) 210 211 return self.run_in_background_with_log(top_cmd, dut, log_path) 212 213 def run_turbostat_in_background(self, dut, log_name): 214 """ 215 Run turbostat in background. 216 217 """ 218 log_path = '/tmp/%s' % log_name 219 turbostat_cmd = ( 220 'nohup turbostat --quiet --interval 10 ' 221 '--show=CPU,Bzy_MHz,Avg_MHz,TSC_MHz,Busy%%,IRQ,CoreTmp ' 222 '1> %s') % log_path 223 224 return self.run_in_background_with_log(turbostat_cmd, dut, log_path) 225 226 def run_cpuinfo(self, dut, log_name): 227 """ 228 Collect CPU info of "dut" into "log_name" file. 229 230 """ 231 cpuinfo_cmd = ( 232 'for cpunum in ' 233 " $(awk '/^processor/ { print $NF ; }' /proc/cpuinfo ) ; do " 234 ' for i in' 235 ' `ls -d /sys/devices/system/cpu/cpu"${cpunum}"/cpufreq/' 236 '{cpuinfo_cur_freq,scaling_*_freq,scaling_governor} ' 237 ' 2>/dev/null` ; do ' 238 ' paste -d" " <(echo "${i}") <(cat "${i}"); ' 239 ' done;' 240 'done;' 241 'for cpudata in' 242 ' /sys/devices/system/cpu/intel_pstate/no_turbo' 243 ' /sys/devices/system/cpu/online; do ' 244 ' if [[ -e "${cpudata}" ]] ; then ' 245 ' paste <(echo "${cpudata}") <(cat "${cpudata}"); ' 246 ' fi; ' 247 'done; ' 248 # Adding thermal data to the log. 249 'paste -d" "' 250 ' <(cat /sys/class/thermal/thermal_zone*/type)' 251 ' <(cat /sys/class/thermal/thermal_zone*/temp)') 252 253 # Get CPUInfo at the end of the test. 254 logging.info('Get cpuinfo: %s', cpuinfo_cmd) 255 with open(os.path.join(self.resultsdir, log_name), 256 'w') as cpu_log_file: 257 # Handle command failure gracefully. 258 res = dut.run( 259 cpuinfo_cmd, stdout_tee=cpu_log_file, ignore_status=True) 260 if res.exit_status: 261 logging.error('Get cpuinfo command failed with %d.', 262 res.exit_status) 263 264 def run_once(self, args, client_ip='', dut=None): 265 """ 266 Run a single telemetry test. 267 268 @param args: A dictionary of the arguments that were passed 269 to this test. 270 @param client_ip: The ip address of the DUT 271 @param dut: The autotest host object representing DUT. 272 273 @returns A TelemetryResult instance with the results of this execution. 274 """ 275 test_name = args.get('test', '') 276 test_args = args.get('test_args', '') 277 profiler_args = args.get('profiler_args', '') 278 279 dut_config_str = args.get('dut_config', '{}') 280 dut_config = json.loads(dut_config_str) 281 # Setup device with dut_config arguments before running test 282 if dut_config: 283 wait_time = device_setup_utils.setup_device(dut, dut_config) 284 # Wait time can be used to accumulate cooldown time in Crosperf. 285 with open(os.path.join(self.resultsdir, WAIT_TIME_LOG), 'w') as f: 286 f.write(str(wait_time)) 287 288 output_format = 'histograms' 289 290 # For local runs, we set local=True and use local chrome source to run 291 # tests; for lab runs, we use devserver instead. 292 # By default to be True. 293 local = args.get('local', 'true').lower() == 'true' 294 295 # If run_local=true, telemetry benchmark will run on DUT, otherwise 296 # run remotely from host. 297 # By default to be False. 298 # TODO(zhizhouy): It is better to change the field name from "run_local" 299 # to "telemetry_on_dut" in crosperf experiment files for consistency. 300 telemetry_on_dut = args.get('run_local', '').lower() == 'true' 301 302 # Init TelemetryRunner. 303 tr = telemetry_runner.TelemetryRunner( 304 dut, local=local, telemetry_on_dut=telemetry_on_dut) 305 306 # Run the test. And collect profile if needed. 307 try: 308 # If profiler_args specified, we want to add several more options 309 # to the command so that run_benchmark will collect system wide 310 # profiles. 311 if profiler_args: 312 profiler_opts = [ 313 '--interval-profiling-period=story_run', 314 '--interval-profiling-target=system_wide', 315 '--interval-profiler-options="%s"' % profiler_args 316 ] 317 318 top_interval = dut_config.get('top_interval', 0) 319 turbostat = dut_config.get('turbostat') 320 321 run_cpuinfo = self.run_cpustats_in_background if dut \ 322 else self.no_background 323 run_turbostat = self.run_turbostat_in_background if ( 324 dut and turbostat) else self.no_background 325 run_top = self.run_top_in_background if ( 326 dut and top_interval > 0) else self.no_background 327 328 # FIXME(denik): replace with ExitStack. 329 with run_cpuinfo(dut, CPUSTATS_LOG) as _cpu_cm, \ 330 run_turbostat(dut, TURBOSTAT_LOG) as _turbo_cm, \ 331 run_top(dut, TOP_LOG, top_interval) as _top_cm: 332 333 arguments = [] 334 if test_args: 335 arguments.extend(shlex.split(test_args)) 336 if profiler_args: 337 arguments.extend(profiler_opts) 338 logging.debug('Telemetry Arguments: %s', arguments) 339 perf_value_writer = self 340 artifacts = True if profiler_args else False 341 result = tr.run_telemetry_benchmark( 342 test_name, 343 perf_value_writer, 344 *arguments, 345 ex_output_format=output_format, 346 results_dir=self.resultsdir, 347 no_verbose=True, 348 artifacts=artifacts) 349 logging.info('Telemetry completed with exit status: %s.', 350 result.status) 351 logging.info('output: %s\n', result.output) 352 353 except (error.TestFail, error.TestWarn): 354 logging.debug( 355 'Test did not succeed while executing telemetry test.') 356 raise 357 except: 358 logging.debug('Unexpected failure on telemetry_Crosperf.') 359 raise 360 finally: 361 if dut: 362 self.run_cpuinfo(dut, CPUINFO_LOG) 363 364 # Checking whether result file exists. 365 filepath = os.path.join(self.resultsdir, 'histograms.json') 366 if not os.path.exists(filepath): 367 raise RuntimeError('Missing results file: %s' % filepath) 368 369 # Copy the perf data file into the test_that profiling directory, 370 # if necessary. It always comes from DUT. 371 if profiler_args: 372 filepath = os.path.join(self.resultsdir, 'artifacts') 373 if not os.path.isabs(filepath): 374 raise RuntimeError('Expected absolute path of ' 375 'arfifacts: %s' % filepath) 376 perf_exist = False 377 for root, _dirs, files in os.walk(filepath): 378 for f in files: 379 if f.endswith('.perf.data'): 380 perf_exist = True 381 src_file = os.path.join(root, f) 382 # results-cache.py in crosperf supports multiple 383 # perf.data files, but only if they are named exactly 384 # so. Therefore, create a subdir for each perf.data 385 # file. 386 dst_dir = os.path.join(self.profdir, ''.join( 387 f.split('.')[:-2])) 388 os.makedirs(dst_dir) 389 dst_file = os.path.join(dst_dir, 'perf.data') 390 shutil.copyfile(src_file, dst_file) 391 if not perf_exist: 392 raise error.TestFail('Error: No profiles collected, test may ' 393 'not run correctly.') 394 395 # In skylab run, environment variable SYNCHRONOUS_OFFLOAD_DIR is set to 396 # be a directory; In test_that run, it will not be set. 397 synchronous_dir = os.getenv('SYNCHRONOUS_OFFLOAD_DIR', '') 398 # For skylab run, push logs and json results to the synchronous offload 399 # directory for instant access for report. 400 if synchronous_dir and os.path.isdir(synchronous_dir): 401 try: 402 dst = os.path.join(synchronous_dir, 'results') 403 shutil.copytree(self.resultsdir, dst) 404 except: 405 raise RuntimeError( 406 'Failed to copy results in %s to synchronous offload' 407 ' directory %s' % (self.resultsdir, synchronous_dir)) 408 409 return result 410