1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Runs perf tests. 6 7Our buildbot infrastructure requires each slave to run steps serially. 8This is sub-optimal for android, where these steps can run independently on 9multiple connected devices. 10 11The buildbots will run this script multiple times per cycle: 12- First: all steps listed in --steps in will be executed in parallel using all 13connected devices. Step results will be pickled to disk. Each step has a unique 14name. The result code will be ignored if the step name is listed in 15--flaky-steps. 16The buildbot will treat this step as a regular step, and will not process any 17graph data. 18 19- Then, with -print-step STEP_NAME: at this stage, we'll simply print the file 20with the step results previously saved. The buildbot will then process the graph 21data accordingly. 22 23The JSON steps file contains a dictionary in the format: 24{ "version": int, 25 "steps": { 26 "foo": { 27 "device_affinity": int, 28 "cmd": "script_to_execute foo" 29 }, 30 "bar": { 31 "device_affinity": int, 32 "cmd": "script_to_execute bar" 33 } 34 } 35} 36 37The JSON flaky steps file contains a list with step names which results should 38be ignored: 39[ 40 "step_name_foo", 41 "step_name_bar" 42] 43 44Note that script_to_execute necessarily have to take at least the following 45option: 46 --device: the serial number to be passed to all adb commands. 47""" 48 49import collections 50import io 51import json 52import logging 53import os 54import pickle 55import re 56import shutil 57import sys 58import tempfile 59import threading 60import time 61import zipfile 62 63from devil.android import battery_utils 64from devil.android import device_errors 65from devil.android import forwarder 66from devil.constants import exit_codes 67from devil.utils import cmd_helper 68from pylib import constants 69from pylib.base import base_test_result 70from pylib.base import base_test_runner 71from pylib.constants import host_paths 72 73 74# Regex for the master branch commit position. 75_GIT_CR_POS_RE = re.compile(r'^Cr-Commit-Position: refs/heads/master@{#(\d+)}$') 76 77 78def _GetChromiumRevision(): 79 # pylint: disable=line-too-long 80 """Get the git hash and commit position of the chromium master branch. 81 82 See: https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/runtest.py#212 83 84 Returns: 85 A dictionary with 'revision' and 'commit_pos' keys. 86 """ 87 # pylint: enable=line-too-long 88 status, output = cmd_helper.GetCmdStatusAndOutput( 89 ['git', 'log', '-n', '1', '--pretty=format:%H%n%B', 'HEAD'], 90 host_paths.DIR_SOURCE_ROOT) 91 revision = None 92 commit_pos = None 93 if not status: 94 lines = output.splitlines() 95 revision = lines[0] 96 for line in reversed(lines): 97 m = _GIT_CR_POS_RE.match(line.strip()) 98 if m: 99 commit_pos = int(m.group(1)) 100 break 101 return {'revision': revision, 'commit_pos': commit_pos} 102 103 104def GetPersistedResult(test_name): 105 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) 106 if not os.path.exists(file_name): 107 logging.error('File not found %s', file_name) 108 return None 109 110 with file(file_name, 'r') as f: 111 return pickle.loads(f.read()) 112 113 114def OutputJsonList(json_input, json_output): 115 with file(json_input, 'r') as i: 116 all_steps = json.load(i) 117 118 step_values = [] 119 for k, v in all_steps['steps'].iteritems(): 120 data = {'test': k, 'device_affinity': v['device_affinity']} 121 122 persisted_result = GetPersistedResult(k) 123 if persisted_result: 124 data['start_time'] = persisted_result['start_time'] 125 data['end_time'] = persisted_result['end_time'] 126 data['total_time'] = persisted_result['total_time'] 127 data['has_archive'] = persisted_result['archive_bytes'] is not None 128 step_values.append(data) 129 130 with file(json_output, 'w') as o: 131 o.write(json.dumps(step_values)) 132 return 0 133 134 135def PrintTestOutput(test_name, json_file_name=None, archive_file_name=None): 136 """Helper method to print the output of previously executed test_name. 137 138 Args: 139 test_name: name of the test that has been previously executed. 140 json_file_name: name of the file to output chartjson data to. 141 archive_file_name: name of the file to write the compressed ZIP archive. 142 143 Returns: 144 exit code generated by the test step. 145 """ 146 persisted_result = GetPersistedResult(test_name) 147 if not persisted_result: 148 return exit_codes.INFRA 149 logging.info('*' * 80) 150 logging.info('Output from:') 151 logging.info(persisted_result['cmd']) 152 logging.info('*' * 80) 153 154 output_formatted = '' 155 persisted_outputs = persisted_result['output'] 156 for i in xrange(len(persisted_outputs)): 157 output_formatted += '\n\nOutput from run #%d:\n\n%s' % ( 158 i, persisted_outputs[i]) 159 print output_formatted 160 161 if json_file_name: 162 with file(json_file_name, 'w') as f: 163 f.write(persisted_result['chartjson']) 164 165 if archive_file_name: 166 if persisted_result['archive_bytes'] is not None: 167 with file(archive_file_name, 'wb') as f: 168 f.write(persisted_result['archive_bytes']) 169 else: 170 logging.error('The output dir was not archived.') 171 172 return persisted_result['exit_code'] 173 174 175def PrintSummary(test_names): 176 logging.info('*' * 80) 177 logging.info('Sharding summary') 178 device_total_time = collections.defaultdict(int) 179 for test_name in test_names: 180 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) 181 if not os.path.exists(file_name): 182 logging.info('%s : No status file found', test_name) 183 continue 184 with file(file_name, 'r') as f: 185 result = pickle.loads(f.read()) 186 logging.info('%s : exit_code=%d in %d secs at %s', 187 result['name'], result['exit_code'], result['total_time'], 188 result['device']) 189 device_total_time[result['device']] += result['total_time'] 190 for device, device_time in device_total_time.iteritems(): 191 logging.info('Total for device %s : %d secs', device, device_time) 192 logging.info('Total steps time: %d secs', sum(device_total_time.values())) 193 194 195class _HeartBeatLogger(object): 196 # How often to print the heartbeat on flush(). 197 _PRINT_INTERVAL = 30.0 198 199 def __init__(self): 200 """A file-like class for keeping the buildbot alive.""" 201 self._len = 0 202 self._tick = time.time() 203 self._stopped = threading.Event() 204 self._timer = threading.Thread(target=self._runner) 205 self._timer.start() 206 207 def _runner(self): 208 while not self._stopped.is_set(): 209 self.flush() 210 self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL) 211 212 def write(self, data): 213 self._len += len(data) 214 215 def flush(self): 216 now = time.time() 217 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL: 218 self._tick = now 219 print '--single-step output length %d' % self._len 220 sys.stdout.flush() 221 222 def stop(self): 223 self._stopped.set() 224 225 226class TestRunner(base_test_runner.BaseTestRunner): 227 def __init__(self, test_options, device, shard_index, max_shard, tests, 228 flaky_tests): 229 """A TestRunner instance runs a perf test on a single device. 230 231 Args: 232 test_options: A PerfOptions object. 233 device: Device to run the tests. 234 shard_index: the index of this device. 235 max_shards: the maximum shard index. 236 tests: a dict mapping test_name to command. 237 flaky_tests: a list of flaky test_name. 238 """ 239 super(TestRunner, self).__init__(device, None) 240 self._options = test_options 241 self._shard_index = shard_index 242 self._max_shard = max_shard 243 self._tests = tests 244 self._flaky_tests = flaky_tests 245 self._output_dir = None 246 self._device_battery = battery_utils.BatteryUtils(self.device) 247 248 @staticmethod 249 def _SaveResult(result): 250 pickled = os.path.join(constants.PERF_OUTPUT_DIR, result['name']) 251 if os.path.exists(pickled): 252 with file(pickled, 'r') as f: 253 previous = pickle.loads(f.read()) 254 result['output'] = previous['output'] + result['output'] 255 256 with file(pickled, 'w') as f: 257 f.write(pickle.dumps(result)) 258 259 def _CheckDeviceAffinity(self, test_name): 260 """Returns True if test_name has affinity for this shard.""" 261 affinity = (self._tests['steps'][test_name]['device_affinity'] % 262 self._max_shard) 263 if self._shard_index == affinity: 264 return True 265 logging.info('Skipping %s on %s (affinity is %s, device is %s)', 266 test_name, self.device_serial, affinity, self._shard_index) 267 return False 268 269 def _CleanupOutputDirectory(self): 270 if self._output_dir: 271 shutil.rmtree(self._output_dir, ignore_errors=True) 272 self._output_dir = None 273 274 def _ReadChartjsonOutput(self): 275 if not self._output_dir: 276 return '' 277 278 json_output_path = os.path.join(self._output_dir, 'results-chart.json') 279 try: 280 with open(json_output_path) as f: 281 return f.read() 282 except IOError: 283 logging.exception('Exception when reading chartjson.') 284 logging.error('This usually means that telemetry did not run, so it could' 285 ' not generate the file. Please check the device running' 286 ' the test.') 287 return '' 288 289 def _WriteBuildBotJson(self): 290 """Write metadata about the buildbot environment to the output dir.""" 291 data = { 292 'chromium': _GetChromiumRevision(), 293 'environment': dict(os.environ)} 294 logging.info('BuildBot environment: %s', data) 295 with open(os.path.join(self._output_dir, 'buildbot.json'), 'w') as f: 296 json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': ')) 297 298 def _ArchiveOutputDir(self): 299 """Archive all files in the output dir, and return as compressed bytes.""" 300 with io.BytesIO() as archive: 301 with zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED) as contents: 302 num_files = 0 303 for absdir, _, files in os.walk(self._output_dir): 304 reldir = os.path.relpath(absdir, self._output_dir) 305 for filename in files: 306 src_path = os.path.join(absdir, filename) 307 # We use normpath to turn './file.txt' into just 'file.txt'. 308 dst_path = os.path.normpath(os.path.join(reldir, filename)) 309 contents.write(src_path, dst_path) 310 num_files += 1 311 if num_files: 312 logging.info('%d files in the output dir were archived.', num_files) 313 else: 314 logging.warning('No files in the output dir. Archive is empty.') 315 return archive.getvalue() 316 317 def _LaunchPerfTest(self, test_name): 318 """Runs a perf test. 319 320 Args: 321 test_name: the name of the test to be executed. 322 323 Returns: 324 A tuple containing (Output, base_test_result.ResultType) 325 """ 326 if not self._CheckDeviceAffinity(test_name): 327 return '', base_test_result.ResultType.PASS 328 329 try: 330 logging.warning('Unmapping device ports') 331 forwarder.Forwarder.UnmapAllDevicePorts(self.device) 332 self.device.RestartAdbd() 333 except Exception as e: # pylint: disable=broad-except 334 logging.error('Exception when tearing down device %s', e) 335 336 test_config = self._tests['steps'][test_name] 337 cmd = ('%s --device %s' % (test_config['cmd'], self.device_serial)) 338 339 if (self._options.collect_chartjson_data 340 or test_config.get('archive_output_dir')): 341 self._output_dir = tempfile.mkdtemp() 342 self._WriteBuildBotJson() 343 cmd = cmd + ' --output-dir=%s' % self._output_dir 344 345 logging.info( 346 'temperature: %s (0.1 C)', 347 str(self._device_battery.GetBatteryInfo().get('temperature'))) 348 if self._options.max_battery_temp: 349 self._device_battery.LetBatteryCoolToTemperature( 350 self._options.max_battery_temp) 351 352 logging.info('Charge level: %s%%', 353 str(self._device_battery.GetBatteryInfo().get('level'))) 354 if self._options.min_battery_level: 355 self._device_battery.ChargeDeviceToLevel( 356 self._options.min_battery_level) 357 self.device.SetScreen(True) 358 359 logging.info('%s : %s', test_name, cmd) 360 start_time = time.time() 361 362 timeout = test_config.get('timeout', 3600) 363 if self._options.no_timeout: 364 timeout = None 365 logging.info('Timeout for %s test: %s', test_name, timeout) 366 full_cmd = cmd 367 if self._options.dry_run: 368 full_cmd = 'echo %s' % cmd 369 370 logfile = sys.stdout 371 archive_bytes = None 372 if self._options.single_step: 373 # Just print a heart-beat so that the outer buildbot scripts won't timeout 374 # without response. 375 logfile = _HeartBeatLogger() 376 cwd = os.path.abspath(host_paths.DIR_SOURCE_ROOT) 377 if full_cmd.startswith('src/'): 378 cwd = os.path.abspath(os.path.join(host_paths.DIR_SOURCE_ROOT, os.pardir)) 379 try: 380 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout( 381 full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile) 382 json_output = self._ReadChartjsonOutput() 383 if test_config.get('archive_output_dir'): 384 archive_bytes = self._ArchiveOutputDir() 385 except cmd_helper.TimeoutError as e: 386 exit_code = -1 387 output = e.output 388 json_output = '' 389 finally: 390 self._CleanupOutputDirectory() 391 if self._options.single_step: 392 logfile.stop() 393 end_time = time.time() 394 if exit_code is None: 395 exit_code = -1 396 logging.info('%s : exit_code=%d in %d secs at %s', 397 test_name, exit_code, end_time - start_time, 398 self.device_serial) 399 400 if exit_code == 0: 401 result_type = base_test_result.ResultType.PASS 402 else: 403 result_type = base_test_result.ResultType.FAIL 404 # Since perf tests use device affinity, give the device a chance to 405 # recover if it is offline after a failure. Otherwise, the master sharder 406 # will remove it from the pool and future tests on this device will fail. 407 try: 408 self.device.WaitUntilFullyBooted(timeout=120) 409 except device_errors.CommandTimeoutError as e: 410 logging.error('Device failed to return after %s: %s', test_name, e) 411 412 actual_exit_code = exit_code 413 if test_name in self._flaky_tests: 414 # The exit_code is used at the second stage when printing the 415 # test output. If the test is flaky, force to "0" to get that step green 416 # whilst still gathering data to the perf dashboards. 417 # The result_type is used by the test_dispatcher to retry the test. 418 exit_code = 0 419 420 persisted_result = { 421 'name': test_name, 422 'output': [output], 423 'chartjson': json_output, 424 'archive_bytes': archive_bytes, 425 'exit_code': exit_code, 426 'actual_exit_code': actual_exit_code, 427 'result_type': result_type, 428 'start_time': start_time, 429 'end_time': end_time, 430 'total_time': end_time - start_time, 431 'device': self.device_serial, 432 'cmd': cmd, 433 } 434 self._SaveResult(persisted_result) 435 436 return (output, result_type) 437 438 def RunTest(self, test_name): 439 """Run a perf test on the device. 440 441 Args: 442 test_name: String to use for logging the test result. 443 444 Returns: 445 A tuple of (TestRunResults, retry). 446 """ 447 _, result_type = self._LaunchPerfTest(test_name) 448 results = base_test_result.TestRunResults() 449 results.AddResult(base_test_result.BaseTestResult(test_name, result_type)) 450 retry = None 451 if not results.DidRunPass(): 452 retry = test_name 453 return results, retry 454