1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5"""Runs perf tests. 6 7Our buildbot infrastructure requires each slave to run steps serially. 8This is sub-optimal for android, where these steps can run independently on 9multiple connected devices. 10 11The buildbots will run this script multiple times per cycle: 12- First: all steps listed in --steps in will be executed in parallel using all 13connected devices. Step results will be pickled to disk. Each step has a unique 14name. The result code will be ignored if the step name is listed in 15--flaky-steps. 16The buildbot will treat this step as a regular step, and will not process any 17graph data. 18 19- Then, with -print-step STEP_NAME: at this stage, we'll simply print the file 20with the step results previously saved. The buildbot will then process the graph 21data accordingly. 22 23The JSON steps file contains a dictionary in the format: 24{ "version": int, 25 "steps": { 26 "foo": { 27 "device_affinity": int, 28 "cmd": "script_to_execute foo" 29 }, 30 "bar": { 31 "device_affinity": int, 32 "cmd": "script_to_execute bar" 33 } 34 } 35} 36 37The JSON flaky steps file contains a list with step names which results should 38be ignored: 39[ 40 "step_name_foo", 41 "step_name_bar" 42] 43 44Note that script_to_execute necessarily have to take at least the following 45option: 46 --device: the serial number to be passed to all adb commands. 47""" 48 49import collections 50import datetime 51import json 52import logging 53import os 54import pickle 55import sys 56import threading 57import time 58 59from pylib import cmd_helper 60from pylib import constants 61from pylib import forwarder 62from pylib.base import base_test_result 63from pylib.base import base_test_runner 64 65 66def OutputJsonList(json_input, json_output): 67 with file(json_input, 'r') as i: 68 all_steps = json.load(i) 69 step_names = all_steps['steps'].keys() 70 with file(json_output, 'w') as o: 71 o.write(json.dumps(step_names)) 72 return 0 73 74 75def PrintTestOutput(test_name): 76 """Helper method to print the output of previously executed test_name. 77 78 Args: 79 test_name: name of the test that has been previously executed. 80 81 Returns: 82 exit code generated by the test step. 83 """ 84 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) 85 if not os.path.exists(file_name): 86 logging.error('File not found %s', file_name) 87 return 1 88 89 with file(file_name, 'r') as f: 90 persisted_result = pickle.loads(f.read()) 91 logging.info('*' * 80) 92 logging.info('Output from:') 93 logging.info(persisted_result['cmd']) 94 logging.info('*' * 80) 95 print persisted_result['output'] 96 97 return persisted_result['exit_code'] 98 99 100def PrintSummary(test_names): 101 logging.info('*' * 80) 102 logging.info('Sharding summary') 103 device_total_time = collections.defaultdict(int) 104 for test_name in test_names: 105 file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name) 106 if not os.path.exists(file_name): 107 logging.info('%s : No status file found', test_name) 108 continue 109 with file(file_name, 'r') as f: 110 result = pickle.loads(f.read()) 111 logging.info('%s : exit_code=%d in %d secs at %s', 112 result['name'], result['exit_code'], result['total_time'], 113 result['device']) 114 device_total_time[result['device']] += result['total_time'] 115 for device, device_time in device_total_time.iteritems(): 116 logging.info('Total for device %s : %d secs', device, device_time) 117 logging.info('Total steps time: %d secs', sum(device_total_time.values())) 118 119 120class _HeartBeatLogger(object): 121 # How often to print the heartbeat on flush(). 122 _PRINT_INTERVAL = 30.0 123 124 def __init__(self): 125 """A file-like class for keeping the buildbot alive.""" 126 self._len = 0 127 self._tick = time.time() 128 self._stopped = threading.Event() 129 self._timer = threading.Thread(target=self._runner) 130 self._timer.start() 131 132 def _runner(self): 133 while not self._stopped.is_set(): 134 self.flush() 135 self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL) 136 137 def write(self, data): 138 self._len += len(data) 139 140 def flush(self): 141 now = time.time() 142 if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL: 143 self._tick = now 144 print '--single-step output length %d' % self._len 145 sys.stdout.flush() 146 147 def stop(self): 148 self._stopped.set() 149 150 151class TestRunner(base_test_runner.BaseTestRunner): 152 def __init__(self, test_options, device, shard_index, max_shard, tests, 153 flaky_tests): 154 """A TestRunner instance runs a perf test on a single device. 155 156 Args: 157 test_options: A PerfOptions object. 158 device: Device to run the tests. 159 shard_index: the index of this device. 160 max_shards: the maximum shard index. 161 tests: a dict mapping test_name to command. 162 flaky_tests: a list of flaky test_name. 163 """ 164 super(TestRunner, self).__init__(device, None, 'Release') 165 self._options = test_options 166 self._shard_index = shard_index 167 self._max_shard = max_shard 168 self._tests = tests 169 self._flaky_tests = flaky_tests 170 171 @staticmethod 172 def _IsBetter(result): 173 if result['actual_exit_code'] == 0: 174 return True 175 pickled = os.path.join(constants.PERF_OUTPUT_DIR, 176 result['name']) 177 if not os.path.exists(pickled): 178 return True 179 with file(pickled, 'r') as f: 180 previous = pickle.loads(f.read()) 181 return result['actual_exit_code'] < previous['actual_exit_code'] 182 183 @staticmethod 184 def _SaveResult(result): 185 if TestRunner._IsBetter(result): 186 with file(os.path.join(constants.PERF_OUTPUT_DIR, 187 result['name']), 'w') as f: 188 f.write(pickle.dumps(result)) 189 190 def _CheckDeviceAffinity(self, test_name): 191 """Returns True if test_name has affinity for this shard.""" 192 affinity = (self._tests['steps'][test_name]['device_affinity'] % 193 self._max_shard) 194 if self._shard_index == affinity: 195 return True 196 logging.info('Skipping %s on %s (affinity is %s, device is %s)', 197 test_name, self.device_serial, affinity, self._shard_index) 198 return False 199 200 def _LaunchPerfTest(self, test_name): 201 """Runs a perf test. 202 203 Args: 204 test_name: the name of the test to be executed. 205 206 Returns: 207 A tuple containing (Output, base_test_result.ResultType) 208 """ 209 if not self._CheckDeviceAffinity(test_name): 210 return '', base_test_result.ResultType.PASS 211 212 try: 213 logging.warning('Unmapping device ports') 214 forwarder.Forwarder.UnmapAllDevicePorts(self.device) 215 self.device.old_interface.RestartAdbdOnDevice() 216 except Exception as e: 217 logging.error('Exception when tearing down device %s', e) 218 219 cmd = ('%s --device %s' % 220 (self._tests['steps'][test_name]['cmd'], 221 self.device_serial)) 222 logging.info('%s : %s', test_name, cmd) 223 start_time = datetime.datetime.now() 224 225 timeout = 5400 226 if self._options.no_timeout: 227 timeout = None 228 full_cmd = cmd 229 if self._options.dry_run: 230 full_cmd = 'echo %s' % cmd 231 232 logfile = sys.stdout 233 if self._options.single_step: 234 # Just print a heart-beat so that the outer buildbot scripts won't timeout 235 # without response. 236 logfile = _HeartBeatLogger() 237 cwd = os.path.abspath(constants.DIR_SOURCE_ROOT) 238 if full_cmd.startswith('src/'): 239 cwd = os.path.abspath(os.path.join(constants.DIR_SOURCE_ROOT, os.pardir)) 240 try: 241 exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout( 242 full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile) 243 except cmd_helper.TimeoutError as e: 244 exit_code = -1 245 output = str(e) 246 finally: 247 if self._options.single_step: 248 logfile.stop() 249 end_time = datetime.datetime.now() 250 if exit_code is None: 251 exit_code = -1 252 logging.info('%s : exit_code=%d in %d secs at %s', 253 test_name, exit_code, (end_time - start_time).seconds, 254 self.device_serial) 255 result_type = base_test_result.ResultType.FAIL 256 if exit_code == 0: 257 result_type = base_test_result.ResultType.PASS 258 actual_exit_code = exit_code 259 if test_name in self._flaky_tests: 260 # The exit_code is used at the second stage when printing the 261 # test output. If the test is flaky, force to "0" to get that step green 262 # whilst still gathering data to the perf dashboards. 263 # The result_type is used by the test_dispatcher to retry the test. 264 exit_code = 0 265 266 persisted_result = { 267 'name': test_name, 268 'output': output, 269 'exit_code': exit_code, 270 'actual_exit_code': actual_exit_code, 271 'result_type': result_type, 272 'total_time': (end_time - start_time).seconds, 273 'device': self.device_serial, 274 'cmd': cmd, 275 } 276 self._SaveResult(persisted_result) 277 278 return (output, result_type) 279 280 def RunTest(self, test_name): 281 """Run a perf test on the device. 282 283 Args: 284 test_name: String to use for logging the test result. 285 286 Returns: 287 A tuple of (TestRunResults, retry). 288 """ 289 _, result_type = self._LaunchPerfTest(test_name) 290 results = base_test_result.TestRunResults() 291 results.AddResult(base_test_result.BaseTestResult(test_name, result_type)) 292 retry = None 293 if not results.DidRunPass(): 294 retry = test_name 295 return results, retry 296