• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Runs perf tests.
6
7Our buildbot infrastructure requires each slave to run steps serially.
8This is sub-optimal for android, where these steps can run independently on
9multiple connected devices.
10
11The buildbots will run this script multiple times per cycle:
12- First: all steps listed in --steps in will be executed in parallel using all
13connected devices. Step results will be pickled to disk. Each step has a unique
14name. The result code will be ignored if the step name is listed in
15--flaky-steps.
16The buildbot will treat this step as a regular step, and will not process any
17graph data.
18
19- Then, with -print-step STEP_NAME: at this stage, we'll simply print the file
20with the step results previously saved. The buildbot will then process the graph
21data accordingly.
22
23The JSON steps file contains a dictionary in the format:
24{ "version": int,
25  "steps": {
26    "foo": {
27      "device_affinity": int,
28      "cmd": "script_to_execute foo"
29    },
30    "bar": {
31      "device_affinity": int,
32      "cmd": "script_to_execute bar"
33    }
34  }
35}
36
37The JSON flaky steps file contains a list with step names which results should
38be ignored:
39[
40  "step_name_foo",
41  "step_name_bar"
42]
43
44Note that script_to_execute necessarily have to take at least the following
45option:
46  --device: the serial number to be passed to all adb commands.
47"""
48
49import collections
50import io
51import json
52import logging
53import os
54import pickle
55import re
56import shutil
57import sys
58import tempfile
59import threading
60import time
61import zipfile
62
63from devil.android import battery_utils
64from devil.android import device_errors
65from devil.android import forwarder
66from devil.constants import exit_codes
67from devil.utils import cmd_helper
68from pylib import constants
69from pylib.base import base_test_result
70from pylib.base import base_test_runner
71from pylib.constants import host_paths
72
73
74# Regex for the master branch commit position.
75_GIT_CR_POS_RE = re.compile(r'^Cr-Commit-Position: refs/heads/master@{#(\d+)}$')
76
77
78def _GetChromiumRevision():
79  # pylint: disable=line-too-long
80  """Get the git hash and commit position of the chromium master branch.
81
82  See: https://chromium.googlesource.com/chromium/tools/build/+/master/scripts/slave/runtest.py#212
83
84  Returns:
85    A dictionary with 'revision' and 'commit_pos' keys.
86  """
87  # pylint: enable=line-too-long
88  status, output = cmd_helper.GetCmdStatusAndOutput(
89      ['git', 'log', '-n', '1', '--pretty=format:%H%n%B', 'HEAD'],
90      host_paths.DIR_SOURCE_ROOT)
91  revision = None
92  commit_pos = None
93  if not status:
94    lines = output.splitlines()
95    revision = lines[0]
96    for line in reversed(lines):
97      m = _GIT_CR_POS_RE.match(line.strip())
98      if m:
99        commit_pos = int(m.group(1))
100        break
101  return {'revision': revision, 'commit_pos': commit_pos}
102
103
104def GetPersistedResult(test_name):
105  file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
106  if not os.path.exists(file_name):
107    logging.error('File not found %s', file_name)
108    return None
109
110  with file(file_name, 'r') as f:
111    return pickle.loads(f.read())
112
113
114def OutputJsonList(json_input, json_output):
115  with file(json_input, 'r') as i:
116    all_steps = json.load(i)
117
118  step_values = []
119  for k, v in all_steps['steps'].iteritems():
120    data = {'test': k, 'device_affinity': v['device_affinity']}
121
122    persisted_result = GetPersistedResult(k)
123    if persisted_result:
124      data['start_time'] = persisted_result['start_time']
125      data['end_time'] = persisted_result['end_time']
126      data['total_time'] = persisted_result['total_time']
127      data['has_archive'] = persisted_result['archive_bytes'] is not None
128    step_values.append(data)
129
130  with file(json_output, 'w') as o:
131    o.write(json.dumps(step_values))
132  return 0
133
134
135def PrintTestOutput(test_name, json_file_name=None, archive_file_name=None):
136  """Helper method to print the output of previously executed test_name.
137
138  Args:
139    test_name: name of the test that has been previously executed.
140    json_file_name: name of the file to output chartjson data to.
141    archive_file_name: name of the file to write the compressed ZIP archive.
142
143  Returns:
144    exit code generated by the test step.
145  """
146  persisted_result = GetPersistedResult(test_name)
147  if not persisted_result:
148    return exit_codes.INFRA
149  logging.info('*' * 80)
150  logging.info('Output from:')
151  logging.info(persisted_result['cmd'])
152  logging.info('*' * 80)
153
154  output_formatted = ''
155  persisted_outputs = persisted_result['output']
156  for i in xrange(len(persisted_outputs)):
157    output_formatted += '\n\nOutput from run #%d:\n\n%s' % (
158        i, persisted_outputs[i])
159  print output_formatted
160
161  if json_file_name:
162    with file(json_file_name, 'w') as f:
163      f.write(persisted_result['chartjson'])
164
165  if archive_file_name:
166    if persisted_result['archive_bytes'] is not None:
167      with file(archive_file_name, 'wb') as f:
168        f.write(persisted_result['archive_bytes'])
169    else:
170      logging.error('The output dir was not archived.')
171
172  return persisted_result['exit_code']
173
174
175def PrintSummary(test_names):
176  logging.info('*' * 80)
177  logging.info('Sharding summary')
178  device_total_time = collections.defaultdict(int)
179  for test_name in test_names:
180    file_name = os.path.join(constants.PERF_OUTPUT_DIR, test_name)
181    if not os.path.exists(file_name):
182      logging.info('%s : No status file found', test_name)
183      continue
184    with file(file_name, 'r') as f:
185      result = pickle.loads(f.read())
186    logging.info('%s : exit_code=%d in %d secs at %s',
187                 result['name'], result['exit_code'], result['total_time'],
188                 result['device'])
189    device_total_time[result['device']] += result['total_time']
190  for device, device_time in device_total_time.iteritems():
191    logging.info('Total for device %s : %d secs', device, device_time)
192  logging.info('Total steps time: %d secs', sum(device_total_time.values()))
193
194
195class _HeartBeatLogger(object):
196  # How often to print the heartbeat on flush().
197  _PRINT_INTERVAL = 30.0
198
199  def __init__(self):
200    """A file-like class for keeping the buildbot alive."""
201    self._len = 0
202    self._tick = time.time()
203    self._stopped = threading.Event()
204    self._timer = threading.Thread(target=self._runner)
205    self._timer.start()
206
207  def _runner(self):
208    while not self._stopped.is_set():
209      self.flush()
210      self._stopped.wait(_HeartBeatLogger._PRINT_INTERVAL)
211
212  def write(self, data):
213    self._len += len(data)
214
215  def flush(self):
216    now = time.time()
217    if now - self._tick >= _HeartBeatLogger._PRINT_INTERVAL:
218      self._tick = now
219      print '--single-step output length %d' % self._len
220      sys.stdout.flush()
221
222  def stop(self):
223    self._stopped.set()
224
225
226class TestRunner(base_test_runner.BaseTestRunner):
227  def __init__(self, test_options, device, shard_index, max_shard, tests,
228      flaky_tests):
229    """A TestRunner instance runs a perf test on a single device.
230
231    Args:
232      test_options: A PerfOptions object.
233      device: Device to run the tests.
234      shard_index: the index of this device.
235      max_shards: the maximum shard index.
236      tests: a dict mapping test_name to command.
237      flaky_tests: a list of flaky test_name.
238    """
239    super(TestRunner, self).__init__(device, None)
240    self._options = test_options
241    self._shard_index = shard_index
242    self._max_shard = max_shard
243    self._tests = tests
244    self._flaky_tests = flaky_tests
245    self._output_dir = None
246    self._device_battery = battery_utils.BatteryUtils(self.device)
247
248  @staticmethod
249  def _SaveResult(result):
250    pickled = os.path.join(constants.PERF_OUTPUT_DIR, result['name'])
251    if os.path.exists(pickled):
252      with file(pickled, 'r') as f:
253        previous = pickle.loads(f.read())
254        result['output'] = previous['output'] + result['output']
255
256    with file(pickled, 'w') as f:
257      f.write(pickle.dumps(result))
258
259  def _CheckDeviceAffinity(self, test_name):
260    """Returns True if test_name has affinity for this shard."""
261    affinity = (self._tests['steps'][test_name]['device_affinity'] %
262                self._max_shard)
263    if self._shard_index == affinity:
264      return True
265    logging.info('Skipping %s on %s (affinity is %s, device is %s)',
266                 test_name, self.device_serial, affinity, self._shard_index)
267    return False
268
269  def _CleanupOutputDirectory(self):
270    if self._output_dir:
271      shutil.rmtree(self._output_dir, ignore_errors=True)
272      self._output_dir = None
273
274  def _ReadChartjsonOutput(self):
275    if not self._output_dir:
276      return ''
277
278    json_output_path = os.path.join(self._output_dir, 'results-chart.json')
279    try:
280      with open(json_output_path) as f:
281        return f.read()
282    except IOError:
283      logging.exception('Exception when reading chartjson.')
284      logging.error('This usually means that telemetry did not run, so it could'
285                    ' not generate the file. Please check the device running'
286                    ' the test.')
287      return ''
288
289  def _WriteBuildBotJson(self):
290    """Write metadata about the buildbot environment to the output dir."""
291    data = {
292      'chromium': _GetChromiumRevision(),
293      'environment': dict(os.environ)}
294    logging.info('BuildBot environment: %s', data)
295    with open(os.path.join(self._output_dir, 'buildbot.json'), 'w') as f:
296      json.dump(data, f, sort_keys=True, indent=2, separators=(',', ': '))
297
298  def _ArchiveOutputDir(self):
299    """Archive all files in the output dir, and return as compressed bytes."""
300    with io.BytesIO() as archive:
301      with zipfile.ZipFile(archive, 'w', zipfile.ZIP_DEFLATED) as contents:
302        num_files = 0
303        for absdir, _, files in os.walk(self._output_dir):
304          reldir = os.path.relpath(absdir, self._output_dir)
305          for filename in files:
306            src_path = os.path.join(absdir, filename)
307            # We use normpath to turn './file.txt' into just 'file.txt'.
308            dst_path = os.path.normpath(os.path.join(reldir, filename))
309            contents.write(src_path, dst_path)
310            num_files += 1
311      if num_files:
312        logging.info('%d files in the output dir were archived.', num_files)
313      else:
314        logging.warning('No files in the output dir. Archive is empty.')
315      return archive.getvalue()
316
317  def _LaunchPerfTest(self, test_name):
318    """Runs a perf test.
319
320    Args:
321      test_name: the name of the test to be executed.
322
323    Returns:
324      A tuple containing (Output, base_test_result.ResultType)
325    """
326    if not self._CheckDeviceAffinity(test_name):
327      return '', base_test_result.ResultType.PASS
328
329    try:
330      logging.warning('Unmapping device ports')
331      forwarder.Forwarder.UnmapAllDevicePorts(self.device)
332      self.device.RestartAdbd()
333    except Exception as e: # pylint: disable=broad-except
334      logging.error('Exception when tearing down device %s', e)
335
336    test_config = self._tests['steps'][test_name]
337    cmd = ('%s --device %s' % (test_config['cmd'], self.device_serial))
338
339    if (self._options.collect_chartjson_data
340        or test_config.get('archive_output_dir')):
341      self._output_dir = tempfile.mkdtemp()
342      self._WriteBuildBotJson()
343      cmd = cmd + ' --output-dir=%s' % self._output_dir
344
345    logging.info(
346        'temperature: %s (0.1 C)',
347        str(self._device_battery.GetBatteryInfo().get('temperature')))
348    if self._options.max_battery_temp:
349      self._device_battery.LetBatteryCoolToTemperature(
350          self._options.max_battery_temp)
351
352    logging.info('Charge level: %s%%',
353        str(self._device_battery.GetBatteryInfo().get('level')))
354    if self._options.min_battery_level:
355      self._device_battery.ChargeDeviceToLevel(
356          self._options.min_battery_level)
357    self.device.SetScreen(True)
358
359    logging.info('%s : %s', test_name, cmd)
360    start_time = time.time()
361
362    timeout = test_config.get('timeout', 3600)
363    if self._options.no_timeout:
364      timeout = None
365    logging.info('Timeout for %s test: %s', test_name, timeout)
366    full_cmd = cmd
367    if self._options.dry_run:
368      full_cmd = 'echo %s' % cmd
369
370    logfile = sys.stdout
371    archive_bytes = None
372    if self._options.single_step:
373      # Just print a heart-beat so that the outer buildbot scripts won't timeout
374      # without response.
375      logfile = _HeartBeatLogger()
376    cwd = os.path.abspath(host_paths.DIR_SOURCE_ROOT)
377    if full_cmd.startswith('src/'):
378      cwd = os.path.abspath(os.path.join(host_paths.DIR_SOURCE_ROOT, os.pardir))
379    try:
380      exit_code, output = cmd_helper.GetCmdStatusAndOutputWithTimeout(
381          full_cmd, timeout, cwd=cwd, shell=True, logfile=logfile)
382      json_output = self._ReadChartjsonOutput()
383      if test_config.get('archive_output_dir'):
384        archive_bytes = self._ArchiveOutputDir()
385    except cmd_helper.TimeoutError as e:
386      exit_code = -1
387      output = e.output
388      json_output = ''
389    finally:
390      self._CleanupOutputDirectory()
391      if self._options.single_step:
392        logfile.stop()
393    end_time = time.time()
394    if exit_code is None:
395      exit_code = -1
396    logging.info('%s : exit_code=%d in %d secs at %s',
397                 test_name, exit_code, end_time - start_time,
398                 self.device_serial)
399
400    if exit_code == 0:
401      result_type = base_test_result.ResultType.PASS
402    else:
403      result_type = base_test_result.ResultType.FAIL
404      # Since perf tests use device affinity, give the device a chance to
405      # recover if it is offline after a failure. Otherwise, the master sharder
406      # will remove it from the pool and future tests on this device will fail.
407      try:
408        self.device.WaitUntilFullyBooted(timeout=120)
409      except device_errors.CommandTimeoutError as e:
410        logging.error('Device failed to return after %s: %s', test_name, e)
411
412    actual_exit_code = exit_code
413    if test_name in self._flaky_tests:
414      # The exit_code is used at the second stage when printing the
415      # test output. If the test is flaky, force to "0" to get that step green
416      # whilst still gathering data to the perf dashboards.
417      # The result_type is used by the test_dispatcher to retry the test.
418      exit_code = 0
419
420    persisted_result = {
421        'name': test_name,
422        'output': [output],
423        'chartjson': json_output,
424        'archive_bytes': archive_bytes,
425        'exit_code': exit_code,
426        'actual_exit_code': actual_exit_code,
427        'result_type': result_type,
428        'start_time': start_time,
429        'end_time': end_time,
430        'total_time': end_time - start_time,
431        'device': self.device_serial,
432        'cmd': cmd,
433    }
434    self._SaveResult(persisted_result)
435
436    return (output, result_type)
437
438  def RunTest(self, test_name):
439    """Run a perf test on the device.
440
441    Args:
442      test_name: String to use for logging the test result.
443
444    Returns:
445      A tuple of (TestRunResults, retry).
446    """
447    _, result_type = self._LaunchPerfTest(test_name)
448    results = base_test_result.TestRunResults()
449    results.AddResult(base_test_result.BaseTestResult(test_name, result_type))
450    retry = None
451    if not results.DidRunPass():
452      retry = test_name
453    return results, retry
454