• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""The experiment runner module."""
7from __future__ import print_function
8
9import getpass
10import os
11import shutil
12import time
13
14import lock_machine
15import test_flag
16
17from cros_utils import command_executer
18from cros_utils import logger
19from cros_utils.email_sender import EmailSender
20from cros_utils.file_utils import FileUtils
21
22import config
23from experiment_status import ExperimentStatus
24from results_cache import CacheConditions
25from results_cache import ResultsCache
26from results_report import HTMLResultsReport
27from results_report import TextResultsReport
28from results_report import JSONResultsReport
29from schedv2 import Schedv2
30
31
32def _WriteJSONReportToFile(experiment, results_dir, json_report):
33  """Writes a JSON report to a file in results_dir."""
34  has_llvm = any('llvm' in l.compiler for l in experiment.labels)
35  compiler_string = 'llvm' if has_llvm else 'gcc'
36  board = experiment.labels[0].board
37  filename = 'report_%s_%s_%s.%s.json' % (board, json_report.date,
38                                          json_report.time.replace(
39                                              ':', '.'), compiler_string)
40  fullname = os.path.join(results_dir, filename)
41  report_text = json_report.GetReport()
42  with open(fullname, 'w') as out_file:
43    out_file.write(report_text)
44
45
46class ExperimentRunner(object):
47  """ExperimentRunner Class."""
48
49  STATUS_TIME_DELAY = 30
50  THREAD_MONITOR_DELAY = 2
51
52  SUCCEEDED = 0
53  HAS_FAILURE = 1
54  ALL_FAILED = 2
55
56  def __init__(self,
57               experiment,
58               json_report,
59               using_schedv2=False,
60               log=None,
61               cmd_exec=None):
62    self._experiment = experiment
63    self.l = log or logger.GetLogger(experiment.log_dir)
64    self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l)
65    self._terminated = False
66    self.json_report = json_report
67    self.locked_machines = []
68    if experiment.log_level != 'verbose':
69      self.STATUS_TIME_DELAY = 10
70
71    # Setting this to True will use crosperf sched v2 (feature in progress).
72    self._using_schedv2 = using_schedv2
73
74  def _GetMachineList(self):
75    """Return a list of all requested machines.
76
77    Create a list of all the requested machines, both global requests and
78    label-specific requests, and return the list.
79    """
80    machines = self._experiment.remote
81    # All Label.remote is a sublist of experiment.remote.
82    for l in self._experiment.labels:
83      for r in l.remote:
84        assert r in machines
85    return machines
86
87  def _UpdateMachineList(self, locked_machines):
88    """Update machines lists to contain only locked machines.
89
90    Go through all the lists of requested machines, both global and
91    label-specific requests, and remove any machine that we were not
92    able to lock.
93
94    Args:
95      locked_machines: A list of the machines we successfully locked.
96    """
97    for m in self._experiment.remote:
98      if m not in locked_machines:
99        self._experiment.remote.remove(m)
100
101    for l in self._experiment.labels:
102      for m in l.remote:
103        if m not in locked_machines:
104          l.remote.remove(m)
105
106  def _GetMachineType(self, lock_mgr, machine):
107    """Get where is the machine from.
108
109    Returns:
110      The location of the machine: local or crosfleet
111    """
112    # We assume that lab machine always starts with chromeos*, and local
113    # machines are ip address.
114    if 'chromeos' in machine:
115      if lock_mgr.CheckMachineInCrosfleet(machine):
116        return 'crosfleet'
117      else:
118        raise RuntimeError('Lab machine not in Crosfleet.')
119    return 'local'
120
121  def _LockAllMachines(self, experiment):
122    """Attempt to globally lock all of the machines requested for run.
123
124    This method tries to lock all machines requested for this crosperf run
125    in three different modes automatically, to prevent any other crosperf runs
126    from being able to update/use the machines while this experiment is
127    running:
128      - Crosfleet machines: Use crosfleet lease-dut mechanism to lease
129      - Local machines: Use file lock mechanism to lock
130    """
131    if test_flag.GetTestMode():
132      self.locked_machines = self._GetMachineList()
133      experiment.locked_machines = self.locked_machines
134    else:
135      experiment.lock_mgr = lock_machine.LockManager(
136          self._GetMachineList(),
137          '',
138          experiment.labels[0].chromeos_root,
139          experiment.locks_dir,
140          log=self.l,
141      )
142      for m in experiment.lock_mgr.machines:
143        machine_type = self._GetMachineType(experiment.lock_mgr, m)
144        if machine_type == 'local':
145          experiment.lock_mgr.AddMachineToLocal(m)
146        elif machine_type == 'crosfleet':
147          experiment.lock_mgr.AddMachineToCrosfleet(m)
148      machine_states = experiment.lock_mgr.GetMachineStates('lock')
149      experiment.lock_mgr.CheckMachineLocks(machine_states, 'lock')
150      self.locked_machines = experiment.lock_mgr.UpdateMachines(True)
151      experiment.locked_machines = self.locked_machines
152      self._UpdateMachineList(self.locked_machines)
153      experiment.machine_manager.RemoveNonLockedMachines(self.locked_machines)
154      if not self.locked_machines:
155        raise RuntimeError('Unable to lock any machines.')
156
157  def _ClearCacheEntries(self, experiment):
158    for br in experiment.benchmark_runs:
159      cache = ResultsCache()
160      cache.Init(br.label.chromeos_image, br.label.chromeos_root,
161                 br.benchmark.test_name, br.iteration, br.test_args,
162                 br.profiler_args, br.machine_manager, br.machine,
163                 br.label.board, br.cache_conditions, br.logger(),
164                 br.log_level, br.label, br.share_cache, br.benchmark.suite,
165                 br.benchmark.show_all_results, br.benchmark.run_local,
166                 br.benchmark.cwp_dso)
167      cache_dir = cache.GetCacheDirForWrite()
168      if os.path.exists(cache_dir):
169        self.l.LogOutput('Removing cache dir: %s' % cache_dir)
170        shutil.rmtree(cache_dir)
171
172  def _Run(self, experiment):
173    try:
174      # We should not lease machines if tests are launched via `crosfleet
175      # create-test`. This is because leasing DUT in crosfleet will create a
176      # no-op task on the DUT and new test created will be hanging there.
177      # TODO(zhizhouy): Need to check whether machine is ready or not before
178      # assigning a test to it.
179      if not experiment.no_lock and not experiment.crosfleet:
180        self._LockAllMachines(experiment)
181      # Calculate all checksums of avaiable/locked machines, to ensure same
182      # label has same machines for testing
183      experiment.SetCheckSums(forceSameImage=True)
184      if self._using_schedv2:
185        schedv2 = Schedv2(experiment)
186        experiment.set_schedv2(schedv2)
187      if CacheConditions.FALSE in experiment.cache_conditions:
188        self._ClearCacheEntries(experiment)
189      status = ExperimentStatus(experiment)
190      experiment.Run()
191      last_status_time = 0
192      last_status_string = ''
193      try:
194        if experiment.log_level != 'verbose':
195          self.l.LogStartDots()
196        while not experiment.IsComplete():
197          if last_status_time + self.STATUS_TIME_DELAY < time.time():
198            last_status_time = time.time()
199            border = '=============================='
200            if experiment.log_level == 'verbose':
201              self.l.LogOutput(border)
202              self.l.LogOutput(status.GetProgressString())
203              self.l.LogOutput(status.GetStatusString())
204              self.l.LogOutput(border)
205            else:
206              current_status_string = status.GetStatusString()
207              if current_status_string != last_status_string:
208                self.l.LogEndDots()
209                self.l.LogOutput(border)
210                self.l.LogOutput(current_status_string)
211                self.l.LogOutput(border)
212                last_status_string = current_status_string
213              else:
214                self.l.LogAppendDot()
215          time.sleep(self.THREAD_MONITOR_DELAY)
216      except KeyboardInterrupt:
217        self._terminated = True
218        self.l.LogError('Ctrl-c pressed. Cleaning up...')
219        experiment.Terminate()
220        raise
221      except SystemExit:
222        self._terminated = True
223        self.l.LogError('Unexpected exit. Cleaning up...')
224        experiment.Terminate()
225        raise
226    finally:
227      experiment.Cleanup()
228
229  def _PrintTable(self, experiment):
230    self.l.LogOutput(TextResultsReport.FromExperiment(experiment).GetReport())
231
232  def _Email(self, experiment):
233    # Only email by default if a new run was completed.
234    send_mail = False
235    for benchmark_run in experiment.benchmark_runs:
236      if not benchmark_run.cache_hit:
237        send_mail = True
238        break
239    if (not send_mail and not experiment.email_to
240        or config.GetConfig('no_email')):
241      return
242
243    label_names = []
244    for label in experiment.labels:
245      label_names.append(label.name)
246    subject = '%s: %s' % (experiment.name, ' vs. '.join(label_names))
247
248    text_report = TextResultsReport.FromExperiment(experiment,
249                                                   True).GetReport()
250    text_report += ('\nResults are stored in %s.\n' %
251                    experiment.results_directory)
252    text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report
253    html_report = HTMLResultsReport.FromExperiment(experiment).GetReport()
254    attachment = EmailSender.Attachment('report.html', html_report)
255    email_to = experiment.email_to or []
256    email_to.append(getpass.getuser())
257    EmailSender().SendEmail(email_to,
258                            subject,
259                            text_report,
260                            attachments=[attachment],
261                            msg_type='html')
262
263  def _StoreResults(self, experiment):
264    if self._terminated:
265      return self.ALL_FAILED
266
267    results_directory = experiment.results_directory
268    FileUtils().RmDir(results_directory)
269    FileUtils().MkDirP(results_directory)
270    self.l.LogOutput('Storing experiment file in %s.' % results_directory)
271    experiment_file_path = os.path.join(results_directory, 'experiment.exp')
272    FileUtils().WriteFile(experiment_file_path, experiment.experiment_file)
273
274    has_failure = False
275    all_failed = True
276
277    topstats_file = os.path.join(results_directory, 'topstats.log')
278    self.l.LogOutput('Storing top statistics of each benchmark run into %s.' %
279                     topstats_file)
280    with open(topstats_file, 'w') as top_fd:
281      for benchmark_run in experiment.benchmark_runs:
282        if benchmark_run.result:
283          # FIXME: Pylint has a bug suggesting the following change, which
284          # should be fixed in pylint 2.0. Resolve this after pylint >= 2.0.
285          # Bug: https://github.com/PyCQA/pylint/issues/1984
286          # pylint: disable=simplifiable-if-statement
287          if benchmark_run.result.retval:
288            has_failure = True
289          else:
290            all_failed = False
291          # Header with benchmark run name.
292          top_fd.write('%s\n' % str(benchmark_run))
293          # Formatted string with top statistics.
294          top_fd.write(benchmark_run.result.FormatStringTopCommands())
295          top_fd.write('\n\n')
296
297    if all_failed:
298      return self.ALL_FAILED
299
300    self.l.LogOutput('Storing results of each benchmark run.')
301    for benchmark_run in experiment.benchmark_runs:
302      if benchmark_run.result:
303        benchmark_run_name = ''.join(ch for ch in benchmark_run.name
304                                     if ch.isalnum())
305        benchmark_run_path = os.path.join(results_directory,
306                                          benchmark_run_name)
307        if experiment.compress_results:
308          benchmark_run.result.CompressResultsTo(benchmark_run_path)
309        else:
310          benchmark_run.result.CopyResultsTo(benchmark_run_path)
311        benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp)
312
313    self.l.LogOutput('Storing results report in %s.' % results_directory)
314    results_table_path = os.path.join(results_directory, 'results.html')
315    report = HTMLResultsReport.FromExperiment(experiment).GetReport()
316    if self.json_report:
317      json_report = JSONResultsReport.FromExperiment(experiment,
318                                                     json_args={'indent': 2})
319      _WriteJSONReportToFile(experiment, results_directory, json_report)
320
321    FileUtils().WriteFile(results_table_path, report)
322
323    self.l.LogOutput('Storing email message body in %s.' % results_directory)
324    msg_file_path = os.path.join(results_directory, 'msg_body.html')
325    text_report = TextResultsReport.FromExperiment(experiment,
326                                                   True).GetReport()
327    text_report += ('\nResults are stored in %s.\n' %
328                    experiment.results_directory)
329    msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report
330    FileUtils().WriteFile(msg_file_path, msg_body)
331
332    return self.SUCCEEDED if not has_failure else self.HAS_FAILURE
333
334  def Run(self):
335    try:
336      self._Run(self._experiment)
337    finally:
338      # Always print the report at the end of the run.
339      self._PrintTable(self._experiment)
340      ret = self._StoreResults(self._experiment)
341      if ret != self.ALL_FAILED:
342        self._Email(self._experiment)
343    return ret
344
345
346class MockExperimentRunner(ExperimentRunner):
347  """Mocked ExperimentRunner for testing."""
348
349  def __init__(self, experiment, json_report):
350    super(MockExperimentRunner, self).__init__(experiment, json_report)
351
352  def _Run(self, experiment):
353    self.l.LogOutput("Would run the following experiment: '%s'." %
354                     experiment.name)
355
356  def _PrintTable(self, experiment):
357    self.l.LogOutput('Would print the experiment table.')
358
359  def _Email(self, experiment):
360    self.l.LogOutput('Would send result email.')
361
362  def _StoreResults(self, experiment):
363    self.l.LogOutput('Would store the results.')
364