1# -*- coding: utf-8 -*- 2# Copyright (c) 2011 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""The experiment runner module.""" 7from __future__ import print_function 8 9import getpass 10import os 11import shutil 12import time 13 14import lock_machine 15import test_flag 16 17from cros_utils import command_executer 18from cros_utils import logger 19from cros_utils.email_sender import EmailSender 20from cros_utils.file_utils import FileUtils 21 22import config 23from experiment_status import ExperimentStatus 24from results_cache import CacheConditions 25from results_cache import ResultsCache 26from results_report import HTMLResultsReport 27from results_report import TextResultsReport 28from results_report import JSONResultsReport 29from schedv2 import Schedv2 30 31 32def _WriteJSONReportToFile(experiment, results_dir, json_report): 33 """Writes a JSON report to a file in results_dir.""" 34 has_llvm = any('llvm' in l.compiler for l in experiment.labels) 35 compiler_string = 'llvm' if has_llvm else 'gcc' 36 board = experiment.labels[0].board 37 filename = 'report_%s_%s_%s.%s.json' % (board, json_report.date, 38 json_report.time.replace( 39 ':', '.'), compiler_string) 40 fullname = os.path.join(results_dir, filename) 41 report_text = json_report.GetReport() 42 with open(fullname, 'w') as out_file: 43 out_file.write(report_text) 44 45 46class ExperimentRunner(object): 47 """ExperimentRunner Class.""" 48 49 STATUS_TIME_DELAY = 30 50 THREAD_MONITOR_DELAY = 2 51 52 SUCCEEDED = 0 53 HAS_FAILURE = 1 54 ALL_FAILED = 2 55 56 def __init__(self, 57 experiment, 58 json_report, 59 using_schedv2=False, 60 log=None, 61 cmd_exec=None): 62 self._experiment = experiment 63 self.l = log or logger.GetLogger(experiment.log_dir) 64 self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l) 65 self._terminated = False 66 self.json_report = json_report 67 self.locked_machines = [] 68 if experiment.log_level != 'verbose': 69 self.STATUS_TIME_DELAY = 10 70 71 # Setting this to True will use crosperf sched v2 (feature in progress). 72 self._using_schedv2 = using_schedv2 73 74 def _GetMachineList(self): 75 """Return a list of all requested machines. 76 77 Create a list of all the requested machines, both global requests and 78 label-specific requests, and return the list. 79 """ 80 machines = self._experiment.remote 81 # All Label.remote is a sublist of experiment.remote. 82 for l in self._experiment.labels: 83 for r in l.remote: 84 assert r in machines 85 return machines 86 87 def _UpdateMachineList(self, locked_machines): 88 """Update machines lists to contain only locked machines. 89 90 Go through all the lists of requested machines, both global and 91 label-specific requests, and remove any machine that we were not 92 able to lock. 93 94 Args: 95 locked_machines: A list of the machines we successfully locked. 96 """ 97 for m in self._experiment.remote: 98 if m not in locked_machines: 99 self._experiment.remote.remove(m) 100 101 for l in self._experiment.labels: 102 for m in l.remote: 103 if m not in locked_machines: 104 l.remote.remove(m) 105 106 def _GetMachineType(self, lock_mgr, machine): 107 """Get where is the machine from. 108 109 Returns: 110 The location of the machine: local or crosfleet 111 """ 112 # We assume that lab machine always starts with chromeos*, and local 113 # machines are ip address. 114 if 'chromeos' in machine: 115 if lock_mgr.CheckMachineInCrosfleet(machine): 116 return 'crosfleet' 117 else: 118 raise RuntimeError('Lab machine not in Crosfleet.') 119 return 'local' 120 121 def _LockAllMachines(self, experiment): 122 """Attempt to globally lock all of the machines requested for run. 123 124 This method tries to lock all machines requested for this crosperf run 125 in three different modes automatically, to prevent any other crosperf runs 126 from being able to update/use the machines while this experiment is 127 running: 128 - Crosfleet machines: Use crosfleet lease-dut mechanism to lease 129 - Local machines: Use file lock mechanism to lock 130 """ 131 if test_flag.GetTestMode(): 132 self.locked_machines = self._GetMachineList() 133 experiment.locked_machines = self.locked_machines 134 else: 135 experiment.lock_mgr = lock_machine.LockManager( 136 self._GetMachineList(), 137 '', 138 experiment.labels[0].chromeos_root, 139 experiment.locks_dir, 140 log=self.l, 141 ) 142 for m in experiment.lock_mgr.machines: 143 machine_type = self._GetMachineType(experiment.lock_mgr, m) 144 if machine_type == 'local': 145 experiment.lock_mgr.AddMachineToLocal(m) 146 elif machine_type == 'crosfleet': 147 experiment.lock_mgr.AddMachineToCrosfleet(m) 148 machine_states = experiment.lock_mgr.GetMachineStates('lock') 149 experiment.lock_mgr.CheckMachineLocks(machine_states, 'lock') 150 self.locked_machines = experiment.lock_mgr.UpdateMachines(True) 151 experiment.locked_machines = self.locked_machines 152 self._UpdateMachineList(self.locked_machines) 153 experiment.machine_manager.RemoveNonLockedMachines(self.locked_machines) 154 if not self.locked_machines: 155 raise RuntimeError('Unable to lock any machines.') 156 157 def _ClearCacheEntries(self, experiment): 158 for br in experiment.benchmark_runs: 159 cache = ResultsCache() 160 cache.Init(br.label.chromeos_image, br.label.chromeos_root, 161 br.benchmark.test_name, br.iteration, br.test_args, 162 br.profiler_args, br.machine_manager, br.machine, 163 br.label.board, br.cache_conditions, br.logger(), 164 br.log_level, br.label, br.share_cache, br.benchmark.suite, 165 br.benchmark.show_all_results, br.benchmark.run_local, 166 br.benchmark.cwp_dso) 167 cache_dir = cache.GetCacheDirForWrite() 168 if os.path.exists(cache_dir): 169 self.l.LogOutput('Removing cache dir: %s' % cache_dir) 170 shutil.rmtree(cache_dir) 171 172 def _Run(self, experiment): 173 try: 174 # We should not lease machines if tests are launched via `crosfleet 175 # create-test`. This is because leasing DUT in crosfleet will create a 176 # no-op task on the DUT and new test created will be hanging there. 177 # TODO(zhizhouy): Need to check whether machine is ready or not before 178 # assigning a test to it. 179 if not experiment.no_lock and not experiment.crosfleet: 180 self._LockAllMachines(experiment) 181 # Calculate all checksums of avaiable/locked machines, to ensure same 182 # label has same machines for testing 183 experiment.SetCheckSums(forceSameImage=True) 184 if self._using_schedv2: 185 schedv2 = Schedv2(experiment) 186 experiment.set_schedv2(schedv2) 187 if CacheConditions.FALSE in experiment.cache_conditions: 188 self._ClearCacheEntries(experiment) 189 status = ExperimentStatus(experiment) 190 experiment.Run() 191 last_status_time = 0 192 last_status_string = '' 193 try: 194 if experiment.log_level != 'verbose': 195 self.l.LogStartDots() 196 while not experiment.IsComplete(): 197 if last_status_time + self.STATUS_TIME_DELAY < time.time(): 198 last_status_time = time.time() 199 border = '==============================' 200 if experiment.log_level == 'verbose': 201 self.l.LogOutput(border) 202 self.l.LogOutput(status.GetProgressString()) 203 self.l.LogOutput(status.GetStatusString()) 204 self.l.LogOutput(border) 205 else: 206 current_status_string = status.GetStatusString() 207 if current_status_string != last_status_string: 208 self.l.LogEndDots() 209 self.l.LogOutput(border) 210 self.l.LogOutput(current_status_string) 211 self.l.LogOutput(border) 212 last_status_string = current_status_string 213 else: 214 self.l.LogAppendDot() 215 time.sleep(self.THREAD_MONITOR_DELAY) 216 except KeyboardInterrupt: 217 self._terminated = True 218 self.l.LogError('Ctrl-c pressed. Cleaning up...') 219 experiment.Terminate() 220 raise 221 except SystemExit: 222 self._terminated = True 223 self.l.LogError('Unexpected exit. Cleaning up...') 224 experiment.Terminate() 225 raise 226 finally: 227 experiment.Cleanup() 228 229 def _PrintTable(self, experiment): 230 self.l.LogOutput(TextResultsReport.FromExperiment(experiment).GetReport()) 231 232 def _Email(self, experiment): 233 # Only email by default if a new run was completed. 234 send_mail = False 235 for benchmark_run in experiment.benchmark_runs: 236 if not benchmark_run.cache_hit: 237 send_mail = True 238 break 239 if (not send_mail and not experiment.email_to 240 or config.GetConfig('no_email')): 241 return 242 243 label_names = [] 244 for label in experiment.labels: 245 label_names.append(label.name) 246 subject = '%s: %s' % (experiment.name, ' vs. '.join(label_names)) 247 248 text_report = TextResultsReport.FromExperiment(experiment, 249 True).GetReport() 250 text_report += ('\nResults are stored in %s.\n' % 251 experiment.results_directory) 252 text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report 253 html_report = HTMLResultsReport.FromExperiment(experiment).GetReport() 254 attachment = EmailSender.Attachment('report.html', html_report) 255 email_to = experiment.email_to or [] 256 email_to.append(getpass.getuser()) 257 EmailSender().SendEmail(email_to, 258 subject, 259 text_report, 260 attachments=[attachment], 261 msg_type='html') 262 263 def _StoreResults(self, experiment): 264 if self._terminated: 265 return self.ALL_FAILED 266 267 results_directory = experiment.results_directory 268 FileUtils().RmDir(results_directory) 269 FileUtils().MkDirP(results_directory) 270 self.l.LogOutput('Storing experiment file in %s.' % results_directory) 271 experiment_file_path = os.path.join(results_directory, 'experiment.exp') 272 FileUtils().WriteFile(experiment_file_path, experiment.experiment_file) 273 274 has_failure = False 275 all_failed = True 276 277 topstats_file = os.path.join(results_directory, 'topstats.log') 278 self.l.LogOutput('Storing top statistics of each benchmark run into %s.' % 279 topstats_file) 280 with open(topstats_file, 'w') as top_fd: 281 for benchmark_run in experiment.benchmark_runs: 282 if benchmark_run.result: 283 # FIXME: Pylint has a bug suggesting the following change, which 284 # should be fixed in pylint 2.0. Resolve this after pylint >= 2.0. 285 # Bug: https://github.com/PyCQA/pylint/issues/1984 286 # pylint: disable=simplifiable-if-statement 287 if benchmark_run.result.retval: 288 has_failure = True 289 else: 290 all_failed = False 291 # Header with benchmark run name. 292 top_fd.write('%s\n' % str(benchmark_run)) 293 # Formatted string with top statistics. 294 top_fd.write(benchmark_run.result.FormatStringTopCommands()) 295 top_fd.write('\n\n') 296 297 if all_failed: 298 return self.ALL_FAILED 299 300 self.l.LogOutput('Storing results of each benchmark run.') 301 for benchmark_run in experiment.benchmark_runs: 302 if benchmark_run.result: 303 benchmark_run_name = ''.join(ch for ch in benchmark_run.name 304 if ch.isalnum()) 305 benchmark_run_path = os.path.join(results_directory, 306 benchmark_run_name) 307 if experiment.compress_results: 308 benchmark_run.result.CompressResultsTo(benchmark_run_path) 309 else: 310 benchmark_run.result.CopyResultsTo(benchmark_run_path) 311 benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp) 312 313 self.l.LogOutput('Storing results report in %s.' % results_directory) 314 results_table_path = os.path.join(results_directory, 'results.html') 315 report = HTMLResultsReport.FromExperiment(experiment).GetReport() 316 if self.json_report: 317 json_report = JSONResultsReport.FromExperiment(experiment, 318 json_args={'indent': 2}) 319 _WriteJSONReportToFile(experiment, results_directory, json_report) 320 321 FileUtils().WriteFile(results_table_path, report) 322 323 self.l.LogOutput('Storing email message body in %s.' % results_directory) 324 msg_file_path = os.path.join(results_directory, 'msg_body.html') 325 text_report = TextResultsReport.FromExperiment(experiment, 326 True).GetReport() 327 text_report += ('\nResults are stored in %s.\n' % 328 experiment.results_directory) 329 msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report 330 FileUtils().WriteFile(msg_file_path, msg_body) 331 332 return self.SUCCEEDED if not has_failure else self.HAS_FAILURE 333 334 def Run(self): 335 try: 336 self._Run(self._experiment) 337 finally: 338 # Always print the report at the end of the run. 339 self._PrintTable(self._experiment) 340 ret = self._StoreResults(self._experiment) 341 if ret != self.ALL_FAILED: 342 self._Email(self._experiment) 343 return ret 344 345 346class MockExperimentRunner(ExperimentRunner): 347 """Mocked ExperimentRunner for testing.""" 348 349 def __init__(self, experiment, json_report): 350 super(MockExperimentRunner, self).__init__(experiment, json_report) 351 352 def _Run(self, experiment): 353 self.l.LogOutput("Would run the following experiment: '%s'." % 354 experiment.name) 355 356 def _PrintTable(self, experiment): 357 self.l.LogOutput('Would print the experiment table.') 358 359 def _Email(self, experiment): 360 self.l.LogOutput('Would send result email.') 361 362 def _StoreResults(self, experiment): 363 self.l.LogOutput('Would store the results.') 364