• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright 2011 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""The experiment runner module."""
7
8import getpass
9import os
10import shutil
11import time
12
13from cros_utils import command_executer
14from cros_utils import logger
15from cros_utils.email_sender import EmailSender
16from cros_utils.file_utils import FileUtils
17from experiment_status import ExperimentStatus
18import lock_machine
19from results_cache import CacheConditions
20from results_cache import ResultsCache
21from results_report import HTMLResultsReport
22from results_report import JSONResultsReport
23from results_report import TextResultsReport
24from schedv2 import Schedv2
25import test_flag
26
27import config
28
29
30def _WriteJSONReportToFile(experiment, results_dir, json_report):
31    """Writes a JSON report to a file in results_dir."""
32    has_llvm = any("llvm" in l.compiler for l in experiment.labels)
33    compiler_string = "llvm" if has_llvm else "gcc"
34    board = experiment.labels[0].board
35    filename = "report_%s_%s_%s.%s.json" % (
36        board,
37        json_report.date,
38        json_report.time.replace(":", "."),
39        compiler_string,
40    )
41    fullname = os.path.join(results_dir, filename)
42    report_text = json_report.GetReport()
43    with open(fullname, "w") as out_file:
44        out_file.write(report_text)
45
46
47class ExperimentRunner(object):
48    """ExperimentRunner Class."""
49
50    STATUS_TIME_DELAY = 30
51    THREAD_MONITOR_DELAY = 2
52
53    SUCCEEDED = 0
54    HAS_FAILURE = 1
55    ALL_FAILED = 2
56
57    def __init__(
58        self,
59        experiment,
60        json_report,
61        using_schedv2=False,
62        log=None,
63        cmd_exec=None,
64    ):
65        self._experiment = experiment
66        self.l = log or logger.GetLogger(experiment.log_dir)
67        self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l)
68        self._terminated = False
69        self.json_report = json_report
70        self.locked_machines = []
71        if experiment.log_level != "verbose":
72            self.STATUS_TIME_DELAY = 10
73
74        # Setting this to True will use crosperf sched v2 (feature in progress).
75        self._using_schedv2 = using_schedv2
76
77    def _GetMachineList(self):
78        """Return a list of all requested machines.
79
80        Create a list of all the requested machines, both global requests and
81        label-specific requests, and return the list.
82        """
83        machines = self._experiment.remote
84        # All Label.remote is a sublist of experiment.remote.
85        for l in self._experiment.labels:
86            for r in l.remote:
87                assert r in machines
88        return machines
89
90    def _UpdateMachineList(self, locked_machines):
91        """Update machines lists to contain only locked machines.
92
93        Go through all the lists of requested machines, both global and
94        label-specific requests, and remove any machine that we were not
95        able to lock.
96
97        Args:
98          locked_machines: A list of the machines we successfully locked.
99        """
100        for m in self._experiment.remote:
101            if m not in locked_machines:
102                self._experiment.remote.remove(m)
103
104        for l in self._experiment.labels:
105            for m in l.remote:
106                if m not in locked_machines:
107                    l.remote.remove(m)
108
109    def _GetMachineType(self, lock_mgr, machine):
110        """Get where is the machine from.
111
112        Returns:
113          The location of the machine: local or crosfleet
114        """
115        # We assume that lab machine always starts with chromeos*, and local
116        # machines are ip address.
117        if "chromeos" in machine:
118            if lock_mgr.CheckMachineInCrosfleet(machine):
119                return "crosfleet"
120            else:
121                raise RuntimeError("Lab machine not in Crosfleet.")
122        return "local"
123
124    def _LockAllMachines(self, experiment):
125        """Attempt to globally lock all of the machines requested for run.
126
127        This method tries to lock all machines requested for this crosperf run
128        in three different modes automatically, to prevent any other crosperf runs
129        from being able to update/use the machines while this experiment is
130        running:
131          - Crosfleet machines: Use crosfleet lease-dut mechanism to lease
132          - Local machines: Use file lock mechanism to lock
133        """
134        if test_flag.GetTestMode():
135            self.locked_machines = self._GetMachineList()
136            experiment.locked_machines = self.locked_machines
137        else:
138            experiment.lock_mgr = lock_machine.LockManager(
139                self._GetMachineList(),
140                "",
141                experiment.labels[0].chromeos_root,
142                experiment.locks_dir,
143                log=self.l,
144            )
145            for m in experiment.lock_mgr.machines:
146                machine_type = self._GetMachineType(experiment.lock_mgr, m)
147                if machine_type == "local":
148                    experiment.lock_mgr.AddMachineToLocal(m)
149                elif machine_type == "crosfleet":
150                    experiment.lock_mgr.AddMachineToCrosfleet(m)
151            machine_states = experiment.lock_mgr.GetMachineStates("lock")
152            experiment.lock_mgr.CheckMachineLocks(machine_states, "lock")
153            self.locked_machines = experiment.lock_mgr.UpdateMachines(True)
154            experiment.locked_machines = self.locked_machines
155            self._UpdateMachineList(self.locked_machines)
156            experiment.machine_manager.RemoveNonLockedMachines(
157                self.locked_machines
158            )
159            if not self.locked_machines:
160                raise RuntimeError("Unable to lock any machines.")
161
162    def _ClearCacheEntries(self, experiment):
163        for br in experiment.benchmark_runs:
164            cache = ResultsCache()
165            cache.Init(
166                br.label.chromeos_image,
167                br.label.chromeos_root,
168                br.benchmark.test_name,
169                br.iteration,
170                br.test_args,
171                br.profiler_args,
172                br.machine_manager,
173                br.machine,
174                br.label.board,
175                br.cache_conditions,
176                br.logger(),
177                br.log_level,
178                br.label,
179                br.share_cache,
180                br.benchmark.suite,
181                br.benchmark.show_all_results,
182                br.benchmark.run_local,
183                br.benchmark.cwp_dso,
184            )
185            cache_dir = cache.GetCacheDirForWrite()
186            if os.path.exists(cache_dir):
187                self.l.LogOutput("Removing cache dir: %s" % cache_dir)
188                shutil.rmtree(cache_dir)
189
190    def _Run(self, experiment):
191        try:
192            # We should not lease machines if tests are launched via `crosfleet
193            # create-test`. This is because leasing DUT in crosfleet will create a
194            # no-op task on the DUT and new test created will be hanging there.
195            # TODO(zhizhouy): Need to check whether machine is ready or not before
196            # assigning a test to it.
197            if not experiment.no_lock and not experiment.crosfleet:
198                self._LockAllMachines(experiment)
199            # Calculate all checksums of avaiable/locked machines, to ensure same
200            # label has same machines for testing
201            experiment.SetCheckSums(forceSameImage=True)
202            if self._using_schedv2:
203                schedv2 = Schedv2(experiment)
204                experiment.set_schedv2(schedv2)
205            if CacheConditions.FALSE in experiment.cache_conditions:
206                self._ClearCacheEntries(experiment)
207            status = ExperimentStatus(experiment)
208            experiment.Run()
209            last_status_time = 0
210            last_status_string = ""
211            try:
212                if experiment.log_level != "verbose":
213                    self.l.LogStartDots()
214                while not experiment.IsComplete():
215                    if last_status_time + self.STATUS_TIME_DELAY < time.time():
216                        last_status_time = time.time()
217                        border = "=============================="
218                        if experiment.log_level == "verbose":
219                            self.l.LogOutput(border)
220                            self.l.LogOutput(status.GetProgressString())
221                            self.l.LogOutput(status.GetStatusString())
222                            self.l.LogOutput(border)
223                        else:
224                            current_status_string = status.GetStatusString()
225                            if current_status_string != last_status_string:
226                                self.l.LogEndDots()
227                                self.l.LogOutput(border)
228                                self.l.LogOutput(current_status_string)
229                                self.l.LogOutput(border)
230                                last_status_string = current_status_string
231                            else:
232                                self.l.LogAppendDot()
233                    time.sleep(self.THREAD_MONITOR_DELAY)
234            except KeyboardInterrupt:
235                self._terminated = True
236                self.l.LogError("Ctrl-c pressed. Cleaning up...")
237                experiment.Terminate()
238                raise
239            except SystemExit:
240                self._terminated = True
241                self.l.LogError("Unexpected exit. Cleaning up...")
242                experiment.Terminate()
243                raise
244        finally:
245            experiment.Cleanup()
246
247    def _PrintTable(self, experiment):
248        self.l.LogOutput(
249            TextResultsReport.FromExperiment(experiment).GetReport()
250        )
251
252    def _Email(self, experiment):
253        # Only email by default if a new run was completed.
254        send_mail = False
255        for benchmark_run in experiment.benchmark_runs:
256            if not benchmark_run.cache_hit:
257                send_mail = True
258                break
259        if (
260            not send_mail
261            and not experiment.email_to
262            or config.GetConfig("no_email")
263        ):
264            return
265
266        label_names = []
267        for label in experiment.labels:
268            label_names.append(label.name)
269        subject = "%s: %s" % (experiment.name, " vs. ".join(label_names))
270
271        text_report = TextResultsReport.FromExperiment(
272            experiment, True
273        ).GetReport()
274        text_report += (
275            "\nResults are stored in %s.\n" % experiment.results_directory
276        )
277        text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report
278        html_report = HTMLResultsReport.FromExperiment(experiment).GetReport()
279        attachment = EmailSender.Attachment("report.html", html_report)
280        email_to = experiment.email_to or []
281        email_to.append(getpass.getuser())
282        EmailSender().SendEmail(
283            email_to,
284            subject,
285            text_report,
286            attachments=[attachment],
287            msg_type="html",
288        )
289
290    def _StoreResults(self, experiment):
291        if self._terminated:
292            return self.ALL_FAILED
293
294        results_directory = experiment.results_directory
295        FileUtils().RmDir(results_directory)
296        FileUtils().MkDirP(results_directory)
297        self.l.LogOutput("Storing experiment file in %s." % results_directory)
298        experiment_file_path = os.path.join(results_directory, "experiment.exp")
299        FileUtils().WriteFile(experiment_file_path, experiment.experiment_file)
300
301        has_failure = False
302        all_failed = True
303
304        topstats_file = os.path.join(results_directory, "topstats.log")
305        self.l.LogOutput(
306            "Storing top statistics of each benchmark run into %s."
307            % topstats_file
308        )
309        with open(topstats_file, "w") as top_fd:
310            for benchmark_run in experiment.benchmark_runs:
311                if benchmark_run.result:
312                    # FIXME: Pylint has a bug suggesting the following change, which
313                    # should be fixed in pylint 2.0. Resolve this after pylint >= 2.0.
314                    # Bug: https://github.com/PyCQA/pylint/issues/1984
315                    # pylint: disable=simplifiable-if-statement
316                    if benchmark_run.result.retval:
317                        has_failure = True
318                    else:
319                        all_failed = False
320                    # Header with benchmark run name.
321                    top_fd.write("%s\n" % str(benchmark_run))
322                    # Formatted string with top statistics.
323                    top_fd.write(benchmark_run.result.FormatStringTopCommands())
324                    top_fd.write("\n\n")
325
326        if all_failed:
327            return self.ALL_FAILED
328
329        self.l.LogOutput("Storing results of each benchmark run.")
330        for benchmark_run in experiment.benchmark_runs:
331            if benchmark_run.result:
332                benchmark_run_name = "".join(
333                    ch for ch in benchmark_run.name if ch.isalnum()
334                )
335                benchmark_run_path = os.path.join(
336                    results_directory, benchmark_run_name
337                )
338                if experiment.compress_results:
339                    benchmark_run.result.CompressResultsTo(benchmark_run_path)
340                else:
341                    benchmark_run.result.CopyResultsTo(benchmark_run_path)
342                benchmark_run.result.CleanUp(
343                    benchmark_run.benchmark.rm_chroot_tmp
344                )
345
346        self.l.LogOutput("Storing results report in %s." % results_directory)
347        results_table_path = os.path.join(results_directory, "results.html")
348        report = HTMLResultsReport.FromExperiment(experiment).GetReport()
349        if self.json_report:
350            json_report = JSONResultsReport.FromExperiment(
351                experiment, json_args={"indent": 2}
352            )
353            _WriteJSONReportToFile(experiment, results_directory, json_report)
354
355        FileUtils().WriteFile(results_table_path, report)
356
357        self.l.LogOutput(
358            "Storing email message body in %s." % results_directory
359        )
360        msg_file_path = os.path.join(results_directory, "msg_body.html")
361        text_report = TextResultsReport.FromExperiment(
362            experiment, True
363        ).GetReport()
364        text_report += (
365            "\nResults are stored in %s.\n" % experiment.results_directory
366        )
367        msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report
368        FileUtils().WriteFile(msg_file_path, msg_body)
369
370        return self.SUCCEEDED if not has_failure else self.HAS_FAILURE
371
372    def Run(self):
373        try:
374            self._Run(self._experiment)
375        finally:
376            # Always print the report at the end of the run.
377            self._PrintTable(self._experiment)
378            ret = self._StoreResults(self._experiment)
379            if ret != self.ALL_FAILED:
380                self._Email(self._experiment)
381        return ret
382
383
384class MockExperimentRunner(ExperimentRunner):
385    """Mocked ExperimentRunner for testing."""
386
387    def __init__(self, experiment, json_report):
388        super(MockExperimentRunner, self).__init__(experiment, json_report)
389
390    def _Run(self, experiment):
391        self.l.LogOutput(
392            "Would run the following experiment: '%s'." % experiment.name
393        )
394
395    def _PrintTable(self, experiment):
396        self.l.LogOutput("Would print the experiment table.")
397
398    def _Email(self, experiment):
399        self.l.LogOutput("Would send result email.")
400
401    def _StoreResults(self, experiment):
402        self.l.LogOutput("Would store the results.")
403