• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2024 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from __future__ import annotations
6
7import datetime as dt
8import json
9import logging
10import re
11from enum import IntEnum
12from typing import TYPE_CHECKING, Iterable, Optional
13
14from crossbench import helper
15from crossbench.probes.internal import (InternalJsonResultProbe,
16                                        InternalJsonResultProbeContext)
17from crossbench.probes.probe import ProbeIncompatibleBrowser
18from crossbench.probes.result_location import ResultLocation
19from crossbench.probes.results import EmptyProbeResult, LocalProbeResult
20
21if TYPE_CHECKING:
22  from crossbench.browsers.browser import Browser
23  from crossbench.env import HostEnvironment
24  from crossbench.probes.results import ProbeResult, ProbeResultDict
25  from crossbench.runner.actions import Actions
26  from crossbench.runner.groups.browsers import BrowsersRunGroup
27  from crossbench.runner.groups.repetitions import RepetitionsRunGroup
28  from crossbench.runner.groups.stories import StoriesRunGroup
29  from crossbench.runner.run import Run
30  from crossbench.types import Json
31
32THERMAL_STATUS_RE = re.compile(r"Thermal Status: (?P<status>\d+)")
33COOLDOWN_WAIT_RANGE = helper.WaitRange(
34    min=dt.timedelta(seconds=1), timeout=dt.timedelta(minutes=5))
35
36
37class ThermalStatus(IntEnum):
38  UNAVAILABLE = -1
39  NONE = 0
40  LIGHT = 1
41  MODERATE = 2
42  SEVERE = 3
43  CRITICAL = 4
44  EMERGENCY = 5
45  SHUTDOWN = 6
46
47  @classmethod
48  def parse(cls, value: str) -> ThermalStatus:
49    try:
50      return ThermalStatus(int(value))
51    except ValueError:
52      pass
53
54    for member in ThermalStatus:
55      if value.upper().endswith(member.name):
56        return member
57
58    raise ValueError(f"Invalid ThermalStatus: {repr(value)}")
59
60
61class ThermalMonitorProbe(InternalJsonResultProbe):
62  """
63  Internal probe to monitor device thermal status.
64  """
65  NAME = "cb.thermal_monitor"
66  RESULT_LOCATION = ResultLocation.LOCAL
67
68  def __init__(self,
69               cool_down_time: dt.timedelta = dt.timedelta(),
70               threshold: Optional[ThermalStatus] = None):
71    super().__init__()
72    self._threshold: Optional[ThermalStatus] = threshold
73    self._cool_down_time: Optional[dt.timedelta] = cool_down_time
74    if threshold is not None and threshold <= 0:
75      raise ValueError("Threshold must be positive")
76
77  @property
78  def result_path_name(self) -> str:
79    return "cb.thermal_monitor.json"
80
81  @property
82  def threshold(self) -> Optional[ThermalStatus]:
83    return self._threshold
84
85  @property
86  def cool_down_time(self) -> dt.timedelta:
87    return self._cool_down_time
88
89  def to_json(self, actions: Actions) -> Json:
90    raise NotImplementedError("Should not be called, data comes from context")
91
92  def validate_browser(self, env: HostEnvironment, browser: Browser) -> None:
93    super().validate_browser(env, browser)
94    if self.threshold is not None and not browser.platform.is_android:
95      raise ProbeIncompatibleBrowser(
96          self, browser, "Thermal thresholds only supported on android")
97
98  def merge_repetitions(self, group: RepetitionsRunGroup) -> ProbeResult:
99    return self._merge_group(group, (run.results for run in group.runs))
100
101  def merge_stories(self, group: StoriesRunGroup) -> ProbeResult:
102    return self._merge_group(
103        group, (rep_group.results for rep_group in group.repetitions_groups))
104
105  def merge_browsers(self, group: BrowsersRunGroup) -> ProbeResult:
106    return self._merge_group(
107        group, (story_group.results for story_group in group.story_groups))
108
109  def _merge_group(self, group,
110                   results_iter: Iterable[ProbeResultDict]) -> ProbeResult:
111    group_max_status: ThermalStatus = ThermalStatus.UNAVAILABLE
112    has_results: bool = False
113    for results in results_iter:
114      result = results[self]
115      if not result:
116        continue
117      with result.json.open(encoding="utf-8") as f:
118        thermals = json.load(f)
119        if "max_observed_status" not in thermals:
120          continue
121        repetition_max_status = ThermalStatus(thermals["max_observed_status"])
122        group_max_status = max(group_max_status, repetition_max_status)
123        has_results = True
124
125    if not has_results:
126      return EmptyProbeResult()
127
128    merged_path = group.get_local_probe_result_path(self)
129    with merged_path.open("w", encoding="utf-8") as f:
130      json.dump({"max_observed_status": group_max_status}, f, indent=2)
131      # TODO(375390958): figure out why files aren't fully written to
132      # pyfakefs here.
133      f.write("\n")
134
135    return LocalProbeResult(json=(merged_path,))
136
137  def log_browsers_result(self, group: BrowsersRunGroup) -> None:
138    if self not in group.results:
139      return
140    result = group.results[self]
141    if not result:
142      return
143
144    with result.json.open(encoding="utf-8") as f:
145      thermals = json.load(f)
146      max_observed_status = ThermalStatus(thermals["max_observed_status"])
147
148    if max_observed_status == ThermalStatus.LIGHT:
149      logging.info("-" * 80)
150      logging.error("Light thermal throttling detected during execution, "
151                    "scores may be affected.")
152    elif max_observed_status > ThermalStatus.LIGHT:
153      logging.info("-" * 80)
154      logging.error("Significant thermal throttling detected during execution, "
155                    "scores are not representative of the device performance.")
156
157  def get_context(self, run: Run) -> ThermalMonitorProbeContext:
158    if run.browser.platform.is_android:
159      return AndroidThermalMonitorProbeContext(self, run)
160    return ThermalMonitorProbeContext(self, run)
161
162
163class ThermalMonitorProbeContext(InternalJsonResultProbeContext):
164
165  def __init__(self, probe: ThermalMonitorProbe, run: Run) -> None:
166    super().__init__(probe, run)
167
168  @property
169  def probe(self) -> ThermalMonitorProbe:
170    return self._probe
171
172  def setup(self) -> None:
173    self.run.runner.wait(self.probe.cool_down_time, absolute_time=True)
174
175    if not self.browser_platform.is_thermal_throttled():
176      return
177    logging.info("COOLDOWN")
178    for _ in COOLDOWN_WAIT_RANGE.wait_with_backoff():
179      if not self.browser_platform.is_thermal_throttled():
180        break
181      logging.info("COOLDOWN: still hot, waiting some more")
182
183  def to_json(self, actions: Actions) -> Json:
184    del actions
185    return {}
186
187
188class AndroidThermalMonitorProbeContext(ThermalMonitorProbeContext):
189
190  def __init__(self, probe: ThermalMonitorProbe, run: Run) -> None:
191    super().__init__(probe, run)
192    self._max_observed_status: ThermalStatus = ThermalStatus.UNAVAILABLE
193
194  def _get_thermal_status(self) -> ThermalStatus:
195    stdout = self.browser_platform.sh_stdout("dumpsys", "thermalservice")
196    if match := THERMAL_STATUS_RE.search(stdout):
197      return ThermalStatus(int(match["status"]))
198    return ThermalStatus.UNAVAILABLE
199
200  def _wait_if_necessary(self, probe_threshold: ThermalStatus) -> None:
201    current_status = self._get_thermal_status()
202    if current_status < probe_threshold:
203      return
204
205    logging.info("Thermal throttling status too high: %s", current_status.name)
206    logging.info("COOLDOWN")
207    try:
208      for _ in COOLDOWN_WAIT_RANGE.wait_with_backoff():
209        current_status = self._get_thermal_status()
210        logging.debug("Thermal status: %s", current_status.name)
211        if current_status < probe_threshold:
212          logging.info("COOLDOWN: complete")
213          break
214    except TimeoutError:
215      logging.error("COOLDOWN: device is still too hot after waiting for %s",
216                    COOLDOWN_WAIT_RANGE.timeout)
217
218  def setup(self) -> None:
219    if self.probe.threshold is not None:
220      self._wait_if_necessary(self.probe.threshold)
221    else:
222      super().setup()
223
224    current_status = self._get_thermal_status()
225    self._max_observed_status = max(self._max_observed_status, current_status)
226    logging.debug("Thermal throttling before run: %s", current_status.name)
227
228  def teardown(self) -> ProbeResult:
229    current_status = self._get_thermal_status()
230    self._max_observed_status = max(self._max_observed_status, current_status)
231    logging.debug("Thermal throttling after run: %s", current_status.name)
232    # TODO(crbug.com/374737038): After crbug.com/374737038 is done, raise an
233    # exception here if max status was at threshold or higher. This will
234    # register the run as a failure to process it correctly later.
235    return super().teardown()
236
237  def to_json(self, actions: Actions) -> Json:
238    del actions
239    return {"max_observed_status": self._max_observed_status.value}
240