1# Copyright 2024 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5from __future__ import annotations 6 7import datetime as dt 8import json 9import logging 10import re 11from enum import IntEnum 12from typing import TYPE_CHECKING, Iterable, Optional 13 14from crossbench import helper 15from crossbench.probes.internal import (InternalJsonResultProbe, 16 InternalJsonResultProbeContext) 17from crossbench.probes.probe import ProbeIncompatibleBrowser 18from crossbench.probes.result_location import ResultLocation 19from crossbench.probes.results import EmptyProbeResult, LocalProbeResult 20 21if TYPE_CHECKING: 22 from crossbench.browsers.browser import Browser 23 from crossbench.env import HostEnvironment 24 from crossbench.probes.results import ProbeResult, ProbeResultDict 25 from crossbench.runner.actions import Actions 26 from crossbench.runner.groups.browsers import BrowsersRunGroup 27 from crossbench.runner.groups.repetitions import RepetitionsRunGroup 28 from crossbench.runner.groups.stories import StoriesRunGroup 29 from crossbench.runner.run import Run 30 from crossbench.types import Json 31 32THERMAL_STATUS_RE = re.compile(r"Thermal Status: (?P<status>\d+)") 33COOLDOWN_WAIT_RANGE = helper.WaitRange( 34 min=dt.timedelta(seconds=1), timeout=dt.timedelta(minutes=5)) 35 36 37class ThermalStatus(IntEnum): 38 UNAVAILABLE = -1 39 NONE = 0 40 LIGHT = 1 41 MODERATE = 2 42 SEVERE = 3 43 CRITICAL = 4 44 EMERGENCY = 5 45 SHUTDOWN = 6 46 47 @classmethod 48 def parse(cls, value: str) -> ThermalStatus: 49 try: 50 return ThermalStatus(int(value)) 51 except ValueError: 52 pass 53 54 for member in ThermalStatus: 55 if value.upper().endswith(member.name): 56 return member 57 58 raise ValueError(f"Invalid ThermalStatus: {repr(value)}") 59 60 61class ThermalMonitorProbe(InternalJsonResultProbe): 62 """ 63 Internal probe to monitor device thermal status. 64 """ 65 NAME = "cb.thermal_monitor" 66 RESULT_LOCATION = ResultLocation.LOCAL 67 68 def __init__(self, 69 cool_down_time: dt.timedelta = dt.timedelta(), 70 threshold: Optional[ThermalStatus] = None): 71 super().__init__() 72 self._threshold: Optional[ThermalStatus] = threshold 73 self._cool_down_time: Optional[dt.timedelta] = cool_down_time 74 if threshold is not None and threshold <= 0: 75 raise ValueError("Threshold must be positive") 76 77 @property 78 def result_path_name(self) -> str: 79 return "cb.thermal_monitor.json" 80 81 @property 82 def threshold(self) -> Optional[ThermalStatus]: 83 return self._threshold 84 85 @property 86 def cool_down_time(self) -> dt.timedelta: 87 return self._cool_down_time 88 89 def to_json(self, actions: Actions) -> Json: 90 raise NotImplementedError("Should not be called, data comes from context") 91 92 def validate_browser(self, env: HostEnvironment, browser: Browser) -> None: 93 super().validate_browser(env, browser) 94 if self.threshold is not None and not browser.platform.is_android: 95 raise ProbeIncompatibleBrowser( 96 self, browser, "Thermal thresholds only supported on android") 97 98 def merge_repetitions(self, group: RepetitionsRunGroup) -> ProbeResult: 99 return self._merge_group(group, (run.results for run in group.runs)) 100 101 def merge_stories(self, group: StoriesRunGroup) -> ProbeResult: 102 return self._merge_group( 103 group, (rep_group.results for rep_group in group.repetitions_groups)) 104 105 def merge_browsers(self, group: BrowsersRunGroup) -> ProbeResult: 106 return self._merge_group( 107 group, (story_group.results for story_group in group.story_groups)) 108 109 def _merge_group(self, group, 110 results_iter: Iterable[ProbeResultDict]) -> ProbeResult: 111 group_max_status: ThermalStatus = ThermalStatus.UNAVAILABLE 112 has_results: bool = False 113 for results in results_iter: 114 result = results[self] 115 if not result: 116 continue 117 with result.json.open(encoding="utf-8") as f: 118 thermals = json.load(f) 119 if "max_observed_status" not in thermals: 120 continue 121 repetition_max_status = ThermalStatus(thermals["max_observed_status"]) 122 group_max_status = max(group_max_status, repetition_max_status) 123 has_results = True 124 125 if not has_results: 126 return EmptyProbeResult() 127 128 merged_path = group.get_local_probe_result_path(self) 129 with merged_path.open("w", encoding="utf-8") as f: 130 json.dump({"max_observed_status": group_max_status}, f, indent=2) 131 # TODO(375390958): figure out why files aren't fully written to 132 # pyfakefs here. 133 f.write("\n") 134 135 return LocalProbeResult(json=(merged_path,)) 136 137 def log_browsers_result(self, group: BrowsersRunGroup) -> None: 138 if self not in group.results: 139 return 140 result = group.results[self] 141 if not result: 142 return 143 144 with result.json.open(encoding="utf-8") as f: 145 thermals = json.load(f) 146 max_observed_status = ThermalStatus(thermals["max_observed_status"]) 147 148 if max_observed_status == ThermalStatus.LIGHT: 149 logging.info("-" * 80) 150 logging.error("Light thermal throttling detected during execution, " 151 "scores may be affected.") 152 elif max_observed_status > ThermalStatus.LIGHT: 153 logging.info("-" * 80) 154 logging.error("Significant thermal throttling detected during execution, " 155 "scores are not representative of the device performance.") 156 157 def get_context(self, run: Run) -> ThermalMonitorProbeContext: 158 if run.browser.platform.is_android: 159 return AndroidThermalMonitorProbeContext(self, run) 160 return ThermalMonitorProbeContext(self, run) 161 162 163class ThermalMonitorProbeContext(InternalJsonResultProbeContext): 164 165 def __init__(self, probe: ThermalMonitorProbe, run: Run) -> None: 166 super().__init__(probe, run) 167 168 @property 169 def probe(self) -> ThermalMonitorProbe: 170 return self._probe 171 172 def setup(self) -> None: 173 self.run.runner.wait(self.probe.cool_down_time, absolute_time=True) 174 175 if not self.browser_platform.is_thermal_throttled(): 176 return 177 logging.info("COOLDOWN") 178 for _ in COOLDOWN_WAIT_RANGE.wait_with_backoff(): 179 if not self.browser_platform.is_thermal_throttled(): 180 break 181 logging.info("COOLDOWN: still hot, waiting some more") 182 183 def to_json(self, actions: Actions) -> Json: 184 del actions 185 return {} 186 187 188class AndroidThermalMonitorProbeContext(ThermalMonitorProbeContext): 189 190 def __init__(self, probe: ThermalMonitorProbe, run: Run) -> None: 191 super().__init__(probe, run) 192 self._max_observed_status: ThermalStatus = ThermalStatus.UNAVAILABLE 193 194 def _get_thermal_status(self) -> ThermalStatus: 195 stdout = self.browser_platform.sh_stdout("dumpsys", "thermalservice") 196 if match := THERMAL_STATUS_RE.search(stdout): 197 return ThermalStatus(int(match["status"])) 198 return ThermalStatus.UNAVAILABLE 199 200 def _wait_if_necessary(self, probe_threshold: ThermalStatus) -> None: 201 current_status = self._get_thermal_status() 202 if current_status < probe_threshold: 203 return 204 205 logging.info("Thermal throttling status too high: %s", current_status.name) 206 logging.info("COOLDOWN") 207 try: 208 for _ in COOLDOWN_WAIT_RANGE.wait_with_backoff(): 209 current_status = self._get_thermal_status() 210 logging.debug("Thermal status: %s", current_status.name) 211 if current_status < probe_threshold: 212 logging.info("COOLDOWN: complete") 213 break 214 except TimeoutError: 215 logging.error("COOLDOWN: device is still too hot after waiting for %s", 216 COOLDOWN_WAIT_RANGE.timeout) 217 218 def setup(self) -> None: 219 if self.probe.threshold is not None: 220 self._wait_if_necessary(self.probe.threshold) 221 else: 222 super().setup() 223 224 current_status = self._get_thermal_status() 225 self._max_observed_status = max(self._max_observed_status, current_status) 226 logging.debug("Thermal throttling before run: %s", current_status.name) 227 228 def teardown(self) -> ProbeResult: 229 current_status = self._get_thermal_status() 230 self._max_observed_status = max(self._max_observed_status, current_status) 231 logging.debug("Thermal throttling after run: %s", current_status.name) 232 # TODO(crbug.com/374737038): After crbug.com/374737038 is done, raise an 233 # exception here if max status was at threshold or higher. This will 234 # register the run as a failure to process it correctly later. 235 return super().teardown() 236 237 def to_json(self, actions: Actions) -> Json: 238 del actions 239 return {"max_observed_status": self._max_observed_status.value} 240