1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import re 7import time 8import xmlrpclib 9 10from autotest_lib.client.common_lib import error 11from autotest_lib.server.cros.faft.firmware_test import FirmwareTest 12 13class firmware_ECThermal(FirmwareTest): 14 """ 15 Servo based EC thermal engine test. 16 """ 17 version = 1 18 19 # Delay for waiting fan to start or stop 20 FAN_DELAY = 5 21 22 # Delay for waiting device stressing to stablize 23 STRESS_DELAY = 30 24 25 # Delay for stressing device with fan off to check temperature increase 26 STRESS_DELAY_NO_FAN = 12 27 28 # Margin for comparing servo based and ectool based CPU temperature 29 TEMP_MISMATCH_MARGIN = 3 30 31 # Minimum increase of CPU temperature when stressing DUT 32 TEMP_STRESS_INCREASE = 3 33 34 # Pseudo INT_MAX. Used as infinity when comparing temperature readings 35 INT_MAX = 10000 36 37 # Sensor type ID of ignored sensors 38 SENSOR_TYPE_IGNORED = 255 39 40 # PID of DUT stressing processes 41 _stress_pid = list() 42 43 def enable_auto_fan_control(self): 44 """Enable EC automatic fan speed control""" 45 # We use set_nocheck because servo reports current target 46 # RPM instead 'auto', and therefore servo.set always fails. 47 self.servo.set_nocheck('fan_target_rpm', 'auto') 48 49 50 def max_fan(self): 51 """Maximize fan speed""" 52 # We use set_nocheck because servo reports current target 53 # RPM instead 'max', and therefore servo.set always fails. 54 self.servo.set_nocheck('fan_target_rpm', 'max') 55 56 57 def turn_off_fan(self): 58 """Turn off fan""" 59 self.servo.set('fan_target_rpm', 'off') 60 61 62 def _get_setting_for_type(self, type_id): 63 """ 64 Retrieve thermal setting for a given type of sensor 65 66 Args: 67 type_id: The ID of sensor type. 68 69 Returns: 70 A list containing thresholds in the following order: 71 Warning 72 CPU off 73 All power off 74 Fan speed thresholds 75 """ 76 setting = list() 77 current_id = 0 78 while True: 79 try: 80 lines = self.faft_client.system.run_shell_command_get_output( 81 'ectool thermalget %d %d' % (type_id, current_id)) 82 except xmlrpclib.Fault: 83 break 84 pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.') 85 for line in lines: 86 matched = pattern.match(line) 87 if matched is not None: 88 # Convert degree K to degree C 89 setting.append(int(matched.group(1)) - 273) 90 current_id = current_id + 1 91 92 if len(setting) == 0: 93 return None 94 return setting 95 96 97 def get_fan_steps(self): 98 """Retrieve fan step config from EC""" 99 num_steps = len(self._thermal_setting[0]) - 3 100 self._fan_steps = list() 101 expected_pat = (["Lowest speed: ([0-9-]+) RPM"] + 102 ["\d+ K:\s+([0-9-]+) RPM"] * num_steps) 103 match = self.ec.send_command_get_output("thermalfan 0", expected_pat) 104 for m in match: 105 self._fan_steps.append(int(m[1])) 106 107 # Get the actual value of each fan step 108 for i in xrange(num_steps + 1): 109 if self._fan_steps[i] == 0: 110 continue 111 self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i]) 112 self._fan_steps[i] = int(self.servo.get('fan_target_rpm')) 113 114 logging.info("Actual fan steps: %s", self._fan_steps) 115 116 117 def get_thermal_setting(self): 118 """Retrieve thermal engine setting from EC""" 119 self._thermal_setting = list() 120 type_id = 0 121 while True: 122 setting = self._get_setting_for_type(type_id) 123 if setting is None: 124 break 125 self._thermal_setting.append(setting) 126 type_id = type_id + 1 127 logging.info("Number of tempearture sensor types: %d", type_id) 128 129 # Get the number of temperature sensors 130 self._num_temp_sensor = 0 131 while True: 132 try: 133 self.faft_client.system.run_shell_command('ectool temps %d' % 134 self._num_temp_sensor) 135 self._num_temp_sensor = self._num_temp_sensor + 1 136 except xmlrpclib.Fault: 137 break 138 logging.info("Number of temperature sensor: %d", self._num_temp_sensor) 139 140 141 def initialize(self, host, cmdline_args): 142 super(firmware_ECThermal, self).initialize(host, cmdline_args) 143 # Don't bother if there is no Chrome EC. 144 if not self.check_ec_capability(): 145 raise error.TestNAError("Nothing needs to be tested on this device") 146 self.ec.send_command("chan 0") 147 try: 148 self.faft_client.system.run_shell_command('stop temp_metrics') 149 except xmlrpclib.Fault: 150 self._has_temp_metrics = False 151 else: 152 logging.info('Stopped temp_metrics') 153 self._has_temp_metrics = True 154 if self.check_ec_capability(['thermal']): 155 self.get_thermal_setting() 156 self.get_fan_steps() 157 self.enable_auto_fan_control() 158 159 160 def cleanup(self): 161 try: 162 if self.check_ec_capability(['thermal']): 163 self.enable_auto_fan_control() 164 if self._has_temp_metrics: 165 logging.info('Starting temp_metrics') 166 self.faft_client.system.run_shell_command('start temp_metrics') 167 self.ec.send_command("chan 0xffffffff") 168 except Exception as e: 169 logging.error("Caught exception: %s", str(e)) 170 super(firmware_ECThermal, self).cleanup() 171 172 173 def _find_cpu_sensor_id(self): 174 """ 175 This function find CPU temperature sensor using ectool. 176 177 Returns: 178 Integer ID of CPU temperature sensor. 179 180 Raises: 181 error.TestFail: Raised if we fail to find PECI temparture through 182 ectool. 183 """ 184 for temp_id in range(self._num_temp_sensor): 185 lines = self.faft_client.system.run_shell_command_get_output( 186 'ectool tempsinfo %d' % temp_id) 187 for line in lines: 188 matched = re.match('Sensor name: (.*)', line) 189 if matched is not None and matched.group(1) == 'PECI': 190 return temp_id 191 raise error.TestFail('Cannot find CPU temperature sensor ID.') 192 193 194 def _get_temp_reading(self, sensor_id): 195 """ 196 Get temperature reading on a sensor through ectool 197 198 Args: 199 sensor_id: Temperature sensor ID. 200 201 Returns: 202 Temperature reading in degree C. 203 204 Raises: 205 xmlrpclib.Fault: Raised when we fail to read temperature. 206 error.TestError: Raised if ectool doesn't behave as we expected. 207 """ 208 assert sensor_id < self._num_temp_sensor 209 pattern = re.compile('Reading temperature...(\d*)') 210 lines = self.faft_client.system.run_shell_command_get_output( 211 'ectool temps %d' % sensor_id) 212 for line in lines: 213 matched = pattern.match(line) 214 if matched is not None: 215 return int(matched.group(1)) - 273 216 # Should never reach here 217 raise error.TestError("Unexpected error occurred") 218 219 220 def check_temp_report(self): 221 """ 222 Checker of temperature reporting. 223 224 This function reads CPU temperature from servo and ectool. If 225 the two readings mismatches by more than TEMP_MISMATCH_MARGIN,' 226 test fails. 227 228 Raises: 229 error.TestFail: Raised when temperature reading mismatches by 230 more than TEMP_MISMATCH_MARGIN. 231 """ 232 cpu_temp_id = self._find_cpu_sensor_id() 233 logging.info("CPU temperature sensor ID is %d", cpu_temp_id) 234 ectool_cpu_temp = self._get_temp_reading(cpu_temp_id) 235 servo_cpu_temp = int(self.servo.get('cpu_temp')) 236 logging.info("CPU temperature from servo: %d C", servo_cpu_temp) 237 logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp) 238 if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN: 239 raise error.TestFail( 240 'CPU temperature readings from servo and ectool differ') 241 242 243 def _stress_dut(self, threads=4): 244 """ 245 Stress DUT system. 246 247 By reading from /dev/urandom and writing to /dev/null, we can stress 248 DUT and cause CPU temperature to go up. We stress the system forever, 249 until _stop_stressing is called to kill the stress threads. This 250 function is non-blocking. 251 252 Args: 253 threads: Number of threads (processes) when stressing forever. 254 255 Returns: 256 A list of stress process IDs is returned. 257 """ 258 logging.info("Stressing DUT with %d threads...", threads) 259 self.faft_client.system.run_shell_command('pkill dd') 260 stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M &' 261 # Grep for [d]d instead of dd to prevent getting the PID of grep 262 # itself. 263 pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'" 264 self._stress_pid = list() 265 for _ in xrange(threads): 266 self.faft_client.system.run_shell_command(stress_cmd) 267 lines = self.faft_client.system.run_shell_command_get_output( 268 pid_cmd) 269 for line in lines: 270 logging.info("PID is %s", line) 271 self._stress_pid.append(int(line.strip())) 272 return self._stress_pid 273 274 275 def _stop_stressing(self): 276 """Stop stressing DUT system""" 277 stop_cmd = 'kill -9 %d' 278 for pid in self._stress_pid: 279 self.faft_client.system.run_shell_command(stop_cmd % pid) 280 281 282 def check_fan_off(self): 283 """ 284 Checker of fan turned off. 285 286 The function first delay FAN_DELAY seconds to ensure fan stops. 287 Then it reads fan speed and return False if fan speed is non-zero. 288 Then it stresses the system a bit and check if the temperature 289 goes up by more than TEMP_STRESS_INCREASE. 290 291 Raises: 292 error.TestFail: Raised when temperature doesn't increase by more than 293 TEMP_STRESS_INCREASE. 294 """ 295 time.sleep(self.FAN_DELAY) 296 fan_speed = self.servo.get('fan_actual_rpm') 297 if int(fan_speed) != 0: 298 raise error.TestFail("Fan is not turned off.") 299 logging.info("EC reports fan turned off.") 300 cpu_temp_before = int(self.servo.get('cpu_temp')) 301 logging.info("CPU temperature before stressing is %d C", 302 cpu_temp_before) 303 self._stress_dut() 304 time.sleep(self.STRESS_DELAY_NO_FAN) 305 cpu_temp_after = int(self.servo.get('cpu_temp')) 306 self._stop_stressing() 307 logging.info("CPU temperature after stressing is %d C", 308 cpu_temp_after) 309 if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE: 310 raise error.TestFail( 311 "CPU temperature did not go up by more than %d degrees" % 312 self.TEMP_STRESS_INCREASE) 313 314 315 def _get_temp_sensor_type(self, sensor_id): 316 """ 317 Get type of a given temperature sensor 318 319 Args: 320 sensor_id: Temperature sensor ID. 321 322 Returns: 323 Type ID of the temperature sensor. 324 325 Raises: 326 error.TestError: Raised when ectool doesn't behave as we expected. 327 """ 328 assert sensor_id < self._num_temp_sensor 329 pattern = re.compile('Sensor type: (\d*)') 330 lines = self.faft_client.system.run_shell_command_get_output( 331 'ectool tempsinfo %d' % sensor_id) 332 for line in lines: 333 matched = pattern.match(line) 334 if matched is not None: 335 return int(matched.group(1)) 336 # Should never reach here 337 raise error.TestError("Unexpected error occurred") 338 339 340 def _check_fan_speed_per_sensor(self, fan_speed, sensor_id): 341 """ 342 Check if the given fan_speed is reasonable from the view of certain 343 temperature sensor. There could be three types of outcome: 344 1. Fan speed is higher than expected. This may be due to other 345 sensor sensing higher temperature and setting fan to higher 346 speed. 347 2. Fan speed is as expected. 348 3. Fan speed is lower than expected. In this case, EC is not 349 working as expected and an error should be raised. 350 351 Args: 352 fan_speed: The current fan speed in RPM. 353 sensor_id: The ID of temperature sensor. 354 355 Returns: 356 0x00: Fan speed is higher than expected. 357 0x01: Fan speed is as expected. 358 0x10: Fan speed is lower than expected. 359 360 Raises: 361 error.TestError: Raised when getting unexpected fan speed. 362 """ 363 sensor_type = self._get_temp_sensor_type(sensor_id) 364 if sensor_type == self.SENSOR_TYPE_IGNORED: 365 # This sensor should be ignored 366 return 0x00 367 368 if self._thermal_setting[sensor_type][-1] == -273: 369 # The fan stepping for this type of sensor is disabled 370 return 0x00 371 372 try: 373 idx = self._fan_steps.index(fan_speed) 374 except: 375 raise error.TestError("Unexpected fan speed: %d" % fan_speed) 376 377 if idx == 0: 378 lower_bound = -self.INT_MAX 379 upper_bound = self._thermal_setting[sensor_type][3] 380 elif idx == len(self._fan_steps) - 1: 381 lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3 382 upper_bound = self.INT_MAX 383 else: 384 lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3 385 upper_bound = self._thermal_setting[sensor_type][idx + 3] 386 387 temp_reading = self._get_temp_reading(sensor_id) 388 logging.info("Sensor %d = %d C", sensor_id, temp_reading) 389 logging.info(" Expecting %d - %d C", lower_bound, upper_bound) 390 if temp_reading > upper_bound: 391 return 0x00 392 elif temp_reading < lower_bound: 393 return 0x10 394 else: 395 return 0x01 396 397 398 def check_auto_fan(self): 399 """ 400 Checker of thermal engine automatic fan speed control. 401 402 Stress DUT system for a longer period to make temperature more stable 403 and check if fan speed is controlled as expected. 404 405 Raises: 406 error.TestFail: Raised when fan speed is not as expected. 407 """ 408 self._stress_dut() 409 time.sleep(self.STRESS_DELAY) 410 fan_rpm = int(self.servo.get('fan_target_rpm')) 411 logging.info('Fan speed is %d RPM', fan_rpm) 412 try: 413 result = reduce(lambda x, y: x | y, 414 [self._check_fan_speed_per_sensor(fan_rpm, x) 415 for x in range(self._num_temp_sensor)]) 416 finally: 417 self._stop_stressing() 418 if result == 0x00: 419 raise error.TestFail("Fan speed higher than expected") 420 if result == 0x10: 421 raise error.TestFail("Fan speed lower than expected") 422 423 424 def run_once(self): 425 """Execute the main body of the test. 426 """ 427 if not self.check_ec_capability(['thermal']): 428 raise error.TestNAError("Nothing needs to be tested on this device") 429 logging.info("Checking host temperature report.") 430 self.check_temp_report() 431 432 self.turn_off_fan() 433 logging.info("Verifying fan is turned off.") 434 self.check_fan_off() 435 436 self.enable_auto_fan_control() 437 logging.info("Verifying automatic fan control functionality.") 438 self.check_auto_fan() 439