1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import re 7import time 8import xmlrpclib 9 10from autotest_lib.client.common_lib import error 11from autotest_lib.server.cros.faft.firmware_test import FirmwareTest 12 13class firmware_ECThermal(FirmwareTest): 14 """ 15 Servo based EC thermal engine test. 16 """ 17 version = 1 18 19 # Delay for waiting fan to start or stop 20 FAN_DELAY = 5 21 22 # Delay for waiting device stressing to stablize 23 STRESS_DELAY = 30 24 25 # Delay for stressing device with fan off to check temperature increase 26 STRESS_DELAY_NO_FAN = 12 27 28 # Margin for comparing servo based and ectool based CPU temperature 29 TEMP_MISMATCH_MARGIN = 3 30 31 # Minimum increase of CPU temperature when stressing DUT 32 TEMP_STRESS_INCREASE = 3 33 34 # Pseudo INT_MAX. Used as infinity when comparing temperature readings 35 INT_MAX = 10000 36 37 # Sensor type ID of ignored sensors 38 SENSOR_TYPE_IGNORED = 255 39 40 # PID of DUT stressing processes 41 _stress_pid = list() 42 43 def enable_auto_fan_control(self): 44 """Enable EC automatic fan speed control""" 45 # We use set_nocheck because servo reports current target 46 # RPM instead 'auto', and therefore servo.set always fails. 47 self.servo.set_nocheck('fan_target_rpm', 'auto') 48 49 50 def max_fan(self): 51 """Maximize fan speed""" 52 # We use set_nocheck because servo reports current target 53 # RPM instead 'max', and therefore servo.set always fails. 54 self.servo.set_nocheck('fan_target_rpm', 'max') 55 56 57 def turn_off_fan(self): 58 """Turn off fan""" 59 self.servo.set('fan_target_rpm', 'off') 60 61 62 def _get_setting_for_type(self, type_id): 63 """ 64 Retrieve thermal setting for a given type of sensor 65 66 Args: 67 type_id: The ID of sensor type. 68 69 Returns: 70 A list containing thresholds in the following order: 71 Warning 72 CPU off 73 All power off 74 Fan speed thresholds 75 """ 76 setting = list() 77 current_id = 0 78 while True: 79 try: 80 lines = self.faft_client.system.run_shell_command_get_output( 81 'ectool thermalget %d %d' % (type_id, current_id)) 82 except xmlrpclib.Fault: 83 break 84 pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.') 85 for line in lines: 86 matched = pattern.match(line) 87 if matched is not None: 88 # Convert degree K to degree C 89 setting.append(int(matched.group(1)) - 273) 90 current_id = current_id + 1 91 92 if len(setting) == 0: 93 return None 94 return setting 95 96 97 def get_fan_steps(self): 98 """Retrieve fan step config from EC""" 99 num_steps = len(self._thermal_setting[0]) - 3 100 self._fan_steps = list() 101 expected_pat = (["Lowest speed: ([0-9-]+) RPM"] + 102 ["\d+ K:\s+([0-9-]+) RPM"] * num_steps) 103 match = self.ec.send_command_get_output("thermalfan 0", expected_pat) 104 for m in match: 105 self._fan_steps.append(int(m[1])) 106 107 # Get the actual value of each fan step 108 for i in xrange(num_steps + 1): 109 if self._fan_steps[i] == 0: 110 continue 111 self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i]) 112 self._fan_steps[i] = int(self.servo.get('fan_target_rpm')) 113 114 logging.info("Actual fan steps: %s", self._fan_steps) 115 116 117 def get_thermal_setting(self): 118 """Retrieve thermal engine setting from EC""" 119 self._thermal_setting = list() 120 type_id = 0 121 while True: 122 setting = self._get_setting_for_type(type_id) 123 if setting is None: 124 break 125 self._thermal_setting.append(setting) 126 type_id = type_id + 1 127 logging.info("Number of tempearture sensor types: %d", type_id) 128 129 # Get the number of temperature sensors 130 self._num_temp_sensor = 0 131 while True: 132 try: 133 self.faft_client.system.run_shell_command('ectool temps %d' % 134 self._num_temp_sensor) 135 self._num_temp_sensor = self._num_temp_sensor + 1 136 except xmlrpclib.Fault: 137 break 138 logging.info("Number of temperature sensor: %d", self._num_temp_sensor) 139 140 141 def initialize(self, host, cmdline_args): 142 super(firmware_ECThermal, self).initialize(host, cmdline_args) 143 # Don't bother if there is no Chrome EC. 144 if not self.check_ec_capability(): 145 raise error.TestNAError("Nothing needs to be tested on this device") 146 self.ec.send_command("chan 0") 147 try: 148 self.faft_client.system.run_shell_command('stop temp_metrics') 149 except xmlrpclib.Fault: 150 self._has_temp_metrics = False 151 else: 152 logging.info('Stopped temp_metrics') 153 self._has_temp_metrics = True 154 if self.check_ec_capability(['thermal']): 155 self.get_thermal_setting() 156 self.get_fan_steps() 157 self.enable_auto_fan_control() 158 159 160 def cleanup(self): 161 try: 162 if self.check_ec_capability(['thermal']): 163 self.enable_auto_fan_control() 164 if self._has_temp_metrics: 165 logging.info('Starting temp_metrics') 166 self.faft_client.system.run_shell_command('start temp_metrics') 167 self.ec.send_command("chan 0xffffffff") 168 except Exception as e: 169 logging.error("Caught exception: %s", str(e)) 170 super(firmware_ECThermal, self).cleanup() 171 172 173 def _find_cpu_sensor_id(self): 174 """ 175 This function find CPU temperature sensor using ectool. 176 177 Returns: 178 Integer ID of CPU temperature sensor. 179 180 Raises: 181 error.TestFail: Raised if we fail to find PECI temparture through 182 ectool. 183 """ 184 for temp_id in range(self._num_temp_sensor): 185 lines = self.faft_client.system.run_shell_command_get_output( 186 'ectool tempsinfo %d' % temp_id) 187 for line in lines: 188 matched = re.match('Sensor name: (.*)', line) 189 if matched is not None and matched.group(1) == 'PECI': 190 return temp_id 191 raise error.TestFail('Cannot find CPU temperature sensor ID.') 192 193 194 def _get_temp_reading(self, sensor_id): 195 """ 196 Get temperature reading on a sensor through ectool 197 198 Args: 199 sensor_id: Temperature sensor ID. 200 201 Returns: 202 Temperature reading in degree C. 203 204 Raises: 205 xmlrpclib.Fault: Raised when we fail to read temperature. 206 error.TestError: Raised if ectool doesn't behave as we expected. 207 """ 208 assert sensor_id < self._num_temp_sensor 209 pattern = re.compile('Reading temperature...(\d*)') 210 lines = self.faft_client.system.run_shell_command_get_output( 211 'ectool temps %d' % sensor_id) 212 for line in lines: 213 matched = pattern.match(line) 214 if matched is not None: 215 return int(matched.group(1)) - 273 216 # Should never reach here 217 raise error.TestError("Unexpected error occurred") 218 219 220 def check_temp_report(self): 221 """ 222 Checker of temperature reporting. 223 224 This function reads CPU temperature from servo and ectool. If 225 the two readings mismatches by more than TEMP_MISMATCH_MARGIN,' 226 test fails. 227 228 Raises: 229 error.TestFail: Raised when temperature reading mismatches by 230 more than TEMP_MISMATCH_MARGIN. 231 """ 232 cpu_temp_id = self._find_cpu_sensor_id() 233 logging.info("CPU temperature sensor ID is %d", cpu_temp_id) 234 ectool_cpu_temp = self._get_temp_reading(cpu_temp_id) 235 servo_cpu_temp = int(self.servo.get('cpu_temp')) 236 logging.info("CPU temperature from servo: %d C", servo_cpu_temp) 237 logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp) 238 if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN: 239 raise error.TestFail( 240 'CPU temperature readings from servo and ectool differ') 241 242 243 def _stress_dut(self, threads=4): 244 """ 245 Stress DUT system. 246 247 By reading from /dev/urandom and writing to /dev/null, we can stress 248 DUT and cause CPU temperature to go up. We stress the system forever, 249 until _stop_stressing is called to kill the stress threads. This 250 function is non-blocking. 251 252 Args: 253 threads: Number of threads (processes) when stressing forever. 254 255 Returns: 256 A list of stress process IDs is returned. 257 """ 258 logging.info("Stressing DUT with %d threads...", threads) 259 self.faft_client.system.run_shell_command('pkill dd') 260 stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M' 261 # Grep for [d]d instead of dd to prevent getting the PID of grep 262 # itself. 263 pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'" 264 block = False 265 self._stress_pid = list() 266 for _ in xrange(threads): 267 self.faft_client.system.run_shell_command(stress_cmd, block) 268 lines = self.faft_client.system.run_shell_command_get_output( 269 pid_cmd) 270 for line in lines: 271 logging.info("PID is %s", line) 272 self._stress_pid.append(int(line.strip())) 273 return self._stress_pid 274 275 276 def _stop_stressing(self): 277 """Stop stressing DUT system""" 278 stop_cmd = 'kill -9 %d' 279 for pid in self._stress_pid: 280 self.faft_client.system.run_shell_command(stop_cmd % pid) 281 282 283 def check_fan_off(self): 284 """ 285 Checker of fan turned off. 286 287 The function first delay FAN_DELAY seconds to ensure fan stops. 288 Then it reads fan speed and return False if fan speed is non-zero. 289 Then it stresses the system a bit and check if the temperature 290 goes up by more than TEMP_STRESS_INCREASE. 291 292 Raises: 293 error.TestFail: Raised when temperature doesn't increase by more than 294 TEMP_STRESS_INCREASE. 295 """ 296 time.sleep(self.FAN_DELAY) 297 fan_speed = self.servo.get('fan_actual_rpm') 298 if int(fan_speed) != 0: 299 raise error.TestFail("Fan is not turned off.") 300 logging.info("EC reports fan turned off.") 301 cpu_temp_before = int(self.servo.get('cpu_temp')) 302 logging.info("CPU temperature before stressing is %d C", 303 cpu_temp_before) 304 self._stress_dut() 305 time.sleep(self.STRESS_DELAY_NO_FAN) 306 cpu_temp_after = int(self.servo.get('cpu_temp')) 307 self._stop_stressing() 308 logging.info("CPU temperature after stressing is %d C", 309 cpu_temp_after) 310 if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE: 311 raise error.TestFail( 312 "CPU temperature did not go up by more than %d degrees" % 313 self.TEMP_STRESS_INCREASE) 314 315 316 def _get_temp_sensor_type(self, sensor_id): 317 """ 318 Get type of a given temperature sensor 319 320 Args: 321 sensor_id: Temperature sensor ID. 322 323 Returns: 324 Type ID of the temperature sensor. 325 326 Raises: 327 error.TestError: Raised when ectool doesn't behave as we expected. 328 """ 329 assert sensor_id < self._num_temp_sensor 330 pattern = re.compile('Sensor type: (\d*)') 331 lines = self.faft_client.system.run_shell_command_get_output( 332 'ectool tempsinfo %d' % sensor_id) 333 for line in lines: 334 matched = pattern.match(line) 335 if matched is not None: 336 return int(matched.group(1)) 337 # Should never reach here 338 raise error.TestError("Unexpected error occurred") 339 340 341 def _check_fan_speed_per_sensor(self, fan_speed, sensor_id): 342 """ 343 Check if the given fan_speed is reasonable from the view of certain 344 temperature sensor. There could be three types of outcome: 345 1. Fan speed is higher than expected. This may be due to other 346 sensor sensing higher temperature and setting fan to higher 347 speed. 348 2. Fan speed is as expected. 349 3. Fan speed is lower than expected. In this case, EC is not 350 working as expected and an error should be raised. 351 352 Args: 353 fan_speed: The current fan speed in RPM. 354 sensor_id: The ID of temperature sensor. 355 356 Returns: 357 0x00: Fan speed is higher than expected. 358 0x01: Fan speed is as expected. 359 0x10: Fan speed is lower than expected. 360 361 Raises: 362 error.TestError: Raised when getting unexpected fan speed. 363 """ 364 sensor_type = self._get_temp_sensor_type(sensor_id) 365 if sensor_type == self.SENSOR_TYPE_IGNORED: 366 # This sensor should be ignored 367 return 0x00 368 369 if self._thermal_setting[sensor_type][-1] == -273: 370 # The fan stepping for this type of sensor is disabled 371 return 0x00 372 373 try: 374 idx = self._fan_steps.index(fan_speed) 375 except: 376 raise error.TestError("Unexpected fan speed: %d" % fan_speed) 377 378 if idx == 0: 379 lower_bound = -self.INT_MAX 380 upper_bound = self._thermal_setting[sensor_type][3] 381 elif idx == len(self._fan_steps) - 1: 382 lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3 383 upper_bound = self.INT_MAX 384 else: 385 lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3 386 upper_bound = self._thermal_setting[sensor_type][idx + 3] 387 388 temp_reading = self._get_temp_reading(sensor_id) 389 logging.info("Sensor %d = %d C", sensor_id, temp_reading) 390 logging.info(" Expecting %d - %d C", lower_bound, upper_bound) 391 if temp_reading > upper_bound: 392 return 0x00 393 elif temp_reading < lower_bound: 394 return 0x10 395 else: 396 return 0x01 397 398 399 def check_auto_fan(self): 400 """ 401 Checker of thermal engine automatic fan speed control. 402 403 Stress DUT system for a longer period to make temperature more stable 404 and check if fan speed is controlled as expected. 405 406 Raises: 407 error.TestFail: Raised when fan speed is not as expected. 408 """ 409 self._stress_dut() 410 time.sleep(self.STRESS_DELAY) 411 fan_rpm = int(self.servo.get('fan_target_rpm')) 412 logging.info('Fan speed is %d RPM', fan_rpm) 413 try: 414 result = reduce(lambda x, y: x | y, 415 [self._check_fan_speed_per_sensor(fan_rpm, x) 416 for x in range(self._num_temp_sensor)]) 417 finally: 418 self._stop_stressing() 419 if result == 0x00: 420 raise error.TestFail("Fan speed higher than expected") 421 if result == 0x10: 422 raise error.TestFail("Fan speed lower than expected") 423 424 425 def run_once(self): 426 """Execute the main body of the test. 427 """ 428 if not self.check_ec_capability(['thermal']): 429 raise error.TestNAError("Nothing needs to be tested on this device") 430 logging.info("Checking host temperature report.") 431 self.check_temp_report() 432 433 self.turn_off_fan() 434 logging.info("Verifying fan is turned off.") 435 self.check_fan_off() 436 437 self.enable_auto_fan_control() 438 logging.info("Verifying automatic fan control functionality.") 439 self.check_auto_fan() 440