• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import re
7import time
8import xmlrpclib
9
10from autotest_lib.client.common_lib import error
11from autotest_lib.server.cros.faft.firmware_test import FirmwareTest
12
13class firmware_ECThermal(FirmwareTest):
14    """
15    Servo based EC thermal engine test.
16    """
17    version = 1
18
19    # Delay for waiting fan to start or stop
20    FAN_DELAY = 5
21
22    # Delay for waiting device stressing to stablize
23    STRESS_DELAY = 30
24
25    # Delay for stressing device with fan off to check temperature increase
26    STRESS_DELAY_NO_FAN = 12
27
28    # Margin for comparing servo based and ectool based CPU temperature
29    TEMP_MISMATCH_MARGIN = 3
30
31    # Minimum increase of CPU temperature when stressing DUT
32    TEMP_STRESS_INCREASE = 3
33
34    # Pseudo INT_MAX. Used as infinity when comparing temperature readings
35    INT_MAX = 10000
36
37    # Sensor type ID of ignored sensors
38    SENSOR_TYPE_IGNORED = 255
39
40    # PID of DUT stressing processes
41    _stress_pid = list()
42
43    def enable_auto_fan_control(self):
44        """Enable EC automatic fan speed control"""
45        # We use set_nocheck because servo reports current target
46        # RPM instead 'auto', and therefore servo.set always fails.
47        self.servo.set_nocheck('fan_target_rpm', 'auto')
48
49
50    def max_fan(self):
51        """Maximize fan speed"""
52        # We use set_nocheck because servo reports current target
53        # RPM instead 'max', and therefore servo.set always fails.
54        self.servo.set_nocheck('fan_target_rpm', 'max')
55
56
57    def turn_off_fan(self):
58        """Turn off fan"""
59        self.servo.set('fan_target_rpm', 'off')
60
61
62    def _get_setting_for_type(self, type_id):
63        """
64        Retrieve thermal setting for a given type of sensor
65
66        Args:
67          type_id: The ID of sensor type.
68
69        Returns:
70          A list containing thresholds in the following order:
71            Warning
72            CPU off
73            All power off
74            Fan speed thresholds
75        """
76        setting = list()
77        current_id = 0
78        while True:
79            try:
80                lines = self.faft_client.system.run_shell_command_get_output(
81                        'ectool thermalget %d %d' % (type_id, current_id))
82            except xmlrpclib.Fault:
83                break
84            pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.')
85            for line in lines:
86                matched = pattern.match(line)
87                if matched is not None:
88                    # Convert degree K to degree C
89                    setting.append(int(matched.group(1)) - 273)
90            current_id = current_id + 1
91
92        if len(setting) == 0:
93            return None
94        return setting
95
96
97    def get_fan_steps(self):
98        """Retrieve fan step config from EC"""
99        num_steps = len(self._thermal_setting[0]) - 3
100        self._fan_steps = list()
101        expected_pat = (["Lowest speed: ([0-9-]+) RPM"] +
102                        ["\d+ K:\s+([0-9-]+) RPM"] * num_steps)
103        match = self.ec.send_command_get_output("thermalfan 0", expected_pat)
104        for m in match:
105            self._fan_steps.append(int(m[1]))
106
107        # Get the actual value of each fan step
108        for i in xrange(num_steps + 1):
109            if self._fan_steps[i] == 0:
110                continue
111            self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i])
112            self._fan_steps[i] = int(self.servo.get('fan_target_rpm'))
113
114        logging.info("Actual fan steps: %s", self._fan_steps)
115
116
117    def get_thermal_setting(self):
118        """Retrieve thermal engine setting from EC"""
119        self._thermal_setting = list()
120        type_id = 0
121        while True:
122            setting = self._get_setting_for_type(type_id)
123            if setting is None:
124                break
125            self._thermal_setting.append(setting)
126            type_id = type_id + 1
127        logging.info("Number of tempearture sensor types: %d", type_id)
128
129        # Get the number of temperature sensors
130        self._num_temp_sensor = 0
131        while True:
132            try:
133                self.faft_client.system.run_shell_command('ectool temps %d' %
134                                                   self._num_temp_sensor)
135                self._num_temp_sensor = self._num_temp_sensor + 1
136            except xmlrpclib.Fault:
137                break
138        logging.info("Number of temperature sensor: %d", self._num_temp_sensor)
139
140
141    def initialize(self, host, cmdline_args):
142        super(firmware_ECThermal, self).initialize(host, cmdline_args)
143        # Don't bother if there is no Chrome EC.
144        if not self.check_ec_capability():
145            raise error.TestNAError("Nothing needs to be tested on this device")
146        self.ec.send_command("chan 0")
147        try:
148            self.faft_client.system.run_shell_command('stop temp_metrics')
149        except xmlrpclib.Fault:
150            self._has_temp_metrics = False
151        else:
152            logging.info('Stopped temp_metrics')
153            self._has_temp_metrics = True
154        if self.check_ec_capability(['thermal']):
155            self.get_thermal_setting()
156            self.get_fan_steps()
157            self.enable_auto_fan_control()
158
159
160    def cleanup(self):
161        try:
162            if self.check_ec_capability(['thermal']):
163                self.enable_auto_fan_control()
164            if self._has_temp_metrics:
165                logging.info('Starting temp_metrics')
166                self.faft_client.system.run_shell_command('start temp_metrics')
167            self.ec.send_command("chan 0xffffffff")
168        except Exception as e:
169            logging.error("Caught exception: %s", str(e))
170        super(firmware_ECThermal, self).cleanup()
171
172
173    def _find_cpu_sensor_id(self):
174        """
175        This function find CPU temperature sensor using ectool.
176
177        Returns:
178          Integer ID of CPU temperature sensor.
179
180        Raises:
181          error.TestFail: Raised if we fail to find PECI temparture through
182            ectool.
183        """
184        for temp_id in range(self._num_temp_sensor):
185            lines = self.faft_client.system.run_shell_command_get_output(
186                    'ectool tempsinfo %d' % temp_id)
187            for line in lines:
188                matched = re.match('Sensor name: (.*)', line)
189                if matched is not None and matched.group(1) == 'PECI':
190                    return temp_id
191        raise error.TestFail('Cannot find CPU temperature sensor ID.')
192
193
194    def _get_temp_reading(self, sensor_id):
195        """
196        Get temperature reading on a sensor through ectool
197
198        Args:
199          sensor_id: Temperature sensor ID.
200
201        Returns:
202          Temperature reading in degree C.
203
204        Raises:
205          xmlrpclib.Fault: Raised when we fail to read temperature.
206          error.TestError: Raised if ectool doesn't behave as we expected.
207        """
208        assert sensor_id < self._num_temp_sensor
209        pattern = re.compile('Reading temperature...(\d*)')
210        lines = self.faft_client.system.run_shell_command_get_output(
211                'ectool temps %d' % sensor_id)
212        for line in lines:
213            matched = pattern.match(line)
214            if matched is not None:
215                return int(matched.group(1)) - 273
216        # Should never reach here
217        raise error.TestError("Unexpected error occurred")
218
219
220    def check_temp_report(self):
221        """
222        Checker of temperature reporting.
223
224        This function reads CPU temperature from servo and ectool. If
225        the two readings mismatches by more than TEMP_MISMATCH_MARGIN,'
226        test fails.
227
228        Raises:
229          error.TestFail: Raised when temperature reading mismatches by
230            more than TEMP_MISMATCH_MARGIN.
231        """
232        cpu_temp_id = self._find_cpu_sensor_id()
233        logging.info("CPU temperature sensor ID is %d", cpu_temp_id)
234        ectool_cpu_temp = self._get_temp_reading(cpu_temp_id)
235        servo_cpu_temp = int(self.servo.get('cpu_temp'))
236        logging.info("CPU temperature from servo: %d C", servo_cpu_temp)
237        logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp)
238        if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN:
239            raise error.TestFail(
240                    'CPU temperature readings from servo and ectool differ')
241
242
243    def _stress_dut(self, threads=4):
244        """
245        Stress DUT system.
246
247        By reading from /dev/urandom and writing to /dev/null, we can stress
248        DUT and cause CPU temperature to go up. We stress the system forever,
249        until _stop_stressing is called to kill the stress threads. This
250        function is non-blocking.
251
252        Args:
253          threads: Number of threads (processes) when stressing forever.
254
255        Returns:
256          A list of stress process IDs is returned.
257        """
258        logging.info("Stressing DUT with %d threads...", threads)
259        self.faft_client.system.run_shell_command('pkill dd')
260        stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M &'
261        # Grep for [d]d instead of dd to prevent getting the PID of grep
262        # itself.
263        pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'"
264        self._stress_pid = list()
265        for _ in xrange(threads):
266            self.faft_client.system.run_shell_command(stress_cmd)
267        lines = self.faft_client.system.run_shell_command_get_output(
268                    pid_cmd)
269        for line in lines:
270            logging.info("PID is %s", line)
271            self._stress_pid.append(int(line.strip()))
272        return self._stress_pid
273
274
275    def _stop_stressing(self):
276        """Stop stressing DUT system"""
277        stop_cmd = 'kill -9 %d'
278        for pid in self._stress_pid:
279            self.faft_client.system.run_shell_command(stop_cmd % pid)
280
281
282    def check_fan_off(self):
283        """
284        Checker of fan turned off.
285
286        The function first delay FAN_DELAY seconds to ensure fan stops.
287        Then it reads fan speed and return False if fan speed is non-zero.
288        Then it stresses the system a bit and check if the temperature
289        goes up by more than TEMP_STRESS_INCREASE.
290
291        Raises:
292          error.TestFail: Raised when temperature doesn't increase by more than
293            TEMP_STRESS_INCREASE.
294        """
295        time.sleep(self.FAN_DELAY)
296        fan_speed = self.servo.get('fan_actual_rpm')
297        if int(fan_speed) != 0:
298            raise error.TestFail("Fan is not turned off.")
299        logging.info("EC reports fan turned off.")
300        cpu_temp_before = int(self.servo.get('cpu_temp'))
301        logging.info("CPU temperature before stressing is %d C",
302                     cpu_temp_before)
303        self._stress_dut()
304        time.sleep(self.STRESS_DELAY_NO_FAN)
305        cpu_temp_after = int(self.servo.get('cpu_temp'))
306        self._stop_stressing()
307        logging.info("CPU temperature after stressing is %d C",
308                     cpu_temp_after)
309        if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE:
310            raise error.TestFail(
311                    "CPU temperature did not go up by more than %d degrees" %
312                    self.TEMP_STRESS_INCREASE)
313
314
315    def _get_temp_sensor_type(self, sensor_id):
316        """
317        Get type of a given temperature sensor
318
319        Args:
320          sensor_id: Temperature sensor ID.
321
322        Returns:
323          Type ID of the temperature sensor.
324
325        Raises:
326          error.TestError: Raised when ectool doesn't behave as we expected.
327        """
328        assert sensor_id < self._num_temp_sensor
329        pattern = re.compile('Sensor type: (\d*)')
330        lines = self.faft_client.system.run_shell_command_get_output(
331                'ectool tempsinfo %d' % sensor_id)
332        for line in lines:
333            matched = pattern.match(line)
334            if matched is not None:
335                return int(matched.group(1))
336        # Should never reach here
337        raise error.TestError("Unexpected error occurred")
338
339
340    def _check_fan_speed_per_sensor(self, fan_speed, sensor_id):
341        """
342        Check if the given fan_speed is reasonable from the view of certain
343        temperature sensor. There could be three types of outcome:
344          1. Fan speed is higher than expected. This may be due to other
345             sensor sensing higher temperature and setting fan to higher
346             speed.
347          2. Fan speed is as expected.
348          3. Fan speed is lower than expected. In this case, EC is not
349             working as expected and an error should be raised.
350
351        Args:
352          fan_speed: The current fan speed in RPM.
353          sensor_id: The ID of temperature sensor.
354
355        Returns:
356          0x00: Fan speed is higher than expected.
357          0x01: Fan speed is as expected.
358          0x10: Fan speed is lower than expected.
359
360        Raises:
361          error.TestError: Raised when getting unexpected fan speed.
362        """
363        sensor_type = self._get_temp_sensor_type(sensor_id)
364        if sensor_type == self.SENSOR_TYPE_IGNORED:
365            # This sensor should be ignored
366            return 0x00
367
368        if self._thermal_setting[sensor_type][-1] == -273:
369            # The fan stepping for this type of sensor is disabled
370            return 0x00
371
372        try:
373            idx = self._fan_steps.index(fan_speed)
374        except:
375            raise error.TestError("Unexpected fan speed: %d" % fan_speed)
376
377        if idx == 0:
378            lower_bound = -self.INT_MAX
379            upper_bound = self._thermal_setting[sensor_type][3]
380        elif idx == len(self._fan_steps) - 1:
381            lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
382            upper_bound = self.INT_MAX
383        else:
384            lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
385            upper_bound = self._thermal_setting[sensor_type][idx + 3]
386
387        temp_reading = self._get_temp_reading(sensor_id)
388        logging.info("Sensor %d = %d C", sensor_id, temp_reading)
389        logging.info("  Expecting %d - %d C", lower_bound, upper_bound)
390        if temp_reading > upper_bound:
391            return 0x00
392        elif temp_reading < lower_bound:
393            return 0x10
394        else:
395            return 0x01
396
397
398    def check_auto_fan(self):
399        """
400        Checker of thermal engine automatic fan speed control.
401
402        Stress DUT system for a longer period to make temperature more stable
403        and check if fan speed is controlled as expected.
404
405        Raises:
406          error.TestFail: Raised when fan speed is not as expected.
407        """
408        self._stress_dut()
409        time.sleep(self.STRESS_DELAY)
410        fan_rpm = int(self.servo.get('fan_target_rpm'))
411        logging.info('Fan speed is %d RPM', fan_rpm)
412        try:
413            result = reduce(lambda x, y: x | y,
414                            [self._check_fan_speed_per_sensor(fan_rpm, x)
415                             for x in range(self._num_temp_sensor)])
416        finally:
417            self._stop_stressing()
418        if result == 0x00:
419            raise error.TestFail("Fan speed higher than expected")
420        if result == 0x10:
421            raise error.TestFail("Fan speed lower than expected")
422
423
424    def run_once(self):
425        """Execute the main body of the test.
426        """
427        if not self.check_ec_capability(['thermal']):
428            raise error.TestNAError("Nothing needs to be tested on this device")
429        logging.info("Checking host temperature report.")
430        self.check_temp_report()
431
432        self.turn_off_fan()
433        logging.info("Verifying fan is turned off.")
434        self.check_fan_off()
435
436        self.enable_auto_fan_control()
437        logging.info("Verifying automatic fan control functionality.")
438        self.check_auto_fan()
439