• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import re
7import time
8import xmlrpclib
9
10from autotest_lib.client.common_lib import error
11from autotest_lib.server.cros.faft.firmware_test import FirmwareTest
12
13class firmware_ECThermal(FirmwareTest):
14    """
15    Servo based EC thermal engine test.
16    """
17    version = 1
18
19    # Delay for waiting fan to start or stop
20    FAN_DELAY = 5
21
22    # Delay for waiting device stressing to stablize
23    STRESS_DELAY = 30
24
25    # Delay for stressing device with fan off to check temperature increase
26    STRESS_DELAY_NO_FAN = 12
27
28    # Margin for comparing servo based and ectool based CPU temperature
29    TEMP_MISMATCH_MARGIN = 3
30
31    # Minimum increase of CPU temperature when stressing DUT
32    TEMP_STRESS_INCREASE = 3
33
34    # Pseudo INT_MAX. Used as infinity when comparing temperature readings
35    INT_MAX = 10000
36
37    # Sensor type ID of ignored sensors
38    SENSOR_TYPE_IGNORED = 255
39
40    # PID of DUT stressing processes
41    _stress_pid = list()
42
43    def enable_auto_fan_control(self):
44        """Enable EC automatic fan speed control"""
45        # We use set_nocheck because servo reports current target
46        # RPM instead 'auto', and therefore servo.set always fails.
47        self.servo.set_nocheck('fan_target_rpm', 'auto')
48
49
50    def max_fan(self):
51        """Maximize fan speed"""
52        # We use set_nocheck because servo reports current target
53        # RPM instead 'max', and therefore servo.set always fails.
54        self.servo.set_nocheck('fan_target_rpm', 'max')
55
56
57    def turn_off_fan(self):
58        """Turn off fan"""
59        self.servo.set('fan_target_rpm', 'off')
60
61
62    def _get_setting_for_type(self, type_id):
63        """
64        Retrieve thermal setting for a given type of sensor
65
66        Args:
67          type_id: The ID of sensor type.
68
69        Returns:
70          A list containing thresholds in the following order:
71            Warning
72            CPU off
73            All power off
74            Fan speed thresholds
75        """
76        setting = list()
77        current_id = 0
78        while True:
79            try:
80                lines = self.faft_client.system.run_shell_command_get_output(
81                        'ectool thermalget %d %d' % (type_id, current_id))
82            except xmlrpclib.Fault:
83                break
84            pattern = re.compile('Threshold \d* [a-z ]* \d* is (\d*) K.')
85            for line in lines:
86                matched = pattern.match(line)
87                if matched is not None:
88                    # Convert degree K to degree C
89                    setting.append(int(matched.group(1)) - 273)
90            current_id = current_id + 1
91
92        if len(setting) == 0:
93            return None
94        return setting
95
96
97    def get_fan_steps(self):
98        """Retrieve fan step config from EC"""
99        num_steps = len(self._thermal_setting[0]) - 3
100        self._fan_steps = list()
101        expected_pat = (["Lowest speed: ([0-9-]+) RPM"] +
102                        ["\d+ K:\s+([0-9-]+) RPM"] * num_steps)
103        match = self.ec.send_command_get_output("thermalfan 0", expected_pat)
104        for m in match:
105            self._fan_steps.append(int(m[1]))
106
107        # Get the actual value of each fan step
108        for i in xrange(num_steps + 1):
109            if self._fan_steps[i] == 0:
110                continue
111            self.servo.set_nocheck('fan_target_rpm', "%d" % self._fan_steps[i])
112            self._fan_steps[i] = int(self.servo.get('fan_target_rpm'))
113
114        logging.info("Actual fan steps: %s", self._fan_steps)
115
116
117    def get_thermal_setting(self):
118        """Retrieve thermal engine setting from EC"""
119        self._thermal_setting = list()
120        type_id = 0
121        while True:
122            setting = self._get_setting_for_type(type_id)
123            if setting is None:
124                break
125            self._thermal_setting.append(setting)
126            type_id = type_id + 1
127        logging.info("Number of tempearture sensor types: %d", type_id)
128
129        # Get the number of temperature sensors
130        self._num_temp_sensor = 0
131        while True:
132            try:
133                self.faft_client.system.run_shell_command('ectool temps %d' %
134                                                   self._num_temp_sensor)
135                self._num_temp_sensor = self._num_temp_sensor + 1
136            except xmlrpclib.Fault:
137                break
138        logging.info("Number of temperature sensor: %d", self._num_temp_sensor)
139
140
141    def initialize(self, host, cmdline_args):
142        super(firmware_ECThermal, self).initialize(host, cmdline_args)
143        # Don't bother if there is no Chrome EC.
144        if not self.check_ec_capability():
145            raise error.TestNAError("Nothing needs to be tested on this device")
146        self.ec.send_command("chan 0")
147        try:
148            self.faft_client.system.run_shell_command('stop temp_metrics')
149        except xmlrpclib.Fault:
150            self._has_temp_metrics = False
151        else:
152            logging.info('Stopped temp_metrics')
153            self._has_temp_metrics = True
154        if self.check_ec_capability(['thermal']):
155            self.get_thermal_setting()
156            self.get_fan_steps()
157            self.enable_auto_fan_control()
158
159
160    def cleanup(self):
161        try:
162            if self.check_ec_capability(['thermal']):
163                self.enable_auto_fan_control()
164            if self._has_temp_metrics:
165                logging.info('Starting temp_metrics')
166                self.faft_client.system.run_shell_command('start temp_metrics')
167            self.ec.send_command("chan 0xffffffff")
168        except Exception as e:
169            logging.error("Caught exception: %s", str(e))
170        super(firmware_ECThermal, self).cleanup()
171
172
173    def _find_cpu_sensor_id(self):
174        """
175        This function find CPU temperature sensor using ectool.
176
177        Returns:
178          Integer ID of CPU temperature sensor.
179
180        Raises:
181          error.TestFail: Raised if we fail to find PECI temparture through
182            ectool.
183        """
184        for temp_id in range(self._num_temp_sensor):
185            lines = self.faft_client.system.run_shell_command_get_output(
186                    'ectool tempsinfo %d' % temp_id)
187            for line in lines:
188                matched = re.match('Sensor name: (.*)', line)
189                if matched is not None and matched.group(1) == 'PECI':
190                    return temp_id
191        raise error.TestFail('Cannot find CPU temperature sensor ID.')
192
193
194    def _get_temp_reading(self, sensor_id):
195        """
196        Get temperature reading on a sensor through ectool
197
198        Args:
199          sensor_id: Temperature sensor ID.
200
201        Returns:
202          Temperature reading in degree C.
203
204        Raises:
205          xmlrpclib.Fault: Raised when we fail to read temperature.
206          error.TestError: Raised if ectool doesn't behave as we expected.
207        """
208        assert sensor_id < self._num_temp_sensor
209        pattern = re.compile('Reading temperature...(\d*)')
210        lines = self.faft_client.system.run_shell_command_get_output(
211                'ectool temps %d' % sensor_id)
212        for line in lines:
213            matched = pattern.match(line)
214            if matched is not None:
215                return int(matched.group(1)) - 273
216        # Should never reach here
217        raise error.TestError("Unexpected error occurred")
218
219
220    def check_temp_report(self):
221        """
222        Checker of temperature reporting.
223
224        This function reads CPU temperature from servo and ectool. If
225        the two readings mismatches by more than TEMP_MISMATCH_MARGIN,'
226        test fails.
227
228        Raises:
229          error.TestFail: Raised when temperature reading mismatches by
230            more than TEMP_MISMATCH_MARGIN.
231        """
232        cpu_temp_id = self._find_cpu_sensor_id()
233        logging.info("CPU temperature sensor ID is %d", cpu_temp_id)
234        ectool_cpu_temp = self._get_temp_reading(cpu_temp_id)
235        servo_cpu_temp = int(self.servo.get('cpu_temp'))
236        logging.info("CPU temperature from servo: %d C", servo_cpu_temp)
237        logging.info("CPU temperature from ectool: %d C", ectool_cpu_temp)
238        if abs(ectool_cpu_temp - servo_cpu_temp) > self.TEMP_MISMATCH_MARGIN:
239            raise error.TestFail(
240                    'CPU temperature readings from servo and ectool differ')
241
242
243    def _stress_dut(self, threads=4):
244        """
245        Stress DUT system.
246
247        By reading from /dev/urandom and writing to /dev/null, we can stress
248        DUT and cause CPU temperature to go up. We stress the system forever,
249        until _stop_stressing is called to kill the stress threads. This
250        function is non-blocking.
251
252        Args:
253          threads: Number of threads (processes) when stressing forever.
254
255        Returns:
256          A list of stress process IDs is returned.
257        """
258        logging.info("Stressing DUT with %d threads...", threads)
259        self.faft_client.system.run_shell_command('pkill dd')
260        stress_cmd = 'dd if=/dev/urandom of=/dev/null bs=1M'
261        # Grep for [d]d instead of dd to prevent getting the PID of grep
262        # itself.
263        pid_cmd = "ps -ef | grep '[d]d if=/dev/urandom' | awk '{print $2}'"
264        block = False
265        self._stress_pid = list()
266        for _ in xrange(threads):
267            self.faft_client.system.run_shell_command(stress_cmd, block)
268        lines = self.faft_client.system.run_shell_command_get_output(
269                    pid_cmd)
270        for line in lines:
271            logging.info("PID is %s", line)
272            self._stress_pid.append(int(line.strip()))
273        return self._stress_pid
274
275
276    def _stop_stressing(self):
277        """Stop stressing DUT system"""
278        stop_cmd = 'kill -9 %d'
279        for pid in self._stress_pid:
280            self.faft_client.system.run_shell_command(stop_cmd % pid)
281
282
283    def check_fan_off(self):
284        """
285        Checker of fan turned off.
286
287        The function first delay FAN_DELAY seconds to ensure fan stops.
288        Then it reads fan speed and return False if fan speed is non-zero.
289        Then it stresses the system a bit and check if the temperature
290        goes up by more than TEMP_STRESS_INCREASE.
291
292        Raises:
293          error.TestFail: Raised when temperature doesn't increase by more than
294            TEMP_STRESS_INCREASE.
295        """
296        time.sleep(self.FAN_DELAY)
297        fan_speed = self.servo.get('fan_actual_rpm')
298        if int(fan_speed) != 0:
299            raise error.TestFail("Fan is not turned off.")
300        logging.info("EC reports fan turned off.")
301        cpu_temp_before = int(self.servo.get('cpu_temp'))
302        logging.info("CPU temperature before stressing is %d C",
303                     cpu_temp_before)
304        self._stress_dut()
305        time.sleep(self.STRESS_DELAY_NO_FAN)
306        cpu_temp_after = int(self.servo.get('cpu_temp'))
307        self._stop_stressing()
308        logging.info("CPU temperature after stressing is %d C",
309                     cpu_temp_after)
310        if cpu_temp_after - cpu_temp_before < self.TEMP_STRESS_INCREASE:
311            raise error.TestFail(
312                    "CPU temperature did not go up by more than %d degrees" %
313                    self.TEMP_STRESS_INCREASE)
314
315
316    def _get_temp_sensor_type(self, sensor_id):
317        """
318        Get type of a given temperature sensor
319
320        Args:
321          sensor_id: Temperature sensor ID.
322
323        Returns:
324          Type ID of the temperature sensor.
325
326        Raises:
327          error.TestError: Raised when ectool doesn't behave as we expected.
328        """
329        assert sensor_id < self._num_temp_sensor
330        pattern = re.compile('Sensor type: (\d*)')
331        lines = self.faft_client.system.run_shell_command_get_output(
332                'ectool tempsinfo %d' % sensor_id)
333        for line in lines:
334            matched = pattern.match(line)
335            if matched is not None:
336                return int(matched.group(1))
337        # Should never reach here
338        raise error.TestError("Unexpected error occurred")
339
340
341    def _check_fan_speed_per_sensor(self, fan_speed, sensor_id):
342        """
343        Check if the given fan_speed is reasonable from the view of certain
344        temperature sensor. There could be three types of outcome:
345          1. Fan speed is higher than expected. This may be due to other
346             sensor sensing higher temperature and setting fan to higher
347             speed.
348          2. Fan speed is as expected.
349          3. Fan speed is lower than expected. In this case, EC is not
350             working as expected and an error should be raised.
351
352        Args:
353          fan_speed: The current fan speed in RPM.
354          sensor_id: The ID of temperature sensor.
355
356        Returns:
357          0x00: Fan speed is higher than expected.
358          0x01: Fan speed is as expected.
359          0x10: Fan speed is lower than expected.
360
361        Raises:
362          error.TestError: Raised when getting unexpected fan speed.
363        """
364        sensor_type = self._get_temp_sensor_type(sensor_id)
365        if sensor_type == self.SENSOR_TYPE_IGNORED:
366            # This sensor should be ignored
367            return 0x00
368
369        if self._thermal_setting[sensor_type][-1] == -273:
370            # The fan stepping for this type of sensor is disabled
371            return 0x00
372
373        try:
374            idx = self._fan_steps.index(fan_speed)
375        except:
376            raise error.TestError("Unexpected fan speed: %d" % fan_speed)
377
378        if idx == 0:
379            lower_bound = -self.INT_MAX
380            upper_bound = self._thermal_setting[sensor_type][3]
381        elif idx == len(self._fan_steps) - 1:
382            lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
383            upper_bound = self.INT_MAX
384        else:
385            lower_bound = self._thermal_setting[sensor_type][idx + 2] - 3
386            upper_bound = self._thermal_setting[sensor_type][idx + 3]
387
388        temp_reading = self._get_temp_reading(sensor_id)
389        logging.info("Sensor %d = %d C", sensor_id, temp_reading)
390        logging.info("  Expecting %d - %d C", lower_bound, upper_bound)
391        if temp_reading > upper_bound:
392            return 0x00
393        elif temp_reading < lower_bound:
394            return 0x10
395        else:
396            return 0x01
397
398
399    def check_auto_fan(self):
400        """
401        Checker of thermal engine automatic fan speed control.
402
403        Stress DUT system for a longer period to make temperature more stable
404        and check if fan speed is controlled as expected.
405
406        Raises:
407          error.TestFail: Raised when fan speed is not as expected.
408        """
409        self._stress_dut()
410        time.sleep(self.STRESS_DELAY)
411        fan_rpm = int(self.servo.get('fan_target_rpm'))
412        logging.info('Fan speed is %d RPM', fan_rpm)
413        try:
414            result = reduce(lambda x, y: x | y,
415                            [self._check_fan_speed_per_sensor(fan_rpm, x)
416                             for x in range(self._num_temp_sensor)])
417        finally:
418            self._stop_stressing()
419        if result == 0x00:
420            raise error.TestFail("Fan speed higher than expected")
421        if result == 0x10:
422            raise error.TestFail("Fan speed lower than expected")
423
424
425    def run_once(self):
426        """Execute the main body of the test.
427        """
428        if not self.check_ec_capability(['thermal']):
429            raise error.TestNAError("Nothing needs to be tested on this device")
430        logging.info("Checking host temperature report.")
431        self.check_temp_report()
432
433        self.turn_off_fan()
434        logging.info("Verifying fan is turned off.")
435        self.check_fan_off()
436
437        self.enable_auto_fan_control()
438        logging.info("Verifying automatic fan control functionality.")
439        self.check_auto_fan()
440