• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import functools
6import logging
7
8import common
9from autotest_lib.client.common_lib import hosts
10from autotest_lib.server.cros.servo import servo
11from autotest_lib.server.hosts import repair_utils
12
13
14def ignore_exception_for_non_cros_host(func):
15    """
16    Decorator to ignore ControlUnavailableError if servo host is not cros host.
17    When using test_that command on a workstation, this enables usage of
18    additional servo devices such as servo micro and Sweetberry. This shall not
19    change any lab behavior.
20    """
21    @functools.wraps(func)
22    def wrapper(self, host):
23        """
24        Wrapper around func.
25        """
26        try:
27            func(self, host)
28        except servo.ControlUnavailableError as e:
29            if host.is_cros_host():
30                raise
31            logging.warning("Servo host is not cros host, ignore %s: %s",
32                            type(e).__name__, e)
33    return wrapper
34
35
36class _UpdateVerifier(hosts.Verifier):
37    """
38    Verifier to trigger a servo host update, if necessary.
39
40    The operation doesn't wait for the update to complete and is
41    considered a success whether or not the servo is currently
42    up-to-date.
43    """
44
45    def verify(self, host):
46        # First, only run this verifier if the host is in the physical lab.
47        # Secondly, skip if the test is being run by test_that, because subnet
48        # restrictions can cause the update to fail.
49        try:
50            if host.is_in_lab() and host.job and host.job.in_lab:
51                host.update_image(wait_for_update=False)
52        # We don't want failure from update block DUT repair action.
53        # See crbug.com/1029950.
54        except Exception as e:
55            logging.error('Failed to update servohost image: %s', e)
56
57    @property
58    def description(self):
59        return 'servo host software is up-to-date'
60
61
62class _ConfigVerifier(hosts.Verifier):
63    """
64    Base verifier for the servo config file verifiers.
65    """
66
67    CONFIG_FILE = '/var/lib/servod/config'
68    ATTR = ''
69
70    @staticmethod
71    def _get_config_val(host, config_file, attr):
72        """
73        Get the `attr` for `host` from `config_file`.
74
75        @param host         Host to be checked for `config_file`.
76        @param config_file  Path to the config file to be tested.
77        @param attr         Attribute to get from config file.
78
79        @return The attr val as set in the config file, or `None` if
80                the file was absent.
81        """
82        getboard = ('CONFIG=%s ; [ -f $CONFIG ] && '
83                    '. $CONFIG && echo $%s' % (config_file, attr))
84        attr_val = host.run(getboard, ignore_status=True).stdout
85        return attr_val.strip('\n') if attr_val else None
86
87    @staticmethod
88    def _validate_attr(host, val, expected_val, attr, config_file):
89        """
90        Check that the attr setting is valid for the host.
91
92        This presupposes that a valid config file was found.  Raise an
93        execption if:
94          * There was no attr setting from the file (i.e. the setting
95            is an empty string), or
96          * The attr setting is valid, the attr is known,
97            and the setting doesn't match the DUT.
98
99        @param host         Host to be checked for `config_file`.
100        @param val          Value to be tested.
101        @param expected_val Expected value.
102        @param attr         Attribute we're validating.
103        @param config_file  Path to the config file to be tested.
104        """
105        if not val:
106            raise hosts.AutoservVerifyError(
107                    'config file %s exists, but %s '
108                    'is not set' % (attr, config_file))
109        if expected_val is not None and val != expected_val:
110            raise hosts.AutoservVerifyError(
111                    '%s is %s; it should be %s' % (attr, val, expected_val))
112
113
114    def _get_config(self, host):
115        """
116        Return the config file to check.
117
118        @param host     Host object.
119
120        @return The config file to check.
121        """
122        return '%s_%d' % (self.CONFIG_FILE, host.servo_port)
123
124    @property
125    def description(self):
126        return 'servo %s setting is correct' % self.ATTR
127
128
129class _SerialConfigVerifier(_ConfigVerifier):
130    """
131    Verifier for the servo SERIAL configuration.
132    """
133
134    ATTR = 'SERIAL'
135
136    def verify(self, host):
137        """
138        Test whether the `host` has a `SERIAL` setting configured.
139
140        This tests the config file names used by the `servod` upstart
141        job for a valid setting of the `SERIAL` variable.  The following
142        conditions raise errors:
143          * The SERIAL setting doesn't match the DUT's entry in the AFE
144            database.
145          * There is no config file.
146        """
147        if not host.is_cros_host():
148            return
149        # Not all servo hosts will have a servo serial so don't verify if it's
150        # not set.
151        if host.servo_serial is None:
152            return
153        config = self._get_config(host)
154        serialval = self._get_config_val(host, config, self.ATTR)
155        if serialval is None:
156            raise hosts.AutoservVerifyError(
157                    'Servo serial is unconfigured; should be %s'
158                    % host.servo_serial
159            )
160
161        self._validate_attr(host, serialval, host.servo_serial, self.ATTR,
162                            config)
163
164
165
166class _BoardConfigVerifier(_ConfigVerifier):
167    """
168    Verifier for the servo BOARD configuration.
169    """
170
171    ATTR = 'BOARD'
172
173    def verify(self, host):
174        """
175        Test whether the `host` has a `BOARD` setting configured.
176
177        This tests the config file names used by the `servod` upstart
178        job for a valid setting of the `BOARD` variable.  The following
179        conditions raise errors:
180          * A config file exists, but the content contains no setting
181            for BOARD.
182          * The BOARD setting doesn't match the DUT's entry in the AFE
183            database.
184          * There is no config file.
185        """
186        if not host.is_cros_host():
187            return
188        config = self._get_config(host)
189        boardval = self._get_config_val(host, config, self.ATTR)
190        if boardval is None:
191            msg = 'Servo board is unconfigured'
192            if host.servo_board is not None:
193                msg += '; should be %s' % host.servo_board
194            raise hosts.AutoservVerifyError(msg)
195
196        self._validate_attr(host, boardval, host.servo_board, self.ATTR,
197                            config)
198
199
200class _ServodJobVerifier(hosts.Verifier):
201    """
202    Verifier to check that the `servod` upstart job is running.
203    """
204
205    def verify(self, host):
206        if not host.is_cros_host():
207            return
208        status_cmd = 'status servod PORT=%d' % host.servo_port
209        job_status = host.run(status_cmd, ignore_status=True).stdout
210        if 'start/running' not in job_status:
211            raise hosts.AutoservVerifyError(
212                    'servod not running on %s port %d' %
213                    (host.hostname, host.servo_port))
214
215    @property
216    def description(self):
217        return 'servod upstart job is running'
218
219
220class _DiskSpaceVerifier(hosts.Verifier):
221    """
222    Verifier to make sure there is enough disk space left on servohost.
223    """
224
225    def verify(self, host):
226        # Check available space of stateful is greater than threshold, in Gib.
227        host.check_diskspace('/mnt/stateful_partition', 0.5)
228
229    @property
230    def description(self):
231        return 'servohost has enough disk space.'
232
233
234class _ServodConnectionVerifier(hosts.Verifier):
235    """
236    Verifier to check that we can connect to `servod`.
237
238    This tests the connection to the target servod service with a simple
239    method call.  As a side-effect, all servo signals are initialized to
240    default values.
241
242    N.B. Initializing servo signals is necessary because the power
243    button and lid switch verifiers both test against expected initial
244    values.
245    """
246
247    def verify(self, host):
248        host.connect_servo()
249
250    @property
251    def description(self):
252        return 'servod service is taking calls'
253
254
255class _PowerButtonVerifier(hosts.Verifier):
256    """
257    Verifier to check sanity of the `pwr_button` signal.
258
259    Tests that the `pwr_button` signal shows the power button has been
260    released.  When `pwr_button` is stuck at `press`, it commonly
261    indicates that the ribbon cable is disconnected.
262    """
263    # TODO (crbug.com/646593) - Remove list below once servo has been updated
264    # with a dummy pwr_button signal.
265    _BOARDS_WO_PWR_BUTTON = ['arkham', 'gale', 'mistral', 'storm', 'whirlwind']
266
267    @ignore_exception_for_non_cros_host
268    def verify(self, host):
269        if host.servo_board in self._BOARDS_WO_PWR_BUTTON:
270            return
271        button = host.get_servo().get('pwr_button')
272        if button != 'release':
273            raise hosts.AutoservVerifyError(
274                    'Check ribbon cable: \'pwr_button\' is stuck')
275
276
277    @property
278    def description(self):
279        return 'pwr_button control is normal'
280
281
282class _LidVerifier(hosts.Verifier):
283    """
284    Verifier to check sanity of the `lid_open` signal.
285    """
286
287    @ignore_exception_for_non_cros_host
288    def verify(self, host):
289        lid_open = host.get_servo().get('lid_open')
290        if lid_open != 'yes' and lid_open != 'not_applicable':
291            raise hosts.AutoservVerifyError(
292                    'Check lid switch: lid_open is %s' % lid_open)
293
294    @property
295    def description(self):
296        return 'lid_open control is normal'
297
298
299class _RestartServod(hosts.RepairAction):
300    """Restart `servod` with the proper BOARD setting."""
301
302    def repair(self, host):
303        if not host.is_cros_host():
304            raise hosts.AutoservRepairError(
305                    'Can\'t restart servod: not running '
306                    'embedded Chrome OS.',
307                    'servo_not_applicable_to_non_cros_host')
308        host.restart_servod()
309
310    @property
311    def description(self):
312        return 'Start servod with the proper config settings.'
313
314
315class _ServoRebootRepair(repair_utils.RebootRepair):
316    """
317    Reboot repair action that also waits for an update.
318
319    This is the same as the standard `RebootRepair`, but for
320    a non-multi-DUTs servo host, if there's a pending update,
321    we wait for that to complete before rebooting.  This should
322    ensure that the servo_v3 is up-to-date after reboot. Labstation
323    reboot and update is handled by labstation host class.
324    """
325
326    def repair(self, host):
327        if host.is_localhost() or not host.is_cros_host():
328            raise hosts.AutoservRepairError(
329                'Target servo is not a test lab servo',
330                'servo_not_applicable_to_host_outside_lab')
331        if host.is_labstation():
332            host.request_reboot()
333            logging.warning('Reboot labstation requested, it will be '
334                            'handled by labstation administrative task.')
335        else:
336            try:
337                host.update_image(wait_for_update=True)
338            # We don't want failure from update block DUT repair action.
339            # See crbug.com/1029950.
340            except Exception as e:
341                logging.error('Failed to update servohost image: %s', e)
342            super(_ServoRebootRepair, self).repair(host)
343
344    @property
345    def description(self):
346        return 'Wait for update, then reboot servo host.'
347
348
349class _DutRebootRepair(hosts.RepairAction):
350    """
351    Reboot DUT to recover some servo controls depending on EC console.
352
353    Some servo controls, like lid_open, requires communicating with DUT through
354    EC UART console. Failure of this kinds of controls can be recovered by
355    rebooting the DUT.
356    """
357
358    def repair(self, host):
359        host.get_servo().get_power_state_controller().reset()
360        # Get the lid_open value which requires EC console.
361        lid_open = host.get_servo().get('lid_open')
362        if lid_open != 'yes' and lid_open != 'not_applicable':
363            raise hosts.AutoservVerifyError(
364                    'Still fail to contact EC console after rebooting DUT')
365
366    @property
367    def description(self):
368        return 'Reset the DUT via servo'
369
370
371class _DiskCleanupRepair(hosts.RepairAction):
372    """
373    Remove old logs/metrics/crash_dumps on servohost to free up disk space.
374    """
375    KEEP_LOGS_MAX_DAYS = 5
376
377    FILE_TO_REMOVE = ['/var/lib/metrics/uma-events',
378                      '/var/spool/crash/*']
379
380    def repair(self, host):
381        if host.is_localhost():
382            # we don't want to remove anything from local testing.
383            return
384
385        # Remove old servod logs.
386        host.run('/usr/bin/find /var/log/servod_* -mtime +%d -print -delete'
387                 % self.KEEP_LOGS_MAX_DAYS, ignore_status=True)
388
389        # Remove pre-defined metrics and crash dumps.
390        for path in self.FILE_TO_REMOVE:
391            host.run('rm %s' % path, ignore_status=True)
392
393    @property
394    def description(self):
395        return 'Clean up old logs/metrics on servohost to free up disk space.'
396
397
398def create_servo_repair_strategy():
399    """
400    Return a `RepairStrategy` for a `ServoHost`.
401    """
402    config = ['brd_config', 'ser_config']
403    verify_dag = [
404        (repair_utils.SshVerifier,   'servo_ssh',   []),
405        (_DiskSpaceVerifier,         'disk_space',  ['servo_ssh']),
406        (_UpdateVerifier,            'update',      ['servo_ssh']),
407        (_BoardConfigVerifier,       'brd_config',  ['servo_ssh']),
408        (_SerialConfigVerifier,      'ser_config',  ['servo_ssh']),
409        (_ServodJobVerifier,         'job',         config + ['disk_space']),
410        (_ServodConnectionVerifier,  'servod',      ['job']),
411        (_PowerButtonVerifier,       'pwr_button',  ['servod']),
412        (_LidVerifier,               'lid_open',    ['servod']),
413        # TODO(jrbarnette):  We want a verifier for whether there's
414        # a working USB stick plugged into the servo.  However,
415        # although we always want to log USB stick problems, we don't
416        # want to fail the servo because we don't want a missing USB
417        # stick to prevent, say, power cycling the DUT.
418        #
419        # So, it may be that the right fix is to put diagnosis into
420        # ServoInstallRepair rather than add a verifier.
421    ]
422
423    servod_deps = ['job', 'servod', 'pwr_button']
424    repair_actions = [
425        (_DiskCleanupRepair, 'disk_cleanup', ['servo_ssh'], ['disk_space']),
426        (_RestartServod, 'restart', ['servo_ssh'], config + servod_deps),
427        (_ServoRebootRepair, 'servo_reboot', ['servo_ssh'], servod_deps),
428        (_DutRebootRepair, 'dut_reboot', ['servod'], ['lid_open']),
429    ]
430    return hosts.RepairStrategy(verify_dag, repair_actions, 'servo')
431