• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from __future__ import absolute_import
7from __future__ import division
8from __future__ import print_function
9
10import json
11import logging
12import time
13import math
14
15import common
16from autotest_lib.client.common_lib import error
17from autotest_lib.client.common_lib import global_config
18from autotest_lib.client.common_lib import hosts
19from autotest_lib.client.common_lib import utils
20from autotest_lib.client.common_lib.cros import dev_server
21from autotest_lib.client.common_lib.cros import retry
22from autotest_lib.client.common_lib.cros import tpm_utils
23from autotest_lib.server import afe_utils
24from autotest_lib.server import crashcollect
25from autotest_lib.server.cros import provisioner
26from autotest_lib.server.cros.dynamic_suite import tools
27from autotest_lib.server.hosts import cros_constants
28from autotest_lib.server.hosts import cros_firmware
29from autotest_lib.server.hosts import repair_utils
30from autotest_lib.site_utils.admin_audit import verifiers as audit_verify
31from autotest_lib.site_utils.admin_audit import constants as audit_const
32from six.moves import range
33
34try:
35    from chromite.lib import metrics
36except ImportError:
37    metrics = utils.metrics_mock
38
39from chromite.lib import timeout_util
40
41DEFAULT_SERVO_RESET_TRIGGER = (
42        'ping',
43        'ssh',
44        'stop_start_ui',
45        'power',
46)
47
48
49# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be
50# in dev mode (usually, those should be unmanaged devices)
51#
52_DEV_MODE_ALLOWED_POOLS = set(
53    global_config.global_config.get_config_value(
54            'CROS',
55            'pools_dev_mode_allowed',
56            type=str,
57            default='',
58            allow_blank=True).split(','))
59
60# Setting to suppress dev mode check; primarily used for moblab where all
61# DUT's are in dev mode.
62_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value(
63            'CROS',
64            'dev_mode_allowed',
65            type=bool,
66            default=False)
67
68# Triggers for the 'provision', 'powerwash', and 'usb' repair actions.
69# These are also used as dependencies in the `CrosHost` repair
70# sequence, as follows:
71#
72# provision:
73#   - triggers: _CROS_PROVISION_TRIGGERS
74#   - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS
75#
76# powerwash:
77#   - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_PROVISION_TRIGGERS
78#   - depends on: _CROS_USB_TRIGGERS
79#
80# usb:
81#   - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
82#               _CROS_PROVISION_TRIGGERS
83#   - depends on: _CROS_USB_DEPENDENCIES
84#
85# N.B. AC power detection depends on software on the DUT, and there
86# have been bugs where detection failed even though the DUT really
87# did have power.  So, we make the 'power' verifier a trigger for
88# reinstall repair actions, too.
89#
90# TODO(jrbarnette):  provision repair can't fix all problems reported by
91# the 'cros' verifier; it's listed as an provision trigger as a
92# simplification.  The ultimate fix is to split the 'cros' verifier
93# into smaller individual verifiers.
94_CROS_PROVISION_TRIGGERS = (
95        'power',
96        'rwfw',
97        'fwstatus',
98        'python',
99        'hwid',
100        'cros',
101        'dev_default_boot',
102)
103_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_provision', 'ext4',)
104_CROS_USB_TRIGGERS = (
105        'ping',
106        'ssh',
107        'writable',
108        'stop_start_ui',
109)
110_JETSTREAM_USB_TRIGGERS = (
111        'ping',
112        'ssh',
113        'writable',
114)
115_CROS_FIRMWARE_TRIGGERS = (
116        'ping',
117        'ssh',
118)
119_CROS_USB_DEPENDENCIES = ('usb_drive', )
120
121
122class ACPowerVerifier(hosts.Verifier):
123    """Check for AC power and battery charging state."""
124
125    # Battery discharging state in power_supply_info file.
126    BATTERY_DISCHARGING = 'Discharging'
127    # Power controller can discharge battery any time till 90% for any model.
128    # Setting level to 90% in case we have wearout of it.
129    BATTERY_DISCHARGE_MIN = 90
130
131    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
132    def verify(self, host):
133        # pylint: disable=missing-docstring
134        info = self._load_info(host)
135        self._validate_ac_plugged(info)
136        self._validate_battery(host, info)
137
138    def _load_info(self, host):
139        try:
140            info = host.get_power_supply_info()
141        except error.AutoservRunError:
142            raise hosts.AutoservVerifyError(
143                    'Failed to get power supply info')
144        return info
145
146    def _validate_ac_plugged(self, info):
147        # Validate that DUT is plugged to the AC.
148        try:
149            if info['Line Power']['online'] != 'yes':
150                raise hosts.AutoservVerifyError(
151                        'AC power is not plugged in')
152        except KeyError:
153            raise hosts.AutoservVerifyError(
154                    'Cannot determine AC power status')
155
156    def _validate_battery(self, host, info):
157        try:
158            charging_state = info['Battery']['state']
159            battery_level = float(info['Battery']['percentage'])
160
161            # Collect info to determine which battery level is better to call
162            # as MIN_BATTERY_LEVEL for DUTs in the lab.
163            if battery_level < cros_constants.MIN_BATTERY_LEVEL:
164                level_by_10 = int(math.floor(battery_level / 10.0)) * 10
165                metrics_data = {
166                        'host': host.hostname,
167                        'level': level_by_10,
168                        'mode': charging_state
169                }
170                metrics.Counter('chromeos/autotest/battery/state2').increment(
171                        fields=metrics_data)
172
173            if (charging_state == self.BATTERY_DISCHARGING
174                        and battery_level < self.BATTERY_DISCHARGE_MIN):
175                logging.debug('Try to fix discharging state of the battery. '
176                              'Possible that a test left wrong state.')
177                # Here is the chance that battery is discharging because
178                # of some test did not clean up the state.
179                # We are going to try to fix it by set charging to normal.
180                host.run('ectool chargecontrol normal', ignore_status=True)
181                # wait to change state.
182                time.sleep(10)
183                info = self._load_info(host)
184                charging_state = info['Battery']['state']
185                fixed = charging_state != self.BATTERY_DISCHARGING
186                # TODO (@otabek) remove metrics after research
187                logging.debug('Fixed battery discharge mode.')
188                metrics_data = {
189                        'model': host.host_info_store.get().model,
190                        'fixed': fixed
191                }
192                metrics.Counter(
193                    'chromeos/autotest/repair/chargecontrol_fixed'
194                ).increment(fields=metrics_data)
195
196            if (battery_level < cros_constants.MIN_BATTERY_LEVEL
197                        and charging_state == self.BATTERY_DISCHARGING):
198                # TODO(@xianuowang) remove metrics here once we have device
199                # health profile to collect history of DUT's metrics.
200                metrics_data = {'host': host.hostname,
201                                'board': host.host_info_store.get().board}
202                metrics.Counter(
203                    'chromeos/autotest/repair/verifier/power').increment(
204                        fields=metrics_data)
205                raise hosts.AutoservVerifyError(
206                        'Battery is in discharging state and current level'
207                        ' is less than %s%%' %
208                        cros_constants.MIN_BATTERY_LEVEL)
209        except (KeyError, ValueError):
210            logging.warning('Cannot determine battery state -'
211                            ' skipping check.')
212
213    @property
214    def description(self):
215        # pylint: disable=missing-docstring
216        return 'The DUT is plugged in to AC power and battery is charing'
217
218
219class CrosVerisionVerifier(hosts.Verifier):
220    """Confirm that current ChromeOS image on the host is matches
221    to provision-cros_version label.
222
223    Some tests behavior may changed DUT image while they don't update
224    provision-cros_version label, which could cause the next test run
225    on the same host gets an unexpected OS version and yields false
226    positive test result.
227    """
228
229    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
230    def verify(self, host):
231        label_match = True
232        try:
233            label_match = host.verify_cros_version_label()
234        except Exception as e:
235            # We don't want fail this verifier for any errors that other
236            # than a actual version mismatch, as that can make debugging
237            # more challenge.
238            logging.warning('Unexpected error during verify cros verision'
239                            ' on %s; %s', host.hostname, e)
240
241        if not label_match:
242            raise hosts.AutoservVerifyError('ChromeOS image on the host'
243                                            ' does not match to cros-version'
244                                            ' label.')
245
246    @property
247    def description(self):
248        # pylint: disable=missing-docstring
249        return 'ChromeOS image on host matches cros_version label'
250
251
252class WritableVerifier(hosts.Verifier):
253    """
254    Confirm the stateful file systems are writable.
255
256    The standard linux response to certain unexpected file system errors
257    (including hardware errors in block devices) is to change the file
258    system status to read-only.  This checks that that hasn't happened.
259
260    The test covers the two file systems that need to be writable for
261    critical operations like AU:
262      * The (unencrypted) stateful system which includes
263        /mnt/stateful_partition.
264      * The encrypted stateful partition, which includes /var.
265
266    The test doesn't check various bind mounts; those are expected to
267    fail the same way as their underlying main mounts.  Whether the
268    Linux kernel can guarantee that is untested...
269    """
270
271    # N.B. Order matters here:  Encrypted stateful is loop-mounted from
272    # a file in unencrypted stateful, so we don't test for errors in
273    # encrypted stateful if unencrypted fails.
274    _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp']
275
276    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
277    def verify(self, host):
278        # pylint: disable=missing-docstring
279        # This deliberately stops looking after the first error.
280        # See above for the details.
281        for testdir in self._TEST_DIRECTORIES:
282            if not host.is_file_system_writable([testdir]):
283                msg = 'Can\'t create a file in %s' % testdir
284                raise hosts.AutoservVerifyError(msg)
285
286    @property
287    def description(self):
288        # pylint: disable=missing-docstring
289        return 'The stateful filesystems are writable'
290
291
292class EXT4fsErrorVerifier(hosts.Verifier):
293    """
294    Confirm we have not seen critical file system kernel errors.
295    """
296
297    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
298    def verify(self, host):
299        # pylint: disable=missing-docstring
300        # grep for stateful FS errors of the type "EXT4-fs error (device sda1):"
301        command = ("dmesg | grep -E \"EXT4-fs error \(device "
302                   "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | "
303                   "grep -e '^/mnt/stateful_partition ' | "
304                   "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"")
305        output = host.run(command=command, ignore_status=True).stdout
306        if output:
307            sample = output.splitlines()[0]
308            message = 'Saw file system error: %s' % sample
309            raise hosts.AutoservVerifyError(message)
310        # Check for other critical FS errors.
311        command = 'dmesg | grep "This should not happen!!  Data will be lost"'
312        output = host.run(command=command, ignore_status=True).stdout
313        if output:
314            message = 'Saw file system error: Data will be lost'
315            raise hosts.AutoservVerifyError(message)
316        else:
317            logging.error('Could not determine stateful mount.')
318
319    @property
320    def description(self):
321        # pylint: disable=missing-docstring
322        return 'Did not find critical file system errors'
323
324
325class UpdateSuccessVerifier(hosts.Verifier):
326    """
327    Checks that the DUT successfully finished its last provision job.
328
329    At the start of any update (e.g. for a Provision job), the code
330    creates a marker file named `PROVISION_FAILED`.  The file is located
331    in a part of the stateful partition that will be removed if an
332    update finishes successfully.  Thus, the presence of the file
333    indicates that a prior update failed.
334
335    The verifier tests for the existence of the marker file and fails if
336    it still exists.
337    """
338
339    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
340    def verify(self, host):
341        # pylint: disable=missing-docstring
342        result = host.run('test -f %s' % provisioner.PROVISION_FAILED,
343                          ignore_status=True)
344        if result.exit_status == 0:
345            raise hosts.AutoservVerifyError(
346                    'Last provision on this DUT failed')
347
348    @property
349    def description(self):
350        # pylint: disable=missing-docstring
351        return 'The most recent provision attempt on this DUT succeeded'
352
353
354class TPMStatusVerifier(hosts.Verifier):
355    """Verify that the host's TPM is in a good state."""
356
357    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
358    def verify(self, host):
359        # pylint: disable=missing-docstring
360        if _is_virtual_machine(host):
361            # We do not forward host TPM / emulated TPM to qemu VMs, so skip
362            # this verification step.
363            logging.debug('Skipped verification %s on VM', self)
364            return
365
366        try:
367            status = CryptohomeStatus(host)
368        except hosts.AutoservVerifyError:
369            logging.info('Cannot determine the Cryptohome valid status - '
370                         'skipping check.')
371            return
372        try:
373            tpm = status['tpm']
374            if not tpm['enabled']:
375                raise hosts.AutoservVerifyError(
376                        'TPM is not enabled -- Hardware is not working.')
377            if not tpm['can_connect']:
378                raise hosts.AutoservVerifyError(
379                        ('TPM connect failed -- '
380                         'last_error=%d.' % tpm['last_error']))
381            if tpm['owned'] and not tpm['can_load_srk']:
382                raise hosts.AutoservVerifyError(
383                        'Cannot load the TPM SRK')
384            if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']:
385                raise hosts.AutoservVerifyError(
386                        'Cannot load the TPM SRK public key')
387        except KeyError:
388            logging.info('Cannot determine the Cryptohome valid status - '
389                         'skipping check.')
390
391    @property
392    def description(self):
393        # pylint: disable=missing-docstring
394        return 'The host\'s TPM is available and working'
395
396
397class PythonVerifier(hosts.Verifier):
398    """Confirm the presence of a working Python interpreter."""
399
400    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
401    def verify(self, host):
402        # pylint: disable=missing-docstring
403        result = host.run('python -c "import json"',
404                          ignore_status=True)
405        if result.exit_status != 0:
406            message = 'The python interpreter is broken'
407            if result.exit_status == 127:
408                search = host.run('which python', ignore_status=True)
409                if search.exit_status != 0 or not search.stdout:
410                    message = ('Python is missing; may be caused by '
411                               'powerwash')
412            raise hosts.AutoservVerifyError(message)
413
414    @property
415    def description(self):
416        # pylint: disable=missing-docstring
417        return 'Python on the host is installed and working'
418
419
420class DevModeVerifier(hosts.Verifier):
421    """Verify that the host is not in dev mode."""
422
423    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
424    def verify(self, host):
425        # pylint: disable=missing-docstring
426        # Some pools are allowed to be in dev mode
427        info = host.host_info_store.get()
428        if (_DEV_MODE_ALWAYS_ALLOWED or
429                bool(info.pools & _DEV_MODE_ALLOWED_POOLS)):
430            return
431
432        result = host.run('crossystem devsw_boot', ignore_status=True).stdout
433        if result != '0':
434            raise hosts.AutoservVerifyError('The host is in dev mode')
435
436    @property
437    def description(self):
438        # pylint: disable=missing-docstring
439        return 'The host should not be in dev mode'
440
441
442class DevDefaultBootVerifier(hosts.Verifier):
443    """Verify that the host is set to boot the internal disk by default."""
444
445    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
446    def verify(self, host):
447        # pylint: disable=missing-docstring
448        result = host.run('crossystem dev_default_boot', ignore_status=True)
449        default_boot = result.stdout.strip()
450        if default_boot != 'disk':
451            raise hosts.AutoservVerifyError(
452                    'The host has incorrect dev_default_boot value: %r'
453                    % default_boot)
454
455    @property
456    def description(self):
457        # pylint: disable=missing-docstring
458        return 'The host should have dev_default_boot=disk'
459
460
461class HWIDVerifier(hosts.Verifier):
462    """Verify that the host has HWID & serial number."""
463
464    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
465    def verify(self, host):
466        # pylint: disable=missing-docstring
467        info = host.host_info_store.get()
468        if not info.board or not info.model:
469            # if board or model missed in host_info file then it is empty
470            # skip verifier
471            return
472        info_hwid = info.attributes.get('HWID')
473        info_serial_number = info.attributes.get('serial_number')
474
475        if not info_hwid or not info_serial_number:
476            logging.info('Missing HWID or/and SerialNumber.'
477                         ' Probably device was not deployed properly.'
478                         ' Marking DUT for need re-deployment.')
479            host.set_device_repair_state(
480                    cros_constants.DEVICE_STATE_NEEDS_DEPLOY)
481            return
482
483        host_hwid = host.run('crossystem hwid', ignore_status=True).stdout
484        host_serial_number = self._get_serial_number(host, info_serial_number)
485        if not host_hwid or not host_serial_number:
486            raise hosts.AutoservVerifyError(
487                    'Failed to get HWID & Serial Number for host %s' %
488                    host.hostname)
489
490        if host_hwid != info_hwid:
491            # We not fail verifier as it not critical for majority tests.
492            metrics.Counter('chromeos/autotest/repair/hwid_change').increment(
493                    fields={
494                            'host': host.hostname,
495                            'board': info.board or ''
496                    })
497            logging.info(
498                    'HWID changed to: %s required manual work'
499                    ' to fix it.', host_hwid)
500
501        if host_serial_number and host_serial_number != info_serial_number:
502            logging.info(
503                    'The SerialNumber mismatch detected %s != %s.'
504                    ' Probably attempt to replace DUT without deployment.'
505                    ' Marking DUT for need re-deployment.', info_serial_number,
506                    host_serial_number)
507            host.set_device_repair_state(
508                    cros_constants.DEVICE_STATE_NEEDS_DEPLOY)
509
510    def _get_serial_number(self, host, serial_number):
511        """Read serial_number from VPD.
512
513        If VPD does not have any value for serial_number then it will
514        try to restore from host_info.
515
516        @param host             CrosHost
517        @param serial_number    Serial-number from host-info
518        """
519        req = host.run('vpd -g serial_number', ignore_status=True)
520        # serial_number not found in the VPD info
521        if not req.stdout and req.exit_status == 3 and serial_number:
522            logging.debug('Cannot find serial_number from VPD.')
523            # check if vpd working fine without error
524            l1 = host.run('vpd -l', ignore_status=True)
525            l2 = host.run('vpd -l |grep "\"serial_number\"="',
526                          ignore_status=True)
527            if l1.exit_status == 0 and l2.exit_status == 1:
528                logging.info('Start restoring serial_number:%s for VPD.',
529                             serial_number)
530                # update serial_number for VPD
531                cmd = 'vpd -s serial_number=%s'
532                host.run(cmd % serial_number, ignore_status=True)
533                host.run('dump_vpd_log --force', ignore_status=True)
534                # reading from VPD to see what we updated
535                req = host.run('vpd -g serial_number', ignore_status=True)
536        return req.stdout
537
538    @property
539    def description(self):
540        # pylint: disable=missing-docstring
541        return 'The host should have valid HWID and Serial Number'
542
543
544class EnrollmentStateVerifier(hosts.Verifier):
545    """Verify that the device's enrollment state is clean.
546
547    There are two "flags" that generate 3 possible enrollment states here.
548    Flag 1 - The presence of install attributes file in
549             /home/.shadow/install_attributes.pb
550
551    Flag 2 - The value of "check_enrollment" from VPD. Can be obtained by
552             reading the cache file in
553             /mnt/stateful_partition/unencrypted/cache/vpd/full-v2.txt
554
555    The states:
556    State 1 - Device is enrolled, means flag 1 is true and in
557              flag 2 check_enrollment=1
558    State 2 - Device is consumer owned, means flag 1 is true and in
559              flag 2 check_enrollment=0
560    State 3 - Device is enrolled and has been powerwashed, means flag 1 is
561              false. If the value in flag 2 is check_enrollment=1 then the
562              device will perform forced re-enrollment check and depending
563              on the response from the server might force the device to enroll
564              again. If the value is check_enrollment=0, then device can be
565              used like a new device.
566
567    We consider state 1, and first scenario(check_enrollment=1) of state 3
568    as unacceptable state here as they may interfere with normal tests.
569    """
570
571    VPD_CACHE = '/mnt/stateful_partition/unencrypted/cache/vpd/full-v2.txt'
572
573    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
574    def verify(self, host):
575        # pylint: disable=missing-docstring
576        if self._get_enrollment_state(host):
577            raise hosts.AutoservNonCriticalVerifyError('The device is enrolled,'
578                                                       ' it may interfere with'
579                                                       ' some tests.')
580
581    def _get_enrollment_state(self, host):
582        logging.debug('checking enrollment state from VPD cache...')
583        response = host.run('grep "check_enrollment" %s' % self.VPD_CACHE,
584                            ignore_status=True)
585        if response.exit_status == 0:
586            result = response.stdout.strip()
587            logging.info('Enrollment state in VPD cache: %s', result)
588            return result == '"check_enrollment"="1"'
589
590        logging.error('Unexpected error occured during verify enrollment state'
591                      ' in VPD cache, skipping verify process.')
592        return False
593
594    def _is_applicable(self, host):
595        info = host.host_info_store.get()
596        # if os type is missing from host_info, then we assume it's cros.
597        return getattr(info, 'os', 'cros') in ('', 'cros')
598
599    @property
600    def description(self):
601        # pylint: disable=missing-docstring
602        return 'The enrollment state is clean on the host'
603
604
605class FirmwareTpmVerifier(hosts.Verifier):
606    """Verifier that firmware tpm info is correct.
607
608    For dev-signed firmware, tpm_fwver and tpm_kernver reported from
609    crossystem should always be 0x10001. Firmware update on DUTs with
610    incorrect tmp_fwver or tpm_kernver may fail due to firmware
611    rollback protection.
612    """
613    # A list of field we want check from crossystem and expected value.
614    CHECK_LIST = [
615            ('tpm_fwver', '0x00010001'),
616            ('tpm_kernver', '0x00010001'),
617    ]
618
619    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
620    def verify(self, host):
621        # pylint: disable=missing-docstring
622        for field, expected_value in self.CHECK_LIST:
623            result = host.run('crossystem %s' % field, ignore_status=True)
624            if result.exit_status != 0:
625                raise hosts.AutoservNonCriticalVerifyError(
626                        'Unable to get %s from crossystem.' % field)
627            if result.stdout != expected_value:
628                raise hosts.AutoservNonCriticalVerifyError(
629                        'Unexpected %s value: %s, expected: %s. This error'
630                        ' may cause firmware provision fail due to the'
631                        ' rollback protection.' %
632                        (field, result.stdout, expected_value))
633
634    def _is_applicable(self, host):
635        return cros_firmware._is_firmware_testing_device(host)
636
637    @property
638    def description(self):
639        # pylint: disable=missing-docstring
640        return 'Firmware tpm info is correct in crossystem.'
641
642
643class JetstreamTpmVerifier(hosts.Verifier):
644    """Verify that Jetstream TPM is in a good state."""
645
646    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
647    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
648    def verify(self, host):
649        # pylint: disable=missing-docstring
650        try:
651            status = CryptohomeStatus(host)
652            if not status.tpm_enabled:
653                raise hosts.AutoservVerifyError('TPM is not enabled')
654            if not status.tpm_owned:
655                raise hosts.AutoservVerifyError('TPM is not owned')
656            if not status.tpm_can_load_srk:
657                raise hosts.AutoservVerifyError('TPM cannot load SRK')
658            if not status.tpm_can_load_srk_pubkey:
659                raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey')
660
661            # Check that the TPM is fully initialized. The output of this
662            # command is line-oriented property/value pairs.
663            result = host.run('cryptohome --action=tpm_status')
664            if 'TPM Ready: true' not in result.stdout:
665                raise hosts.AutoservVerifyError('TPM is not ready')
666        except error.AutoservRunError:
667            raise hosts.AutoservVerifyError(
668                    'Could not determine TPM status')
669
670    @property
671    def description(self):
672        # pylint: disable=missing-docstring
673        return 'Jetstream TPM state check'
674
675
676class JetstreamAttestationVerifier(hosts.Verifier):
677    """Verify that Jetstream attestation client has a certificate."""
678
679    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
680    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
681    def verify(self, host):
682        # pylint: disable=missing-docstring
683        try:
684            # This output is in text protobuf format.
685            result = host.run('cryptohome --action=tpm_more_status')
686            if 'attestation_prepared: true' not in result.stdout:
687                raise hosts.AutoservVerifyError(
688                        'Attestation has not been prepared')
689
690            result = host.run('cryptohome --action=tpm_attestation_get_ek')
691            if 'EK Certificate' not in result.stdout:
692                raise hosts.AutoservVerifyError(
693                        'Endorsement certificate not found')
694        except error.AutoservRunError:
695            raise hosts.AutoservVerifyError(
696                    'Unable to fetch endorsement certificate')
697
698    @property
699    def description(self):
700        # pylint: disable=missing-docstring
701        return 'Jetstream attestation endorsement check'
702
703
704class JetstreamServicesVerifier(hosts.Verifier):
705    """Verify that Jetstream services are running."""
706
707    # Retry for b/62576902
708    @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10)
709    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
710    def verify(self, host):
711        # pylint: disable=missing-docstring
712        try:
713            if not host.upstart_status('ap-controller'):
714                raise hosts.AutoservVerifyError(
715                    'ap-controller service is not running')
716        except error.AutoservRunError:
717            raise hosts.AutoservVerifyError(
718                'ap-controller service not found')
719
720        try:
721            host.run('pgrep ap-controller')
722        except error.AutoservRunError:
723            raise hosts.AutoservVerifyError(
724                'ap-controller process is not running')
725
726    @property
727    def description(self):
728        # pylint: disable=missing-docstring
729        return 'Jetstream services must be running'
730
731
732class StopStartUIVerifier(hosts.Verifier):
733    """Verify that command 'stop ui' won't crash the DUT.
734
735    We run 'stop ui' in AU and provision. We found some bad images broke
736    this command and then broke all the provision of all following test. We add
737    this verifier to ensure it works and will trigger reimaging to a good
738    version if it fails.
739    """
740
741    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
742    def verify(self, host):
743        try:
744            host.run('stop ui && start ui', ignore_status=True, timeout=10)
745        except error.AutoservSSHTimeout:
746            raise hosts.AutoservVerifyError(
747                "Got timeout when stop ui/start ui. DUT might crash.")
748
749    @property
750    def description(self):
751        return 'The DUT image works fine when stop ui/start ui.'
752
753
754class ServoUSBDriveVerifier(hosts.Verifier):
755    """Verify that USB drive on Servo is good to use.
756
757    Check if USB drive is detected on servo and verified on servohost and
758    USB is not marked for replacement.
759    """
760
761    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
762    def verify(self, host):
763        # pylint: disable=missing-docstring
764        usb_dev = ''
765        try:
766            usb_dev = host._servo_host._probe_and_validate_usb_dev()
767        except hosts.AutoservRepairError as e:
768            # We USB drive not detected by servod
769            logging.debug('(Not critical) %s', e)
770        host_info = host.host_info_store.get()
771        if not usb_dev:
772            host_info.set_version_label(audit_const.SERVO_USB_STATE_PREFIX,
773                                        audit_const.HW_STATE_NOT_DETECTED)
774            host.host_info_store.commit(host_info)
775            raise hosts.AutoservNonCriticalVerifyError(
776                    'USB-drive is not detected or bad')
777
778        # Check if USB-drive marked for replacement.
779        usb_state = host_info.get_label_value(
780                audit_const.SERVO_USB_STATE_PREFIX)
781        if usb_state and usb_state == audit_const.HW_STATE_NEED_REPLACEMENT:
782            raise hosts.AutoservNonCriticalVerifyError(
783                    'USB-drive marked for replacement')
784
785        # The USB-drive detected and was not mark for replacement.
786        # Set as normal for future audit.
787        host_info.set_version_label(audit_const.SERVO_USB_STATE_PREFIX,
788                                    audit_const.HW_STATE_NORMAL)
789        host.host_info_store.commit(host_info)
790
791    def _is_applicable(self, host):
792        if host.servo:
793            return True
794        return False
795
796    @property
797    def description(self):
798        return 'Ensure USB drive on Servo is in good state.'
799
800
801class DUTStorageVerifier(hosts.Verifier):
802    """Verify that main storage on DUT is good to use.
803
804    Check if DUT drive is providing good SMART stats which not showing any
805    issues on it. The verifier can mark DUT for replacement if SMART stats
806    show outworn data.
807    """
808
809    @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC)
810    def verify(self, host):
811        # pylint: disable=missing-docstring
812        verifier = audit_verify.VerifyDutStorage(host)
813        verifier.verify(set_label=True, run_badblocks='NOT')
814        state = verifier.get_state() or audit_const.HW_STATE_UNKNOWN
815        if not state:
816            raise hosts.AutoservNonCriticalVerifyError(
817                    'DUT storage did not detected or state cannot extracted.')
818        if state == audit_const.HW_STATE_NEED_REPLACEMENT:
819            logging.info('Detected issue with storage on the DUT.')
820            host.set_device_needs_replacement()
821
822    @property
823    def description(self):
824        return 'Ensure DUT storage SMART information is in good state.'
825
826
827class _ResetRepairAction(hosts.RepairAction):
828    """Common handling for repair actions that reset a DUT."""
829
830    def _collect_logs(self, host):
831        """Collect logs from a successfully repaired DUT."""
832        dirname = 'after_%s' % self.tag
833        local_log_dir = crashcollect.get_crashinfo_dir(host, dirname)
834        host.collect_logs('/var/log', local_log_dir, ignore_errors=True)
835        # Collect crash info.
836        crashcollect.get_crashinfo(host, None)
837
838    def _check_reset_success(self, host):
839        """Check whether reset succeeded, and gather logs if possible."""
840        # Waiting to boot device after repair action.
841        if host.wait_up(host.BOOT_TIMEOUT):
842            if host.get_verifier_state('ssh') == hosts.VERIFY_SUCCESS:
843                logging.debug(
844                        'Skip collection logs due DUT was sshable before')
845                return
846            try:
847                # Collect logs once we regain ssh access before
848                # clobbering them.
849                self._collect_logs(host)
850            except Exception:
851                # If the DUT is up, we want to declare success, even if
852                # log gathering fails for some reason.  So, if there's
853                # a failure, just log it and move on.
854                logging.exception('Non-critical failure in log '
855                                  'collection during %s.',
856                                  self.tag)
857            return
858        raise hosts.AutoservRepairError(
859                'Host %s is offline after %s.' % (host.hostname, self.tag),
860                'failed_to_boot_after_' + self.tag)
861
862
863class ServoSysRqRepair(_ResetRepairAction):
864    """
865    Repair a Chrome device by sending a system request to the kernel.
866
867    Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x)
868    will ask the kernel to panic itself and reboot while conserving
869    the kernel logs in console ramoops.
870    """
871
872    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
873    def repair(self, host):
874        # pylint: disable=missing-docstring
875        repair_utils.require_servo(host, ignore_state=True)
876        # Press 3 times Alt+VolUp+X
877        # no checking DUT health between each press as
878        # killing Chrome is not really likely to fix the DUT SSH.
879        for _ in range(3):
880            try:
881                host.servo.sysrq_x()
882            except error.TestFail as ex:
883                raise hosts.AutoservRepairError(
884                      'cannot press sysrq-x: %s.' % str(ex),
885                      'cannot_press_sysrq_x')
886            # less than 5 seconds between presses.
887            time.sleep(2.0)
888        self._check_reset_success(host)
889
890    @property
891    def description(self):
892        # pylint: disable=missing-docstring
893        return 'Reset the DUT via keyboard sysrq-x'
894
895
896class ServoResetRepair(_ResetRepairAction):
897    """Repair a Chrome device by resetting it with servo."""
898
899    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
900    def repair(self, host):
901        # pylint: disable=missing-docstring
902        repair_utils.require_servo(host, ignore_state=True)
903        host.servo.get_power_state_controller().reset()
904        self._check_reset_success(host)
905
906    @property
907    def description(self):
908        # pylint: disable=missing-docstring
909        return 'Reset the DUT via servo'
910
911
912class ServoCr50RebootRepair(_ResetRepairAction):
913    """
914    Repair a Chrome device by resetting cr50 by servo.
915
916    Reset cr50 which is ec+ccd reset.
917    """
918
919    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
920    def repair(self, host):
921        # pylint: disable=missing-docstring
922        try:
923            host.servo.get_power_state_controller().cr50_reset()
924            self._check_reset_success(host)
925        finally:
926            # cr50 reset will clear some some init like `ccd testlab open`
927            # so we want to re-initialize servo after cr50 reset if the main
928            # device is ccd.
929            if host.servo.main_device_is_ccd():
930                host.servo.initialize_dut()
931
932    def _is_applicable(self, host):
933        if host.servo:
934            if host.servo.has_control('cr50_reboot'):
935                return True
936        return False
937
938    @property
939    def description(self):
940        # pylint: disable=missing-docstring
941        return 'Reset(cr50) the DUT via servo'
942
943
944class DevDefaultBootRepair(hosts.RepairAction):
945    """Repair a CrOS target by setting dev_default_boot to 'disk'"""
946
947    @timeout_util.TimeoutDecorator(cros_constants.SHORT_REPAIR_TIMEOUT_SEC)
948    def repair(self, host):
949        # pylint: disable=missing-docstring
950        host.run('crossystem dev_default_boot=disk', ignore_status=True)
951
952    @property
953    def description(self):
954        # pylint: disable=missing-docstring
955        return "Set dev_default_boot to 'disk'"
956
957
958class CrosRebootRepair(repair_utils.RebootRepair):
959    """Repair a CrOS target by clearing dev mode and rebooting it."""
960
961    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
962    def repair(self, host):
963        # pylint: disable=missing-docstring
964        # N.B. We need to reboot regardless of whether clearing
965        # dev_mode succeeds or fails.
966        host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
967                 ignore_status=True)
968        host.run('crossystem disable_dev_request=1',
969                 ignore_status=True)
970        super(CrosRebootRepair, self).repair(host)
971
972    @property
973    def description(self):
974        # pylint: disable=missing-docstring
975        return 'Reset GBB flags and Reboot the host'
976
977
978class LabelCleanupRepair(hosts.RepairAction):
979    """Cleanup unexpected labels for the host, e.g. mismatched
980    cros-version label.
981    """
982    # The repair action currently only cleanup cros-version label, however
983    # we can extent it to cleanup other labels when there is need, and it
984    # should be able to determine which label to clean based on check the
985    # cached result from it's trigger list. (example: trigger verifiers can
986    # be access via self._trigger_list, and we can tell which verifier failed
987    # by check Verifier._is_good() method.)
988
989    @timeout_util.TimeoutDecorator(cros_constants.SHORT_REPAIR_TIMEOUT_SEC)
990    def repair(self, host):
991        logging.info('Removing %s label from the host', host.VERSION_PREFIX)
992        info = host.host_info_store.get()
993        info.clear_version_labels()
994        host.host_info_store.commit(info)
995
996    @property
997    def description(self):
998        # pylint: disable=missing-docstring
999        return 'Cleanup unexpected labels for the host'
1000
1001
1002class EnrollmentCleanupRepair(hosts.RepairAction):
1003    """Cleanup enrollment state on ChromeOS device"""
1004
1005    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1006    def repair(self, host):
1007        # Reset VPD enrollment state.
1008        host.run('/usr/sbin/update_rw_vpd check_enrollment 0')
1009
1010        # Clear TPM Owner state.
1011        tpm_utils.ClearTPMOwnerRequest(host, wait_for_ready=True,
1012                                       timeout=host.BOOT_TIMEOUT)
1013
1014    def _is_applicable(self, host):
1015        info = host.host_info_store.get()
1016        # if os type is missing from host_info, then we assume it's cros.
1017        return getattr(info, 'os', 'cros') in ('', 'cros')
1018
1019    @property
1020    def description(self):
1021        # pylint: disable=missing-docstring
1022        return 'Cleanup enrollment state and reboot the host'
1023
1024
1025class ProvisionRepair(hosts.RepairAction):
1026    """
1027    Repair by re-installing a test image using quick provision.
1028
1029    Try to install the DUT's designated "stable test image" using the
1030    standard procedure for installing a new test image via quick provision.
1031    """
1032
1033    @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC)
1034    def repair(self, host):
1035        # pylint: disable=missing-docstring
1036        image_name = host.get_cros_repair_image_name()
1037        logging.info('Staging build for provision: %s', image_name)
1038        devserver = dev_server.ImageServer.resolve(image_name, host.hostname)
1039        devserver.trigger_download(image_name, synchronous=False)
1040        update_url = tools.image_url_pattern() % (
1041                devserver.url(), image_name)
1042        afe_utils.machine_install_and_update_labels(host, update_url)
1043
1044    @property
1045    def description(self):
1046        # pylint: disable=missing-docstring
1047        return 'Re-install the stable build on the host'
1048
1049
1050class PowerWashRepair(ProvisionRepair):
1051    """
1052    Powerwash the DUT, then re-install using quick provision.
1053
1054    Powerwash the DUT, then attempt to re-install a stable test image as
1055    for `ProvisionRepair`.
1056    """
1057
1058    @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC)
1059    def repair(self, host):
1060        # pylint: disable=missing-docstring
1061        host.run('echo "fast safe" > '
1062                 '/mnt/stateful_partition/factory_install_reset')
1063        host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True)
1064        super(PowerWashRepair, self).repair(host)
1065
1066    @property
1067    def description(self):
1068        # pylint: disable=missing-docstring
1069        return 'Powerwash and then re-install the stable build on the host'
1070
1071
1072class ServoInstallRepair(hosts.RepairAction):
1073    """
1074    Reinstall a test image from USB using servo.
1075
1076    Use servo to re-install the DUT's designated "stable test image"
1077    from servo-attached USB storage.
1078    """
1079
1080    # Timeout value for this repair action is specially configured as we need
1081    # stage image to usb drive, install chromeos image.
1082    @timeout_util.TimeoutDecorator(60 * 60)
1083    def repair(self, host):
1084        # pylint: disable=missing-docstring
1085        repair_utils.require_servo(host, ignore_state=True)
1086        image_name = host.get_cros_repair_image_name()
1087        image_name_on_usb = host._servo_host.validate_image_usbkey()
1088        if image_name_on_usb == image_name:
1089            logging.info(
1090                    'Required image %s is already on usbkey,'
1091                    ' skipping download.', image_name)
1092            need_update_image = False
1093        else:
1094            logging.info('Required image is not on usbkey.')
1095            need_update_image = True
1096
1097        # Verify if we want to force re-image the USB.
1098        if not need_update_image and host.health_profile:
1099            repair_failed_count = host.health_profile.get_repair_fail_count()
1100            # try to re-image USB when previous attempt failed
1101            if (repair_failed_count > 0 and
1102                (repair_failed_count == 1 or repair_failed_count % 10 == 0)):
1103                logging.info(
1104                        'Required re-download image to usbkey as'
1105                        ' a previous repair failed. Fail count: %s',
1106                        repair_failed_count)
1107                need_update_image = True
1108
1109        update_url = None
1110        if need_update_image:
1111            logging.info('Staging image: %s on caching server.', image_name)
1112            _, update_url = host.stage_image_for_servo()
1113        afe_utils.clean_provision_labels(host)
1114        host.servo_install(update_url, is_repair=True)
1115        afe_utils.add_provision_labels(host, host.VERSION_PREFIX, image_name)
1116
1117    @property
1118    def description(self):
1119        # pylint: disable=missing-docstring
1120        return 'Reinstall from USB using servo'
1121
1122
1123class JetstreamTpmRepair(hosts.RepairAction):
1124    """Repair by resetting TPM and rebooting."""
1125
1126    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1127    def repair(self, host):
1128        # pylint: disable=missing-docstring
1129        host.run('rm -f /var/cache/ap/setup-network', ignore_status=True)
1130        host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True)
1131        host.run('rm -f /home/.shadow/.can_attempt_ownership',
1132                 ignore_status=True)
1133        host.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
1134        host.reboot()
1135
1136    @property
1137    def description(self):
1138        # pylint: disable=missing-docstring
1139        return 'Reset TPM and reboot'
1140
1141
1142class JetstreamServiceRepair(hosts.RepairAction):
1143    """Repair by restarting Jetstream services."""
1144
1145    @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC)
1146    def repair(self, host):
1147        # pylint: disable=missing-docstring
1148        host.cleanup_services()
1149
1150    @property
1151    def description(self):
1152        # pylint: disable=missing-docstring
1153        return 'Restart Jetstream services'
1154
1155
1156def _cros_verify_dag():
1157    """Return the verification DAG for a `CrosHost`."""
1158    return _cros_verify_base_dag() + _cros_verify_extended_dag()
1159
1160
1161def _cros_verify_base_dag():
1162    """Return the base verification DAG for a `CrosHost`."""
1163    FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier
1164    FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
1165    verify_dag = (
1166            (repair_utils.PingVerifier, 'ping', ()),
1167            (repair_utils.SshVerifier, 'ssh', ('ping', )),
1168            (ServoUSBDriveVerifier, 'usb_drive', ()),
1169            (DevDefaultBootVerifier, 'dev_default_boot', ('ssh', )),
1170            (DevModeVerifier, 'devmode', ('ssh', )),
1171            (EnrollmentStateVerifier, 'enrollment_state', ('ssh', )),
1172            (HWIDVerifier, 'hwid', ('ssh', )),
1173            (ACPowerVerifier, 'power', ('ssh', )),
1174            (EXT4fsErrorVerifier, 'ext4', ('ssh', )),
1175            (WritableVerifier, 'writable', ('ssh', )),
1176            (TPMStatusVerifier, 'tpm', ('ssh', )),
1177            (UpdateSuccessVerifier, 'good_provision', ('ssh', )),
1178            (FirmwareTpmVerifier, 'faft_tpm', ('ssh', )),
1179            (FirmwareStatusVerifier, 'fwstatus', ('ssh', )),
1180            (FirmwareVersionVerifier, 'rwfw', ('ssh', )),
1181            (PythonVerifier, 'python', ('ssh', )),
1182            (repair_utils.LegacyHostVerifier, 'cros', ('ssh', )),
1183            (CrosVerisionVerifier, 'cros_version_label', ('ssh', )),
1184    )
1185    return verify_dag
1186
1187
1188def _cros_verify_extended_dag():
1189    """Return the extended verification DAG for a `CrosHost`."""
1190    return (
1191            (StopStartUIVerifier, 'stop_start_ui', ('ssh', )),
1192            (DUTStorageVerifier, 'storage', ('ssh', )),
1193    )
1194
1195
1196def _cros_basic_repair_actions(
1197    servo_reset_trigger=DEFAULT_SERVO_RESET_TRIGGER
1198):
1199    """Return the basic repair actions for a `CrosHost`
1200
1201    @param servo_reset_trigger: sequence of verifiers that trigger servo reset
1202    and servo cr50 reboot repair.
1203    """
1204    repair_actions = (
1205            # RPM cycling must precede Servo reset:  if the DUT has a dead
1206            # battery, we need to reattach AC power before we reset via servo.
1207            (repair_utils.RPMCycleRepair, 'rpm', (), (
1208                    'ping',
1209                    'ssh',
1210                    'power',
1211            )),
1212            (ServoResetRepair, 'servoreset', (), servo_reset_trigger),
1213            (ServoCr50RebootRepair, 'cr50_reset', (), servo_reset_trigger),
1214            (ServoSysRqRepair, 'sysrq', (), (
1215                    'ping',
1216                    'ssh',
1217            )),
1218            (LabelCleanupRepair, 'label_cleanup', ('ssh', ),
1219             ('cros_version_label', )),
1220
1221            # N.B. FaftFirmwareRepair can't fix a 'good_provision' failure
1222            # directly, because it doesn't remove the flag file that triggers
1223            # the failure.  We include it as a repair trigger because it's
1224            # possible the the last update failed because of the firmware,
1225            # and we want the repair steps below to be able to trust the
1226            # firmware.
1227            (cros_firmware.FaftFirmwareRepair, 'faft_firmware_repair', (), (
1228                    'ping',
1229                    'ssh',
1230                    'fwstatus',
1231                    'good_provision',
1232            )),
1233            (DevDefaultBootRepair, 'set_default_boot', ('ssh', ),
1234             ('dev_default_boot', )),
1235            (CrosRebootRepair, 'reboot', ('ssh', ), (
1236                    'devmode',
1237                    'writable',
1238            )),
1239            (EnrollmentCleanupRepair, 'cleanup_enrollment', ('ssh', ),
1240             ('enrollment_state', )),
1241    )
1242    return repair_actions
1243
1244
1245def _cros_extended_repair_actions(provision_triggers=_CROS_PROVISION_TRIGGERS,
1246                                  powerwash_triggers=_CROS_POWERWASH_TRIGGERS,
1247                                  usb_triggers=_CROS_USB_TRIGGERS,
1248                                  usb_dependencies=_CROS_USB_DEPENDENCIES):
1249    """Return the extended repair actions for a `CrosHost`"""
1250
1251    # The dependencies and triggers for the 'provision', 'powerwash', and 'usb'
1252    # repair actions stack up:  Each one is able to repair progressively
1253    # more verifiers than the one before.  The 'triggers' lists specify
1254    # the progression.
1255
1256    repair_actions = (
1257            (ProvisionRepair, 'provision', usb_triggers + powerwash_triggers,
1258             provision_triggers),
1259            (PowerWashRepair, 'powerwash', usb_triggers,
1260             powerwash_triggers + provision_triggers),
1261            (
1262                    ServoInstallRepair,
1263                    'usb',
1264                    usb_dependencies,
1265                    # faft_tpm is a trigger of usb repair action but should not be
1266                    # dependence of provision and powerwash repair action, due to
1267                    # restriction of current structure, we hardcode it here instead
1268                    # of put it into _CROS_USB_TRIGGERS. TODO(xianuowang@) refactor
1269                    # the logic to create action/verifier DAG for different host
1270                    # type after we decouple infra from test autotest repo.
1271                    usb_triggers + powerwash_triggers + provision_triggers +
1272                    ('faft_tpm', )),
1273    )
1274    return repair_actions
1275
1276
1277def _cros_dedicated_repair_actions(firmware_triggers=_CROS_FIRMWARE_TRIGGERS,
1278                                   usb_dependencies=_CROS_USB_DEPENDENCIES):
1279    """Return the repair actions that only works for `CrosHost`"""
1280
1281    repair_actions = ((cros_firmware.GeneralFirmwareRepair, 'general_firmware',
1282                       usb_dependencies, firmware_triggers), )
1283    return repair_actions
1284
1285
1286def _cros_repair_actions():
1287    """Return the repair actions for a `CrosHost`."""
1288    repair_actions = (_cros_basic_repair_actions() +
1289                      _cros_extended_repair_actions() +
1290                      _cros_dedicated_repair_actions())
1291    return repair_actions
1292
1293
1294def create_cros_repair_strategy():
1295    """Return a `RepairStrategy` for a `CrosHost`."""
1296    verify_dag = _cros_verify_dag()
1297    repair_actions = _cros_repair_actions()
1298    return hosts.RepairStrategy(verify_dag, repair_actions, 'cros')
1299
1300
1301def _moblab_verify_dag():
1302    """Return the verification DAG for a `MoblabHost`."""
1303    verify_dag = (
1304        (repair_utils.SshVerifier,        'ssh',     ()),
1305        (ACPowerVerifier,                 'power',   ('ssh',)),
1306        (PythonVerifier,                  'python',  ('ssh',)),
1307        (repair_utils.LegacyHostVerifier, 'cros',    ('ssh',)),
1308    )
1309    return verify_dag
1310
1311
1312def _moblab_repair_actions():
1313    """Return the repair actions for a `MoblabHost`."""
1314    repair_actions = (
1315        (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
1316        (ProvisionRepair, 'provision', ('ssh',), ('power', 'python', 'cros')),
1317    )
1318    return repair_actions
1319
1320
1321def create_moblab_repair_strategy():
1322    """
1323    Return a `RepairStrategy` for a `MoblabHost`.
1324
1325    Moblab is a subset of the CrOS verify and repair.  Several pieces
1326    are removed because they're not expected to be meaningful.  Some
1327    others are removed for more specific reasons:
1328
1329    'tpm':  Moblab DUTs don't run the tests that matter to this
1330        verifier.  TODO(jrbarnette)  This assertion is unproven.
1331
1332    'good_provision':  This verifier can't pass, because the Moblab provision
1333        procedure doesn't properly delete the PROVISION_FAILED file.
1334        TODO(jrbarnette) We should refactor ChromiumOSProvisioner so
1335        that it can be different for Moblab.
1336
1337    'firmware':  Moblab DUTs shouldn't be in FAFT pools, so we don't try
1338        this.
1339
1340    'powerwash':  Powerwash on Moblab causes trouble with deleting the
1341        DHCP leases file, so we skip it.
1342    """
1343    verify_dag = _moblab_verify_dag()
1344    repair_actions = _moblab_repair_actions()
1345    return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab')
1346
1347
1348def _jetstream_repair_actions():
1349    """Return the repair actions for a `JetstreamHost`."""
1350    provision_triggers = _CROS_PROVISION_TRIGGERS
1351    jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation')
1352    jetstream_service_triggers = (jetstream_tpm_triggers +
1353                                  ('jetstream_services',))
1354    base_actions = _cros_basic_repair_actions(servo_reset_trigger=(
1355            'ping',
1356            'ssh',
1357    ))
1358    custom_actions = (
1359            (JetstreamTpmRepair, 'jetstream_tpm_repair',
1360             _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS,
1361             provision_triggers + jetstream_tpm_triggers),
1362            (JetstreamServiceRepair, 'jetstream_service_repair',
1363             _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
1364             ('jetstream_tpm', 'jetstream_attestation'),
1365             provision_triggers + jetstream_service_triggers),
1366    )
1367    extend_actions = _cros_extended_repair_actions(
1368            provision_triggers=provision_triggers + jetstream_service_triggers,
1369            usb_triggers=_JETSTREAM_USB_TRIGGERS)
1370    return base_actions + custom_actions + extend_actions
1371
1372
1373def _jetstream_verify_dag():
1374    """Return the verification DAG for a `JetstreamHost`."""
1375    verify_dag = _cros_verify_base_dag() + (
1376        (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)),
1377        (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)),
1378        (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)),
1379    )
1380    return verify_dag
1381
1382
1383def create_jetstream_repair_strategy():
1384    """
1385    Return a `RepairStrategy` for a `JetstreamHost`.
1386
1387    The Jetstream repair strategy is based on the CrOS verify and repair,
1388    but adds the JetstreamServicesVerifier.
1389    """
1390    verify_dag = _jetstream_verify_dag()
1391    repair_actions = _jetstream_repair_actions()
1392    return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream')
1393
1394
1395# TODO(pprabhu) Move this to a better place. I have no idea what that place
1396# would be.
1397def _is_virtual_machine(host):
1398    """Determine whether the given |host| is a virtual machine.
1399
1400    @param host: a hosts.Host object.
1401    @returns True if the host is a virtual machine, False otherwise.
1402    """
1403    output = host.run('cat /proc/cpuinfo | grep "model name"',
1404                      ignore_status=True)
1405    return (output.exit_status == 0 and output.stdout and
1406            'qemu' in output.stdout.lower())
1407
1408
1409class CryptohomeStatus(dict):
1410    """Wrapper for getting cryptohome status from a host."""
1411
1412    def __init__(self, host):
1413        super(CryptohomeStatus, self).__init__()
1414        self.update(_get_cryptohome_status(host))
1415        self.tpm = self['tpm']
1416
1417    @property
1418    def tpm_enabled(self):
1419        # pylint: disable=missing-docstring
1420        return self.tpm.get('enabled') == True
1421
1422    @property
1423    def tpm_owned(self):
1424        # pylint: disable=missing-docstring
1425        return self.tpm.get('owned') == True
1426
1427    @property
1428    def tpm_can_load_srk(self):
1429        # pylint: disable=missing-docstring
1430        return self.tpm.get('can_load_srk') == True
1431
1432    @property
1433    def tpm_can_load_srk_pubkey(self):
1434        # pylint: disable=missing-docstring
1435        return self.tpm.get('can_load_srk_pubkey') == True
1436
1437
1438def _get_cryptohome_status(host):
1439    """Returns a dictionary containing the cryptohome status.
1440
1441    @param host: a hosts.Host object.
1442    @returns A dictionary containing the cryptohome status.
1443    @raises AutoservVerifyError: if the output could not be parsed or the TPM
1444       status is missing.
1445    @raises hosts.AutoservRunError: if the cryptohome command failed.
1446    """
1447    # This cryptohome command emits status information in JSON format. It
1448    # looks something like this:
1449    # {
1450    #    "installattrs": {
1451    #       ...
1452    #    },
1453    #    "mounts": [ {
1454    #       ...
1455    #    } ],
1456    #    "tpm": {
1457    #       "being_owned": false,
1458    #       "can_connect": true,
1459    #       "can_decrypt": false,
1460    #       "can_encrypt": false,
1461    #       "can_load_srk": true,
1462    #       "can_load_srk_pubkey": true,
1463    #       "enabled": true,
1464    #       "has_context": true,
1465    #       "has_cryptohome_key": false,
1466    #       "has_key_handle": false,
1467    #       "last_error": 0,
1468    #       "owned": true
1469    #    }
1470    # }
1471    try:
1472        output = host.run('cryptohome --action=status').stdout.strip()
1473        status = json.loads(output)
1474        if 'tpm' not in status:
1475            raise hosts.AutoservVerifyError('TPM status is missing')
1476        return status
1477    except ValueError:
1478        raise hosts.AutoservVerifyError('Unable to parse cryptohome status')
1479