• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import json
6import logging
7import os
8import time
9
10import common
11from autotest_lib.client.common_lib import error
12from autotest_lib.client.common_lib import global_config
13from autotest_lib.client.common_lib import hosts
14from autotest_lib.client.common_lib.cros import dev_server
15from autotest_lib.client.common_lib.cros import retry
16from autotest_lib.server import afe_utils
17from autotest_lib.server import crashcollect
18from autotest_lib.server.cros import autoupdater
19from autotest_lib.server.cros.dynamic_suite import tools
20from autotest_lib.server.hosts import cros_firmware
21from autotest_lib.server.hosts import repair_utils
22
23# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be
24# in dev mode (usually, those should be unmanaged devices)
25#
26_DEV_MODE_ALLOWED_POOLS = set(
27    global_config.global_config.get_config_value(
28            'CROS',
29            'pools_dev_mode_allowed',
30            type=str,
31            default='',
32            allow_blank=True).split(','))
33
34# Setting to suppress dev mode check; primarily used for moblab where all
35# DUT's are in dev mode.
36_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value(
37            'CROS',
38            'dev_mode_allowed',
39            type=bool,
40            default=False)
41
42# Triggers for the 'au', 'powerwash', and 'usb' repair actions.
43# These are also used as dependencies in the `CrosHost` repair
44# sequence, as follows:
45#
46# au:
47#   - triggers: _CROS_AU_TRIGGERS
48#   - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS
49#
50# powerwash:
51#   - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_AU_TRIGGERS
52#   - depends on: _CROS_USB_TRIGGERS
53#
54# usb:
55#   - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS +
56#               _CROS_AU_TRIGGERS
57#   - no dependencies
58#
59# N.B. AC power detection depends on software on the DUT, and there
60# have been bugs where detection failed even though the DUT really
61# did have power.  So, we make the 'power' verifier a trigger for
62# reinstall repair actions, too.
63#
64# TODO(jrbarnette):  AU repair can't fix all problems reported by
65# the 'cros' verifier; it's listed as an AU trigger as a
66# simplification.  The ultimate fix is to split the 'cros' verifier
67# into smaller individual verifiers.
68_CROS_AU_TRIGGERS = ('power', 'rwfw', 'python', 'cros',)
69_CROS_EXTENDED_AU_TRIGGERS = _CROS_AU_TRIGGERS + ('ec_reset',)
70_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_au', 'ext4',)
71_CROS_USB_TRIGGERS = ('ssh', 'writable', 'stop_start_ui',)
72_JETSTREAM_USB_TRIGGERS = ('ssh', 'writable',)
73
74
75class ACPowerVerifier(hosts.Verifier):
76    """Check for AC power and a reasonable battery charge."""
77
78    def verify(self, host):
79        # pylint: disable=missing-docstring
80        try:
81            info = host.get_power_supply_info()
82        except error.AutoservRunError:
83            raise hosts.AutoservVerifyError(
84                    'Failed to get power supply info')
85
86        try:
87            if info['Line Power']['online'] != 'yes':
88                raise hosts.AutoservVerifyError(
89                        'AC power is not plugged in')
90        except KeyError:
91            raise hosts.AutoservVerifyError(
92                    'Cannot determine AC power status')
93
94        try:
95            if float(info['Battery']['percentage']) < 50.0:
96                raise hosts.AutoservVerifyError(
97                        'Battery is less than 50%')
98        except KeyError:
99            logging.info('Cannot determine battery status - '
100                         'skipping check.')
101
102    @property
103    def description(self):
104        # pylint: disable=missing-docstring
105        return 'The DUT is plugged in to AC power'
106
107
108class WritableVerifier(hosts.Verifier):
109    """
110    Confirm the stateful file systems are writable.
111
112    The standard linux response to certain unexpected file system errors
113    (including hardware errors in block devices) is to change the file
114    system status to read-only.  This checks that that hasn't happened.
115
116    The test covers the two file systems that need to be writable for
117    critical operations like AU:
118      * The (unencrypted) stateful system which includes
119        /mnt/stateful_partition.
120      * The encrypted stateful partition, which includes /var.
121
122    The test doesn't check various bind mounts; those are expected to
123    fail the same way as their underlying main mounts.  Whether the
124    Linux kernel can guarantee that is untested...
125    """
126
127    # N.B. Order matters here:  Encrypted stateful is loop-mounted from
128    # a file in unencrypted stateful, so we don't test for errors in
129    # encrypted stateful if unencrypted fails.
130    _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp']
131
132    def verify(self, host):
133        # pylint: disable=missing-docstring
134        # This deliberately stops looking after the first error.
135        # See above for the details.
136        for testdir in self._TEST_DIRECTORIES:
137            filename = os.path.join(testdir, 'writable_test')
138            command = 'touch %s && rm %s' % (filename, filename)
139            rv = host.run(command=command, ignore_status=True)
140            if rv.exit_status != 0:
141                msg = 'Can\'t create a file in %s' % testdir
142                raise hosts.AutoservVerifyError(msg)
143
144    @property
145    def description(self):
146        # pylint: disable=missing-docstring
147        return 'The stateful filesystems are writable'
148
149
150class EXT4fsErrorVerifier(hosts.Verifier):
151    """
152    Confirm we have not seen critical file system kernel errors.
153    """
154    def verify(self, host):
155        # pylint: disable=missing-docstring
156        # grep for stateful FS errors of the type "EXT4-fs error (device sda1):"
157        command = ("dmesg | grep -E \"EXT4-fs error \(device "
158                   "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | "
159                   "grep -e '^/mnt/stateful_partition ' | "
160                   "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"")
161        output = host.run(command=command, ignore_status=True).stdout
162        if output:
163            sample = output.splitlines()[0]
164            message = 'Saw file system error: %s' % sample
165            raise hosts.AutoservVerifyError(message)
166        # Check for other critical FS errors.
167        command = 'dmesg | grep "This should not happen!!  Data will be lost"'
168        output = host.run(command=command, ignore_status=True).stdout
169        if output:
170            message = 'Saw file system error: Data will be lost'
171            raise hosts.AutoservVerifyError(message)
172        else:
173            logging.error('Could not determine stateful mount.')
174
175    @property
176    def description(self):
177        # pylint: disable=missing-docstring
178        return 'Did not find critical file system errors'
179
180
181class UpdateSuccessVerifier(hosts.Verifier):
182    """
183    Checks that the DUT successfully finished its last provision job.
184
185    At the start of any update (e.g. for a Provision job), the code
186    creates a marker file named `PROVISION_FAILED`.  The file is located
187    in a part of the stateful partition that will be removed if an
188    update finishes successfully.  Thus, the presence of the file
189    indicates that a prior update failed.
190
191    The verifier tests for the existence of the marker file and fails if
192    it still exists.
193    """
194    def verify(self, host):
195        # pylint: disable=missing-docstring
196        result = host.run('test -f %s' % autoupdater.PROVISION_FAILED,
197                          ignore_status=True)
198        if result.exit_status == 0:
199            raise hosts.AutoservVerifyError(
200                    'Last AU on this DUT failed')
201
202    @property
203    def description(self):
204        # pylint: disable=missing-docstring
205        return 'The most recent AU attempt on this DUT succeeded'
206
207
208class TPMStatusVerifier(hosts.Verifier):
209    """Verify that the host's TPM is in a good state."""
210
211    def verify(self, host):
212        # pylint: disable=missing-docstring
213        if _is_virtual_machine(host):
214            # We do not forward host TPM / emulated TPM to qemu VMs, so skip
215            # this verification step.
216            logging.debug('Skipped verification %s on VM', self)
217            return
218
219        try:
220            status = CryptohomeStatus(host)
221        except hosts.AutoservVerifyError:
222            logging.info('Cannot determine the Cryptohome valid status - '
223                         'skipping check.')
224            return
225        try:
226            tpm = status['tpm']
227            if not tpm['enabled']:
228                raise hosts.AutoservVerifyError(
229                        'TPM is not enabled -- Hardware is not working.')
230            if not tpm['can_connect']:
231                raise hosts.AutoservVerifyError(
232                        ('TPM connect failed -- '
233                         'last_error=%d.' % tpm['last_error']))
234            if tpm['owned'] and not tpm['can_load_srk']:
235                raise hosts.AutoservVerifyError(
236                        'Cannot load the TPM SRK')
237            if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']:
238                raise hosts.AutoservVerifyError(
239                        'Cannot load the TPM SRK public key')
240        except KeyError:
241            logging.info('Cannot determine the Cryptohome valid status - '
242                         'skipping check.')
243
244    @property
245    def description(self):
246        # pylint: disable=missing-docstring
247        return 'The host\'s TPM is available and working'
248
249
250class PythonVerifier(hosts.Verifier):
251    """Confirm the presence of a working Python interpreter."""
252
253    def verify(self, host):
254        # pylint: disable=missing-docstring
255        result = host.run('python -c "import json"',
256                          ignore_status=True)
257        if result.exit_status != 0:
258            message = 'The python interpreter is broken'
259            if result.exit_status == 127:
260                search = host.run('which python', ignore_status=True)
261                if search.exit_status != 0 or not search.stdout:
262                    message = ('Python is missing; may be caused by '
263                               'powerwash')
264            raise hosts.AutoservVerifyError(message)
265
266    @property
267    def description(self):
268        # pylint: disable=missing-docstring
269        return 'Python on the host is installed and working'
270
271
272class DevModeVerifier(hosts.Verifier):
273    """Verify that the host is not in dev mode."""
274
275    def verify(self, host):
276        # pylint: disable=missing-docstring
277        # Some pools are allowed to be in dev mode
278        info = host.host_info_store.get()
279        if (_DEV_MODE_ALWAYS_ALLOWED or
280                bool(info.pools & _DEV_MODE_ALLOWED_POOLS)):
281            return
282
283        result = host.run('crossystem devsw_boot', ignore_status=True).stdout
284        if result != '0':
285            raise hosts.AutoservVerifyError('The host is in dev mode')
286
287    @property
288    def description(self):
289        # pylint: disable=missing-docstring
290        return 'The host should not be in dev mode'
291
292
293class HWIDVerifier(hosts.Verifier):
294    """Verify that the host has HWID & serial number."""
295
296    def verify(self, host):
297        # pylint: disable=missing-docstring
298        try:
299            info = host.host_info_store.get()
300
301            hwid = host.run('crossystem hwid', ignore_status=True).stdout
302            if hwid:
303                info.attributes['HWID'] = hwid
304
305            serial_number = host.run('vpd -g serial_number',
306                                     ignore_status=True).stdout
307            if serial_number:
308                info.attributes['serial_number'] = serial_number
309
310            if info != host.host_info_store.get():
311                host.host_info_store.commit(info)
312        except Exception as e:
313            logging.exception('Failed to get HWID & Serial Number for host '
314                              '%s: %s', host.hostname, str(e))
315
316    @property
317    def description(self):
318        # pylint: disable=missing-docstring
319        return 'The host should have valid HWID and Serial Number'
320
321
322class JetstreamTpmVerifier(hosts.Verifier):
323    """Verify that Jetstream TPM is in a good state."""
324
325    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
326    def verify(self, host):
327        # pylint: disable=missing-docstring
328        try:
329            status = CryptohomeStatus(host)
330            if not status.tpm_enabled:
331                raise hosts.AutoservVerifyError('TPM is not enabled')
332            if not status.tpm_owned:
333                raise hosts.AutoservVerifyError('TPM is not owned')
334            if not status.tpm_can_load_srk:
335                raise hosts.AutoservVerifyError('TPM cannot load SRK')
336            if not status.tpm_can_load_srk_pubkey:
337                raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey')
338
339            # Check that the TPM is fully initialized. The output of this
340            # command is line-oriented property/value pairs.
341            result = host.run('cryptohome --action=tpm_status')
342            if 'TPM Ready: true' not in result.stdout:
343                raise hosts.AutoservVerifyError('TPM is not ready')
344        except error.AutoservRunError:
345            raise hosts.AutoservVerifyError(
346                    'Could not determine TPM status')
347
348    @property
349    def description(self):
350        # pylint: disable=missing-docstring
351        return 'Jetstream TPM state check'
352
353
354class JetstreamAttestationVerifier(hosts.Verifier):
355    """Verify that Jetstream attestation client has a certificate."""
356
357    @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10)
358    def verify(self, host):
359        # pylint: disable=missing-docstring
360        try:
361            # This output is in text protobuf format.
362            result = host.run('cryptohome --action=tpm_more_status')
363            if 'attestation_prepared: true' not in result.stdout:
364                raise hosts.AutoservVerifyError(
365                        'Attestation has not been prepared')
366
367            result = host.run('cryptohome --action=tpm_attestation_get_ek')
368            if 'EK Certificate' not in result.stdout:
369                raise hosts.AutoservVerifyError(
370                        'Endorsement certificate not found')
371        except error.AutoservRunError:
372            raise hosts.AutoservVerifyError(
373                    'Unable to fetch endorsement certificate')
374
375    @property
376    def description(self):
377        # pylint: disable=missing-docstring
378        return 'Jetstream attestation endorsement check'
379
380
381class JetstreamServicesVerifier(hosts.Verifier):
382    """Verify that Jetstream services are running."""
383
384    # Retry for b/62576902
385    @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10)
386    def verify(self, host):
387        # pylint: disable=missing-docstring
388        try:
389            if not host.upstart_status('ap-controller'):
390                raise hosts.AutoservVerifyError(
391                    'ap-controller service is not running')
392        except error.AutoservRunError:
393            raise hosts.AutoservVerifyError(
394                'ap-controller service not found')
395
396        try:
397            host.run('pgrep ap-controller')
398        except error.AutoservRunError:
399            raise hosts.AutoservVerifyError(
400                'ap-controller process is not running')
401
402    @property
403    def description(self):
404        # pylint: disable=missing-docstring
405        return 'Jetstream services must be running'
406
407
408class KvmExistsVerifier(hosts.Verifier):
409    """Verify that /dev/kvm exists if it should be there"""
410
411    def verify(self, host):
412        # pylint: disable=missing-docstring
413        result = host.run('[ ! -e /dev/kvm -a -f /usr/bin/vm_concierge ]',
414                          ignore_status=True)
415        if result.exit_status == 0:
416            # Silently check if the kvm_transition flag is being used by Chrome
417            # indicating /dev/kvm may not be present yet on this system.
418            result = host.run('grep -qsxF "kvm_transition" '
419                              '/etc/ui_use_flags.txt', ignore_status=True)
420            if result.exit_status != 0:
421                raise hosts.AutoservVerifyError('/dev/kvm is missing')
422
423    @property
424    def description(self):
425        # pylint: disable=missing-docstring
426        return '/dev/kvm should exist if device supports Linux VMs'
427
428
429class StopStartUIVerifier(hosts.Verifier):
430    """Verify that command 'stop ui' won't crash the DUT.
431
432    We run 'stop ui' in AU and provision. We found some bad images broke
433    this command and then broke all the provision of all following test. We add
434    this verifier to ensure it works and will trigger reimaging to a good
435    version if it fails.
436    """
437    def verify(self, host):
438        try:
439            host.run('stop ui && start ui', ignore_status=True, timeout=10)
440        except error.AutoservSSHTimeout:
441            raise hosts.AutoservVerifyError(
442                "Got timeout when stop ui/start ui. DUT might crash.")
443
444    @property
445    def description(self):
446        return 'The DUT image works fine when stop ui/start ui.'
447
448
449class ServoTypeVerifier(hosts.Verifier):
450    """Verify that servo_type attribute exists"""
451
452    def verify(self, host):
453        if not host.servo:
454            logging.info("Host has no working servo.")
455            return
456
457        info = host.host_info_store.get()
458        try:
459            servo_type = host.servo.get_servo_version()
460            if servo_type != info.attributes.get('servo_type', ''):
461                logging.info('servo_type mismatch detected, updating...')
462                info.attributes['servo_type'] = servo_type
463                host.host_info_store.commit(info)
464        except Exception as e:
465            # We don't want fail the verifier and break DUTs here just
466            # because of servo issue.
467            logging.error("Failed to update servo_type, %s", str(e))
468
469    @property
470    def description(self):
471        return 'The host has servo_type attribute'
472
473
474class _ResetRepairAction(hosts.RepairAction):
475    """Common handling for repair actions that reset a DUT."""
476
477    def _collect_logs(self, host):
478        """Collect logs from a successfully repaired DUT."""
479        dirname = 'after_%s' % self.tag
480        local_log_dir = crashcollect.get_crashinfo_dir(host, dirname)
481        host.collect_logs('/var/log', local_log_dir, ignore_errors=True)
482        # Collect crash info.
483        crashcollect.get_crashinfo(host, None)
484
485    def _check_reset_success(self, host):
486        """Check whether reset succeeded, and gather logs if possible."""
487        if host.wait_up(host.BOOT_TIMEOUT):
488            try:
489                # Collect logs once we regain ssh access before
490                # clobbering them.
491                self._collect_logs(host)
492            except Exception:
493                # If the DUT is up, we want to declare success, even if
494                # log gathering fails for some reason.  So, if there's
495                # a failure, just log it and move on.
496                logging.exception('Non-critical failure in log '
497                                  'collection during %s.',
498                                  self.tag)
499            return
500        raise hosts.AutoservRepairError(
501                'Host %s is still offline after %s.' %
502                (host.hostname, self.tag), 'failed_to_boot_after_' + self.tag)
503
504
505class ServoSysRqRepair(_ResetRepairAction):
506    """
507    Repair a Chrome device by sending a system request to the kernel.
508
509    Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x)
510    will ask the kernel to panic itself and reboot while conserving
511    the kernel logs in console ramoops.
512    """
513
514    def repair(self, host):
515        # pylint: disable=missing-docstring
516        repair_utils.require_servo(host)
517        # Press 3 times Alt+VolUp+X
518        # no checking DUT health between each press as
519        # killing Chrome is not really likely to fix the DUT SSH.
520        for _ in range(3):
521            try:
522                host.servo.sysrq_x()
523            except error.TestFail, ex:
524                raise hosts.AutoservRepairError(
525                      'cannot press sysrq-x: %s.' % str(ex),
526                      'cannot_press_sysrq_x')
527            # less than 5 seconds between presses.
528            time.sleep(2.0)
529        self._check_reset_success(host)
530
531    @property
532    def description(self):
533        # pylint: disable=missing-docstring
534        return 'Reset the DUT via keyboard sysrq-x'
535
536
537class ServoResetRepair(_ResetRepairAction):
538    """Repair a Chrome device by resetting it with servo."""
539
540    def repair(self, host):
541        # pylint: disable=missing-docstring
542        repair_utils.require_servo(host)
543        host.servo.get_power_state_controller().reset()
544        self._check_reset_success(host)
545
546    @property
547    def description(self):
548        # pylint: disable=missing-docstring
549        return 'Reset the DUT via servo'
550
551
552class CrosRebootRepair(repair_utils.RebootRepair):
553    """Repair a CrOS target by clearing dev mode and rebooting it."""
554
555    def repair(self, host):
556        # pylint: disable=missing-docstring
557        # N.B. We need to reboot regardless of whether clearing
558        # dev_mode succeeds or fails.
559        host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0',
560                 ignore_status=True)
561        host.run('crossystem disable_dev_request=1',
562                 ignore_status=True)
563        super(CrosRebootRepair, self).repair(host)
564
565    @property
566    def description(self):
567        # pylint: disable=missing-docstring
568        return 'Reset GBB flags and Reboot the host'
569
570
571class AutoUpdateRepair(hosts.RepairAction):
572    """
573    Repair by re-installing a test image using autoupdate.
574
575    Try to install the DUT's designated "stable test image" using the
576    standard procedure for installing a new test image via autoupdate.
577    """
578
579    def repair(self, host):
580        # pylint: disable=missing-docstring
581        image_name = host.get_cros_repair_image_name()
582        logging.info('Staging build for AU: %s', image_name)
583        devserver = dev_server.ImageServer.resolve(image_name, host.hostname)
584        devserver.trigger_download(image_name, synchronous=False)
585        update_url = tools.image_url_pattern() % (
586                devserver.url(), image_name)
587        afe_utils.machine_install_and_update_labels(host, update_url)
588
589    @property
590    def description(self):
591        # pylint: disable=missing-docstring
592        return 'Re-install the stable build via AU'
593
594
595class PowerWashRepair(AutoUpdateRepair):
596    """
597    Powerwash the DUT, then re-install using autoupdate.
598
599    Powerwash the DUT, then attempt to re-install a stable test image as
600    for `AutoUpdateRepair`.
601    """
602
603    def repair(self, host):
604        # pylint: disable=missing-docstring
605        host.run('echo "fast safe" > '
606                 '/mnt/stateful_partition/factory_install_reset')
607        host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True)
608        super(PowerWashRepair, self).repair(host)
609
610    @property
611    def description(self):
612        # pylint: disable=missing-docstring
613        return 'Powerwash and then re-install the stable build via AU'
614
615
616class ServoInstallRepair(hosts.RepairAction):
617    """
618    Reinstall a test image from USB using servo.
619
620    Use servo to re-install the DUT's designated "stable test image"
621    from servo-attached USB storage.
622    """
623
624    def repair(self, host):
625        # pylint: disable=missing-docstring
626        repair_utils.require_servo(host)
627        image_name, update_url = host.stage_image_for_servo()
628        afe_utils.clean_provision_labels(host)
629        host.servo_install(update_url)
630        afe_utils.add_provision_labels(host, host.VERSION_PREFIX, image_name)
631
632    @property
633    def description(self):
634        # pylint: disable=missing-docstring
635        return 'Reinstall from USB using servo'
636
637
638class ColdRebootRepair(_ResetRepairAction):
639    """
640    Repair a Chrome device by performing a cold reboot that resets the EC.
641
642    Use ectool to perform a cold reboot which will reset the EC.
643    """
644
645    def repair(self, host):
646        # pylint: disable=missing-docstring
647        host.reboot(reboot_cmd='ectool reboot_ec cold')
648        self._check_reset_success(host)
649
650    @property
651    def description(self):
652        # pylint: disable=missing-docstring
653        return 'Reset the DUT via cold reboot with ectool'
654
655
656class JetstreamTpmRepair(hosts.RepairAction):
657    """Repair by resetting TPM and rebooting."""
658
659    def repair(self, host):
660        # pylint: disable=missing-docstring
661        host.run('rm -f /var/cache/ap/setup-network', ignore_status=True)
662        host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True)
663        host.run('rm -f /home/.shadow/.can_attempt_ownership',
664                 ignore_status=True)
665        host.run('crossystem clear_tpm_owner_request=1', ignore_status=True)
666        host.reboot()
667
668    @property
669    def description(self):
670        # pylint: disable=missing-docstring
671        return 'Reset TPM and reboot'
672
673
674class JetstreamServiceRepair(hosts.RepairAction):
675    """Repair by restarting Jetstream services."""
676
677    def repair(self, host):
678        # pylint: disable=missing-docstring
679        host.cleanup_services()
680
681    @property
682    def description(self):
683        # pylint: disable=missing-docstring
684        return 'Restart Jetstream services'
685
686
687def _cros_verify_dag():
688    """Return the verification DAG for a `CrosHost`."""
689    return _cros_verify_base_dag() + _cros_verify_extended_dag()
690
691
692def _cros_verify_base_dag():
693    """Return the base verification DAG for a `CrosHost`."""
694    FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier
695    FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier
696    verify_dag = (
697        (repair_utils.SshVerifier,        'ssh',        ()),
698        (ServoTypeVerifier,               'servo_type', ()),
699        (DevModeVerifier,                 'devmode',  ('ssh',)),
700        (HWIDVerifier,                    'hwid',     ('ssh',)),
701        (ACPowerVerifier,                 'power',    ('ssh',)),
702        (EXT4fsErrorVerifier,             'ext4',     ('ssh',)),
703        (WritableVerifier,                'writable', ('ssh',)),
704        (TPMStatusVerifier,               'tpm',      ('ssh',)),
705        (UpdateSuccessVerifier,           'good_au',  ('ssh',)),
706        (FirmwareStatusVerifier,          'fwstatus', ('ssh',)),
707        (FirmwareVersionVerifier,         'rwfw',     ('ssh',)),
708        (PythonVerifier,                  'python',   ('ssh',)),
709        (repair_utils.LegacyHostVerifier, 'cros',     ('ssh',)),
710        (KvmExistsVerifier,               'ec_reset', ('ssh',)),
711    )
712    return verify_dag
713
714
715def _cros_verify_extended_dag():
716    """Return the extended verification DAG for a `CrosHost`."""
717    return (
718        (StopStartUIVerifier, 'stop_start_ui', ('ssh',)),
719    )
720
721
722def _cros_basic_repair_actions():
723    """Return the basic repair actions for a `CrosHost`"""
724    FirmwareRepair = cros_firmware.FirmwareRepair
725    repair_actions = (
726        # RPM cycling must precede Servo reset:  if the DUT has a dead
727        # battery, we need to reattach AC power before we reset via servo.
728        (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
729        (ServoSysRqRepair, 'sysrq', (), ('ssh',)),
730        (ServoResetRepair, 'servoreset', (), ('ssh',)),
731
732        # N.B. FirmwareRepair can't fix a 'good_au' failure directly,
733        # because it doesn't remove the flag file that triggers the
734        # failure.  We include it as a repair trigger because it's
735        # possible the the last update failed because of the firmware,
736        # and we want the repair steps below to be able to trust the
737        # firmware.
738        (FirmwareRepair, 'firmware', (), ('ssh', 'fwstatus', 'good_au',)),
739
740        (CrosRebootRepair, 'reboot', ('ssh',), ('devmode', 'writable',)),
741
742        (ColdRebootRepair, 'coldboot', ('ssh',), ('ec_reset',)),
743    )
744    return repair_actions
745
746
747def _cros_extended_repair_actions(au_triggers=_CROS_EXTENDED_AU_TRIGGERS,
748                                  powerwash_triggers=_CROS_POWERWASH_TRIGGERS,
749                                  usb_triggers=_CROS_USB_TRIGGERS):
750    """Return the extended repair actions for a `CrosHost`"""
751
752    # The dependencies and triggers for the 'au', 'powerwash', and 'usb'
753    # repair actions stack up:  Each one is able to repair progressively
754    # more verifiers than the one before.  The 'triggers' lists specify
755    # the progression.
756
757    repair_actions = (
758        (AutoUpdateRepair, 'au',
759                usb_triggers + powerwash_triggers, au_triggers),
760        (PowerWashRepair, 'powerwash',
761                usb_triggers, powerwash_triggers + au_triggers),
762        (ServoInstallRepair, 'usb',
763                (), usb_triggers + powerwash_triggers + au_triggers),
764    )
765    return repair_actions
766
767
768def _cros_repair_actions():
769    """Return the repair actions for a `CrosHost`."""
770    repair_actions = (_cros_basic_repair_actions() +
771                      _cros_extended_repair_actions())
772    return repair_actions
773
774
775def create_cros_repair_strategy():
776    """Return a `RepairStrategy` for a `CrosHost`."""
777    verify_dag = _cros_verify_dag()
778    repair_actions = _cros_repair_actions()
779    return hosts.RepairStrategy(verify_dag, repair_actions, 'cros')
780
781
782def _moblab_verify_dag():
783    """Return the verification DAG for a `MoblabHost`."""
784    verify_dag = (
785        (repair_utils.SshVerifier,        'ssh',     ()),
786        (ACPowerVerifier,                 'power',   ('ssh',)),
787        (PythonVerifier,                  'python',  ('ssh',)),
788        (repair_utils.LegacyHostVerifier, 'cros',    ('ssh',)),
789    )
790    return verify_dag
791
792
793def _moblab_repair_actions():
794    """Return the repair actions for a `MoblabHost`."""
795    repair_actions = (
796        (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)),
797        (AutoUpdateRepair, 'au', ('ssh',), ('power', 'python', 'cros')),
798    )
799    return repair_actions
800
801
802def create_moblab_repair_strategy():
803    """
804    Return a `RepairStrategy` for a `MoblabHost`.
805
806    Moblab is a subset of the CrOS verify and repair.  Several pieces
807    are removed because they're not expected to be meaningful.  Some
808    others are removed for more specific reasons:
809
810    'tpm':  Moblab DUTs don't run the tests that matter to this
811        verifier.  TODO(jrbarnette)  This assertion is unproven.
812
813    'good_au':  This verifier can't pass, because the Moblab AU
814        procedure doesn't properly delete the PROVISION_FAILED file.
815        TODO(jrbarnette) We should refactor ChromiumOSUpdater so
816        that it can be different for Moblab.
817
818    'firmware':  Moblab DUTs shouldn't be in FAFT pools, so we don't try
819        this.
820
821    'powerwash':  Powerwash on Moblab causes trouble with deleting the
822        DHCP leases file, so we skip it.
823    """
824    verify_dag = _moblab_verify_dag()
825    repair_actions = _moblab_repair_actions()
826    return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab')
827
828
829def _jetstream_repair_actions():
830    """Return the repair actions for a `JetstreamHost`."""
831    au_triggers = _CROS_AU_TRIGGERS
832    jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation')
833    jetstream_service_triggers = (jetstream_tpm_triggers +
834                                  ('jetstream_services',))
835    repair_actions = (
836        _cros_basic_repair_actions() +
837        (
838            (JetstreamTpmRepair, 'jetstream_tpm_repair',
839             _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS,
840             au_triggers + jetstream_tpm_triggers),
841
842            (JetstreamServiceRepair, 'jetstream_service_repair',
843             _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + (
844                 'jetstream_tpm', 'jetstream_attestation'),
845             au_triggers + jetstream_service_triggers),
846        ) +
847        _cros_extended_repair_actions(
848            au_triggers=au_triggers + jetstream_service_triggers,
849            usb_triggers=_JETSTREAM_USB_TRIGGERS))
850    return repair_actions
851
852
853def _jetstream_verify_dag():
854    """Return the verification DAG for a `JetstreamHost`."""
855    verify_dag = _cros_verify_base_dag() + (
856        (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)),
857        (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)),
858        (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)),
859    )
860    return verify_dag
861
862
863def create_jetstream_repair_strategy():
864    """
865    Return a `RepairStrategy` for a `JetstreamHost`.
866
867    The Jetstream repair strategy is based on the CrOS verify and repair,
868    but adds the JetstreamServicesVerifier.
869    """
870    verify_dag = _jetstream_verify_dag()
871    repair_actions = _jetstream_repair_actions()
872    return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream')
873
874
875# TODO(pprabhu) Move this to a better place. I have no idea what that place
876# would be.
877def _is_virtual_machine(host):
878    """Determine whether the given |host| is a virtual machine.
879
880    @param host: a hosts.Host object.
881    @returns True if the host is a virtual machine, False otherwise.
882    """
883    output = host.run('cat /proc/cpuinfo | grep "model name"',
884                      ignore_status=True)
885    return (output.exit_status == 0 and output.stdout and
886            'qemu' in output.stdout.lower())
887
888
889class CryptohomeStatus(dict):
890    """Wrapper for getting cryptohome status from a host."""
891
892    def __init__(self, host):
893        super(CryptohomeStatus, self).__init__()
894        self.update(_get_cryptohome_status(host))
895        self.tpm = self['tpm']
896
897    @property
898    def tpm_enabled(self):
899        # pylint: disable=missing-docstring
900        return self.tpm.get('enabled') == True
901
902    @property
903    def tpm_owned(self):
904        # pylint: disable=missing-docstring
905        return self.tpm.get('owned') == True
906
907    @property
908    def tpm_can_load_srk(self):
909        # pylint: disable=missing-docstring
910        return self.tpm.get('can_load_srk') == True
911
912    @property
913    def tpm_can_load_srk_pubkey(self):
914        # pylint: disable=missing-docstring
915        return self.tpm.get('can_load_srk_pubkey') == True
916
917
918def _get_cryptohome_status(host):
919    """Returns a dictionary containing the cryptohome status.
920
921    @param host: a hosts.Host object.
922    @returns A dictionary containing the cryptohome status.
923    @raises AutoservVerifyError: if the output could not be parsed or the TPM
924       status is missing.
925    @raises hosts.AutoservRunError: if the cryptohome command failed.
926    """
927    # This cryptohome command emits status information in JSON format. It
928    # looks something like this:
929    # {
930    #    "installattrs": {
931    #       ...
932    #    },
933    #    "mounts": [ {
934    #       ...
935    #    } ],
936    #    "tpm": {
937    #       "being_owned": false,
938    #       "can_connect": true,
939    #       "can_decrypt": false,
940    #       "can_encrypt": false,
941    #       "can_load_srk": true,
942    #       "can_load_srk_pubkey": true,
943    #       "enabled": true,
944    #       "has_context": true,
945    #       "has_cryptohome_key": false,
946    #       "has_key_handle": false,
947    #       "last_error": 0,
948    #       "owned": true
949    #    }
950    # }
951    try:
952        output = host.run('cryptohome --action=status').stdout.strip()
953        status = json.loads(output)
954        if 'tpm' not in status:
955            raise hosts.AutoservVerifyError('TPM status is missing')
956        return status
957    except ValueError:
958        raise hosts.AutoservVerifyError('Unable to parse cryptohome status')
959