1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import time 9 10import common 11from autotest_lib.client.common_lib import error 12from autotest_lib.client.common_lib import global_config 13from autotest_lib.client.common_lib import hosts 14from autotest_lib.client.common_lib.cros import dev_server 15from autotest_lib.client.common_lib.cros import retry 16from autotest_lib.server import afe_utils 17from autotest_lib.server import crashcollect 18from autotest_lib.server.cros import autoupdater 19from autotest_lib.server.cros.dynamic_suite import tools 20from autotest_lib.server.hosts import cros_firmware 21from autotest_lib.server.hosts import repair_utils 22 23# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be 24# in dev mode (usually, those should be unmanaged devices) 25# 26_DEV_MODE_ALLOWED_POOLS = set( 27 global_config.global_config.get_config_value( 28 'CROS', 29 'pools_dev_mode_allowed', 30 type=str, 31 default='', 32 allow_blank=True).split(',')) 33 34# Setting to suppress dev mode check; primarily used for moblab where all 35# DUT's are in dev mode. 36_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value( 37 'CROS', 38 'dev_mode_allowed', 39 type=bool, 40 default=False) 41 42# Triggers for the 'au', 'powerwash', and 'usb' repair actions. 43# These are also used as dependencies in the `CrosHost` repair 44# sequence, as follows: 45# 46# au: 47# - triggers: _CROS_AU_TRIGGERS 48# - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS 49# 50# powerwash: 51# - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_AU_TRIGGERS 52# - depends on: _CROS_USB_TRIGGERS 53# 54# usb: 55# - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + 56# _CROS_AU_TRIGGERS 57# - no dependencies 58# 59# N.B. AC power detection depends on software on the DUT, and there 60# have been bugs where detection failed even though the DUT really 61# did have power. So, we make the 'power' verifier a trigger for 62# reinstall repair actions, too. 63# 64# TODO(jrbarnette): AU repair can't fix all problems reported by 65# the 'cros' verifier; it's listed as an AU trigger as a 66# simplification. The ultimate fix is to split the 'cros' verifier 67# into smaller individual verifiers. 68_CROS_AU_TRIGGERS = ('power', 'rwfw', 'python', 'cros',) 69_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_au', 'ext4',) 70_CROS_USB_TRIGGERS = ('ssh', 'writable',) 71 72 73class ACPowerVerifier(hosts.Verifier): 74 """Check for AC power and a reasonable battery charge.""" 75 76 def verify(self, host): 77 # pylint: disable=missing-docstring 78 try: 79 info = host.get_power_supply_info() 80 except error.AutoservRunError: 81 raise hosts.AutoservVerifyError( 82 'Failed to get power supply info') 83 84 try: 85 if info['Line Power']['online'] != 'yes': 86 raise hosts.AutoservVerifyError( 87 'AC power is not plugged in') 88 except KeyError: 89 raise hosts.AutoservVerifyError( 90 'Cannot determine AC power status') 91 92 try: 93 if float(info['Battery']['percentage']) < 50.0: 94 raise hosts.AutoservVerifyError( 95 'Battery is less than 50%') 96 except KeyError: 97 logging.info('Cannot determine battery status - ' 98 'skipping check.') 99 100 @property 101 def description(self): 102 # pylint: disable=missing-docstring 103 return 'The DUT is plugged in to AC power' 104 105 106class WritableVerifier(hosts.Verifier): 107 """ 108 Confirm the stateful file systems are writable. 109 110 The standard linux response to certain unexpected file system errors 111 (including hardware errors in block devices) is to change the file 112 system status to read-only. This checks that that hasn't happened. 113 114 The test covers the two file systems that need to be writable for 115 critical operations like AU: 116 * The (unencrypted) stateful system which includes 117 /mnt/stateful_partition. 118 * The encrypted stateful partition, which includes /var. 119 120 The test doesn't check various bind mounts; those are expected to 121 fail the same way as their underlying main mounts. Whether the 122 Linux kernel can guarantee that is untested... 123 """ 124 125 # N.B. Order matters here: Encrypted stateful is loop-mounted from 126 # a file in unencrypted stateful, so we don't test for errors in 127 # encrypted stateful if unencrypted fails. 128 _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp'] 129 130 def verify(self, host): 131 # pylint: disable=missing-docstring 132 # This deliberately stops looking after the first error. 133 # See above for the details. 134 for testdir in self._TEST_DIRECTORIES: 135 filename = os.path.join(testdir, 'writable_test') 136 command = 'touch %s && rm %s' % (filename, filename) 137 rv = host.run(command=command, ignore_status=True) 138 if rv.exit_status != 0: 139 msg = 'Can\'t create a file in %s' % testdir 140 raise hosts.AutoservVerifyError(msg) 141 142 @property 143 def description(self): 144 # pylint: disable=missing-docstring 145 return 'The stateful filesystems are writable' 146 147 148class EXT4fsErrorVerifier(hosts.Verifier): 149 """ 150 Confirm we have not seen critical file system kernel errors. 151 """ 152 def verify(self, host): 153 # pylint: disable=missing-docstring 154 # grep for stateful FS errors of the type "EXT4-fs error (device sda1):" 155 command = ("dmesg | grep -E \"EXT4-fs error \(device " 156 "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | " 157 "grep -e '^/mnt/stateful_partition ' | " 158 "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"") 159 output = host.run(command=command, ignore_status=True).stdout 160 if output: 161 sample = output.splitlines()[0] 162 message = 'Saw file system error: %s' % sample 163 raise hosts.AutoservVerifyError(message) 164 # Check for other critical FS errors. 165 command = 'dmesg | grep "This should not happen!! Data will be lost"' 166 output = host.run(command=command, ignore_status=True).stdout 167 if output: 168 message = 'Saw file system error: Data will be lost' 169 raise hosts.AutoservVerifyError(message) 170 else: 171 logging.error('Could not determine stateful mount.') 172 173 @property 174 def description(self): 175 # pylint: disable=missing-docstring 176 return 'Did not find critical file system errors' 177 178 179class UpdateSuccessVerifier(hosts.Verifier): 180 """ 181 Checks that the DUT successfully finished its last provision job. 182 183 At the start of any update (e.g. for a Provision job), the code 184 creates a marker file named `PROVISION_FAILED`. The file is located 185 in a part of the stateful partition that will be removed if an 186 update finishes successfully. Thus, the presence of the file 187 indicates that a prior update failed. 188 189 The verifier tests for the existence of the marker file and fails if 190 it still exists. 191 """ 192 def verify(self, host): 193 # pylint: disable=missing-docstring 194 result = host.run('test -f %s' % autoupdater.PROVISION_FAILED, 195 ignore_status=True) 196 if result.exit_status == 0: 197 raise hosts.AutoservVerifyError( 198 'Last AU on this DUT failed') 199 200 @property 201 def description(self): 202 # pylint: disable=missing-docstring 203 return 'The most recent AU attempt on this DUT succeeded' 204 205 206class TPMStatusVerifier(hosts.Verifier): 207 """Verify that the host's TPM is in a good state.""" 208 209 def verify(self, host): 210 # pylint: disable=missing-docstring 211 if _is_virtual_machine(host): 212 # We do not forward host TPM / emulated TPM to qemu VMs, so skip 213 # this verification step. 214 logging.debug('Skipped verification %s on VM', self) 215 return 216 217 try: 218 status = CryptohomeStatus(host) 219 except hosts.AutoservVerifyError: 220 logging.info('Cannot determine the Cryptohome valid status - ' 221 'skipping check.') 222 return 223 try: 224 tpm = status['tpm'] 225 if not tpm['enabled']: 226 raise hosts.AutoservVerifyError( 227 'TPM is not enabled -- Hardware is not working.') 228 if not tpm['can_connect']: 229 raise hosts.AutoservVerifyError( 230 ('TPM connect failed -- ' 231 'last_error=%d.' % tpm['last_error'])) 232 if tpm['owned'] and not tpm['can_load_srk']: 233 raise hosts.AutoservVerifyError( 234 'Cannot load the TPM SRK') 235 if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']: 236 raise hosts.AutoservVerifyError( 237 'Cannot load the TPM SRK public key') 238 except KeyError: 239 logging.info('Cannot determine the Crytohome valid status - ' 240 'skipping check.') 241 242 @property 243 def description(self): 244 # pylint: disable=missing-docstring 245 return 'The host\'s TPM is available and working' 246 247 248class PythonVerifier(hosts.Verifier): 249 """Confirm the presence of a working Python interpreter.""" 250 251 def verify(self, host): 252 # pylint: disable=missing-docstring 253 result = host.run('python -c "import cPickle"', 254 ignore_status=True) 255 if result.exit_status != 0: 256 message = 'The python interpreter is broken' 257 if result.exit_status == 127: 258 search = host.run('which python', ignore_status=True) 259 if search.exit_status != 0 or not search.stdout: 260 message = ('Python is missing; may be caused by ' 261 'powerwash') 262 raise hosts.AutoservVerifyError(message) 263 264 @property 265 def description(self): 266 # pylint: disable=missing-docstring 267 return 'Python on the host is installed and working' 268 269 270class DevModeVerifier(hosts.Verifier): 271 """Verify that the host is not in dev mode.""" 272 273 def verify(self, host): 274 # pylint: disable=missing-docstring 275 # Some pools are allowed to be in dev mode 276 info = host.host_info_store.get() 277 if (_DEV_MODE_ALWAYS_ALLOWED or 278 bool(info.pools & _DEV_MODE_ALLOWED_POOLS)): 279 return 280 281 result = host.run('crossystem devsw_boot', ignore_status=True).stdout 282 if result != '0': 283 raise hosts.AutoservVerifyError('The host is in dev mode') 284 285 @property 286 def description(self): 287 # pylint: disable=missing-docstring 288 return 'The host should not be in dev mode' 289 290 291class HWIDVerifier(hosts.Verifier): 292 """Verify that the host has HWID & serial number.""" 293 294 def verify(self, host): 295 # pylint: disable=missing-docstring 296 try: 297 info = host.host_info_store.get() 298 299 hwid = host.run('crossystem hwid', ignore_status=True).stdout 300 if hwid: 301 info.attributes['HWID'] = hwid 302 303 serial_number = host.run('vpd -g serial_number', 304 ignore_status=True).stdout 305 if serial_number: 306 info.attributes['serial_number'] = serial_number 307 308 if info != host.host_info_store.get(): 309 host.host_info_store.commit(info) 310 except Exception as e: 311 logging.exception('Failed to get HWID & Serial Number for host ', 312 '%s: %s', host.hostname, str(e)) 313 314 @property 315 def description(self): 316 # pylint: disable=missing-docstring 317 return 'The host should have valid HWID and Serial Number' 318 319 320class JetstreamTpmVerifier(hosts.Verifier): 321 """Verify that Jetstream TPM is in a good state.""" 322 323 @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10) 324 def verify(self, host): 325 # pylint: disable=missing-docstring 326 try: 327 status = CryptohomeStatus(host) 328 if not status.tpm_enabled: 329 raise hosts.AutoservVerifyError('TPM is not enabled') 330 if not status.tpm_owned: 331 raise hosts.AutoservVerifyError('TPM is not owned') 332 if not status.tpm_can_load_srk: 333 raise hosts.AutoservVerifyError('TPM cannot load SRK') 334 if not status.tpm_can_load_srk_pubkey: 335 raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey') 336 337 # Check that the TPM is fully initialized. The output of this 338 # command is line-oriented property/value pairs. 339 result = host.run('cryptohome --action=tpm_status') 340 if 'TPM Ready: true' not in result.stdout: 341 raise hosts.AutoservVerifyError('TPM is not ready') 342 except error.AutoservRunError: 343 raise hosts.AutoservVerifyError( 344 'Could not determine TPM status') 345 346 @property 347 def description(self): 348 # pylint: disable=missing-docstring 349 return 'Jetstream TPM state check' 350 351 352class JetstreamAttestationVerifier(hosts.Verifier): 353 """Verify that Jetstream attestation client has a certificate.""" 354 355 @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10) 356 def verify(self, host): 357 # pylint: disable=missing-docstring 358 try: 359 # This output is in text protobuf format. 360 result = host.run('cryptohome --action=tpm_more_status') 361 if 'attestation_prepared: true' not in result.stdout: 362 raise hosts.AutoservVerifyError( 363 'Attestation has not been prepared') 364 365 result = host.run('cryptohome --action=tpm_attestation_get_ek') 366 if 'EK Certificate' not in result.stdout: 367 raise hosts.AutoservVerifyError( 368 'Endorsement certificate not found') 369 except error.AutoservRunError: 370 raise hosts.AutoservVerifyError( 371 'Unable to fetch endorsement certificate') 372 373 @property 374 def description(self): 375 # pylint: disable=missing-docstring 376 return 'Jetstream attestation endorsement check' 377 378 379class JetstreamServicesVerifier(hosts.Verifier): 380 """Verify that Jetstream services are running.""" 381 382 # Retry for b/62576902 383 @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10) 384 def verify(self, host): 385 # pylint: disable=missing-docstring 386 try: 387 if not host.upstart_status('ap-controller'): 388 raise hosts.AutoservVerifyError( 389 'ap-controller service is not running') 390 except error.AutoservRunError: 391 raise hosts.AutoservVerifyError( 392 'ap-controller service not found') 393 394 try: 395 host.run('pgrep ap-controller') 396 except error.AutoservRunError: 397 raise hosts.AutoservVerifyError( 398 'ap-controller process is not running') 399 400 @property 401 def description(self): 402 # pylint: disable=missing-docstring 403 return 'Jetstream services must be running' 404 405 406class KvmExistsVerifier(hosts.Verifier): 407 """Verify that /dev/kvm exists if it should be there""" 408 409 def verify(self, host): 410 # pylint: disable=missing-docstring 411 result = host.run('[ ! -e /dev/kvm -a -f /usr/bin/vm_concierge ]', 412 ignore_status=True) 413 if result.exit_status == 0: 414 raise hosts.AutoservVerifyError('/dev/kvm is missing') 415 416 @property 417 def description(self): 418 # pylint: disable=missing-docstring 419 return '/dev/kvm should exist if device supports Linux VMs' 420 421 422class _ResetRepairAction(hosts.RepairAction): 423 """Common handling for repair actions that reset a DUT.""" 424 425 def _collect_logs(self, host): 426 """Collect logs from a successfully repaired DUT.""" 427 dirname = 'after_%s' % self.tag 428 local_log_dir = crashcollect.get_crashinfo_dir(host, dirname) 429 host.collect_logs('/var/log', local_log_dir, ignore_errors=True) 430 # Collect crash info. 431 crashcollect.get_crashinfo(host, None) 432 433 def _check_reset_success(self, host): 434 """Check whether reset succeeded, and gather logs if possible.""" 435 if host.wait_up(host.BOOT_TIMEOUT): 436 try: 437 # Collect logs once we regain ssh access before 438 # clobbering them. 439 self._collect_logs(host) 440 except Exception: 441 # If the DUT is up, we want to declare success, even if 442 # log gathering fails for some reason. So, if there's 443 # a failure, just log it and move on. 444 logging.exception('Non-critical failure in log ' 445 'collection during %s.', 446 self.tag) 447 return 448 raise hosts.AutoservRepairError( 449 'Host %s is still offline after %s.' % 450 (host.hostname, self.tag), 'failed_to_boot_after_' + self.tag) 451 452 453class ServoSysRqRepair(_ResetRepairAction): 454 """ 455 Repair a Chrome device by sending a system request to the kernel. 456 457 Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x) 458 will ask the kernel to panic itself and reboot while conserving 459 the kernel logs in console ramoops. 460 """ 461 462 def repair(self, host): 463 # pylint: disable=missing-docstring 464 repair_utils.require_servo(host) 465 # Press 3 times Alt+VolUp+X 466 # no checking DUT health between each press as 467 # killing Chrome is not really likely to fix the DUT SSH. 468 for _ in range(3): 469 try: 470 host.servo.sysrq_x() 471 except error.TestFail, ex: 472 raise hosts.AutoservRepairError( 473 'cannot press sysrq-x: %s.' % str(ex), 474 'cannot_press_sysrq_x') 475 # less than 5 seconds between presses. 476 time.sleep(2.0) 477 self._check_reset_success(host) 478 479 @property 480 def description(self): 481 # pylint: disable=missing-docstring 482 return 'Reset the DUT via keyboard sysrq-x' 483 484 485class ServoResetRepair(_ResetRepairAction): 486 """Repair a Chrome device by resetting it with servo.""" 487 488 def repair(self, host): 489 # pylint: disable=missing-docstring 490 repair_utils.require_servo(host) 491 host.servo.get_power_state_controller().reset() 492 self._check_reset_success(host) 493 494 @property 495 def description(self): 496 # pylint: disable=missing-docstring 497 return 'Reset the DUT via servo' 498 499 500class CrosRebootRepair(repair_utils.RebootRepair): 501 """Repair a CrOS target by clearing dev mode and rebooting it.""" 502 503 def repair(self, host): 504 # pylint: disable=missing-docstring 505 # N.B. We need to reboot regardless of whether clearing 506 # dev_mode succeeds or fails. 507 host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', 508 ignore_status=True) 509 host.run('crossystem disable_dev_request=1', 510 ignore_status=True) 511 super(CrosRebootRepair, self).repair(host) 512 513 @property 514 def description(self): 515 # pylint: disable=missing-docstring 516 return 'Reset GBB flags and Reboot the host' 517 518 519class AutoUpdateRepair(hosts.RepairAction): 520 """ 521 Repair by re-installing a test image using autoupdate. 522 523 Try to install the DUT's designated "stable test image" using the 524 standard procedure for installing a new test image via autoupdate. 525 """ 526 527 def repair(self, host): 528 # pylint: disable=missing-docstring 529 image_name = host.get_cros_repair_image_name() 530 logging.info('Staging build for AU: %s', image_name) 531 devserver = dev_server.ImageServer.resolve(image_name, host.hostname) 532 devserver.trigger_download(image_name, synchronous=False) 533 update_url = tools.image_url_pattern() % ( 534 devserver.url(), image_name) 535 afe_utils.machine_install_and_update_labels(host, update_url) 536 537 @property 538 def description(self): 539 # pylint: disable=missing-docstring 540 return 'Re-install the stable build via AU' 541 542 543class PowerWashRepair(AutoUpdateRepair): 544 """ 545 Powerwash the DUT, then re-install using autoupdate. 546 547 Powerwash the DUT, then attempt to re-install a stable test image as 548 for `AutoUpdateRepair`. 549 """ 550 551 def repair(self, host): 552 # pylint: disable=missing-docstring 553 host.run('echo "fast safe" > ' 554 '/mnt/stateful_partition/factory_install_reset') 555 host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True) 556 super(PowerWashRepair, self).repair(host) 557 558 @property 559 def description(self): 560 # pylint: disable=missing-docstring 561 return 'Powerwash and then re-install the stable build via AU' 562 563 564class ServoInstallRepair(hosts.RepairAction): 565 """ 566 Reinstall a test image from USB using servo. 567 568 Use servo to re-install the DUT's designated "stable test image" 569 from servo-attached USB storage. 570 """ 571 572 def repair(self, host): 573 # pylint: disable=missing-docstring 574 repair_utils.require_servo(host) 575 host.servo_install(host.stage_image_for_servo()) 576 577 @property 578 def description(self): 579 # pylint: disable=missing-docstring 580 return 'Reinstall from USB using servo' 581 582 583class ColdRebootRepair(_ResetRepairAction): 584 """ 585 Repair a Chrome device by performing a cold reboot that resets the EC. 586 587 Use ectool to perform a cold reboot which will reset the EC. 588 """ 589 590 def repair(self, host): 591 # pylint: disable=missing-docstring 592 host.reboot(reboot_cmd='ectool reboot_ec cold') 593 self._check_reset_success(host) 594 595 @property 596 def description(self): 597 # pylint: disable=missing-docstring 598 return 'Reset the DUT via cold reboot with ectool' 599 600 601class JetstreamTpmRepair(hosts.RepairAction): 602 """Repair by resetting TPM and rebooting.""" 603 604 def repair(self, host): 605 # pylint: disable=missing-docstring 606 host.run('rm -f /var/cache/ap/setup-network', ignore_status=True) 607 host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True) 608 host.run('rm -f /home/.shadow/.can_attempt_ownership', 609 ignore_status=True) 610 host.run('crossystem clear_tpm_owner_request=1', ignore_status=True) 611 host.reboot() 612 613 @property 614 def description(self): 615 # pylint: disable=missing-docstring 616 return 'Reset TPM and reboot' 617 618 619class JetstreamServiceRepair(hosts.RepairAction): 620 """Repair by restarting Jetstream services.""" 621 622 def repair(self, host): 623 # pylint: disable=missing-docstring 624 host.cleanup_services() 625 626 @property 627 def description(self): 628 # pylint: disable=missing-docstring 629 return 'Restart Jetstream services' 630 631 632def _cros_verify_dag(): 633 """Return the verification DAG for a `CrosHost`.""" 634 FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier 635 FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier 636 verify_dag = ( 637 (repair_utils.SshVerifier, 'ssh', ()), 638 (DevModeVerifier, 'devmode', ('ssh',)), 639 (HWIDVerifier, 'hwid', ('ssh',)), 640 (ACPowerVerifier, 'power', ('ssh',)), 641 (EXT4fsErrorVerifier, 'ext4', ('ssh',)), 642 (WritableVerifier, 'writable', ('ssh',)), 643 (TPMStatusVerifier, 'tpm', ('ssh',)), 644 (UpdateSuccessVerifier, 'good_au', ('ssh',)), 645 (FirmwareStatusVerifier, 'fwstatus', ('ssh',)), 646 (FirmwareVersionVerifier, 'rwfw', ('ssh',)), 647 (PythonVerifier, 'python', ('ssh',)), 648 (repair_utils.LegacyHostVerifier, 'cros', ('ssh',)), 649 (KvmExistsVerifier, 'ec_reset', ('ssh',)), 650 ) 651 return verify_dag 652 653 654def _cros_basic_repair_actions(): 655 """Return the basic repair actions for a `CrosHost`""" 656 FirmwareRepair = cros_firmware.FirmwareRepair 657 repair_actions = ( 658 # RPM cycling must precede Servo reset: if the DUT has a dead 659 # battery, we need to reattach AC power before we reset via servo. 660 (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)), 661 (ServoSysRqRepair, 'sysrq', (), ('ssh',)), 662 (ServoResetRepair, 'servoreset', (), ('ssh',)), 663 664 # N.B. FirmwareRepair can't fix a 'good_au' failure directly, 665 # because it doesn't remove the flag file that triggers the 666 # failure. We include it as a repair trigger because it's 667 # possible the the last update failed because of the firmware, 668 # and we want the repair steps below to be able to trust the 669 # firmware. 670 (FirmwareRepair, 'firmware', (), ('ssh', 'fwstatus', 'good_au',)), 671 672 (CrosRebootRepair, 'reboot', ('ssh',), ('devmode', 'writable',)), 673 674 (ColdRebootRepair, 'coldboot', ('ssh',), ('ec_reset',)), 675 ) 676 return repair_actions 677 678 679def _cros_extended_repair_actions(au_triggers=_CROS_AU_TRIGGERS, 680 powerwash_triggers=_CROS_POWERWASH_TRIGGERS, 681 usb_triggers=_CROS_USB_TRIGGERS): 682 """Return the extended repair actions for a `CrosHost`""" 683 684 # The dependencies and triggers for the 'au', 'powerwash', and 'usb' 685 # repair actions stack up: Each one is able to repair progressively 686 # more verifiers than the one before. The 'triggers' lists specify 687 # the progression. 688 689 repair_actions = ( 690 (AutoUpdateRepair, 'au', 691 usb_triggers + powerwash_triggers, au_triggers), 692 (PowerWashRepair, 'powerwash', 693 usb_triggers, powerwash_triggers + au_triggers), 694 (ServoInstallRepair, 'usb', 695 (), usb_triggers + powerwash_triggers + au_triggers), 696 ) 697 return repair_actions 698 699 700def _cros_repair_actions(): 701 """Return the repair actions for a `CrosHost`.""" 702 repair_actions = (_cros_basic_repair_actions() + 703 _cros_extended_repair_actions()) 704 return repair_actions 705 706 707def create_cros_repair_strategy(): 708 """Return a `RepairStrategy` for a `CrosHost`.""" 709 verify_dag = _cros_verify_dag() 710 repair_actions = _cros_repair_actions() 711 return hosts.RepairStrategy(verify_dag, repair_actions, 'cros') 712 713 714def _moblab_verify_dag(): 715 """Return the verification DAG for a `MoblabHost`.""" 716 FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier 717 verify_dag = ( 718 (repair_utils.SshVerifier, 'ssh', ()), 719 (ACPowerVerifier, 'power', ('ssh',)), 720 (FirmwareVersionVerifier, 'rwfw', ('ssh',)), 721 (PythonVerifier, 'python', ('ssh',)), 722 (repair_utils.LegacyHostVerifier, 'cros', ('ssh',)), 723 ) 724 return verify_dag 725 726 727def _moblab_repair_actions(): 728 """Return the repair actions for a `MoblabHost`.""" 729 repair_actions = ( 730 (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)), 731 (AutoUpdateRepair, 'au', ('ssh',), _CROS_AU_TRIGGERS), 732 ) 733 return repair_actions 734 735 736def create_moblab_repair_strategy(): 737 """ 738 Return a `RepairStrategy` for a `MoblabHost`. 739 740 Moblab is a subset of the CrOS verify and repair. Several pieces 741 are removed because they're not expected to be meaningful. Some 742 others are removed for more specific reasons: 743 744 'tpm': Moblab DUTs don't run the tests that matter to this 745 verifier. TODO(jrbarnette) This assertion is unproven. 746 747 'good_au': This verifier can't pass, because the Moblab AU 748 procedure doesn't properly delete the PROVISION_FAILED file. 749 TODO(jrbarnette) We should refactor ChromiumOSUpdater so 750 that it can be different for Moblab. 751 752 'firmware': Moblab DUTs shouldn't be in FAFT pools, so we don't try 753 this. 754 755 'powerwash': Powerwash on Moblab causes trouble with deleting the 756 DHCP leases file, so we skip it. 757 """ 758 verify_dag = _moblab_verify_dag() 759 repair_actions = _moblab_repair_actions() 760 return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab') 761 762 763def _jetstream_repair_actions(): 764 """Return the repair actions for a `JetstreamHost`.""" 765 au_triggers = _CROS_AU_TRIGGERS 766 jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation') 767 jetstream_service_triggers = (jetstream_tpm_triggers + 768 ('jetstream_services',)) 769 repair_actions = ( 770 _cros_basic_repair_actions() + 771 ( 772 (JetstreamTpmRepair, 'jetstream_tpm_repair', 773 _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS, 774 au_triggers + jetstream_tpm_triggers), 775 776 (JetstreamServiceRepair, 'jetstream_service_repair', 777 _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + ( 778 'jetstream_tpm', 'jetstream_attestation'), 779 au_triggers + jetstream_service_triggers), 780 ) + 781 _cros_extended_repair_actions( 782 au_triggers=au_triggers + jetstream_service_triggers)) 783 return repair_actions 784 785 786def _jetstream_verify_dag(): 787 """Return the verification DAG for a `JetstreamHost`.""" 788 verify_dag = _cros_verify_dag() + ( 789 (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)), 790 (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)), 791 (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)), 792 ) 793 return verify_dag 794 795 796def create_jetstream_repair_strategy(): 797 """ 798 Return a `RepairStrategy` for a `JetstreamHost`. 799 800 The Jetstream repair strategy is based on the CrOS verify and repair, 801 but adds the JetstreamServicesVerifier. 802 """ 803 verify_dag = _jetstream_verify_dag() 804 repair_actions = _jetstream_repair_actions() 805 return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream') 806 807 808# TODO(pprabhu) Move this to a better place. I have no idea what that place 809# would be. 810def _is_virtual_machine(host): 811 """Determine whether the given |host| is a virtual machine. 812 813 @param host: a hosts.Host object. 814 @returns True if the host is a virtual machine, False otherwise. 815 """ 816 output = host.run('cat /proc/cpuinfo | grep "model name"', 817 ignore_status=True) 818 return (output.exit_status == 0 and output.stdout and 819 'qemu' in output.stdout.lower()) 820 821 822class CryptohomeStatus(dict): 823 """Wrapper for getting cryptohome status from a host.""" 824 825 def __init__(self, host): 826 super(CryptohomeStatus, self).__init__() 827 self.update(_get_cryptohome_status(host)) 828 self.tpm = self['tpm'] 829 830 @property 831 def tpm_enabled(self): 832 # pylint: disable=missing-docstring 833 return self.tpm.get('enabled') == True 834 835 @property 836 def tpm_owned(self): 837 # pylint: disable=missing-docstring 838 return self.tpm.get('owned') == True 839 840 @property 841 def tpm_can_load_srk(self): 842 # pylint: disable=missing-docstring 843 return self.tpm.get('can_load_srk') == True 844 845 @property 846 def tpm_can_load_srk_pubkey(self): 847 # pylint: disable=missing-docstring 848 return self.tpm.get('can_load_srk_pubkey') == True 849 850 851def _get_cryptohome_status(host): 852 """Returns a dictionary containing the cryptohome status. 853 854 @param host: a hosts.Host object. 855 @returns A dictionary containing the cryptohome status. 856 @raises AutoservVerifyError: if the output could not be parsed or the TPM 857 status is missing. 858 @raises hosts.AutoservRunError: if the cryptohome command failed. 859 """ 860 # This cryptohome command emits status information in JSON format. It 861 # looks something like this: 862 # { 863 # "installattrs": { 864 # ... 865 # }, 866 # "mounts": [ { 867 # ... 868 # } ], 869 # "tpm": { 870 # "being_owned": false, 871 # "can_connect": true, 872 # "can_decrypt": false, 873 # "can_encrypt": false, 874 # "can_load_srk": true, 875 # "can_load_srk_pubkey": true, 876 # "enabled": true, 877 # "has_context": true, 878 # "has_cryptohome_key": false, 879 # "has_key_handle": false, 880 # "last_error": 0, 881 # "owned": true 882 # } 883 # } 884 try: 885 output = host.run('cryptohome --action=status').stdout.strip() 886 status = json.loads(output) 887 if 'tpm' not in status: 888 raise hosts.AutoservVerifyError('TPM status is missing') 889 return status 890 except ValueError: 891 raise hosts.AutoservVerifyError('Unable to parse cryptohome status') 892