1# Copyright 2016 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import json 6import logging 7import os 8import time 9 10import common 11from autotest_lib.client.common_lib import error 12from autotest_lib.client.common_lib import global_config 13from autotest_lib.client.common_lib import hosts 14from autotest_lib.client.common_lib.cros import dev_server 15from autotest_lib.client.common_lib.cros import retry 16from autotest_lib.server import afe_utils 17from autotest_lib.server import crashcollect 18from autotest_lib.server.cros import autoupdater 19from autotest_lib.server.cros.dynamic_suite import tools 20from autotest_lib.server.hosts import cros_firmware 21from autotest_lib.server.hosts import repair_utils 22 23# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be 24# in dev mode (usually, those should be unmanaged devices) 25# 26_DEV_MODE_ALLOWED_POOLS = set( 27 global_config.global_config.get_config_value( 28 'CROS', 29 'pools_dev_mode_allowed', 30 type=str, 31 default='', 32 allow_blank=True).split(',')) 33 34# Setting to suppress dev mode check; primarily used for moblab where all 35# DUT's are in dev mode. 36_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value( 37 'CROS', 38 'dev_mode_allowed', 39 type=bool, 40 default=False) 41 42# Triggers for the 'au', 'powerwash', and 'usb' repair actions. 43# These are also used as dependencies in the `CrosHost` repair 44# sequence, as follows: 45# 46# au: 47# - triggers: _CROS_AU_TRIGGERS 48# - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS 49# 50# powerwash: 51# - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_AU_TRIGGERS 52# - depends on: _CROS_USB_TRIGGERS 53# 54# usb: 55# - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + 56# _CROS_AU_TRIGGERS 57# - no dependencies 58# 59# N.B. AC power detection depends on software on the DUT, and there 60# have been bugs where detection failed even though the DUT really 61# did have power. So, we make the 'power' verifier a trigger for 62# reinstall repair actions, too. 63# 64# TODO(jrbarnette): AU repair can't fix all problems reported by 65# the 'cros' verifier; it's listed as an AU trigger as a 66# simplification. The ultimate fix is to split the 'cros' verifier 67# into smaller individual verifiers. 68_CROS_AU_TRIGGERS = ('power', 'rwfw', 'python', 'cros',) 69_CROS_EXTENDED_AU_TRIGGERS = _CROS_AU_TRIGGERS + ('ec_reset',) 70_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_au', 'ext4',) 71_CROS_USB_TRIGGERS = ('ssh', 'writable', 'stop_start_ui',) 72_JETSTREAM_USB_TRIGGERS = ('ssh', 'writable',) 73 74 75class ACPowerVerifier(hosts.Verifier): 76 """Check for AC power and a reasonable battery charge.""" 77 78 def verify(self, host): 79 # pylint: disable=missing-docstring 80 try: 81 info = host.get_power_supply_info() 82 except error.AutoservRunError: 83 raise hosts.AutoservVerifyError( 84 'Failed to get power supply info') 85 86 try: 87 if info['Line Power']['online'] != 'yes': 88 raise hosts.AutoservVerifyError( 89 'AC power is not plugged in') 90 except KeyError: 91 raise hosts.AutoservVerifyError( 92 'Cannot determine AC power status') 93 94 try: 95 if float(info['Battery']['percentage']) < 50.0: 96 raise hosts.AutoservVerifyError( 97 'Battery is less than 50%') 98 except KeyError: 99 logging.info('Cannot determine battery status - ' 100 'skipping check.') 101 102 @property 103 def description(self): 104 # pylint: disable=missing-docstring 105 return 'The DUT is plugged in to AC power' 106 107 108class WritableVerifier(hosts.Verifier): 109 """ 110 Confirm the stateful file systems are writable. 111 112 The standard linux response to certain unexpected file system errors 113 (including hardware errors in block devices) is to change the file 114 system status to read-only. This checks that that hasn't happened. 115 116 The test covers the two file systems that need to be writable for 117 critical operations like AU: 118 * The (unencrypted) stateful system which includes 119 /mnt/stateful_partition. 120 * The encrypted stateful partition, which includes /var. 121 122 The test doesn't check various bind mounts; those are expected to 123 fail the same way as their underlying main mounts. Whether the 124 Linux kernel can guarantee that is untested... 125 """ 126 127 # N.B. Order matters here: Encrypted stateful is loop-mounted from 128 # a file in unencrypted stateful, so we don't test for errors in 129 # encrypted stateful if unencrypted fails. 130 _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp'] 131 132 def verify(self, host): 133 # pylint: disable=missing-docstring 134 # This deliberately stops looking after the first error. 135 # See above for the details. 136 for testdir in self._TEST_DIRECTORIES: 137 filename = os.path.join(testdir, 'writable_test') 138 command = 'touch %s && rm %s' % (filename, filename) 139 rv = host.run(command=command, ignore_status=True) 140 if rv.exit_status != 0: 141 msg = 'Can\'t create a file in %s' % testdir 142 raise hosts.AutoservVerifyError(msg) 143 144 @property 145 def description(self): 146 # pylint: disable=missing-docstring 147 return 'The stateful filesystems are writable' 148 149 150class EXT4fsErrorVerifier(hosts.Verifier): 151 """ 152 Confirm we have not seen critical file system kernel errors. 153 """ 154 def verify(self, host): 155 # pylint: disable=missing-docstring 156 # grep for stateful FS errors of the type "EXT4-fs error (device sda1):" 157 command = ("dmesg | grep -E \"EXT4-fs error \(device " 158 "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | " 159 "grep -e '^/mnt/stateful_partition ' | " 160 "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"") 161 output = host.run(command=command, ignore_status=True).stdout 162 if output: 163 sample = output.splitlines()[0] 164 message = 'Saw file system error: %s' % sample 165 raise hosts.AutoservVerifyError(message) 166 # Check for other critical FS errors. 167 command = 'dmesg | grep "This should not happen!! Data will be lost"' 168 output = host.run(command=command, ignore_status=True).stdout 169 if output: 170 message = 'Saw file system error: Data will be lost' 171 raise hosts.AutoservVerifyError(message) 172 else: 173 logging.error('Could not determine stateful mount.') 174 175 @property 176 def description(self): 177 # pylint: disable=missing-docstring 178 return 'Did not find critical file system errors' 179 180 181class UpdateSuccessVerifier(hosts.Verifier): 182 """ 183 Checks that the DUT successfully finished its last provision job. 184 185 At the start of any update (e.g. for a Provision job), the code 186 creates a marker file named `PROVISION_FAILED`. The file is located 187 in a part of the stateful partition that will be removed if an 188 update finishes successfully. Thus, the presence of the file 189 indicates that a prior update failed. 190 191 The verifier tests for the existence of the marker file and fails if 192 it still exists. 193 """ 194 def verify(self, host): 195 # pylint: disable=missing-docstring 196 result = host.run('test -f %s' % autoupdater.PROVISION_FAILED, 197 ignore_status=True) 198 if result.exit_status == 0: 199 raise hosts.AutoservVerifyError( 200 'Last AU on this DUT failed') 201 202 @property 203 def description(self): 204 # pylint: disable=missing-docstring 205 return 'The most recent AU attempt on this DUT succeeded' 206 207 208class TPMStatusVerifier(hosts.Verifier): 209 """Verify that the host's TPM is in a good state.""" 210 211 def verify(self, host): 212 # pylint: disable=missing-docstring 213 if _is_virtual_machine(host): 214 # We do not forward host TPM / emulated TPM to qemu VMs, so skip 215 # this verification step. 216 logging.debug('Skipped verification %s on VM', self) 217 return 218 219 try: 220 status = CryptohomeStatus(host) 221 except hosts.AutoservVerifyError: 222 logging.info('Cannot determine the Cryptohome valid status - ' 223 'skipping check.') 224 return 225 try: 226 tpm = status['tpm'] 227 if not tpm['enabled']: 228 raise hosts.AutoservVerifyError( 229 'TPM is not enabled -- Hardware is not working.') 230 if not tpm['can_connect']: 231 raise hosts.AutoservVerifyError( 232 ('TPM connect failed -- ' 233 'last_error=%d.' % tpm['last_error'])) 234 if tpm['owned'] and not tpm['can_load_srk']: 235 raise hosts.AutoservVerifyError( 236 'Cannot load the TPM SRK') 237 if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']: 238 raise hosts.AutoservVerifyError( 239 'Cannot load the TPM SRK public key') 240 except KeyError: 241 logging.info('Cannot determine the Cryptohome valid status - ' 242 'skipping check.') 243 244 @property 245 def description(self): 246 # pylint: disable=missing-docstring 247 return 'The host\'s TPM is available and working' 248 249 250class PythonVerifier(hosts.Verifier): 251 """Confirm the presence of a working Python interpreter.""" 252 253 def verify(self, host): 254 # pylint: disable=missing-docstring 255 result = host.run('python -c "import json"', 256 ignore_status=True) 257 if result.exit_status != 0: 258 message = 'The python interpreter is broken' 259 if result.exit_status == 127: 260 search = host.run('which python', ignore_status=True) 261 if search.exit_status != 0 or not search.stdout: 262 message = ('Python is missing; may be caused by ' 263 'powerwash') 264 raise hosts.AutoservVerifyError(message) 265 266 @property 267 def description(self): 268 # pylint: disable=missing-docstring 269 return 'Python on the host is installed and working' 270 271 272class DevModeVerifier(hosts.Verifier): 273 """Verify that the host is not in dev mode.""" 274 275 def verify(self, host): 276 # pylint: disable=missing-docstring 277 # Some pools are allowed to be in dev mode 278 info = host.host_info_store.get() 279 if (_DEV_MODE_ALWAYS_ALLOWED or 280 bool(info.pools & _DEV_MODE_ALLOWED_POOLS)): 281 return 282 283 result = host.run('crossystem devsw_boot', ignore_status=True).stdout 284 if result != '0': 285 raise hosts.AutoservVerifyError('The host is in dev mode') 286 287 @property 288 def description(self): 289 # pylint: disable=missing-docstring 290 return 'The host should not be in dev mode' 291 292 293class HWIDVerifier(hosts.Verifier): 294 """Verify that the host has HWID & serial number.""" 295 296 def verify(self, host): 297 # pylint: disable=missing-docstring 298 try: 299 info = host.host_info_store.get() 300 301 hwid = host.run('crossystem hwid', ignore_status=True).stdout 302 if hwid: 303 info.attributes['HWID'] = hwid 304 305 serial_number = host.run('vpd -g serial_number', 306 ignore_status=True).stdout 307 if serial_number: 308 info.attributes['serial_number'] = serial_number 309 310 if info != host.host_info_store.get(): 311 host.host_info_store.commit(info) 312 except Exception as e: 313 logging.exception('Failed to get HWID & Serial Number for host ' 314 '%s: %s', host.hostname, str(e)) 315 316 @property 317 def description(self): 318 # pylint: disable=missing-docstring 319 return 'The host should have valid HWID and Serial Number' 320 321 322class JetstreamTpmVerifier(hosts.Verifier): 323 """Verify that Jetstream TPM is in a good state.""" 324 325 @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10) 326 def verify(self, host): 327 # pylint: disable=missing-docstring 328 try: 329 status = CryptohomeStatus(host) 330 if not status.tpm_enabled: 331 raise hosts.AutoservVerifyError('TPM is not enabled') 332 if not status.tpm_owned: 333 raise hosts.AutoservVerifyError('TPM is not owned') 334 if not status.tpm_can_load_srk: 335 raise hosts.AutoservVerifyError('TPM cannot load SRK') 336 if not status.tpm_can_load_srk_pubkey: 337 raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey') 338 339 # Check that the TPM is fully initialized. The output of this 340 # command is line-oriented property/value pairs. 341 result = host.run('cryptohome --action=tpm_status') 342 if 'TPM Ready: true' not in result.stdout: 343 raise hosts.AutoservVerifyError('TPM is not ready') 344 except error.AutoservRunError: 345 raise hosts.AutoservVerifyError( 346 'Could not determine TPM status') 347 348 @property 349 def description(self): 350 # pylint: disable=missing-docstring 351 return 'Jetstream TPM state check' 352 353 354class JetstreamAttestationVerifier(hosts.Verifier): 355 """Verify that Jetstream attestation client has a certificate.""" 356 357 @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10) 358 def verify(self, host): 359 # pylint: disable=missing-docstring 360 try: 361 # This output is in text protobuf format. 362 result = host.run('cryptohome --action=tpm_more_status') 363 if 'attestation_prepared: true' not in result.stdout: 364 raise hosts.AutoservVerifyError( 365 'Attestation has not been prepared') 366 367 result = host.run('cryptohome --action=tpm_attestation_get_ek') 368 if 'EK Certificate' not in result.stdout: 369 raise hosts.AutoservVerifyError( 370 'Endorsement certificate not found') 371 except error.AutoservRunError: 372 raise hosts.AutoservVerifyError( 373 'Unable to fetch endorsement certificate') 374 375 @property 376 def description(self): 377 # pylint: disable=missing-docstring 378 return 'Jetstream attestation endorsement check' 379 380 381class JetstreamServicesVerifier(hosts.Verifier): 382 """Verify that Jetstream services are running.""" 383 384 # Retry for b/62576902 385 @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10) 386 def verify(self, host): 387 # pylint: disable=missing-docstring 388 try: 389 if not host.upstart_status('ap-controller'): 390 raise hosts.AutoservVerifyError( 391 'ap-controller service is not running') 392 except error.AutoservRunError: 393 raise hosts.AutoservVerifyError( 394 'ap-controller service not found') 395 396 try: 397 host.run('pgrep ap-controller') 398 except error.AutoservRunError: 399 raise hosts.AutoservVerifyError( 400 'ap-controller process is not running') 401 402 @property 403 def description(self): 404 # pylint: disable=missing-docstring 405 return 'Jetstream services must be running' 406 407 408class KvmExistsVerifier(hosts.Verifier): 409 """Verify that /dev/kvm exists if it should be there""" 410 411 def verify(self, host): 412 # pylint: disable=missing-docstring 413 result = host.run('[ ! -e /dev/kvm -a -f /usr/bin/vm_concierge ]', 414 ignore_status=True) 415 if result.exit_status == 0: 416 # Silently check if the kvm_transition flag is being used by Chrome 417 # indicating /dev/kvm may not be present yet on this system. 418 result = host.run('grep -qsxF "kvm_transition" ' 419 '/etc/ui_use_flags.txt', ignore_status=True) 420 if result.exit_status != 0: 421 raise hosts.AutoservVerifyError('/dev/kvm is missing') 422 423 @property 424 def description(self): 425 # pylint: disable=missing-docstring 426 return '/dev/kvm should exist if device supports Linux VMs' 427 428 429class StopStartUIVerifier(hosts.Verifier): 430 """Verify that command 'stop ui' won't crash the DUT. 431 432 We run 'stop ui' in AU and provision. We found some bad images broke 433 this command and then broke all the provision of all following test. We add 434 this verifier to ensure it works and will trigger reimaging to a good 435 version if it fails. 436 """ 437 def verify(self, host): 438 try: 439 host.run('stop ui && start ui', ignore_status=True, timeout=10) 440 except error.AutoservSSHTimeout: 441 raise hosts.AutoservVerifyError( 442 "Got timeout when stop ui/start ui. DUT might crash.") 443 444 @property 445 def description(self): 446 return 'The DUT image works fine when stop ui/start ui.' 447 448 449class ServoTypeVerifier(hosts.Verifier): 450 """Verify that servo_type attribute exists""" 451 452 def verify(self, host): 453 if not host.servo: 454 logging.info("Host has no working servo.") 455 return 456 457 info = host.host_info_store.get() 458 try: 459 servo_type = host.servo.get_servo_version() 460 if servo_type != info.attributes.get('servo_type', ''): 461 logging.info('servo_type mismatch detected, updating...') 462 info.attributes['servo_type'] = servo_type 463 host.host_info_store.commit(info) 464 except Exception as e: 465 # We don't want fail the verifier and break DUTs here just 466 # because of servo issue. 467 logging.error("Failed to update servo_type, %s", str(e)) 468 469 @property 470 def description(self): 471 return 'The host has servo_type attribute' 472 473 474class _ResetRepairAction(hosts.RepairAction): 475 """Common handling for repair actions that reset a DUT.""" 476 477 def _collect_logs(self, host): 478 """Collect logs from a successfully repaired DUT.""" 479 dirname = 'after_%s' % self.tag 480 local_log_dir = crashcollect.get_crashinfo_dir(host, dirname) 481 host.collect_logs('/var/log', local_log_dir, ignore_errors=True) 482 # Collect crash info. 483 crashcollect.get_crashinfo(host, None) 484 485 def _check_reset_success(self, host): 486 """Check whether reset succeeded, and gather logs if possible.""" 487 if host.wait_up(host.BOOT_TIMEOUT): 488 try: 489 # Collect logs once we regain ssh access before 490 # clobbering them. 491 self._collect_logs(host) 492 except Exception: 493 # If the DUT is up, we want to declare success, even if 494 # log gathering fails for some reason. So, if there's 495 # a failure, just log it and move on. 496 logging.exception('Non-critical failure in log ' 497 'collection during %s.', 498 self.tag) 499 return 500 raise hosts.AutoservRepairError( 501 'Host %s is still offline after %s.' % 502 (host.hostname, self.tag), 'failed_to_boot_after_' + self.tag) 503 504 505class ServoSysRqRepair(_ResetRepairAction): 506 """ 507 Repair a Chrome device by sending a system request to the kernel. 508 509 Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x) 510 will ask the kernel to panic itself and reboot while conserving 511 the kernel logs in console ramoops. 512 """ 513 514 def repair(self, host): 515 # pylint: disable=missing-docstring 516 repair_utils.require_servo(host) 517 # Press 3 times Alt+VolUp+X 518 # no checking DUT health between each press as 519 # killing Chrome is not really likely to fix the DUT SSH. 520 for _ in range(3): 521 try: 522 host.servo.sysrq_x() 523 except error.TestFail, ex: 524 raise hosts.AutoservRepairError( 525 'cannot press sysrq-x: %s.' % str(ex), 526 'cannot_press_sysrq_x') 527 # less than 5 seconds between presses. 528 time.sleep(2.0) 529 self._check_reset_success(host) 530 531 @property 532 def description(self): 533 # pylint: disable=missing-docstring 534 return 'Reset the DUT via keyboard sysrq-x' 535 536 537class ServoResetRepair(_ResetRepairAction): 538 """Repair a Chrome device by resetting it with servo.""" 539 540 def repair(self, host): 541 # pylint: disable=missing-docstring 542 repair_utils.require_servo(host) 543 host.servo.get_power_state_controller().reset() 544 self._check_reset_success(host) 545 546 @property 547 def description(self): 548 # pylint: disable=missing-docstring 549 return 'Reset the DUT via servo' 550 551 552class CrosRebootRepair(repair_utils.RebootRepair): 553 """Repair a CrOS target by clearing dev mode and rebooting it.""" 554 555 def repair(self, host): 556 # pylint: disable=missing-docstring 557 # N.B. We need to reboot regardless of whether clearing 558 # dev_mode succeeds or fails. 559 host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', 560 ignore_status=True) 561 host.run('crossystem disable_dev_request=1', 562 ignore_status=True) 563 super(CrosRebootRepair, self).repair(host) 564 565 @property 566 def description(self): 567 # pylint: disable=missing-docstring 568 return 'Reset GBB flags and Reboot the host' 569 570 571class AutoUpdateRepair(hosts.RepairAction): 572 """ 573 Repair by re-installing a test image using autoupdate. 574 575 Try to install the DUT's designated "stable test image" using the 576 standard procedure for installing a new test image via autoupdate. 577 """ 578 579 def repair(self, host): 580 # pylint: disable=missing-docstring 581 image_name = host.get_cros_repair_image_name() 582 logging.info('Staging build for AU: %s', image_name) 583 devserver = dev_server.ImageServer.resolve(image_name, host.hostname) 584 devserver.trigger_download(image_name, synchronous=False) 585 update_url = tools.image_url_pattern() % ( 586 devserver.url(), image_name) 587 afe_utils.machine_install_and_update_labels(host, update_url) 588 589 @property 590 def description(self): 591 # pylint: disable=missing-docstring 592 return 'Re-install the stable build via AU' 593 594 595class PowerWashRepair(AutoUpdateRepair): 596 """ 597 Powerwash the DUT, then re-install using autoupdate. 598 599 Powerwash the DUT, then attempt to re-install a stable test image as 600 for `AutoUpdateRepair`. 601 """ 602 603 def repair(self, host): 604 # pylint: disable=missing-docstring 605 host.run('echo "fast safe" > ' 606 '/mnt/stateful_partition/factory_install_reset') 607 host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True) 608 super(PowerWashRepair, self).repair(host) 609 610 @property 611 def description(self): 612 # pylint: disable=missing-docstring 613 return 'Powerwash and then re-install the stable build via AU' 614 615 616class ServoInstallRepair(hosts.RepairAction): 617 """ 618 Reinstall a test image from USB using servo. 619 620 Use servo to re-install the DUT's designated "stable test image" 621 from servo-attached USB storage. 622 """ 623 624 def repair(self, host): 625 # pylint: disable=missing-docstring 626 repair_utils.require_servo(host) 627 image_name, update_url = host.stage_image_for_servo() 628 afe_utils.clean_provision_labels(host) 629 host.servo_install(update_url) 630 afe_utils.add_provision_labels(host, host.VERSION_PREFIX, image_name) 631 632 @property 633 def description(self): 634 # pylint: disable=missing-docstring 635 return 'Reinstall from USB using servo' 636 637 638class ColdRebootRepair(_ResetRepairAction): 639 """ 640 Repair a Chrome device by performing a cold reboot that resets the EC. 641 642 Use ectool to perform a cold reboot which will reset the EC. 643 """ 644 645 def repair(self, host): 646 # pylint: disable=missing-docstring 647 host.reboot(reboot_cmd='ectool reboot_ec cold') 648 self._check_reset_success(host) 649 650 @property 651 def description(self): 652 # pylint: disable=missing-docstring 653 return 'Reset the DUT via cold reboot with ectool' 654 655 656class JetstreamTpmRepair(hosts.RepairAction): 657 """Repair by resetting TPM and rebooting.""" 658 659 def repair(self, host): 660 # pylint: disable=missing-docstring 661 host.run('rm -f /var/cache/ap/setup-network', ignore_status=True) 662 host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True) 663 host.run('rm -f /home/.shadow/.can_attempt_ownership', 664 ignore_status=True) 665 host.run('crossystem clear_tpm_owner_request=1', ignore_status=True) 666 host.reboot() 667 668 @property 669 def description(self): 670 # pylint: disable=missing-docstring 671 return 'Reset TPM and reboot' 672 673 674class JetstreamServiceRepair(hosts.RepairAction): 675 """Repair by restarting Jetstream services.""" 676 677 def repair(self, host): 678 # pylint: disable=missing-docstring 679 host.cleanup_services() 680 681 @property 682 def description(self): 683 # pylint: disable=missing-docstring 684 return 'Restart Jetstream services' 685 686 687def _cros_verify_dag(): 688 """Return the verification DAG for a `CrosHost`.""" 689 return _cros_verify_base_dag() + _cros_verify_extended_dag() 690 691 692def _cros_verify_base_dag(): 693 """Return the base verification DAG for a `CrosHost`.""" 694 FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier 695 FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier 696 verify_dag = ( 697 (repair_utils.SshVerifier, 'ssh', ()), 698 (ServoTypeVerifier, 'servo_type', ()), 699 (DevModeVerifier, 'devmode', ('ssh',)), 700 (HWIDVerifier, 'hwid', ('ssh',)), 701 (ACPowerVerifier, 'power', ('ssh',)), 702 (EXT4fsErrorVerifier, 'ext4', ('ssh',)), 703 (WritableVerifier, 'writable', ('ssh',)), 704 (TPMStatusVerifier, 'tpm', ('ssh',)), 705 (UpdateSuccessVerifier, 'good_au', ('ssh',)), 706 (FirmwareStatusVerifier, 'fwstatus', ('ssh',)), 707 (FirmwareVersionVerifier, 'rwfw', ('ssh',)), 708 (PythonVerifier, 'python', ('ssh',)), 709 (repair_utils.LegacyHostVerifier, 'cros', ('ssh',)), 710 (KvmExistsVerifier, 'ec_reset', ('ssh',)), 711 ) 712 return verify_dag 713 714 715def _cros_verify_extended_dag(): 716 """Return the extended verification DAG for a `CrosHost`.""" 717 return ( 718 (StopStartUIVerifier, 'stop_start_ui', ('ssh',)), 719 ) 720 721 722def _cros_basic_repair_actions(): 723 """Return the basic repair actions for a `CrosHost`""" 724 FirmwareRepair = cros_firmware.FirmwareRepair 725 repair_actions = ( 726 # RPM cycling must precede Servo reset: if the DUT has a dead 727 # battery, we need to reattach AC power before we reset via servo. 728 (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)), 729 (ServoSysRqRepair, 'sysrq', (), ('ssh',)), 730 (ServoResetRepair, 'servoreset', (), ('ssh',)), 731 732 # N.B. FirmwareRepair can't fix a 'good_au' failure directly, 733 # because it doesn't remove the flag file that triggers the 734 # failure. We include it as a repair trigger because it's 735 # possible the the last update failed because of the firmware, 736 # and we want the repair steps below to be able to trust the 737 # firmware. 738 (FirmwareRepair, 'firmware', (), ('ssh', 'fwstatus', 'good_au',)), 739 740 (CrosRebootRepair, 'reboot', ('ssh',), ('devmode', 'writable',)), 741 742 (ColdRebootRepair, 'coldboot', ('ssh',), ('ec_reset',)), 743 ) 744 return repair_actions 745 746 747def _cros_extended_repair_actions(au_triggers=_CROS_EXTENDED_AU_TRIGGERS, 748 powerwash_triggers=_CROS_POWERWASH_TRIGGERS, 749 usb_triggers=_CROS_USB_TRIGGERS): 750 """Return the extended repair actions for a `CrosHost`""" 751 752 # The dependencies and triggers for the 'au', 'powerwash', and 'usb' 753 # repair actions stack up: Each one is able to repair progressively 754 # more verifiers than the one before. The 'triggers' lists specify 755 # the progression. 756 757 repair_actions = ( 758 (AutoUpdateRepair, 'au', 759 usb_triggers + powerwash_triggers, au_triggers), 760 (PowerWashRepair, 'powerwash', 761 usb_triggers, powerwash_triggers + au_triggers), 762 (ServoInstallRepair, 'usb', 763 (), usb_triggers + powerwash_triggers + au_triggers), 764 ) 765 return repair_actions 766 767 768def _cros_repair_actions(): 769 """Return the repair actions for a `CrosHost`.""" 770 repair_actions = (_cros_basic_repair_actions() + 771 _cros_extended_repair_actions()) 772 return repair_actions 773 774 775def create_cros_repair_strategy(): 776 """Return a `RepairStrategy` for a `CrosHost`.""" 777 verify_dag = _cros_verify_dag() 778 repair_actions = _cros_repair_actions() 779 return hosts.RepairStrategy(verify_dag, repair_actions, 'cros') 780 781 782def _moblab_verify_dag(): 783 """Return the verification DAG for a `MoblabHost`.""" 784 verify_dag = ( 785 (repair_utils.SshVerifier, 'ssh', ()), 786 (ACPowerVerifier, 'power', ('ssh',)), 787 (PythonVerifier, 'python', ('ssh',)), 788 (repair_utils.LegacyHostVerifier, 'cros', ('ssh',)), 789 ) 790 return verify_dag 791 792 793def _moblab_repair_actions(): 794 """Return the repair actions for a `MoblabHost`.""" 795 repair_actions = ( 796 (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)), 797 (AutoUpdateRepair, 'au', ('ssh',), ('power', 'python', 'cros')), 798 ) 799 return repair_actions 800 801 802def create_moblab_repair_strategy(): 803 """ 804 Return a `RepairStrategy` for a `MoblabHost`. 805 806 Moblab is a subset of the CrOS verify and repair. Several pieces 807 are removed because they're not expected to be meaningful. Some 808 others are removed for more specific reasons: 809 810 'tpm': Moblab DUTs don't run the tests that matter to this 811 verifier. TODO(jrbarnette) This assertion is unproven. 812 813 'good_au': This verifier can't pass, because the Moblab AU 814 procedure doesn't properly delete the PROVISION_FAILED file. 815 TODO(jrbarnette) We should refactor ChromiumOSUpdater so 816 that it can be different for Moblab. 817 818 'firmware': Moblab DUTs shouldn't be in FAFT pools, so we don't try 819 this. 820 821 'powerwash': Powerwash on Moblab causes trouble with deleting the 822 DHCP leases file, so we skip it. 823 """ 824 verify_dag = _moblab_verify_dag() 825 repair_actions = _moblab_repair_actions() 826 return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab') 827 828 829def _jetstream_repair_actions(): 830 """Return the repair actions for a `JetstreamHost`.""" 831 au_triggers = _CROS_AU_TRIGGERS 832 jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation') 833 jetstream_service_triggers = (jetstream_tpm_triggers + 834 ('jetstream_services',)) 835 repair_actions = ( 836 _cros_basic_repair_actions() + 837 ( 838 (JetstreamTpmRepair, 'jetstream_tpm_repair', 839 _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS, 840 au_triggers + jetstream_tpm_triggers), 841 842 (JetstreamServiceRepair, 'jetstream_service_repair', 843 _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + ( 844 'jetstream_tpm', 'jetstream_attestation'), 845 au_triggers + jetstream_service_triggers), 846 ) + 847 _cros_extended_repair_actions( 848 au_triggers=au_triggers + jetstream_service_triggers, 849 usb_triggers=_JETSTREAM_USB_TRIGGERS)) 850 return repair_actions 851 852 853def _jetstream_verify_dag(): 854 """Return the verification DAG for a `JetstreamHost`.""" 855 verify_dag = _cros_verify_base_dag() + ( 856 (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)), 857 (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)), 858 (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)), 859 ) 860 return verify_dag 861 862 863def create_jetstream_repair_strategy(): 864 """ 865 Return a `RepairStrategy` for a `JetstreamHost`. 866 867 The Jetstream repair strategy is based on the CrOS verify and repair, 868 but adds the JetstreamServicesVerifier. 869 """ 870 verify_dag = _jetstream_verify_dag() 871 repair_actions = _jetstream_repair_actions() 872 return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream') 873 874 875# TODO(pprabhu) Move this to a better place. I have no idea what that place 876# would be. 877def _is_virtual_machine(host): 878 """Determine whether the given |host| is a virtual machine. 879 880 @param host: a hosts.Host object. 881 @returns True if the host is a virtual machine, False otherwise. 882 """ 883 output = host.run('cat /proc/cpuinfo | grep "model name"', 884 ignore_status=True) 885 return (output.exit_status == 0 and output.stdout and 886 'qemu' in output.stdout.lower()) 887 888 889class CryptohomeStatus(dict): 890 """Wrapper for getting cryptohome status from a host.""" 891 892 def __init__(self, host): 893 super(CryptohomeStatus, self).__init__() 894 self.update(_get_cryptohome_status(host)) 895 self.tpm = self['tpm'] 896 897 @property 898 def tpm_enabled(self): 899 # pylint: disable=missing-docstring 900 return self.tpm.get('enabled') == True 901 902 @property 903 def tpm_owned(self): 904 # pylint: disable=missing-docstring 905 return self.tpm.get('owned') == True 906 907 @property 908 def tpm_can_load_srk(self): 909 # pylint: disable=missing-docstring 910 return self.tpm.get('can_load_srk') == True 911 912 @property 913 def tpm_can_load_srk_pubkey(self): 914 # pylint: disable=missing-docstring 915 return self.tpm.get('can_load_srk_pubkey') == True 916 917 918def _get_cryptohome_status(host): 919 """Returns a dictionary containing the cryptohome status. 920 921 @param host: a hosts.Host object. 922 @returns A dictionary containing the cryptohome status. 923 @raises AutoservVerifyError: if the output could not be parsed or the TPM 924 status is missing. 925 @raises hosts.AutoservRunError: if the cryptohome command failed. 926 """ 927 # This cryptohome command emits status information in JSON format. It 928 # looks something like this: 929 # { 930 # "installattrs": { 931 # ... 932 # }, 933 # "mounts": [ { 934 # ... 935 # } ], 936 # "tpm": { 937 # "being_owned": false, 938 # "can_connect": true, 939 # "can_decrypt": false, 940 # "can_encrypt": false, 941 # "can_load_srk": true, 942 # "can_load_srk_pubkey": true, 943 # "enabled": true, 944 # "has_context": true, 945 # "has_cryptohome_key": false, 946 # "has_key_handle": false, 947 # "last_error": 0, 948 # "owned": true 949 # } 950 # } 951 try: 952 output = host.run('cryptohome --action=status').stdout.strip() 953 status = json.loads(output) 954 if 'tpm' not in status: 955 raise hosts.AutoservVerifyError('TPM status is missing') 956 return status 957 except ValueError: 958 raise hosts.AutoservVerifyError('Unable to parse cryptohome status') 959