1# Lint as: python2, python3 2# Copyright 2016 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from __future__ import absolute_import 7from __future__ import division 8from __future__ import print_function 9 10import json 11import logging 12import time 13import math 14 15import common 16from autotest_lib.client.common_lib import error 17from autotest_lib.client.common_lib import global_config 18from autotest_lib.client.common_lib import hosts 19from autotest_lib.client.common_lib import utils 20from autotest_lib.client.common_lib.cros import dev_server 21from autotest_lib.client.common_lib.cros import retry 22from autotest_lib.client.common_lib.cros import tpm_utils 23from autotest_lib.server import afe_utils 24from autotest_lib.server import crashcollect 25from autotest_lib.server.cros import provisioner 26from autotest_lib.server.cros.dynamic_suite import tools 27from autotest_lib.server.hosts import cros_constants 28from autotest_lib.server.hosts import cros_firmware 29from autotest_lib.server.hosts import repair_utils 30from autotest_lib.site_utils.admin_audit import verifiers as audit_verify 31from autotest_lib.site_utils.admin_audit import constants as audit_const 32from six.moves import range 33 34try: 35 from chromite.lib import metrics 36except ImportError: 37 metrics = utils.metrics_mock 38 39from chromite.lib import timeout_util 40 41DEFAULT_SERVO_RESET_TRIGGER = ( 42 'ping', 43 'ssh', 44 'stop_start_ui', 45 'power', 46) 47 48 49# _DEV_MODE_ALLOW_POOLS - The set of pools that are allowed to be 50# in dev mode (usually, those should be unmanaged devices) 51# 52_DEV_MODE_ALLOWED_POOLS = set( 53 global_config.global_config.get_config_value( 54 'CROS', 55 'pools_dev_mode_allowed', 56 type=str, 57 default='', 58 allow_blank=True).split(',')) 59 60# Setting to suppress dev mode check; primarily used for moblab where all 61# DUT's are in dev mode. 62_DEV_MODE_ALWAYS_ALLOWED = global_config.global_config.get_config_value( 63 'CROS', 64 'dev_mode_allowed', 65 type=bool, 66 default=False) 67 68# Triggers for the 'provision', 'powerwash', and 'usb' repair actions. 69# These are also used as dependencies in the `CrosHost` repair 70# sequence, as follows: 71# 72# provision: 73# - triggers: _CROS_PROVISION_TRIGGERS 74# - depends on: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS 75# 76# powerwash: 77# - triggers: _CROS_POWERWASH_TRIGGERS + _CROS_PROVISION_TRIGGERS 78# - depends on: _CROS_USB_TRIGGERS 79# 80# usb: 81# - triggers: _CROS_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + 82# _CROS_PROVISION_TRIGGERS 83# - depends on: _CROS_USB_DEPENDENCIES 84# 85# N.B. AC power detection depends on software on the DUT, and there 86# have been bugs where detection failed even though the DUT really 87# did have power. So, we make the 'power' verifier a trigger for 88# reinstall repair actions, too. 89# 90# TODO(jrbarnette): provision repair can't fix all problems reported by 91# the 'cros' verifier; it's listed as an provision trigger as a 92# simplification. The ultimate fix is to split the 'cros' verifier 93# into smaller individual verifiers. 94_CROS_PROVISION_TRIGGERS = ( 95 'power', 96 'rwfw', 97 'fwstatus', 98 'python', 99 'hwid', 100 'cros', 101 'dev_default_boot', 102) 103_CROS_POWERWASH_TRIGGERS = ('tpm', 'good_provision', 'ext4',) 104_CROS_USB_TRIGGERS = ( 105 'ping', 106 'ssh', 107 'writable', 108 'stop_start_ui', 109) 110_JETSTREAM_USB_TRIGGERS = ( 111 'ping', 112 'ssh', 113 'writable', 114) 115_CROS_FIRMWARE_TRIGGERS = ( 116 'ping', 117 'ssh', 118) 119_CROS_USB_DEPENDENCIES = ('usb_drive', ) 120 121 122class ACPowerVerifier(hosts.Verifier): 123 """Check for AC power and battery charging state.""" 124 125 # Battery discharging state in power_supply_info file. 126 BATTERY_DISCHARGING = 'Discharging' 127 # Power controller can discharge battery any time till 90% for any model. 128 # Setting level to 90% in case we have wearout of it. 129 BATTERY_DISCHARGE_MIN = 90 130 131 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 132 def verify(self, host): 133 # pylint: disable=missing-docstring 134 info = self._load_info(host) 135 self._validate_ac_plugged(info) 136 self._validate_battery(host, info) 137 138 def _load_info(self, host): 139 try: 140 info = host.get_power_supply_info() 141 except error.AutoservRunError: 142 raise hosts.AutoservVerifyError( 143 'Failed to get power supply info') 144 return info 145 146 def _validate_ac_plugged(self, info): 147 # Validate that DUT is plugged to the AC. 148 try: 149 if info['Line Power']['online'] != 'yes': 150 raise hosts.AutoservVerifyError( 151 'AC power is not plugged in') 152 except KeyError: 153 raise hosts.AutoservVerifyError( 154 'Cannot determine AC power status') 155 156 def _validate_battery(self, host, info): 157 try: 158 charging_state = info['Battery']['state'] 159 battery_level = float(info['Battery']['percentage']) 160 161 # Collect info to determine which battery level is better to call 162 # as MIN_BATTERY_LEVEL for DUTs in the lab. 163 if battery_level < cros_constants.MIN_BATTERY_LEVEL: 164 level_by_10 = int(math.floor(battery_level / 10.0)) * 10 165 metrics_data = { 166 'host': host.hostname, 167 'level': level_by_10, 168 'mode': charging_state 169 } 170 metrics.Counter('chromeos/autotest/battery/state2').increment( 171 fields=metrics_data) 172 173 if (charging_state == self.BATTERY_DISCHARGING 174 and battery_level < self.BATTERY_DISCHARGE_MIN): 175 logging.debug('Try to fix discharging state of the battery. ' 176 'Possible that a test left wrong state.') 177 # Here is the chance that battery is discharging because 178 # of some test did not clean up the state. 179 # We are going to try to fix it by set charging to normal. 180 host.run('ectool chargecontrol normal', ignore_status=True) 181 # wait to change state. 182 time.sleep(10) 183 info = self._load_info(host) 184 charging_state = info['Battery']['state'] 185 fixed = charging_state != self.BATTERY_DISCHARGING 186 # TODO (@otabek) remove metrics after research 187 logging.debug('Fixed battery discharge mode.') 188 metrics_data = { 189 'model': host.host_info_store.get().model, 190 'fixed': fixed 191 } 192 metrics.Counter( 193 'chromeos/autotest/repair/chargecontrol_fixed' 194 ).increment(fields=metrics_data) 195 196 if (battery_level < cros_constants.MIN_BATTERY_LEVEL 197 and charging_state == self.BATTERY_DISCHARGING): 198 # TODO(@xianuowang) remove metrics here once we have device 199 # health profile to collect history of DUT's metrics. 200 metrics_data = {'host': host.hostname, 201 'board': host.host_info_store.get().board} 202 metrics.Counter( 203 'chromeos/autotest/repair/verifier/power').increment( 204 fields=metrics_data) 205 raise hosts.AutoservVerifyError( 206 'Battery is in discharging state and current level' 207 ' is less than %s%%' % 208 cros_constants.MIN_BATTERY_LEVEL) 209 except (KeyError, ValueError): 210 logging.warning('Cannot determine battery state -' 211 ' skipping check.') 212 213 @property 214 def description(self): 215 # pylint: disable=missing-docstring 216 return 'The DUT is plugged in to AC power and battery is charing' 217 218 219class CrosVerisionVerifier(hosts.Verifier): 220 """Confirm that current ChromeOS image on the host is matches 221 to provision-cros_version label. 222 223 Some tests behavior may changed DUT image while they don't update 224 provision-cros_version label, which could cause the next test run 225 on the same host gets an unexpected OS version and yields false 226 positive test result. 227 """ 228 229 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 230 def verify(self, host): 231 label_match = True 232 try: 233 label_match = host.verify_cros_version_label() 234 except Exception as e: 235 # We don't want fail this verifier for any errors that other 236 # than a actual version mismatch, as that can make debugging 237 # more challenge. 238 logging.warning('Unexpected error during verify cros verision' 239 ' on %s; %s', host.hostname, e) 240 241 if not label_match: 242 raise hosts.AutoservVerifyError('ChromeOS image on the host' 243 ' does not match to cros-version' 244 ' label.') 245 246 @property 247 def description(self): 248 # pylint: disable=missing-docstring 249 return 'ChromeOS image on host matches cros_version label' 250 251 252class WritableVerifier(hosts.Verifier): 253 """ 254 Confirm the stateful file systems are writable. 255 256 The standard linux response to certain unexpected file system errors 257 (including hardware errors in block devices) is to change the file 258 system status to read-only. This checks that that hasn't happened. 259 260 The test covers the two file systems that need to be writable for 261 critical operations like AU: 262 * The (unencrypted) stateful system which includes 263 /mnt/stateful_partition. 264 * The encrypted stateful partition, which includes /var. 265 266 The test doesn't check various bind mounts; those are expected to 267 fail the same way as their underlying main mounts. Whether the 268 Linux kernel can guarantee that is untested... 269 """ 270 271 # N.B. Order matters here: Encrypted stateful is loop-mounted from 272 # a file in unencrypted stateful, so we don't test for errors in 273 # encrypted stateful if unencrypted fails. 274 _TEST_DIRECTORIES = ['/mnt/stateful_partition', '/var/tmp'] 275 276 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 277 def verify(self, host): 278 # pylint: disable=missing-docstring 279 # This deliberately stops looking after the first error. 280 # See above for the details. 281 for testdir in self._TEST_DIRECTORIES: 282 if not host.is_file_system_writable([testdir]): 283 msg = 'Can\'t create a file in %s' % testdir 284 raise hosts.AutoservVerifyError(msg) 285 286 @property 287 def description(self): 288 # pylint: disable=missing-docstring 289 return 'The stateful filesystems are writable' 290 291 292class EXT4fsErrorVerifier(hosts.Verifier): 293 """ 294 Confirm we have not seen critical file system kernel errors. 295 """ 296 297 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 298 def verify(self, host): 299 # pylint: disable=missing-docstring 300 # grep for stateful FS errors of the type "EXT4-fs error (device sda1):" 301 command = ("dmesg | grep -E \"EXT4-fs error \(device " 302 "$(cut -d ' ' -f 5,9 /proc/$$/mountinfo | " 303 "grep -e '^/mnt/stateful_partition ' | " 304 "cut -d ' ' -f 2 | cut -d '/' -f 3)\):\"") 305 output = host.run(command=command, ignore_status=True).stdout 306 if output: 307 sample = output.splitlines()[0] 308 message = 'Saw file system error: %s' % sample 309 raise hosts.AutoservVerifyError(message) 310 # Check for other critical FS errors. 311 command = 'dmesg | grep "This should not happen!! Data will be lost"' 312 output = host.run(command=command, ignore_status=True).stdout 313 if output: 314 message = 'Saw file system error: Data will be lost' 315 raise hosts.AutoservVerifyError(message) 316 else: 317 logging.error('Could not determine stateful mount.') 318 319 @property 320 def description(self): 321 # pylint: disable=missing-docstring 322 return 'Did not find critical file system errors' 323 324 325class UpdateSuccessVerifier(hosts.Verifier): 326 """ 327 Checks that the DUT successfully finished its last provision job. 328 329 At the start of any update (e.g. for a Provision job), the code 330 creates a marker file named `PROVISION_FAILED`. The file is located 331 in a part of the stateful partition that will be removed if an 332 update finishes successfully. Thus, the presence of the file 333 indicates that a prior update failed. 334 335 The verifier tests for the existence of the marker file and fails if 336 it still exists. 337 """ 338 339 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 340 def verify(self, host): 341 # pylint: disable=missing-docstring 342 result = host.run('test -f %s' % provisioner.PROVISION_FAILED, 343 ignore_status=True) 344 if result.exit_status == 0: 345 raise hosts.AutoservVerifyError( 346 'Last provision on this DUT failed') 347 348 @property 349 def description(self): 350 # pylint: disable=missing-docstring 351 return 'The most recent provision attempt on this DUT succeeded' 352 353 354class TPMStatusVerifier(hosts.Verifier): 355 """Verify that the host's TPM is in a good state.""" 356 357 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 358 def verify(self, host): 359 # pylint: disable=missing-docstring 360 if _is_virtual_machine(host): 361 # We do not forward host TPM / emulated TPM to qemu VMs, so skip 362 # this verification step. 363 logging.debug('Skipped verification %s on VM', self) 364 return 365 366 try: 367 status = CryptohomeStatus(host) 368 except hosts.AutoservVerifyError: 369 logging.info('Cannot determine the Cryptohome valid status - ' 370 'skipping check.') 371 return 372 try: 373 tpm = status['tpm'] 374 if not tpm['enabled']: 375 raise hosts.AutoservVerifyError( 376 'TPM is not enabled -- Hardware is not working.') 377 if not tpm['can_connect']: 378 raise hosts.AutoservVerifyError( 379 ('TPM connect failed -- ' 380 'last_error=%d.' % tpm['last_error'])) 381 if tpm['owned'] and not tpm['can_load_srk']: 382 raise hosts.AutoservVerifyError( 383 'Cannot load the TPM SRK') 384 if tpm['can_load_srk'] and not tpm['can_load_srk_pubkey']: 385 raise hosts.AutoservVerifyError( 386 'Cannot load the TPM SRK public key') 387 except KeyError: 388 logging.info('Cannot determine the Cryptohome valid status - ' 389 'skipping check.') 390 391 @property 392 def description(self): 393 # pylint: disable=missing-docstring 394 return 'The host\'s TPM is available and working' 395 396 397class PythonVerifier(hosts.Verifier): 398 """Confirm the presence of a working Python interpreter.""" 399 400 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 401 def verify(self, host): 402 # pylint: disable=missing-docstring 403 result = host.run('python -c "import json"', 404 ignore_status=True) 405 if result.exit_status != 0: 406 message = 'The python interpreter is broken' 407 if result.exit_status == 127: 408 search = host.run('which python', ignore_status=True) 409 if search.exit_status != 0 or not search.stdout: 410 message = ('Python is missing; may be caused by ' 411 'powerwash') 412 raise hosts.AutoservVerifyError(message) 413 414 @property 415 def description(self): 416 # pylint: disable=missing-docstring 417 return 'Python on the host is installed and working' 418 419 420class DevModeVerifier(hosts.Verifier): 421 """Verify that the host is not in dev mode.""" 422 423 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 424 def verify(self, host): 425 # pylint: disable=missing-docstring 426 # Some pools are allowed to be in dev mode 427 info = host.host_info_store.get() 428 if (_DEV_MODE_ALWAYS_ALLOWED or 429 bool(info.pools & _DEV_MODE_ALLOWED_POOLS)): 430 return 431 432 result = host.run('crossystem devsw_boot', ignore_status=True).stdout 433 if result != '0': 434 raise hosts.AutoservVerifyError('The host is in dev mode') 435 436 @property 437 def description(self): 438 # pylint: disable=missing-docstring 439 return 'The host should not be in dev mode' 440 441 442class DevDefaultBootVerifier(hosts.Verifier): 443 """Verify that the host is set to boot the internal disk by default.""" 444 445 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 446 def verify(self, host): 447 # pylint: disable=missing-docstring 448 result = host.run('crossystem dev_default_boot', ignore_status=True) 449 default_boot = result.stdout.strip() 450 if default_boot != 'disk': 451 raise hosts.AutoservVerifyError( 452 'The host has incorrect dev_default_boot value: %r' 453 % default_boot) 454 455 @property 456 def description(self): 457 # pylint: disable=missing-docstring 458 return 'The host should have dev_default_boot=disk' 459 460 461class HWIDVerifier(hosts.Verifier): 462 """Verify that the host has HWID & serial number.""" 463 464 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 465 def verify(self, host): 466 # pylint: disable=missing-docstring 467 info = host.host_info_store.get() 468 if not info.board or not info.model: 469 # if board or model missed in host_info file then it is empty 470 # skip verifier 471 return 472 info_hwid = info.attributes.get('HWID') 473 info_serial_number = info.attributes.get('serial_number') 474 475 if not info_hwid or not info_serial_number: 476 logging.info('Missing HWID or/and SerialNumber.' 477 ' Probably device was not deployed properly.' 478 ' Marking DUT for need re-deployment.') 479 host.set_device_repair_state( 480 cros_constants.DEVICE_STATE_NEEDS_DEPLOY) 481 return 482 483 host_hwid = host.run('crossystem hwid', ignore_status=True).stdout 484 host_serial_number = self._get_serial_number(host, info_serial_number) 485 if not host_hwid or not host_serial_number: 486 raise hosts.AutoservVerifyError( 487 'Failed to get HWID & Serial Number for host %s' % 488 host.hostname) 489 490 if host_hwid != info_hwid: 491 # We not fail verifier as it not critical for majority tests. 492 metrics.Counter('chromeos/autotest/repair/hwid_change').increment( 493 fields={ 494 'host': host.hostname, 495 'board': info.board or '' 496 }) 497 logging.info( 498 'HWID changed to: %s required manual work' 499 ' to fix it.', host_hwid) 500 501 if host_serial_number and host_serial_number != info_serial_number: 502 logging.info( 503 'The SerialNumber mismatch detected %s != %s.' 504 ' Probably attempt to replace DUT without deployment.' 505 ' Marking DUT for need re-deployment.', info_serial_number, 506 host_serial_number) 507 host.set_device_repair_state( 508 cros_constants.DEVICE_STATE_NEEDS_DEPLOY) 509 510 def _get_serial_number(self, host, serial_number): 511 """Read serial_number from VPD. 512 513 If VPD does not have any value for serial_number then it will 514 try to restore from host_info. 515 516 @param host CrosHost 517 @param serial_number Serial-number from host-info 518 """ 519 req = host.run('vpd -g serial_number', ignore_status=True) 520 # serial_number not found in the VPD info 521 if not req.stdout and req.exit_status == 3 and serial_number: 522 logging.debug('Cannot find serial_number from VPD.') 523 # check if vpd working fine without error 524 l1 = host.run('vpd -l', ignore_status=True) 525 l2 = host.run('vpd -l |grep "\"serial_number\"="', 526 ignore_status=True) 527 if l1.exit_status == 0 and l2.exit_status == 1: 528 logging.info('Start restoring serial_number:%s for VPD.', 529 serial_number) 530 # update serial_number for VPD 531 cmd = 'vpd -s serial_number=%s' 532 host.run(cmd % serial_number, ignore_status=True) 533 host.run('dump_vpd_log --force', ignore_status=True) 534 # reading from VPD to see what we updated 535 req = host.run('vpd -g serial_number', ignore_status=True) 536 return req.stdout 537 538 @property 539 def description(self): 540 # pylint: disable=missing-docstring 541 return 'The host should have valid HWID and Serial Number' 542 543 544class EnrollmentStateVerifier(hosts.Verifier): 545 """Verify that the device's enrollment state is clean. 546 547 There are two "flags" that generate 3 possible enrollment states here. 548 Flag 1 - The presence of install attributes file in 549 /home/.shadow/install_attributes.pb 550 551 Flag 2 - The value of "check_enrollment" from VPD. Can be obtained by 552 reading the cache file in 553 /mnt/stateful_partition/unencrypted/cache/vpd/full-v2.txt 554 555 The states: 556 State 1 - Device is enrolled, means flag 1 is true and in 557 flag 2 check_enrollment=1 558 State 2 - Device is consumer owned, means flag 1 is true and in 559 flag 2 check_enrollment=0 560 State 3 - Device is enrolled and has been powerwashed, means flag 1 is 561 false. If the value in flag 2 is check_enrollment=1 then the 562 device will perform forced re-enrollment check and depending 563 on the response from the server might force the device to enroll 564 again. If the value is check_enrollment=0, then device can be 565 used like a new device. 566 567 We consider state 1, and first scenario(check_enrollment=1) of state 3 568 as unacceptable state here as they may interfere with normal tests. 569 """ 570 571 VPD_CACHE = '/mnt/stateful_partition/unencrypted/cache/vpd/full-v2.txt' 572 573 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 574 def verify(self, host): 575 # pylint: disable=missing-docstring 576 if self._get_enrollment_state(host): 577 raise hosts.AutoservNonCriticalVerifyError('The device is enrolled,' 578 ' it may interfere with' 579 ' some tests.') 580 581 def _get_enrollment_state(self, host): 582 logging.debug('checking enrollment state from VPD cache...') 583 response = host.run('grep "check_enrollment" %s' % self.VPD_CACHE, 584 ignore_status=True) 585 if response.exit_status == 0: 586 result = response.stdout.strip() 587 logging.info('Enrollment state in VPD cache: %s', result) 588 return result == '"check_enrollment"="1"' 589 590 logging.error('Unexpected error occured during verify enrollment state' 591 ' in VPD cache, skipping verify process.') 592 return False 593 594 def _is_applicable(self, host): 595 info = host.host_info_store.get() 596 # if os type is missing from host_info, then we assume it's cros. 597 return getattr(info, 'os', 'cros') in ('', 'cros') 598 599 @property 600 def description(self): 601 # pylint: disable=missing-docstring 602 return 'The enrollment state is clean on the host' 603 604 605class FirmwareTpmVerifier(hosts.Verifier): 606 """Verifier that firmware tpm info is correct. 607 608 For dev-signed firmware, tpm_fwver and tpm_kernver reported from 609 crossystem should always be 0x10001. Firmware update on DUTs with 610 incorrect tmp_fwver or tpm_kernver may fail due to firmware 611 rollback protection. 612 """ 613 # A list of field we want check from crossystem and expected value. 614 CHECK_LIST = [ 615 ('tpm_fwver', '0x00010001'), 616 ('tpm_kernver', '0x00010001'), 617 ] 618 619 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 620 def verify(self, host): 621 # pylint: disable=missing-docstring 622 for field, expected_value in self.CHECK_LIST: 623 result = host.run('crossystem %s' % field, ignore_status=True) 624 if result.exit_status != 0: 625 raise hosts.AutoservNonCriticalVerifyError( 626 'Unable to get %s from crossystem.' % field) 627 if result.stdout != expected_value: 628 raise hosts.AutoservNonCriticalVerifyError( 629 'Unexpected %s value: %s, expected: %s. This error' 630 ' may cause firmware provision fail due to the' 631 ' rollback protection.' % 632 (field, result.stdout, expected_value)) 633 634 def _is_applicable(self, host): 635 return cros_firmware._is_firmware_testing_device(host) 636 637 @property 638 def description(self): 639 # pylint: disable=missing-docstring 640 return 'Firmware tpm info is correct in crossystem.' 641 642 643class JetstreamTpmVerifier(hosts.Verifier): 644 """Verify that Jetstream TPM is in a good state.""" 645 646 @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10) 647 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 648 def verify(self, host): 649 # pylint: disable=missing-docstring 650 try: 651 status = CryptohomeStatus(host) 652 if not status.tpm_enabled: 653 raise hosts.AutoservVerifyError('TPM is not enabled') 654 if not status.tpm_owned: 655 raise hosts.AutoservVerifyError('TPM is not owned') 656 if not status.tpm_can_load_srk: 657 raise hosts.AutoservVerifyError('TPM cannot load SRK') 658 if not status.tpm_can_load_srk_pubkey: 659 raise hosts.AutoservVerifyError('TPM cannot load SRK pubkey') 660 661 # Check that the TPM is fully initialized. The output of this 662 # command is line-oriented property/value pairs. 663 result = host.run('cryptohome --action=tpm_status') 664 if 'TPM Ready: true' not in result.stdout: 665 raise hosts.AutoservVerifyError('TPM is not ready') 666 except error.AutoservRunError: 667 raise hosts.AutoservVerifyError( 668 'Could not determine TPM status') 669 670 @property 671 def description(self): 672 # pylint: disable=missing-docstring 673 return 'Jetstream TPM state check' 674 675 676class JetstreamAttestationVerifier(hosts.Verifier): 677 """Verify that Jetstream attestation client has a certificate.""" 678 679 @retry.retry(error.AutoservError, timeout_min=2, delay_sec=10) 680 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 681 def verify(self, host): 682 # pylint: disable=missing-docstring 683 try: 684 # This output is in text protobuf format. 685 result = host.run('cryptohome --action=tpm_more_status') 686 if 'attestation_prepared: true' not in result.stdout: 687 raise hosts.AutoservVerifyError( 688 'Attestation has not been prepared') 689 690 result = host.run('cryptohome --action=tpm_attestation_get_ek') 691 if 'EK Certificate' not in result.stdout: 692 raise hosts.AutoservVerifyError( 693 'Endorsement certificate not found') 694 except error.AutoservRunError: 695 raise hosts.AutoservVerifyError( 696 'Unable to fetch endorsement certificate') 697 698 @property 699 def description(self): 700 # pylint: disable=missing-docstring 701 return 'Jetstream attestation endorsement check' 702 703 704class JetstreamServicesVerifier(hosts.Verifier): 705 """Verify that Jetstream services are running.""" 706 707 # Retry for b/62576902 708 @retry.retry(error.AutoservError, timeout_min=1, delay_sec=10) 709 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 710 def verify(self, host): 711 # pylint: disable=missing-docstring 712 try: 713 if not host.upstart_status('ap-controller'): 714 raise hosts.AutoservVerifyError( 715 'ap-controller service is not running') 716 except error.AutoservRunError: 717 raise hosts.AutoservVerifyError( 718 'ap-controller service not found') 719 720 try: 721 host.run('pgrep ap-controller') 722 except error.AutoservRunError: 723 raise hosts.AutoservVerifyError( 724 'ap-controller process is not running') 725 726 @property 727 def description(self): 728 # pylint: disable=missing-docstring 729 return 'Jetstream services must be running' 730 731 732class StopStartUIVerifier(hosts.Verifier): 733 """Verify that command 'stop ui' won't crash the DUT. 734 735 We run 'stop ui' in AU and provision. We found some bad images broke 736 this command and then broke all the provision of all following test. We add 737 this verifier to ensure it works and will trigger reimaging to a good 738 version if it fails. 739 """ 740 741 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 742 def verify(self, host): 743 try: 744 host.run('stop ui && start ui', ignore_status=True, timeout=10) 745 except error.AutoservSSHTimeout: 746 raise hosts.AutoservVerifyError( 747 "Got timeout when stop ui/start ui. DUT might crash.") 748 749 @property 750 def description(self): 751 return 'The DUT image works fine when stop ui/start ui.' 752 753 754class ServoUSBDriveVerifier(hosts.Verifier): 755 """Verify that USB drive on Servo is good to use. 756 757 Check if USB drive is detected on servo and verified on servohost and 758 USB is not marked for replacement. 759 """ 760 761 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 762 def verify(self, host): 763 # pylint: disable=missing-docstring 764 usb_dev = '' 765 try: 766 usb_dev = host._servo_host._probe_and_validate_usb_dev() 767 except hosts.AutoservRepairError as e: 768 # We USB drive not detected by servod 769 logging.debug('(Not critical) %s', e) 770 host_info = host.host_info_store.get() 771 if not usb_dev: 772 host_info.set_version_label(audit_const.SERVO_USB_STATE_PREFIX, 773 audit_const.HW_STATE_NOT_DETECTED) 774 host.host_info_store.commit(host_info) 775 raise hosts.AutoservNonCriticalVerifyError( 776 'USB-drive is not detected or bad') 777 778 # Check if USB-drive marked for replacement. 779 usb_state = host_info.get_label_value( 780 audit_const.SERVO_USB_STATE_PREFIX) 781 if usb_state and usb_state == audit_const.HW_STATE_NEED_REPLACEMENT: 782 raise hosts.AutoservNonCriticalVerifyError( 783 'USB-drive marked for replacement') 784 785 # The USB-drive detected and was not mark for replacement. 786 # Set as normal for future audit. 787 host_info.set_version_label(audit_const.SERVO_USB_STATE_PREFIX, 788 audit_const.HW_STATE_NORMAL) 789 host.host_info_store.commit(host_info) 790 791 def _is_applicable(self, host): 792 if host.servo: 793 return True 794 return False 795 796 @property 797 def description(self): 798 return 'Ensure USB drive on Servo is in good state.' 799 800 801class DUTStorageVerifier(hosts.Verifier): 802 """Verify that main storage on DUT is good to use. 803 804 Check if DUT drive is providing good SMART stats which not showing any 805 issues on it. The verifier can mark DUT for replacement if SMART stats 806 show outworn data. 807 """ 808 809 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 810 def verify(self, host): 811 # pylint: disable=missing-docstring 812 verifier = audit_verify.VerifyDutStorage(host) 813 verifier.verify(set_label=True, run_badblocks='NOT') 814 state = verifier.get_state() or audit_const.HW_STATE_UNKNOWN 815 if not state: 816 raise hosts.AutoservNonCriticalVerifyError( 817 'DUT storage did not detected or state cannot extracted.') 818 if state == audit_const.HW_STATE_NEED_REPLACEMENT: 819 logging.info('Detected issue with storage on the DUT.') 820 host.set_device_needs_replacement() 821 822 @property 823 def description(self): 824 return 'Ensure DUT storage SMART information is in good state.' 825 826 827class _ResetRepairAction(hosts.RepairAction): 828 """Common handling for repair actions that reset a DUT.""" 829 830 def _collect_logs(self, host): 831 """Collect logs from a successfully repaired DUT.""" 832 dirname = 'after_%s' % self.tag 833 local_log_dir = crashcollect.get_crashinfo_dir(host, dirname) 834 host.collect_logs('/var/log', local_log_dir, ignore_errors=True) 835 # Collect crash info. 836 crashcollect.get_crashinfo(host, None) 837 838 def _check_reset_success(self, host): 839 """Check whether reset succeeded, and gather logs if possible.""" 840 # Waiting to boot device after repair action. 841 if host.wait_up(host.BOOT_TIMEOUT): 842 if host.get_verifier_state('ssh') == hosts.VERIFY_SUCCESS: 843 logging.debug( 844 'Skip collection logs due DUT was sshable before') 845 return 846 try: 847 # Collect logs once we regain ssh access before 848 # clobbering them. 849 self._collect_logs(host) 850 except Exception: 851 # If the DUT is up, we want to declare success, even if 852 # log gathering fails for some reason. So, if there's 853 # a failure, just log it and move on. 854 logging.exception('Non-critical failure in log ' 855 'collection during %s.', 856 self.tag) 857 return 858 raise hosts.AutoservRepairError( 859 'Host %s is offline after %s.' % (host.hostname, self.tag), 860 'failed_to_boot_after_' + self.tag) 861 862 863class ServoSysRqRepair(_ResetRepairAction): 864 """ 865 Repair a Chrome device by sending a system request to the kernel. 866 867 Sending 3 times the Alt+VolUp+x key combination (aka sysrq-x) 868 will ask the kernel to panic itself and reboot while conserving 869 the kernel logs in console ramoops. 870 """ 871 872 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 873 def repair(self, host): 874 # pylint: disable=missing-docstring 875 repair_utils.require_servo(host, ignore_state=True) 876 # Press 3 times Alt+VolUp+X 877 # no checking DUT health between each press as 878 # killing Chrome is not really likely to fix the DUT SSH. 879 for _ in range(3): 880 try: 881 host.servo.sysrq_x() 882 except error.TestFail as ex: 883 raise hosts.AutoservRepairError( 884 'cannot press sysrq-x: %s.' % str(ex), 885 'cannot_press_sysrq_x') 886 # less than 5 seconds between presses. 887 time.sleep(2.0) 888 self._check_reset_success(host) 889 890 @property 891 def description(self): 892 # pylint: disable=missing-docstring 893 return 'Reset the DUT via keyboard sysrq-x' 894 895 896class ServoResetRepair(_ResetRepairAction): 897 """Repair a Chrome device by resetting it with servo.""" 898 899 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 900 def repair(self, host): 901 # pylint: disable=missing-docstring 902 repair_utils.require_servo(host, ignore_state=True) 903 host.servo.get_power_state_controller().reset() 904 self._check_reset_success(host) 905 906 @property 907 def description(self): 908 # pylint: disable=missing-docstring 909 return 'Reset the DUT via servo' 910 911 912class ServoCr50RebootRepair(_ResetRepairAction): 913 """ 914 Repair a Chrome device by resetting cr50 by servo. 915 916 Reset cr50 which is ec+ccd reset. 917 """ 918 919 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 920 def repair(self, host): 921 # pylint: disable=missing-docstring 922 try: 923 host.servo.get_power_state_controller().cr50_reset() 924 self._check_reset_success(host) 925 finally: 926 # cr50 reset will clear some some init like `ccd testlab open` 927 # so we want to re-initialize servo after cr50 reset if the main 928 # device is ccd. 929 if host.servo.main_device_is_ccd(): 930 host.servo.initialize_dut() 931 932 def _is_applicable(self, host): 933 if host.servo: 934 if host.servo.has_control('cr50_reboot'): 935 return True 936 return False 937 938 @property 939 def description(self): 940 # pylint: disable=missing-docstring 941 return 'Reset(cr50) the DUT via servo' 942 943 944class DevDefaultBootRepair(hosts.RepairAction): 945 """Repair a CrOS target by setting dev_default_boot to 'disk'""" 946 947 @timeout_util.TimeoutDecorator(cros_constants.SHORT_REPAIR_TIMEOUT_SEC) 948 def repair(self, host): 949 # pylint: disable=missing-docstring 950 host.run('crossystem dev_default_boot=disk', ignore_status=True) 951 952 @property 953 def description(self): 954 # pylint: disable=missing-docstring 955 return "Set dev_default_boot to 'disk'" 956 957 958class CrosRebootRepair(repair_utils.RebootRepair): 959 """Repair a CrOS target by clearing dev mode and rebooting it.""" 960 961 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 962 def repair(self, host): 963 # pylint: disable=missing-docstring 964 # N.B. We need to reboot regardless of whether clearing 965 # dev_mode succeeds or fails. 966 host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', 967 ignore_status=True) 968 host.run('crossystem disable_dev_request=1', 969 ignore_status=True) 970 super(CrosRebootRepair, self).repair(host) 971 972 @property 973 def description(self): 974 # pylint: disable=missing-docstring 975 return 'Reset GBB flags and Reboot the host' 976 977 978class LabelCleanupRepair(hosts.RepairAction): 979 """Cleanup unexpected labels for the host, e.g. mismatched 980 cros-version label. 981 """ 982 # The repair action currently only cleanup cros-version label, however 983 # we can extent it to cleanup other labels when there is need, and it 984 # should be able to determine which label to clean based on check the 985 # cached result from it's trigger list. (example: trigger verifiers can 986 # be access via self._trigger_list, and we can tell which verifier failed 987 # by check Verifier._is_good() method.) 988 989 @timeout_util.TimeoutDecorator(cros_constants.SHORT_REPAIR_TIMEOUT_SEC) 990 def repair(self, host): 991 logging.info('Removing %s label from the host', host.VERSION_PREFIX) 992 info = host.host_info_store.get() 993 info.clear_version_labels() 994 host.host_info_store.commit(info) 995 996 @property 997 def description(self): 998 # pylint: disable=missing-docstring 999 return 'Cleanup unexpected labels for the host' 1000 1001 1002class EnrollmentCleanupRepair(hosts.RepairAction): 1003 """Cleanup enrollment state on ChromeOS device""" 1004 1005 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 1006 def repair(self, host): 1007 # Reset VPD enrollment state. 1008 host.run('/usr/sbin/update_rw_vpd check_enrollment 0') 1009 1010 # Clear TPM Owner state. 1011 tpm_utils.ClearTPMOwnerRequest(host, wait_for_ready=True, 1012 timeout=host.BOOT_TIMEOUT) 1013 1014 def _is_applicable(self, host): 1015 info = host.host_info_store.get() 1016 # if os type is missing from host_info, then we assume it's cros. 1017 return getattr(info, 'os', 'cros') in ('', 'cros') 1018 1019 @property 1020 def description(self): 1021 # pylint: disable=missing-docstring 1022 return 'Cleanup enrollment state and reboot the host' 1023 1024 1025class ProvisionRepair(hosts.RepairAction): 1026 """ 1027 Repair by re-installing a test image using quick provision. 1028 1029 Try to install the DUT's designated "stable test image" using the 1030 standard procedure for installing a new test image via quick provision. 1031 """ 1032 1033 @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC) 1034 def repair(self, host): 1035 # pylint: disable=missing-docstring 1036 image_name = host.get_cros_repair_image_name() 1037 logging.info('Staging build for provision: %s', image_name) 1038 devserver = dev_server.ImageServer.resolve(image_name, host.hostname) 1039 devserver.trigger_download(image_name, synchronous=False) 1040 update_url = tools.image_url_pattern() % ( 1041 devserver.url(), image_name) 1042 afe_utils.machine_install_and_update_labels(host, update_url) 1043 1044 @property 1045 def description(self): 1046 # pylint: disable=missing-docstring 1047 return 'Re-install the stable build on the host' 1048 1049 1050class PowerWashRepair(ProvisionRepair): 1051 """ 1052 Powerwash the DUT, then re-install using quick provision. 1053 1054 Powerwash the DUT, then attempt to re-install a stable test image as 1055 for `ProvisionRepair`. 1056 """ 1057 1058 @timeout_util.TimeoutDecorator(cros_constants.LONG_REPAIR_TIMEOUT_SEC) 1059 def repair(self, host): 1060 # pylint: disable=missing-docstring 1061 host.run('echo "fast safe" > ' 1062 '/mnt/stateful_partition/factory_install_reset') 1063 host.reboot(timeout=host.POWERWASH_BOOT_TIMEOUT, wait=True) 1064 super(PowerWashRepair, self).repair(host) 1065 1066 @property 1067 def description(self): 1068 # pylint: disable=missing-docstring 1069 return 'Powerwash and then re-install the stable build on the host' 1070 1071 1072class ServoInstallRepair(hosts.RepairAction): 1073 """ 1074 Reinstall a test image from USB using servo. 1075 1076 Use servo to re-install the DUT's designated "stable test image" 1077 from servo-attached USB storage. 1078 """ 1079 1080 # Timeout value for this repair action is specially configured as we need 1081 # stage image to usb drive, install chromeos image. 1082 @timeout_util.TimeoutDecorator(60 * 60) 1083 def repair(self, host): 1084 # pylint: disable=missing-docstring 1085 repair_utils.require_servo(host, ignore_state=True) 1086 image_name = host.get_cros_repair_image_name() 1087 image_name_on_usb = host._servo_host.validate_image_usbkey() 1088 if image_name_on_usb == image_name: 1089 logging.info( 1090 'Required image %s is already on usbkey,' 1091 ' skipping download.', image_name) 1092 need_update_image = False 1093 else: 1094 logging.info('Required image is not on usbkey.') 1095 need_update_image = True 1096 1097 # Verify if we want to force re-image the USB. 1098 if not need_update_image and host.health_profile: 1099 repair_failed_count = host.health_profile.get_repair_fail_count() 1100 # try to re-image USB when previous attempt failed 1101 if (repair_failed_count > 0 and 1102 (repair_failed_count == 1 or repair_failed_count % 10 == 0)): 1103 logging.info( 1104 'Required re-download image to usbkey as' 1105 ' a previous repair failed. Fail count: %s', 1106 repair_failed_count) 1107 need_update_image = True 1108 1109 update_url = None 1110 if need_update_image: 1111 logging.info('Staging image: %s on caching server.', image_name) 1112 _, update_url = host.stage_image_for_servo() 1113 afe_utils.clean_provision_labels(host) 1114 host.servo_install(update_url, is_repair=True) 1115 afe_utils.add_provision_labels(host, host.VERSION_PREFIX, image_name) 1116 1117 @property 1118 def description(self): 1119 # pylint: disable=missing-docstring 1120 return 'Reinstall from USB using servo' 1121 1122 1123class JetstreamTpmRepair(hosts.RepairAction): 1124 """Repair by resetting TPM and rebooting.""" 1125 1126 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 1127 def repair(self, host): 1128 # pylint: disable=missing-docstring 1129 host.run('rm -f /var/cache/ap/setup-network', ignore_status=True) 1130 host.run('rm -f /home/chronos/.oobe_completed', ignore_status=True) 1131 host.run('rm -f /home/.shadow/.can_attempt_ownership', 1132 ignore_status=True) 1133 host.run('crossystem clear_tpm_owner_request=1', ignore_status=True) 1134 host.reboot() 1135 1136 @property 1137 def description(self): 1138 # pylint: disable=missing-docstring 1139 return 'Reset TPM and reboot' 1140 1141 1142class JetstreamServiceRepair(hosts.RepairAction): 1143 """Repair by restarting Jetstream services.""" 1144 1145 @timeout_util.TimeoutDecorator(cros_constants.REPAIR_TIMEOUT_SEC) 1146 def repair(self, host): 1147 # pylint: disable=missing-docstring 1148 host.cleanup_services() 1149 1150 @property 1151 def description(self): 1152 # pylint: disable=missing-docstring 1153 return 'Restart Jetstream services' 1154 1155 1156def _cros_verify_dag(): 1157 """Return the verification DAG for a `CrosHost`.""" 1158 return _cros_verify_base_dag() + _cros_verify_extended_dag() 1159 1160 1161def _cros_verify_base_dag(): 1162 """Return the base verification DAG for a `CrosHost`.""" 1163 FirmwareStatusVerifier = cros_firmware.FirmwareStatusVerifier 1164 FirmwareVersionVerifier = cros_firmware.FirmwareVersionVerifier 1165 verify_dag = ( 1166 (repair_utils.PingVerifier, 'ping', ()), 1167 (repair_utils.SshVerifier, 'ssh', ('ping', )), 1168 (ServoUSBDriveVerifier, 'usb_drive', ()), 1169 (DevDefaultBootVerifier, 'dev_default_boot', ('ssh', )), 1170 (DevModeVerifier, 'devmode', ('ssh', )), 1171 (EnrollmentStateVerifier, 'enrollment_state', ('ssh', )), 1172 (HWIDVerifier, 'hwid', ('ssh', )), 1173 (ACPowerVerifier, 'power', ('ssh', )), 1174 (EXT4fsErrorVerifier, 'ext4', ('ssh', )), 1175 (WritableVerifier, 'writable', ('ssh', )), 1176 (TPMStatusVerifier, 'tpm', ('ssh', )), 1177 (UpdateSuccessVerifier, 'good_provision', ('ssh', )), 1178 (FirmwareTpmVerifier, 'faft_tpm', ('ssh', )), 1179 (FirmwareStatusVerifier, 'fwstatus', ('ssh', )), 1180 (FirmwareVersionVerifier, 'rwfw', ('ssh', )), 1181 (PythonVerifier, 'python', ('ssh', )), 1182 (repair_utils.LegacyHostVerifier, 'cros', ('ssh', )), 1183 (CrosVerisionVerifier, 'cros_version_label', ('ssh', )), 1184 ) 1185 return verify_dag 1186 1187 1188def _cros_verify_extended_dag(): 1189 """Return the extended verification DAG for a `CrosHost`.""" 1190 return ( 1191 (StopStartUIVerifier, 'stop_start_ui', ('ssh', )), 1192 (DUTStorageVerifier, 'storage', ('ssh', )), 1193 ) 1194 1195 1196def _cros_basic_repair_actions( 1197 servo_reset_trigger=DEFAULT_SERVO_RESET_TRIGGER 1198): 1199 """Return the basic repair actions for a `CrosHost` 1200 1201 @param servo_reset_trigger: sequence of verifiers that trigger servo reset 1202 and servo cr50 reboot repair. 1203 """ 1204 repair_actions = ( 1205 # RPM cycling must precede Servo reset: if the DUT has a dead 1206 # battery, we need to reattach AC power before we reset via servo. 1207 (repair_utils.RPMCycleRepair, 'rpm', (), ( 1208 'ping', 1209 'ssh', 1210 'power', 1211 )), 1212 (ServoResetRepair, 'servoreset', (), servo_reset_trigger), 1213 (ServoCr50RebootRepair, 'cr50_reset', (), servo_reset_trigger), 1214 (ServoSysRqRepair, 'sysrq', (), ( 1215 'ping', 1216 'ssh', 1217 )), 1218 (LabelCleanupRepair, 'label_cleanup', ('ssh', ), 1219 ('cros_version_label', )), 1220 1221 # N.B. FaftFirmwareRepair can't fix a 'good_provision' failure 1222 # directly, because it doesn't remove the flag file that triggers 1223 # the failure. We include it as a repair trigger because it's 1224 # possible the the last update failed because of the firmware, 1225 # and we want the repair steps below to be able to trust the 1226 # firmware. 1227 (cros_firmware.FaftFirmwareRepair, 'faft_firmware_repair', (), ( 1228 'ping', 1229 'ssh', 1230 'fwstatus', 1231 'good_provision', 1232 )), 1233 (DevDefaultBootRepair, 'set_default_boot', ('ssh', ), 1234 ('dev_default_boot', )), 1235 (CrosRebootRepair, 'reboot', ('ssh', ), ( 1236 'devmode', 1237 'writable', 1238 )), 1239 (EnrollmentCleanupRepair, 'cleanup_enrollment', ('ssh', ), 1240 ('enrollment_state', )), 1241 ) 1242 return repair_actions 1243 1244 1245def _cros_extended_repair_actions(provision_triggers=_CROS_PROVISION_TRIGGERS, 1246 powerwash_triggers=_CROS_POWERWASH_TRIGGERS, 1247 usb_triggers=_CROS_USB_TRIGGERS, 1248 usb_dependencies=_CROS_USB_DEPENDENCIES): 1249 """Return the extended repair actions for a `CrosHost`""" 1250 1251 # The dependencies and triggers for the 'provision', 'powerwash', and 'usb' 1252 # repair actions stack up: Each one is able to repair progressively 1253 # more verifiers than the one before. The 'triggers' lists specify 1254 # the progression. 1255 1256 repair_actions = ( 1257 (ProvisionRepair, 'provision', usb_triggers + powerwash_triggers, 1258 provision_triggers), 1259 (PowerWashRepair, 'powerwash', usb_triggers, 1260 powerwash_triggers + provision_triggers), 1261 ( 1262 ServoInstallRepair, 1263 'usb', 1264 usb_dependencies, 1265 # faft_tpm is a trigger of usb repair action but should not be 1266 # dependence of provision and powerwash repair action, due to 1267 # restriction of current structure, we hardcode it here instead 1268 # of put it into _CROS_USB_TRIGGERS. TODO(xianuowang@) refactor 1269 # the logic to create action/verifier DAG for different host 1270 # type after we decouple infra from test autotest repo. 1271 usb_triggers + powerwash_triggers + provision_triggers + 1272 ('faft_tpm', )), 1273 ) 1274 return repair_actions 1275 1276 1277def _cros_dedicated_repair_actions(firmware_triggers=_CROS_FIRMWARE_TRIGGERS, 1278 usb_dependencies=_CROS_USB_DEPENDENCIES): 1279 """Return the repair actions that only works for `CrosHost`""" 1280 1281 repair_actions = ((cros_firmware.GeneralFirmwareRepair, 'general_firmware', 1282 usb_dependencies, firmware_triggers), ) 1283 return repair_actions 1284 1285 1286def _cros_repair_actions(): 1287 """Return the repair actions for a `CrosHost`.""" 1288 repair_actions = (_cros_basic_repair_actions() + 1289 _cros_extended_repair_actions() + 1290 _cros_dedicated_repair_actions()) 1291 return repair_actions 1292 1293 1294def create_cros_repair_strategy(): 1295 """Return a `RepairStrategy` for a `CrosHost`.""" 1296 verify_dag = _cros_verify_dag() 1297 repair_actions = _cros_repair_actions() 1298 return hosts.RepairStrategy(verify_dag, repair_actions, 'cros') 1299 1300 1301def _moblab_verify_dag(): 1302 """Return the verification DAG for a `MoblabHost`.""" 1303 verify_dag = ( 1304 (repair_utils.SshVerifier, 'ssh', ()), 1305 (ACPowerVerifier, 'power', ('ssh',)), 1306 (PythonVerifier, 'python', ('ssh',)), 1307 (repair_utils.LegacyHostVerifier, 'cros', ('ssh',)), 1308 ) 1309 return verify_dag 1310 1311 1312def _moblab_repair_actions(): 1313 """Return the repair actions for a `MoblabHost`.""" 1314 repair_actions = ( 1315 (repair_utils.RPMCycleRepair, 'rpm', (), ('ssh', 'power',)), 1316 (ProvisionRepair, 'provision', ('ssh',), ('power', 'python', 'cros')), 1317 ) 1318 return repair_actions 1319 1320 1321def create_moblab_repair_strategy(): 1322 """ 1323 Return a `RepairStrategy` for a `MoblabHost`. 1324 1325 Moblab is a subset of the CrOS verify and repair. Several pieces 1326 are removed because they're not expected to be meaningful. Some 1327 others are removed for more specific reasons: 1328 1329 'tpm': Moblab DUTs don't run the tests that matter to this 1330 verifier. TODO(jrbarnette) This assertion is unproven. 1331 1332 'good_provision': This verifier can't pass, because the Moblab provision 1333 procedure doesn't properly delete the PROVISION_FAILED file. 1334 TODO(jrbarnette) We should refactor ChromiumOSProvisioner so 1335 that it can be different for Moblab. 1336 1337 'firmware': Moblab DUTs shouldn't be in FAFT pools, so we don't try 1338 this. 1339 1340 'powerwash': Powerwash on Moblab causes trouble with deleting the 1341 DHCP leases file, so we skip it. 1342 """ 1343 verify_dag = _moblab_verify_dag() 1344 repair_actions = _moblab_repair_actions() 1345 return hosts.RepairStrategy(verify_dag, repair_actions, 'moblab') 1346 1347 1348def _jetstream_repair_actions(): 1349 """Return the repair actions for a `JetstreamHost`.""" 1350 provision_triggers = _CROS_PROVISION_TRIGGERS 1351 jetstream_tpm_triggers = ('jetstream_tpm', 'jetstream_attestation') 1352 jetstream_service_triggers = (jetstream_tpm_triggers + 1353 ('jetstream_services',)) 1354 base_actions = _cros_basic_repair_actions(servo_reset_trigger=( 1355 'ping', 1356 'ssh', 1357 )) 1358 custom_actions = ( 1359 (JetstreamTpmRepair, 'jetstream_tpm_repair', 1360 _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS, 1361 provision_triggers + jetstream_tpm_triggers), 1362 (JetstreamServiceRepair, 'jetstream_service_repair', 1363 _JETSTREAM_USB_TRIGGERS + _CROS_POWERWASH_TRIGGERS + 1364 ('jetstream_tpm', 'jetstream_attestation'), 1365 provision_triggers + jetstream_service_triggers), 1366 ) 1367 extend_actions = _cros_extended_repair_actions( 1368 provision_triggers=provision_triggers + jetstream_service_triggers, 1369 usb_triggers=_JETSTREAM_USB_TRIGGERS) 1370 return base_actions + custom_actions + extend_actions 1371 1372 1373def _jetstream_verify_dag(): 1374 """Return the verification DAG for a `JetstreamHost`.""" 1375 verify_dag = _cros_verify_base_dag() + ( 1376 (JetstreamTpmVerifier, 'jetstream_tpm', ('ssh',)), 1377 (JetstreamAttestationVerifier, 'jetstream_attestation', ('ssh',)), 1378 (JetstreamServicesVerifier, 'jetstream_services', ('ssh',)), 1379 ) 1380 return verify_dag 1381 1382 1383def create_jetstream_repair_strategy(): 1384 """ 1385 Return a `RepairStrategy` for a `JetstreamHost`. 1386 1387 The Jetstream repair strategy is based on the CrOS verify and repair, 1388 but adds the JetstreamServicesVerifier. 1389 """ 1390 verify_dag = _jetstream_verify_dag() 1391 repair_actions = _jetstream_repair_actions() 1392 return hosts.RepairStrategy(verify_dag, repair_actions, 'jetstream') 1393 1394 1395# TODO(pprabhu) Move this to a better place. I have no idea what that place 1396# would be. 1397def _is_virtual_machine(host): 1398 """Determine whether the given |host| is a virtual machine. 1399 1400 @param host: a hosts.Host object. 1401 @returns True if the host is a virtual machine, False otherwise. 1402 """ 1403 output = host.run('cat /proc/cpuinfo | grep "model name"', 1404 ignore_status=True) 1405 return (output.exit_status == 0 and output.stdout and 1406 'qemu' in output.stdout.lower()) 1407 1408 1409class CryptohomeStatus(dict): 1410 """Wrapper for getting cryptohome status from a host.""" 1411 1412 def __init__(self, host): 1413 super(CryptohomeStatus, self).__init__() 1414 self.update(_get_cryptohome_status(host)) 1415 self.tpm = self['tpm'] 1416 1417 @property 1418 def tpm_enabled(self): 1419 # pylint: disable=missing-docstring 1420 return self.tpm.get('enabled') == True 1421 1422 @property 1423 def tpm_owned(self): 1424 # pylint: disable=missing-docstring 1425 return self.tpm.get('owned') == True 1426 1427 @property 1428 def tpm_can_load_srk(self): 1429 # pylint: disable=missing-docstring 1430 return self.tpm.get('can_load_srk') == True 1431 1432 @property 1433 def tpm_can_load_srk_pubkey(self): 1434 # pylint: disable=missing-docstring 1435 return self.tpm.get('can_load_srk_pubkey') == True 1436 1437 1438def _get_cryptohome_status(host): 1439 """Returns a dictionary containing the cryptohome status. 1440 1441 @param host: a hosts.Host object. 1442 @returns A dictionary containing the cryptohome status. 1443 @raises AutoservVerifyError: if the output could not be parsed or the TPM 1444 status is missing. 1445 @raises hosts.AutoservRunError: if the cryptohome command failed. 1446 """ 1447 # This cryptohome command emits status information in JSON format. It 1448 # looks something like this: 1449 # { 1450 # "installattrs": { 1451 # ... 1452 # }, 1453 # "mounts": [ { 1454 # ... 1455 # } ], 1456 # "tpm": { 1457 # "being_owned": false, 1458 # "can_connect": true, 1459 # "can_decrypt": false, 1460 # "can_encrypt": false, 1461 # "can_load_srk": true, 1462 # "can_load_srk_pubkey": true, 1463 # "enabled": true, 1464 # "has_context": true, 1465 # "has_cryptohome_key": false, 1466 # "has_key_handle": false, 1467 # "last_error": 0, 1468 # "owned": true 1469 # } 1470 # } 1471 try: 1472 output = host.run('cryptohome --action=status').stdout.strip() 1473 status = json.loads(output) 1474 if 'tpm' not in status: 1475 raise hosts.AutoservVerifyError('TPM status is missing') 1476 return status 1477 except ValueError: 1478 raise hosts.AutoservVerifyError('Unable to parse cryptohome status') 1479