1# Lint as: python2, python3 2# Copyright 2016 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6""" 7Repair actions and verifiers relating to CrOS firmware. 8 9This contains the repair actions and verifiers need to find problems 10with the firmware installed on ChromeOS DUTs, and when necessary, to 11fix problems by updating or re-installing the firmware. 12 13The operations in the module support two distinct use cases: 14 * DUTs used for FAFT tests can in some cases have problems with 15 corrupted firmware. The module supplies `FirmwareStatusVerifier` 16 to check for corruption, and supplies `FaftFirmwareRepair` to 17 re-install firmware of current faft stable_version via servo 18 when needed. 19 * DUTs used for general testing normally should be running a 20 designated "stable" firmware version. This module supplies 21 `FirmwareVersionVerifier` to detect and automatically update 22 firmware that is out-of-date from the designated version. This model 23 also supplys `GeneralFirmwareRepair` to re-install firmware that 24 tied with current stable_version image via servo when needed. 25 26For purposes of the operations in the module, we distinguish three kinds 27of DUT, based on pool assignments: 28 * DUTs used for general testing. These DUTs automatically check for 29 and install the stable firmware using `FirmwareVersionVerifier`. 30 * DUTs in pools used for FAFT testing. These check for bad firmware 31 builds with `FirmwareStatusVerifier`, and will fix problems using 32 `FirmwareRepair`. These DUTs don't check for or install the 33 stable firmware. 34 * DUTs not in general pools, and not used for FAFT. These DUTs 35 are expected to be managed by separate processes and are excluded 36 from all of the verification and repair code in this module. 37""" 38 39# pylint: disable=missing-docstring 40 41from __future__ import absolute_import 42from __future__ import division 43from __future__ import print_function 44 45import json 46import logging 47 48import common 49from autotest_lib.client.common_lib import global_config 50from autotest_lib.client.common_lib import hosts 51from autotest_lib.server import afe_utils 52from autotest_lib.server.hosts import repair_utils 53from autotest_lib.server.hosts import cros_constants 54 55from autotest_lib.utils.frozen_chromite.lib import timeout_util 56import six 57 58 59# _FIRMWARE_REPAIR_POOLS - The set of pools that should be 60# managed by `FirmwareStatusVerifier` and `FirmwareRepair`. 61# 62_FIRMWARE_REPAIR_POOLS = set( 63 global_config.global_config.get_config_value( 64 'CROS', 65 'pools_support_firmware_repair', 66 type=str).split(',')) 67 68 69def _is_firmware_testing_device(host): 70 """ 71 check if a host is dedicated for firmware testing. 72 73 When this function returns true, the DUT should be managed by 74 `FirmwareStatusVerifier` and `FaftFirmwareRepair`, but not 75 `FirmwareVersionVerifier` and `GeneralFirmwareRepair. 76 77 @return A true value if the host should use `FirmwareStatusVerifier` 78 and `FaftFirmwareRepair`; a false value otherwise. 79 """ 80 info = host.host_info_store.get() 81 return bool(info.pools & _FIRMWARE_REPAIR_POOLS) 82 83 84def _is_firmware_update_supported(host): 85 """ 86 Return whether a DUT should be running the standard firmware. 87 88 In the test lab, DUTs used for general testing, (e.g. the `bvt` 89 pool) need their firmware kept up-to-date with 90 `FirmwareVersionVerifier`. However, some pools have alternative 91 policies for firmware management. This returns whether a given DUT 92 should be updated via the standard stable version update, or 93 managed by some other procedure. 94 95 @param host The host to be checked for update policy. 96 @return A true value if the host should use 97 `FirmwareVersionVerifier`; a false value otherwise. 98 """ 99 return not _is_firmware_testing_device(host) 100 101 102def _get_available_firmware(host, model): 103 """Get the available RW firmware version given the model. 104 105 @param host The host to get available firmware for. 106 @param model The model name to get corresponding firmware version. 107 @return The available RW firmware version if found, else, None. 108 """ 109 result = host.run('chromeos-firmwareupdate --manifest', ignore_status=True) 110 111 if result.exit_status != 0: 112 return None 113 114 # The manifest is a JSON in .model.host.versions.rw 115 data = json.loads(result.stdout) or {} 116 key = model if len(data) > 1 else next(six.iterkeys(data), '') 117 key += '.host.versions.rw' 118 for k in key.split('.'): 119 data = data.get(k, {}) 120 return data or None 121 122 123class FirmwareStatusVerifier(hosts.Verifier): 124 """ 125 Verify that a host's firmware is in a good state. 126 127 For DUTs that run firmware tests, it's possible that the firmware 128 on the DUT can get corrupted. This verifier checks whether it 129 appears that firmware should be re-flashed using servo. 130 """ 131 132 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 133 def verify(self, host): 134 if not _is_firmware_testing_device(host): 135 return 136 try: 137 # Read the AP firmware and dump the sections that we're 138 # interested in. 139 cmd = ('mkdir /tmp/verify_firmware; ' 140 'cd /tmp/verify_firmware; ' 141 'for section in VBLOCK_A VBLOCK_B FW_MAIN_A FW_MAIN_B; ' 142 'do flashrom -p host -r -i $section:$section; ' 143 'done') 144 host.run(cmd) 145 146 # Verify the firmware blocks A and B. 147 cmd = ('vbutil_firmware --verify /tmp/verify_firmware/VBLOCK_%c' 148 ' --signpubkey /usr/share/vboot/devkeys/root_key.vbpubk' 149 ' --fv /tmp/verify_firmware/FW_MAIN_%c') 150 for c in ('A', 'B'): 151 rv = host.run(cmd % (c, c), ignore_status=True) 152 if rv.exit_status: 153 raise hosts.AutoservVerifyError( 154 'Firmware %c is in a bad state.' % c) 155 finally: 156 # Remove the temporary files. 157 host.run('rm -rf /tmp/verify_firmware') 158 159 @property 160 def description(self): 161 return 'Firmware on this DUT is clean' 162 163 164class FirmwareRepair(hosts.RepairAction): 165 """ 166 Reinstall the firmware image using servo. 167 168 This repair function attempts to use servo to install the DUT's 169 designated "stable firmware version". 170 171 This repair method only applies to DUTs used for FAFT. 172 """ 173 174 def _get_faft_stable_build(self, host): 175 info = host.host_info_store.get() 176 return afe_utils.get_stable_faft_version_v2(info) 177 178 def _get_os_stable_build(self, host): 179 # Use firmware in current stable os build. 180 return host.get_cros_repair_image_name() 181 182 def _run_faft_repair(self, host, build): 183 host.firmware_install(build) 184 185 def _run_general_repair(self, host, build): 186 # As GeneralFirmwareRepair is the last repair action, we expect 187 # stable_version os image is loaded on usbkey during other repair 188 # action runs. And there is also no point to repeat and waste time if 189 # download image to usbkey failed in other repair actions. 190 if host._servo_host.validate_image_usbkey() != build: 191 raise hosts.AutoservRepairError('%s is expected to be preloaded,' 192 'however it\'s not found on the usbkey' % build, 193 'image not loaded on usbkey') 194 ec_image, bios_image = host._servo_host.prepare_repair_firmware_image() 195 196 # For EVT device with signed variant exists we skip this repair 197 # as it's hard to decide which image to use if DUT do not boot. 198 info = host.host_info_store.get() 199 phase = info.get_label_value('phase') 200 if 'signed' in bios_image and phase.lower() in ('evt', 'dvt', ''): 201 raise hosts.AutoservRepairError( 202 'Could not determine which firmware image to use' 203 ' due to signed firmware image variant exists but' 204 ' DUT phase is earlier than PVT or missing; Phase' 205 ' from inventory: %s' % phase, 206 'Can not determine variant for EVT device') 207 208 # Before flash firmware we want update the build into health profile. 209 if host.health_profile: 210 host.health_profile.set_firmware_stable_version(build) 211 212 if ec_image: 213 logging.info('Attempting to flash ec firmware...') 214 host.servo.program_ec(ec_image, copy_image=False) 215 if bios_image: 216 logging.info('Attempting to flash bios firmware...') 217 host._servo_host.flash_ap_firmware_via_servo(bios_image) 218 219 logging.info('Cold resetting DUT through servo...') 220 host.servo.get_power_state_controller().reset() 221 host.wait_up(timeout=host.BOOT_TIMEOUT) 222 # flash firmware via servo will turn DUT into dev mode, so disable 223 # dev mode and reset gbb flag here. 224 host.run('/usr/share/vboot/bin/set_gbb_flags.sh 0', ignore_status=True) 225 host.run('crossystem disable_dev_request=1', ignore_status=True) 226 host.reboot() 227 228 229class FaftFirmwareRepair(FirmwareRepair): 230 """ 231 Reinstall the firmware for DUTs in faft related pool. 232 """ 233 234 def repair(self, host): 235 repair_utils.require_servo(host, ignore_state=True) 236 build = self._get_faft_stable_build(host) 237 if build: 238 self._run_faft_repair(host, build) 239 else: 240 logging.info('Cannot find faft stable_version, falling back to' 241 ' use firmware on OS stable_version.') 242 build = self._get_os_stable_build(host) 243 if not build: 244 raise hosts.AutoservRepairError( 245 'Failed to find stable_version from host_info.', 246 'cannot find stable_version') 247 self._run_general_repair(host, build) 248 249 def _is_applicable(self, host): 250 return _is_firmware_testing_device(host) 251 252 @property 253 def description(self): 254 return 'Re-install the stable firmware(faft) via servo' 255 256 257class GeneralFirmwareRepair(FirmwareRepair): 258 """Reinstall the firmware for non-faft DUTs. 259 We need different RepairAction for non firmware testing DUT because 260 we want only try re-install firmware if all other RepairAction could 261 not restore ssh capability to the DUT. 262 """ 263 264 def repair(self, host): 265 repair_utils.require_servo(host, ignore_state=True) 266 build = self._get_os_stable_build(host) 267 if not build: 268 raise hosts.AutoservRepairError( 269 'Failed to find stable_version from host_info.', 270 'cannot find stable_version') 271 self._run_general_repair(host, build) 272 273 def _is_applicable(self, host): 274 if _is_firmware_testing_device(host): 275 return False 276 if not host.servo: 277 logging.info( 278 'The current servo state of %s is not met the' 279 ' minimum requirement to flash firmware.', host.hostname) 280 # Flash firmware via servo is consider an expansive opertation, so we 281 # want to check repair data from previous repairs to determine if 282 # firmware repair is need. 283 dhp = host.health_profile 284 if not dhp: 285 logging.info('Device health profile is not available, cannot' 286 ' determine if firmware repair is needed.') 287 return False 288 repair_fail_count = dhp.get_repair_fail_count() 289 if repair_fail_count < 2: 290 # We want to start with a more conservative strategy, so only try 291 # this action on DUTs that failed repair at least twice. 292 # @TODO(xianuowang@) adjust or remove this threshold. 293 logging.info( 294 'Firmware repair will only applies to DUT that' 295 ' failed at least two AdminRepair, current fail' 296 ' count: %s', repair_fail_count) 297 return False 298 flashed_build = dhp.get_firmware_stable_version() 299 candidate_build = self._get_os_stable_build(host) 300 # If we had an success firmware flash in this repair loop, 301 # there is no need to retry flash the same firmware build. 302 if (dhp.get_succeed_repair_action(self.tag) > 0 303 and flashed_build == candidate_build): 304 logging.info( 305 'Firmware from %s has been already installed on %s,' 306 ' no need to retry.', flashed_build, host.hostname) 307 return False 308 if (dhp.get_failed_repair_action(self.tag) > 2 309 and flashed_build == candidate_build): 310 logging.info( 311 'Firmware from %s has been attempted and failed 3 ' 312 'times, no need to retry.', flashed_build) 313 return False 314 return True 315 316 @property 317 def description(self): 318 return 'Re-install the stable firmware(non-faft) via servo' 319 320 321class FirmwareVersionVerifier(hosts.Verifier): 322 """ 323 Check for a firmware update, and apply it if appropriate. 324 325 This verifier checks to ensure that either the firmware on the DUT 326 is up-to-date, or that the target firmware can be installed from the 327 currently running build. 328 329 Failure occurs when all of the following apply: 330 1. The DUT is not excluded from updates. For example, DUTs used 331 for FAFT testing use `FirmwareRepair` instead. 332 2. The DUT's board has an assigned stable firmware version. 333 3. The DUT is not running the assigned stable firmware. 334 4. The firmware supplied in the running OS build is not the 335 assigned stable firmware. 336 337 If the DUT needs an upgrade and the currently running OS build 338 supplies the necessary firmware, the verifier installs the new 339 firmware using `chromeos-firmwareupdate`. Failure to install will 340 cause the verifier to fail. 341 342 This verifier nominally breaks the rule that "verifiers must succeed 343 quickly", since it can invoke `reboot()` during the success code 344 path. We're doing it anyway for two reasons: 345 * The time between updates will typically be measured in months, 346 so the amortized cost is low. 347 * The reason we distinguish repair from verify is to allow 348 rescheduling work immediately while the expensive repair happens 349 out-of-band. But a firmware update will likely hit all DUTs at 350 once, so it's pointless to pass the buck to repair. 351 352 N.B. This verifier is a trigger for all repair actions that install 353 the stable repair image. If the firmware is out-of-date, but the 354 stable repair image does *not* contain the proper firmware version, 355 _the target DUT will fail repair, and will be unable to fix itself_. 356 """ 357 358 @staticmethod 359 def _get_rw_firmware(host): 360 result = host.run('crossystem fwid', ignore_status=True) 361 if result.exit_status == 0: 362 return result.stdout 363 else: 364 return None 365 366 @staticmethod 367 def _check_hardware_match(version_a, version_b): 368 """ 369 Check that two firmware versions identify the same hardware. 370 371 Firmware version strings look like this: 372 Google_Gnawty.5216.239.34 373 The part before the numbers identifies the hardware for which 374 the firmware was built. This function checks that the hardware 375 identified by `version_a` and `version_b` is the same. 376 377 This is a confidence check to protect us from installing the wrong 378 firmware on a DUT when a board label has somehow gone astray. 379 380 @param version_a First firmware version for the comparison. 381 @param version_b Second firmware version for the comparison. 382 """ 383 hardware_a = version_a.split('.')[0] 384 hardware_b = version_b.split('.')[0] 385 if hardware_a != hardware_b: 386 message = 'Hardware/Firmware mismatch updating %s to %s' 387 raise hosts.AutoservVerifyError( 388 message % (version_a, version_b)) 389 390 def _is_stable_image_installed(self, host): 391 """Verify that ChromeOS image on host is a stable version. 392 393 This check verify that device booted from stable image to protect us 394 from installing the firmware from bad/broken/no-tested image. Bad 395 image can have broken updater or corrupted firmware. 396 397 The representation version looks like: 398 nocturne-release/R89-13728.0.0 399 Check compare version from host to version provide as stable image 400 from host-info file. 401 402 @param host CrosHost instance. 403 """ 404 os_from_host = host.get_release_builder_path() 405 os_from_host_info = host.get_cros_repair_image_name() 406 if os_from_host != os_from_host_info: 407 raise hosts.AutoservNonCriticalVerifyError( 408 'Firmware update can be run only from stable image.' 409 ' Expected version:"%s", actually: "%s"' % 410 (os_from_host_info, os_from_host)) 411 412 @timeout_util.TimeoutDecorator(cros_constants.VERIFY_TIMEOUT_SEC) 413 def verify(self, host): 414 # Test 1 - The DUT is not excluded from updates. 415 if not _is_firmware_update_supported(host): 416 return 417 # Test 2 - The DUT has an assigned stable firmware version. 418 info = host.host_info_store.get() 419 if info.model is None: 420 raise hosts.AutoservVerifyError( 421 'Can not verify firmware version. ' 422 'No model label value found') 423 424 stable_firmware = None 425 try: 426 stable_firmware = afe_utils.get_stable_firmware_version_v2(info) 427 except Exception as e: 428 logging.exception('Failed lookup to AFE for stable fw version ' 429 ' with exception: %s', e) 430 431 if stable_firmware is None: 432 logging.debug('Expected FW version not found') 433 # This DUT doesn't have a firmware update target 434 return 435 logging.debug('Expected FW version: %s', stable_firmware) 436 # For tests 3 and 4: If the output from `crossystem` or 437 # `chromeos-firmwareupdate` isn't what we expect, we log an 438 # error, but don't fail: We don't want DUTs unable to test a 439 # build merely because of a bug or change in either of those 440 # commands. 441 442 # Test 3 - The DUT is not running the target stable firmware. 443 current_firmware = self._get_rw_firmware(host) 444 if current_firmware is None: 445 logging.error('DUT firmware version can\'t be determined.') 446 return 447 logging.debug('Current FW version: %s', current_firmware) 448 if current_firmware == stable_firmware: 449 return 450 # Test 4 - The firmware supplied in the running OS build is not 451 # the assigned stable firmware. 452 available_firmware = _get_available_firmware(host, info.model) 453 if available_firmware is None: 454 logging.error('Supplied firmware version in OS can\'t be ' 455 'determined.') 456 return 457 self._is_stable_image_installed(host) 458 if available_firmware != stable_firmware: 459 raise hosts.AutoservVerifyError( 460 'DUT firmware requires update from %s to %s' % 461 (current_firmware, stable_firmware)) 462 # Time to update the firmware. 463 logging.info('Updating firmware from %s to %s', 464 current_firmware, stable_firmware) 465 self._check_hardware_match(current_firmware, stable_firmware) 466 try: 467 host.run('chromeos-firmwareupdate --mode=autoupdate') 468 host.reboot() 469 except Exception as e: 470 message = ('chromeos-firmwareupdate failed: from ' 471 '%s to %s') 472 logging.exception(message, current_firmware, stable_firmware) 473 raise hosts.AutoservVerifyError( 474 message % (current_firmware, stable_firmware)) 475 final_firmware = self._get_rw_firmware(host) 476 if final_firmware != stable_firmware: 477 message = ('chromeos-firmwareupdate failed: tried upgrade ' 478 'to %s, now running %s instead') 479 raise hosts.AutoservVerifyError( 480 message % (stable_firmware, final_firmware)) 481 482 @property 483 def description(self): 484 return 'The firmware on this DUT is up-to-date' 485