1#!/usr/bin/env python2 2# Copyright 2015 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Install an initial test image on a set of DUTs. 7 8The methods in this module are meant for two nominally distinct use 9cases that share a great deal of code internally. The first use 10case is for deployment of DUTs that have just been placed in the lab 11for the first time. The second use case is for use after repairing 12a servo. 13 14Newly deployed DUTs may be in a somewhat anomalous state: 15 * The DUTs are running a production base image, not a test image. 16 By extension, the DUTs aren't reachable over SSH. 17 * The DUTs are not necessarily in the AFE database. DUTs that 18 _are_ in the database should be locked. Either way, the DUTs 19 cannot be scheduled to run tests. 20 * The servos for the DUTs need not be configured with the proper 21 overlay. 22 23More broadly, it's not expected that the DUT will be working at the 24start of this operation. If the DUT isn't working at the end of the 25operation, an error will be reported. 26 27The script performs the following functions: 28 * Configure the servo for the target overlay, and test that the 29 servo is generally in good order. 30 * For the full deployment case, install dev-signed RO firmware 31 from the designated stable test image for the DUTs. 32 * For both cases, use servo to install the stable test image from 33 USB. 34 * If the DUT isn't in the AFE database, add it. 35 36The script imposes these preconditions: 37 * Every DUT has a properly connected servo. 38 * Every DUT and servo have proper DHCP and DNS configurations. 39 * Every servo host is up and running, and accessible via SSH. 40 * There is a known, working test image that can be staged and 41 installed on the target DUTs via servo. 42 * Every DUT has the same board and model. 43 * For the full deployment case, every DUT must be in dev mode, 44 and configured to allow boot from USB with ctrl+U. 45 46The implementation uses the `multiprocessing` module to run all 47installations in parallel, separate processes. 48 49""" 50 51import atexit 52from collections import namedtuple 53import functools 54import json 55import logging 56import multiprocessing 57import os 58import shutil 59import sys 60import tempfile 61import time 62import traceback 63 64import common 65from autotest_lib.client.common_lib import error 66from autotest_lib.client.common_lib import host_states 67from autotest_lib.client.common_lib import time_utils 68from autotest_lib.client.common_lib import utils 69from autotest_lib.client.common_lib.cros import retry 70from autotest_lib.server import afe_utils 71from autotest_lib.server import constants 72from autotest_lib.server import frontend 73from autotest_lib.server import hosts 74from autotest_lib.server.cros.dynamic_suite.constants import VERSION_PREFIX 75from autotest_lib.server.hosts import afe_store 76from autotest_lib.server.hosts import servo_host 77from autotest_lib.site_utils.deployment import cmdvalidate 78from autotest_lib.site_utils.deployment.prepare import dut as preparedut 79from autotest_lib.utils import labellib 80 81 82_LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s' 83 84_DEFAULT_POOL = constants.Labels.POOL_PREFIX + 'suites' 85 86_LABSTATION_DEFAULT_POOL = constants.Labels.POOL_PREFIX + 'labstation_main' 87 88_DIVIDER = '\n============\n' 89 90_LOG_BUCKET_NAME = 'chromeos-install-logs' 91 92_OMAHA_STATUS = 'gs://chromeos-build-release-console/omaha_status.json' 93 94# Lock reasons we'll pass when locking DUTs, depending on the 95# host's prior state. 96_LOCK_REASON_EXISTING = 'Repairing or deploying an existing host' 97_LOCK_REASON_NEW_HOST = 'Repairing or deploying a new host' 98 99_ReportResult = namedtuple('_ReportResult', ['hostname', 'message']) 100 101 102class InstallFailedError(Exception): 103 """Generic error raised explicitly in this module.""" 104 105 106class _NoAFEServoPortError(InstallFailedError): 107 """Exception when there is no servo port stored in the AFE.""" 108 109 110class _MultiFileWriter(object): 111 112 """Group file objects for writing at once.""" 113 114 def __init__(self, files): 115 """Initialize _MultiFileWriter. 116 117 @param files Iterable of file objects for writing. 118 """ 119 self._files = files 120 121 def write(self, s): 122 """Write a string to the files. 123 124 @param s Write this string. 125 """ 126 for file in self._files: 127 file.write(s) 128 129 130def _get_upload_log_path(arguments): 131 return 'gs://{bucket}/{name}'.format( 132 bucket=_LOG_BUCKET_NAME, 133 name=arguments.upload_basename) 134 135 136def _upload_logs(dirpath, gspath): 137 """Upload report logs to Google Storage. 138 139 @param dirpath Path to directory containing the logs. 140 @param gspath Path to GS bucket. 141 """ 142 utils.run(['gsutil', 'cp', '-r', '--', dirpath, gspath]) 143 144 145def _get_omaha_build(board): 146 """Get the currently preferred Beta channel build for `board`. 147 148 Open and read through the JSON file provided by GoldenEye that 149 describes what version Omaha is currently serving for all boards 150 on all channels. Find the entry for `board` on the Beta channel, 151 and return that version string. 152 153 @param board The board to look up from GoldenEye. 154 155 @return Returns a Chrome OS version string in standard form 156 R##-####.#.#. Will return `None` if no Beta channel 157 entry is found. 158 """ 159 ret = utils.run(['gsutil', 'cat', '--', _OMAHA_STATUS]) 160 omaha_status = json.loads(ret.stdout) 161 omaha_board = board.replace('_', '-') 162 for e in omaha_status['omaha_data']: 163 if (e['channel'] == 'beta' and 164 e['board']['public_codename'] == omaha_board): 165 milestone = e['chrome_version'].split('.')[0] 166 build = e['chrome_os_version'] 167 return 'R%s-%s' % (milestone, build) 168 return None 169 170 171def _update_build(afe, report_log, arguments): 172 raise RuntimeError("site_utils.deployment::_update_build is intentionally deleted") 173 174 175def _create_host(hostname, afe, afe_host): 176 """Create a CrosHost object for the DUT. 177 178 This host object is used to update AFE label information for the DUT, but 179 can not be used for installation image on the DUT. In particular, this host 180 object does not have the servo attribute populated. 181 182 @param hostname Hostname of the target DUT. 183 @param afe A frontend.AFE object. 184 @param afe_host AFE Host object for the DUT. 185 """ 186 machine_dict = { 187 'hostname': hostname, 188 'afe_host': afe_host, 189 'host_info_store': afe_store.AfeStore(hostname, afe), 190 } 191 return hosts.create_host(machine_dict) 192 193 194def _try_lock_host(afe_host): 195 """Lock a host in the AFE, and report whether it succeeded. 196 197 The lock action is logged regardless of success; failures are 198 logged if they occur. 199 200 @param afe_host AFE Host instance to be locked. 201 202 @return `True` on success, or `False` on failure. 203 """ 204 try: 205 logging.warning('Locking host now.') 206 afe_host.modify(locked=True, 207 lock_reason=_LOCK_REASON_EXISTING) 208 except Exception as e: 209 logging.exception('Failed to lock: %s', e) 210 return False 211 return True 212 213 214def _try_unlock_host(afe_host): 215 """Unlock a host in the AFE, and report whether it succeeded. 216 217 The unlock action is logged regardless of success; failures are 218 logged if they occur. 219 220 @param afe_host AFE Host instance to be unlocked. 221 222 @return `True` on success, or `False` on failure. 223 """ 224 try: 225 logging.warning('Unlocking host.') 226 afe_host.modify(locked=False, lock_reason='') 227 except Exception as e: 228 logging.exception('Failed to unlock: %s', e) 229 return False 230 return True 231 232 233def _update_host_attributes(afe, hostname, host_attrs): 234 """Update the attributes for a given host. 235 236 @param afe AFE object for RPC calls. 237 @param hostname Host name of the DUT. 238 @param host_attrs Dictionary with attributes to be applied to the 239 host. 240 """ 241 s_hostname, s_port, s_serial = _extract_servo_attributes(hostname, 242 host_attrs) 243 afe.set_host_attribute(servo_host.SERVO_HOST_ATTR, 244 s_hostname, 245 hostname=hostname) 246 afe.set_host_attribute(servo_host.SERVO_PORT_ATTR, 247 s_port, 248 hostname=hostname) 249 if s_serial: 250 afe.set_host_attribute(servo_host.SERVO_SERIAL_ATTR, 251 s_serial, 252 hostname=hostname) 253 254 255def _extract_servo_attributes(hostname, host_attrs): 256 """Extract servo attributes from the host attribute dict, setting defaults. 257 258 @return (servo_hostname, servo_port, servo_serial) 259 """ 260 # Grab the servo hostname/port/serial from `host_attrs` if supplied. 261 # For new servo V4 deployments, we require the user to supply the 262 # attributes (because there are no appropriate defaults). So, if 263 # none are supplied, we assume it can't be V4, and apply the 264 # defaults for servo V3. 265 s_hostname = (host_attrs.get(servo_host.SERVO_HOST_ATTR) or 266 servo_host.make_servo_hostname(hostname)) 267 s_port = (host_attrs.get(servo_host.SERVO_PORT_ATTR) or 268 str(servo_host.ServoHost.DEFAULT_PORT)) 269 s_serial = host_attrs.get(servo_host.SERVO_SERIAL_ATTR) 270 return s_hostname, s_port, s_serial 271 272 273def _wait_for_idle(afe, host_id): 274 """Helper function for `_ensure_host_idle`. 275 276 Poll the host with the given `host_id` via `afe`, waiting for it 277 to become idle. Run forever; the caller takes care of timing out. 278 279 @param afe AFE object for RPC calls. 280 @param host_id Id of the host that's expected to become idle. 281 """ 282 while True: 283 afe_host = afe.get_hosts(id=host_id)[0] 284 if afe_host.status in host_states.IDLE_STATES: 285 return 286 # Let's not spam our server. 287 time.sleep(0.2) 288 289 290def _ensure_host_idle(afe, afe_host): 291 """Abort any special task running on `afe_host`. 292 293 The given `afe_host` is currently locked. If there's a special task 294 running on the given `afe_host`, abort it, then wait for the host to 295 show up as idle, return whether the operation succeeded. 296 297 @param afe AFE object for RPC calls. 298 @param afe_host Host to be aborted. 299 300 @return A true value if the host is idle at return, or a false value 301 if the host wasn't idle after some reasonable time. 302 """ 303 # We need to talk to the shard, not the master, for at least two 304 # reasons: 305 # * The `abort_special_tasks` RPC doesn't forward from the master 306 # to the shard, and only the shard has access to the special 307 # tasks. 308 # * Host status on the master can lag actual status on the shard 309 # by several minutes. Only the shard can provide status 310 # guaranteed to post-date the call to lock the DUT. 311 if afe_host.shard: 312 afe = frontend.AFE(server=afe_host.shard) 313 afe_host = afe.get_hosts(id=afe_host.id)[0] 314 if afe_host.status in host_states.IDLE_STATES: 315 return True 316 afe.run('abort_special_tasks', host_id=afe_host.id, is_active=1) 317 return not retry.timeout(_wait_for_idle, (afe, afe_host.id), 318 timeout_sec=5.0)[0] 319 320 321def _get_afe_host(afe, hostname, host_attrs, arguments): 322 """Get an AFE Host object for the given host. 323 324 If the host is found in the database, return the object 325 from the RPC call with the updated attributes in host_attr_dict. 326 327 If no host is found, create one with appropriate servo 328 attributes and the given board label. 329 330 @param afe AFE object for RPC calls. 331 @param hostname Host name of the DUT. 332 @param host_attrs Dictionary with attributes to be applied to the 333 host. 334 @param arguments Command line arguments with options. 335 336 @return A tuple of the afe_host, plus a flag. The flag indicates 337 whether the Host should be unlocked if subsequent operations 338 fail. (Hosts are always unlocked after success). 339 """ 340 hostlist = afe.get_hosts([hostname]) 341 unlock_on_failure = False 342 if hostlist: 343 afe_host = hostlist[0] 344 if not afe_host.locked: 345 if _try_lock_host(afe_host): 346 unlock_on_failure = True 347 else: 348 raise Exception('Failed to lock host') 349 if not _ensure_host_idle(afe, afe_host): 350 if unlock_on_failure and not _try_unlock_host(afe_host): 351 raise Exception('Failed to abort host, and failed to unlock it') 352 raise Exception('Failed to abort task on host') 353 # This host was pre-existing; if the user didn't supply 354 # attributes, don't update them, because the defaults may 355 # not be correct. 356 if host_attrs and not arguments.labstation: 357 _update_host_attributes(afe, hostname, host_attrs) 358 else: 359 afe_host = afe.create_host(hostname, 360 locked=True, 361 lock_reason=_LOCK_REASON_NEW_HOST) 362 363 if not arguments.labstation: 364 _update_host_attributes(afe, hostname, host_attrs) 365 366 # Correct board/model label is critical to installation. Always ensure user 367 # supplied board/model matches the AFE information. 368 _ensure_label_in_afe(afe_host, 'board', arguments.board) 369 _ensure_label_in_afe(afe_host, 'model', arguments.model) 370 371 afe_host = afe.get_hosts([hostname])[0] 372 return afe_host, unlock_on_failure 373 374 375def _ensure_label_in_afe(afe_host, label_name, label_value): 376 """Add the given board label, only if one doesn't already exist. 377 378 @params label_name name of the label, e.g. 'board', 'model', etc. 379 @params label_value value of the label. 380 381 @raises InstallFailedError if supplied board is different from existing 382 board in AFE. 383 """ 384 if not label_value: 385 return 386 387 labels = labellib.LabelsMapping(afe_host.labels) 388 if label_name not in labels: 389 afe_host.add_labels(['%s:%s' % (label_name, label_value)]) 390 return 391 392 existing_value = labels[label_name] 393 if label_value != existing_value: 394 raise InstallFailedError( 395 'provided %s %s does not match the %s %s for host %s' % 396 (label_name, label_value, label_name, existing_value, 397 afe_host.hostname)) 398 399 400def _create_host_for_installation(host, arguments): 401 """Creates a context manager of hosts.CrosHost object for installation. 402 403 The host object yielded by the returned context manager is agnostic of the 404 infrastructure environment. In particular, it does not have any references 405 to the AFE. 406 407 @param host: A server.hosts.CrosHost object. 408 @param arguments: Parsed commandline arguments for this script. 409 410 @return a context manager which yields hosts.CrosHost object. 411 """ 412 info = host.host_info_store.get() 413 s_host, s_port, s_serial = _extract_servo_attributes(host.hostname, 414 info.attributes) 415 return preparedut.create_cros_host(host.hostname, arguments.board, 416 arguments.model, s_host, s_port, 417 s_serial, arguments.logdir) 418 419 420def _install_test_image(host, arguments): 421 """Install a test image to the DUT. 422 423 Install a stable test image on the DUT using the full servo 424 repair flow. 425 426 @param host Host instance for the DUT being installed. 427 @param arguments Command line arguments with options. 428 """ 429 repair_image = _get_cros_repair_image_name(host) 430 logging.info('Using repair image %s', repair_image) 431 if arguments.dry_run: 432 return 433 if arguments.stageusb: 434 try: 435 preparedut.download_image_to_servo_usb(host, repair_image) 436 except Exception as e: 437 logging.exception('Failed to stage image on USB: %s', e) 438 raise Exception('USB staging failed') 439 if arguments.install_test_image: 440 try: 441 preparedut.install_test_image(host) 442 except error.AutoservRunError as e: 443 logging.exception('Failed to install: %s', e) 444 raise Exception('chromeos-install failed') 445 if arguments.install_firmware: 446 try: 447 if arguments.using_servo: 448 logging.debug('Install FW using servo.') 449 preparedut.flash_firmware_using_servo(host, repair_image) 450 else: 451 logging.debug('Install FW by chromeos-firmwareupdate.') 452 preparedut.install_firmware(host) 453 except error.AutoservRunError as e: 454 logging.exception('Firmware update failed: %s', e) 455 msg = '%s failed' % ( 456 'Flashing firmware using servo' if arguments.using_servo 457 else 'chromeos-firmwareupdate') 458 raise Exception(msg) 459 if arguments.reinstall_test_image: 460 try: 461 preparedut.reinstall_test_image(host) 462 except error.AutoservRunError as e: 463 logging.exception('Failed to install: %s', e) 464 raise Exception('chromeos-install failed') 465 if arguments.install_test_image and arguments.install_firmware: 466 # we need to verify that DUT can successfully boot in to recovery mode 467 # if it's initial deploy. 468 try: 469 preparedut.verify_boot_into_rec_mode(host) 470 except error.AutoservRunError as e: 471 logging.exception('Failed to validate DUT can boot from ' 472 'recovery mode: %s', e) 473 raise Exception('recovery mode validation failed') 474 475 476def _install_and_update_afe(afe, hostname, host_attrs, arguments): 477 """Perform all installation and AFE updates. 478 479 First, lock the host if it exists and is unlocked. Then, 480 install the test image on the DUT. At the end, unlock the 481 DUT, unless the installation failed and the DUT was locked 482 before we started. 483 484 If installation succeeds, make sure the DUT is in the AFE, 485 and make sure that it has basic labels. 486 487 @param afe AFE object for RPC calls. 488 @param hostname Host name of the DUT. 489 @param host_attrs Dictionary with attributes to be applied to the 490 host. 491 @param arguments Command line arguments with options. 492 """ 493 afe_host, unlock_on_failure = _get_afe_host(afe, hostname, host_attrs, 494 arguments) 495 host = None 496 try: 497 host = _create_host(hostname, afe, afe_host) 498 if arguments.labstation: 499 _setup_labstation(host) 500 else: 501 with _create_host_for_installation(host, arguments) as target_host: 502 _install_test_image(target_host, arguments) 503 _update_servo_type_attribute(target_host, host) 504 505 if ((arguments.install_test_image or arguments.reinstall_test_image) 506 and not arguments.dry_run): 507 host.labels.update_labels(host) 508 platform_labels = afe.get_labels( 509 host__hostname=hostname, platform=True) 510 if not platform_labels: 511 platform = host.get_platform() 512 new_labels = afe.get_labels(name=platform) 513 if not new_labels: 514 afe.create_label(platform, platform=True) 515 afe_host.add_labels([platform]) 516 version = [label for label in afe_host.labels 517 if label.startswith(VERSION_PREFIX)] 518 if version and not arguments.dry_run: 519 afe_host.remove_labels(version) 520 except Exception as e: 521 if unlock_on_failure and not _try_unlock_host(afe_host): 522 logging.error('Failed to unlock host!') 523 raise 524 finally: 525 if host is not None: 526 host.close() 527 528 if not _try_unlock_host(afe_host): 529 raise Exception('Install succeeded, but failed to unlock the DUT.') 530 531 532def _install_dut(arguments, host_attr_dict, hostname): 533 """Deploy or repair a single DUT. 534 535 @param arguments Command line arguments with options. 536 @param host_attr_dict Dict mapping hostnames to attributes to be 537 stored in the AFE. 538 @param hostname Host name of the DUT to install on. 539 540 @return On success, return `None`. On failure, return a string 541 with an error message. 542 """ 543 # In some cases, autotest code that we call during install may 544 # put stuff onto stdout with 'print' statements. Most notably, 545 # the AFE frontend may print 'FAILED RPC CALL' (boo, hiss). We 546 # want nothing from this subprocess going to the output we 547 # inherited from our parent, so redirect stdout and stderr, before 548 # we make any AFE calls. Note that this is reasonable because we're 549 # in a subprocess. 550 551 logpath = os.path.join(arguments.logdir, hostname + '.log') 552 logfile = open(logpath, 'w') 553 sys.stderr = sys.stdout = logfile 554 _configure_logging_to_file(logfile) 555 556 afe = frontend.AFE(server=arguments.web) 557 try: 558 _install_and_update_afe(afe, hostname, 559 host_attr_dict.get(hostname, {}), 560 arguments) 561 except Exception as e: 562 logging.exception('Original exception: %s', e) 563 return str(e) 564 return None 565 566 567def _report_hosts(report_log, heading, host_results_list): 568 """Report results for a list of hosts. 569 570 To improve visibility, results are preceded by a header line, 571 followed by a divider line. Then results are printed, one host 572 per line. 573 574 @param report_log File-like object for logging report 575 output. 576 @param heading The header string to be printed before 577 results. 578 @param host_results_list A list of _ReportResult tuples 579 to be printed one per line. 580 """ 581 if not host_results_list: 582 return 583 report_log.write(heading) 584 report_log.write(_DIVIDER) 585 for result in host_results_list: 586 report_log.write('{result.hostname:30} {result.message}\n' 587 .format(result=result)) 588 report_log.write('\n') 589 590 591def _report_results(afe, report_log, hostnames, results, arguments): 592 """Gather and report a summary of results from installation. 593 594 Segregate results into successes and failures, reporting 595 each separately. At the end, report the total of successes 596 and failures. 597 598 @param afe AFE object for RPC calls. 599 @param report_log File-like object for logging report output. 600 @param hostnames List of the hostnames that were tested. 601 @param results List of error messages, in the same order 602 as the hostnames. `None` means the 603 corresponding host succeeded. 604 @param arguments Command line arguments with options. 605 """ 606 successful_hosts = [] 607 success_reports = [] 608 failure_reports = [] 609 for result, hostname in zip(results, hostnames): 610 if result is None: 611 successful_hosts.append(hostname) 612 else: 613 failure_reports.append(_ReportResult(hostname, result)) 614 if successful_hosts: 615 afe.repair_hosts(hostnames=successful_hosts) 616 for h in afe.get_hosts(hostnames=successful_hosts): 617 for label in h.labels: 618 if label.startswith(constants.Labels.POOL_PREFIX): 619 result = _ReportResult(h.hostname, 620 'Host already in %s' % label) 621 success_reports.append(result) 622 break 623 else: 624 if arguments.labstation: 625 target_pool = _LABSTATION_DEFAULT_POOL 626 else: 627 target_pool = _DEFAULT_POOL 628 h.add_labels([target_pool]) 629 result = _ReportResult(h.hostname, 630 'Host added to %s' % target_pool) 631 success_reports.append(result) 632 report_log.write(_DIVIDER) 633 _report_hosts(report_log, 'Successes', success_reports) 634 _report_hosts(report_log, 'Failures', failure_reports) 635 report_log.write( 636 'Installation complete: %d successes, %d failures.\n' % 637 (len(success_reports), len(failure_reports))) 638 639 640def _clear_root_logger_handlers(): 641 """Remove all handlers from root logger.""" 642 root_logger = logging.getLogger() 643 for h in root_logger.handlers: 644 root_logger.removeHandler(h) 645 646 647def _configure_logging_to_file(logfile): 648 """Configure the logging module for `install_duts()`. 649 650 @param log_file Log file object. 651 """ 652 _clear_root_logger_handlers() 653 handler = logging.StreamHandler(logfile) 654 formatter = logging.Formatter(_LOG_FORMAT, time_utils.TIME_FMT) 655 handler.setFormatter(formatter) 656 root_logger = logging.getLogger() 657 root_logger.addHandler(handler) 658 659 660def _get_used_servo_ports(servo_hostname, afe): 661 """ 662 Return a list of used servo ports for the given servo host. 663 664 @param servo_hostname: Hostname of the servo host to check for. 665 @param afe: AFE instance. 666 667 @returns a list of used ports for the given servo host. 668 """ 669 used_ports = [] 670 host_list = afe.get_hosts_by_attribute( 671 attribute=servo_host.SERVO_HOST_ATTR, value=servo_hostname) 672 for host in host_list: 673 afe_host = afe.get_hosts(hostname=host) 674 if afe_host: 675 servo_port = afe_host[0].attributes.get(servo_host.SERVO_PORT_ATTR) 676 if servo_port: 677 used_ports.append(int(servo_port)) 678 return used_ports 679 680 681def _get_free_servo_port(servo_hostname, used_servo_ports, afe): 682 """ 683 Get a free servo port for the servo_host. 684 685 @param servo_hostname: Hostname of the servo host. 686 @param used_servo_ports: Dict of dicts that contain the list of used ports 687 for the given servo host. 688 @param afe: AFE instance. 689 690 @returns a free servo port if servo_hostname is non-empty, otherwise an 691 empty string. 692 """ 693 used_ports = [] 694 servo_port = servo_host.ServoHost.DEFAULT_PORT 695 # If no servo hostname was specified we can assume we're dealing with a 696 # servo v3 or older deployment since the servo hostname can be 697 # inferred from the dut hostname (by appending '-servo' to it). We only 698 # need to find a free port if we're using a servo v4 since we can use the 699 # default port for v3 and older. 700 if not servo_hostname: 701 return '' 702 # If we haven't checked this servo host yet, check the AFE if other duts 703 # used this servo host and grab the ports specified for them. 704 elif servo_hostname not in used_servo_ports: 705 used_ports = _get_used_servo_ports(servo_hostname, afe) 706 else: 707 used_ports = used_servo_ports[servo_hostname] 708 used_ports.sort() 709 if used_ports: 710 # Range is taken from servod.py in hdctools. 711 start_port = servo_host.ServoHost.DEFAULT_PORT 712 end_port = start_port - 99 713 # We'll choose first port available in descending order. 714 for port in xrange(start_port, end_port - 1, -1): 715 if port not in used_ports: 716 servo_port = port 717 break 718 used_ports.append(servo_port) 719 used_servo_ports[servo_hostname] = used_ports 720 return servo_port 721 722 723def _get_afe_servo_port(host_info, afe): 724 """ 725 Get the servo port from the afe if it matches the same servo host hostname. 726 727 @param host_info HostInfo tuple (hostname, host_attr_dict). 728 729 @returns Servo port (int) if servo host hostname matches the one specified 730 host_info.host_attr_dict, otherwise None. 731 732 @raises _NoAFEServoPortError: When there is no stored host info or servo 733 port host attribute in the AFE for the given host. 734 """ 735 afe_hosts = afe.get_hosts(hostname=host_info.hostname) 736 if not afe_hosts: 737 raise _NoAFEServoPortError 738 739 servo_port = afe_hosts[0].attributes.get(servo_host.SERVO_PORT_ATTR) 740 afe_servo_host = afe_hosts[0].attributes.get(servo_host.SERVO_HOST_ATTR) 741 host_info_servo_host = host_info.host_attr_dict.get( 742 servo_host.SERVO_HOST_ATTR) 743 744 if afe_servo_host == host_info_servo_host and servo_port: 745 return int(servo_port) 746 else: 747 raise _NoAFEServoPortError 748 749 750def _get_host_attributes(host_info_list, afe): 751 """ 752 Get host attributes if a hostname_file was supplied. 753 754 @param host_info_list List of HostInfo tuples (hostname, host_attr_dict). 755 756 @returns Dict of attributes from host_info_list. 757 """ 758 host_attributes = {} 759 # We need to choose servo ports for these hosts but we need to make sure 760 # we don't choose ports already used. We'll store all used ports in a 761 # dict of lists where the key is the servo_host and the val is a list of 762 # ports used. 763 used_servo_ports = {} 764 for host_info in host_info_list: 765 host_attr_dict = host_info.host_attr_dict 766 # If the host already has an entry in the AFE that matches the same 767 # servo host hostname and the servo port is set, use that port. 768 try: 769 host_attr_dict[servo_host.SERVO_PORT_ATTR] = _get_afe_servo_port( 770 host_info, afe) 771 except _NoAFEServoPortError: 772 host_attr_dict[servo_host.SERVO_PORT_ATTR] = _get_free_servo_port( 773 host_attr_dict[servo_host.SERVO_HOST_ATTR], used_servo_ports, 774 afe) 775 host_attributes[host_info.hostname] = host_attr_dict 776 return host_attributes 777 778 779def _get_cros_repair_image_name(host): 780 """Get the CrOS repair image name for given host. 781 782 @param host: hosts.CrosHost object. This object need not have an AFE 783 reference. 784 """ 785 info = host.host_info_store.get() 786 if not info.board: 787 raise InstallFailedError('Unknown board for given host') 788 return afe_utils.get_stable_cros_image_name_v2(info) 789 790 791def install_duts(arguments): 792 """Install a test image on DUTs, and deploy them. 793 794 This handles command line parsing for both the repair and 795 deployment commands. The two operations are largely identical; 796 the main difference is that full deployment includes flashing 797 dev-signed firmware on the DUT prior to installing the test 798 image. 799 800 @param arguments Command line arguments with options, as 801 returned by `argparse.Argparser`. 802 """ 803 arguments = cmdvalidate.validate_arguments(arguments) 804 if arguments is None: 805 sys.exit(1) 806 sys.stderr.write('Installation output logs in %s\n' % arguments.logdir) 807 808 # Override tempfile.tempdir. Some of the autotest code we call 809 # will create temporary files that don't get cleaned up. So, we 810 # put the temp files in our results directory, so that we can 811 # clean up everything at one fell swoop. 812 tempfile.tempdir = tempfile.mkdtemp() 813 atexit.register(shutil.rmtree, tempfile.tempdir) 814 815 # We don't want to distract the user with logging output, so we catch 816 # logging output in a file. 817 logging_file_path = os.path.join(arguments.logdir, 'debug.log') 818 logfile = open(logging_file_path, 'w') 819 _configure_logging_to_file(logfile) 820 821 report_log_path = os.path.join(arguments.logdir, 'report.log') 822 with open(report_log_path, 'w') as report_log_file: 823 report_log = _MultiFileWriter([report_log_file, sys.stdout]) 824 afe = frontend.AFE(server=arguments.web) 825 if arguments.dry_run: 826 report_log.write('Dry run - installation and most testing ' 827 'will be skipped.\n') 828 host_attr_dict = _get_host_attributes(arguments.host_info_list, afe) 829 install_pool = multiprocessing.Pool(len(arguments.hostnames)) 830 install_function = functools.partial(_install_dut, arguments, 831 host_attr_dict) 832 results_list = install_pool.map(install_function, arguments.hostnames) 833 _report_results(afe, report_log, arguments.hostnames, results_list, 834 arguments) 835 836 if arguments.upload: 837 try: 838 gspath = _get_upload_log_path(arguments) 839 sys.stderr.write('Logs will be uploaded to %s\n' % (gspath,)) 840 _upload_logs(arguments.logdir, gspath) 841 except Exception: 842 upload_failure_log_path = os.path.join(arguments.logdir, 843 'gs_upload_failure.log') 844 with open(upload_failure_log_path, 'w') as file_: 845 traceback.print_exc(limit=None, file=file_) 846 sys.stderr.write('Failed to upload logs;' 847 ' failure details are stored in {}.\n' 848 .format(upload_failure_log_path)) 849 850 851def _update_servo_type_attribute(host, host_to_update): 852 """Update servo_type attribute for the DUT. 853 854 @param host A CrOSHost with a initialized servo property. 855 @param host_to_update A CrOSHost with AfeStore as its host_info_store. 856 857 """ 858 info = host_to_update.host_info_store.get() 859 if 'servo_type' not in info.attributes: 860 logging.info("Collecting and adding servo_type attribute.") 861 info.attributes['servo_type'] = host.servo.get_servo_version() 862 host_to_update.host_info_store.commit(info) 863 864 865def _setup_labstation(host): 866 """Do initial setup for labstation host. 867 868 @param host A LabstationHost object. 869 870 """ 871 try: 872 if not host.is_labstation(): 873 raise InstallFailedError('Current OS on host %s is not a labstation' 874 ' image.', host.hostname) 875 except AttributeError: 876 raise InstallFailedError('Unable to verify host has a labstation image,' 877 ' this can be caused by host is unsshable.') 878 879 try: 880 # TODO: we should setup hwid and serial number for DUT in deploy script 881 # as well, which is currently obtained from repair job. 882 info = host.host_info_store.get() 883 hwid = host.run('crossystem hwid', ignore_status=True).stdout 884 if hwid: 885 info.attributes['HWID'] = hwid 886 887 serial_number = host.run('vpd -g serial_number', 888 ignore_status=True).stdout 889 if serial_number: 890 info.attributes['serial_number'] = serial_number 891 if info != host.host_info_store.get(): 892 host.host_info_store.commit(info) 893 except Exception as e: 894 raise InstallFailedError('Failed to get HWID & Serial Number for host' 895 ' %s: %s' % (host.hostname, str(e))) 896 897 host.labels.update_labels(host) 898