• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright 2015 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Install an initial test image on a set of DUTs.
7
8The methods in this module are meant for two nominally distinct use
9cases that share a great deal of code internally.  The first use
10case is for deployment of DUTs that have just been placed in the lab
11for the first time.  The second use case is for use after repairing
12a servo.
13
14Newly deployed DUTs may be in a somewhat anomalous state:
15  * The DUTs are running a production base image, not a test image.
16    By extension, the DUTs aren't reachable over SSH.
17  * The DUTs are not necessarily in the AFE database.  DUTs that
18    _are_ in the database should be locked.  Either way, the DUTs
19    cannot be scheduled to run tests.
20  * The servos for the DUTs need not be configured with the proper
21    overlay.
22
23More broadly, it's not expected that the DUT will be working at the
24start of this operation.  If the DUT isn't working at the end of the
25operation, an error will be reported.
26
27The script performs the following functions:
28  * Configure the servo for the target overlay, and test that the
29    servo is generally in good order.
30  * For the full deployment case, install dev-signed RO firmware
31    from the designated stable test image for the DUTs.
32  * For both cases, use servo to install the stable test image from
33    USB.
34  * If the DUT isn't in the AFE database, add it.
35
36The script imposes these preconditions:
37  * Every DUT has a properly connected servo.
38  * Every DUT and servo have proper DHCP and DNS configurations.
39  * Every servo host is up and running, and accessible via SSH.
40  * There is a known, working test image that can be staged and
41    installed on the target DUTs via servo.
42  * Every DUT has the same board and model.
43  * For the full deployment case, every DUT must be in dev mode,
44    and configured to allow boot from USB with ctrl+U.
45
46The implementation uses the `multiprocessing` module to run all
47installations in parallel, separate processes.
48
49"""
50
51import atexit
52from collections import namedtuple
53import functools
54import json
55import logging
56import multiprocessing
57import os
58import shutil
59import sys
60import tempfile
61import time
62import traceback
63
64from chromite.lib import gs
65
66import common
67from autotest_lib.client.common_lib import error
68from autotest_lib.client.common_lib import host_states
69from autotest_lib.client.common_lib import time_utils
70from autotest_lib.client.common_lib import utils
71from autotest_lib.client.common_lib.cros import retry
72from autotest_lib.server import afe_utils
73from autotest_lib.server import constants
74from autotest_lib.server import frontend
75from autotest_lib.server import hosts
76from autotest_lib.server.cros.dynamic_suite.constants import VERSION_PREFIX
77from autotest_lib.server.hosts import afe_store
78from autotest_lib.server.hosts import servo_host
79from autotest_lib.site_utils.deployment import cmdvalidate
80from autotest_lib.site_utils.deployment.prepare import dut as preparedut
81from autotest_lib.site_utils.stable_images import build_data
82from autotest_lib.utils import labellib
83
84
85_LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s'
86
87_DEFAULT_POOL = constants.Labels.POOL_PREFIX + 'suites'
88
89_DIVIDER = '\n============\n'
90
91_LOG_BUCKET_NAME = 'chromeos-install-logs'
92
93_OMAHA_STATUS = 'gs://chromeos-build-release-console/omaha_status.json'
94
95# Lock reasons we'll pass when locking DUTs, depending on the
96# host's prior state.
97_LOCK_REASON_EXISTING = 'Repairing or deploying an existing host'
98_LOCK_REASON_NEW_HOST = 'Repairing or deploying a new host'
99
100_ReportResult = namedtuple('_ReportResult', ['hostname', 'message'])
101
102
103class InstallFailedError(Exception):
104    """Generic error raised explicitly in this module."""
105
106
107class _NoAFEServoPortError(InstallFailedError):
108    """Exception when there is no servo port stored in the AFE."""
109
110
111class _MultiFileWriter(object):
112
113    """Group file objects for writing at once."""
114
115    def __init__(self, files):
116        """Initialize _MultiFileWriter.
117
118        @param files  Iterable of file objects for writing.
119        """
120        self._files = files
121
122    def write(self, s):
123        """Write a string to the files.
124
125        @param s  Write this string.
126        """
127        for file in self._files:
128            file.write(s)
129
130
131def _get_upload_log_path(arguments):
132    return 'gs://{bucket}/{name}'.format(
133        bucket=_LOG_BUCKET_NAME,
134        name=arguments.upload_basename)
135
136
137def _upload_logs(dirpath, gspath):
138    """Upload report logs to Google Storage.
139
140    @param dirpath  Path to directory containing the logs.
141    @param gspath   Path to GS bucket.
142    """
143    ctx = gs.GSContext()
144    ctx.Copy(dirpath, gspath, recursive=True)
145
146
147def _get_omaha_build(board):
148    """Get the currently preferred Beta channel build for `board`.
149
150    Open and read through the JSON file provided by GoldenEye that
151    describes what version Omaha is currently serving for all boards
152    on all channels.  Find the entry for `board` on the Beta channel,
153    and return that version string.
154
155    @param board  The board to look up from GoldenEye.
156
157    @return Returns a Chrome OS version string in standard form
158            R##-####.#.#.  Will return `None` if no Beta channel
159            entry is found.
160    """
161    ctx = gs.GSContext()
162    omaha_status = json.loads(ctx.Cat(_OMAHA_STATUS))
163    omaha_board = board.replace('_', '-')
164    for e in omaha_status['omaha_data']:
165        if (e['channel'] == 'beta' and
166                e['board']['public_codename'] == omaha_board):
167            milestone = e['chrome_version'].split('.')[0]
168            build = e['chrome_os_version']
169            return 'R%s-%s' % (milestone, build)
170    return None
171
172
173def _update_build(afe, report_log, arguments):
174    """Update the stable_test_versions table.
175
176    This calls the `set_stable_version` RPC call to set the stable
177    repair version selected by this run of the command.  Additionally,
178    this updates the stable firmware for the board.  The repair version
179    is selected from three possible versions:
180      * The stable test version currently in the AFE database.
181      * The version Omaha is currently serving as the Beta channel
182        build.
183      * The version supplied by the user.
184    The actual version selected will be whichever of these three is
185    the most up-to-date version.
186
187    The stable firmware version will be set to whatever firmware is
188    bundled in the selected repair image. If the selected repair image bundles
189    firmware for more than one model, then the firmware for every model in the
190    build will be updated.
191
192    This function will log information about the available versions
193    prior to selection.  After selection the repair and firmware
194    versions slected will be logged.
195
196    @param afe          AFE object for RPC calls.
197    @param report_log   File-like object for logging report output.
198    @param arguments    Command line arguments with options.
199
200    @return Returns the version selected.
201    """
202    # Gather the current AFE and Omaha version settings, and report them
203    # to the user.
204    cros_version_map = afe.get_stable_version_map(afe.CROS_IMAGE_TYPE)
205    fw_version_map = afe.get_stable_version_map(afe.FIRMWARE_IMAGE_TYPE)
206    afe_cros = cros_version_map.get_version(arguments.board)
207    afe_fw = fw_version_map.get_version(arguments.board)
208    omaha_cros = _get_omaha_build(arguments.board)
209    report_log.write('AFE    version is %s.\n' % afe_cros)
210    report_log.write('Omaha  version is %s.\n' % omaha_cros)
211    report_log.write('AFE   firmware is %s.\n' % afe_fw)
212    cros_version = afe_cros
213
214    # Check whether we should upgrade the repair build to either
215    # the Omaha or the user's requested build.  If we do, we must
216    # also update the firmware version.
217    if (omaha_cros is not None
218            and (cros_version is None or
219                 utils.compare_versions(cros_version, omaha_cros) < 0)):
220        cros_version = omaha_cros
221    if arguments.build and arguments.build != cros_version:
222        if (cros_version is None
223                or utils.compare_versions(cros_version, arguments.build) < 0):
224            cros_version = arguments.build
225        else:
226            report_log.write('Selected version %s is too old; '
227                             'using version %s'
228                             % (arguments.build, cros_version))
229
230    afe_fw_versions = {arguments.board: afe_fw}
231    fw_versions = build_data.get_firmware_versions(
232        arguments.board, cros_version)
233    # At this point `cros_version` is our new repair build, and
234    # `fw_version` is our new target firmware.  Call the AFE back with
235    # updates as necessary.
236    if not arguments.dry_run:
237        if cros_version != afe_cros:
238            cros_version_map.set_version(arguments.board, cros_version)
239
240            if fw_versions != afe_fw_versions:
241                for model, fw_version in fw_versions.iteritems():
242                    if fw_version is not None:
243                        fw_version_map.set_version(model, fw_version)
244                    else:
245                        fw_version_map.delete_version(model)
246
247    # Report the new state of the world.
248    report_log.write(_DIVIDER)
249    report_log.write('Repair CrOS version for board %s is now %s.\n' %
250                     (arguments.board, cros_version))
251    for model, fw_version in fw_versions.iteritems():
252        report_log.write('Firmware version for model %s is now %s.\n' %
253                         (model, fw_version))
254    return cros_version
255
256
257def _create_host(hostname, afe, afe_host):
258    """Create a CrosHost object for the DUT.
259
260    This host object is used to update AFE label information for the DUT, but
261    can not be used for installation image on the DUT. In particular, this host
262    object does not have the servo attribute populated.
263
264    @param hostname  Hostname of the target DUT.
265    @param afe       A frontend.AFE object.
266    @param afe_host  AFE Host object for the DUT.
267    """
268    machine_dict = {
269            'hostname': hostname,
270            'afe_host': afe_host,
271            'host_info_store': afe_store.AfeStore(hostname, afe),
272    }
273    return hosts.create_host(machine_dict)
274
275
276def _try_lock_host(afe_host):
277    """Lock a host in the AFE, and report whether it succeeded.
278
279    The lock action is logged regardless of success; failures are
280    logged if they occur.
281
282    @param afe_host AFE Host instance to be locked.
283
284    @return `True` on success, or `False` on failure.
285    """
286    try:
287        logging.warning('Locking host now.')
288        afe_host.modify(locked=True,
289                        lock_reason=_LOCK_REASON_EXISTING)
290    except Exception as e:
291        logging.exception('Failed to lock: %s', e)
292        return False
293    return True
294
295
296def _try_unlock_host(afe_host):
297    """Unlock a host in the AFE, and report whether it succeeded.
298
299    The unlock action is logged regardless of success; failures are
300    logged if they occur.
301
302    @param afe_host AFE Host instance to be unlocked.
303
304    @return `True` on success, or `False` on failure.
305    """
306    try:
307        logging.warning('Unlocking host.')
308        afe_host.modify(locked=False, lock_reason='')
309    except Exception as e:
310        logging.exception('Failed to unlock: %s', e)
311        return False
312    return True
313
314
315def _update_host_attributes(afe, hostname, host_attrs):
316    """Update the attributes for a given host.
317
318    @param afe          AFE object for RPC calls.
319    @param hostname     Host name of the DUT.
320    @param host_attrs   Dictionary with attributes to be applied to the
321                        host.
322    """
323    s_hostname, s_port, s_serial = _extract_servo_attributes(hostname,
324                                                             host_attrs)
325    afe.set_host_attribute(servo_host.SERVO_HOST_ATTR,
326                           s_hostname,
327                           hostname=hostname)
328    afe.set_host_attribute(servo_host.SERVO_PORT_ATTR,
329                           s_port,
330                           hostname=hostname)
331    if s_serial:
332        afe.set_host_attribute(servo_host.SERVO_SERIAL_ATTR,
333                               s_serial,
334                               hostname=hostname)
335
336
337def _extract_servo_attributes(hostname, host_attrs):
338    """Extract servo attributes from the host attribute dict, setting defaults.
339
340    @return (servo_hostname, servo_port, servo_serial)
341    """
342    # Grab the servo hostname/port/serial from `host_attrs` if supplied.
343    # For new servo V4 deployments, we require the user to supply the
344    # attributes (because there are no appropriate defaults).  So, if
345    # none are supplied, we assume it can't be V4, and apply the
346    # defaults for servo V3.
347    s_hostname = (host_attrs.get(servo_host.SERVO_HOST_ATTR) or
348                  servo_host.make_servo_hostname(hostname))
349    s_port = (host_attrs.get(servo_host.SERVO_PORT_ATTR) or
350              str(servo_host.ServoHost.DEFAULT_PORT))
351    s_serial = host_attrs.get(servo_host.SERVO_SERIAL_ATTR)
352    return s_hostname, s_port, s_serial
353
354
355def _wait_for_idle(afe, host_id):
356    """Helper function for `_ensure_host_idle`.
357
358    Poll the host with the given `host_id` via `afe`, waiting for it
359    to become idle.  Run forever; the caller takes care of timing out.
360
361    @param afe        AFE object for RPC calls.
362    @param host_id    Id of the host that's expected to become idle.
363    """
364    while True:
365        afe_host = afe.get_hosts(id=host_id)[0]
366        if afe_host.status in host_states.IDLE_STATES:
367            return
368        # Let's not spam our server.
369        time.sleep(0.2)
370
371
372def _ensure_host_idle(afe, afe_host):
373    """Abort any special task running on `afe_host`.
374
375    The given `afe_host` is currently locked.  If there's a special task
376    running on the given `afe_host`, abort it, then wait for the host to
377    show up as idle, return whether the operation succeeded.
378
379    @param afe        AFE object for RPC calls.
380    @param afe_host   Host to be aborted.
381
382    @return A true value if the host is idle at return, or a false value
383        if the host wasn't idle after some reasonable time.
384    """
385    # We need to talk to the shard, not the master, for at least two
386    # reasons:
387    #   * The `abort_special_tasks` RPC doesn't forward from the master
388    #     to the shard, and only the shard has access to the special
389    #     tasks.
390    #   * Host status on the master can lag actual status on the shard
391    #     by several minutes.  Only the shard can provide status
392    #     guaranteed to post-date the call to lock the DUT.
393    if afe_host.shard:
394        afe = frontend.AFE(server=afe_host.shard)
395    afe_host = afe.get_hosts(id=afe_host.id)[0]
396    if afe_host.status in host_states.IDLE_STATES:
397        return True
398    afe.run('abort_special_tasks', host_id=afe_host.id, is_active=1)
399    return not retry.timeout(_wait_for_idle, (afe, afe_host.id),
400                             timeout_sec=5.0)[0]
401
402
403def _get_afe_host(afe, hostname, host_attrs, arguments):
404    """Get an AFE Host object for the given host.
405
406    If the host is found in the database, return the object
407    from the RPC call with the updated attributes in host_attr_dict.
408
409    If no host is found, create one with appropriate servo
410    attributes and the given board label.
411
412    @param afe          AFE object for RPC calls.
413    @param hostname     Host name of the DUT.
414    @param host_attrs   Dictionary with attributes to be applied to the
415                        host.
416    @param arguments    Command line arguments with options.
417
418    @return A tuple of the afe_host, plus a flag. The flag indicates
419            whether the Host should be unlocked if subsequent operations
420            fail.  (Hosts are always unlocked after success).
421    """
422    hostlist = afe.get_hosts([hostname])
423    unlock_on_failure = False
424    if hostlist:
425        afe_host = hostlist[0]
426        if not afe_host.locked:
427            if _try_lock_host(afe_host):
428                unlock_on_failure = True
429            else:
430                raise Exception('Failed to lock host')
431        if not _ensure_host_idle(afe, afe_host):
432            if unlock_on_failure and not _try_unlock_host(afe_host):
433                raise Exception('Failed to abort host, and failed to unlock it')
434            raise Exception('Failed to abort task on host')
435        # This host was pre-existing; if the user didn't supply
436        # attributes, don't update them, because the defaults may
437        # not be correct.
438        if host_attrs:
439            _update_host_attributes(afe, hostname, host_attrs)
440    else:
441        afe_host = afe.create_host(hostname,
442                                   locked=True,
443                                   lock_reason=_LOCK_REASON_NEW_HOST)
444        _update_host_attributes(afe, hostname, host_attrs)
445
446    # Correct board/model label is critical to installation. Always ensure user
447    # supplied board/model matches the AFE information.
448    _ensure_label_in_afe(afe_host, 'board', arguments.board)
449    _ensure_label_in_afe(afe_host, 'model', arguments.model)
450
451    afe_host = afe.get_hosts([hostname])[0]
452    return afe_host, unlock_on_failure
453
454
455def _ensure_label_in_afe(afe_host, label_name, label_value):
456    """Add the given board label, only if one doesn't already exist.
457
458    @params label_name  name of the label, e.g. 'board', 'model', etc.
459    @params label_value value of the label.
460
461    @raises InstallFailedError if supplied board  is different from existing
462            board in AFE.
463    """
464    if not label_value:
465        return
466
467    labels = labellib.LabelsMapping(afe_host.labels)
468    if label_name not in labels:
469        afe_host.add_labels(['%s:%s' % (label_name, label_value)])
470        return
471
472    existing_value = labels[label_name]
473    if label_value != existing_value:
474        raise InstallFailedError(
475                'provided %s %s does not match the %s %s for host %s' %
476                (label_name, label_value, label_name, existing_value,
477                 afe_host.hostname))
478
479
480def _create_host_for_installation(host, arguments):
481    """Creates a context manager of hosts.CrosHost object for installation.
482
483    The host object yielded by the returned context manager is agnostic of the
484    infrastructure environment. In particular, it does not have any references
485    to the AFE.
486
487    @param host: A server.hosts.CrosHost object.
488    @param arguments: Parsed commandline arguments for this script.
489
490    @return a context manager which yields hosts.CrosHost object.
491    """
492    info = host.host_info_store.get()
493    s_host, s_port, s_serial = _extract_servo_attributes(host.hostname,
494                                                         info.attributes)
495    return preparedut.create_host(host.hostname, arguments.board,
496                                  arguments.model, s_host, s_port, s_serial,
497                                  arguments.logdir)
498
499
500def _install_test_image(host, arguments):
501    """Install a test image to the DUT.
502
503    Install a stable test image on the DUT using the full servo
504    repair flow.
505
506    @param host       Host instance for the DUT being installed.
507    @param arguments  Command line arguments with options.
508    """
509    repair_image = _get_cros_repair_image_name(host)
510    logging.info('Using repair image %s', repair_image)
511    if arguments.dry_run:
512        return
513    if arguments.stageusb:
514        try:
515            preparedut.download_image_to_servo_usb(host, repair_image)
516        except Exception as e:
517            logging.exception('Failed to stage image on USB: %s', e)
518            raise Exception('USB staging failed')
519    if arguments.install_firmware:
520        try:
521            if arguments.using_servo:
522                logging.debug('Install FW using servo.')
523                preparedut.flash_firmware_using_servo(host, repair_image)
524            else:
525                logging.debug('Install FW by chromeos-firmwareupdate.')
526                preparedut.install_firmware(host, arguments.force_firmware)
527        except error.AutoservRunError as e:
528            logging.exception('Firmware update failed: %s', e)
529            msg = '%s failed' % (
530                    'Flashing firmware using servo' if arguments.using_servo
531                    else 'chromeos-firmwareupdate')
532            raise Exception(msg)
533    if arguments.install_test_image:
534        try:
535            preparedut.install_test_image(host)
536        except error.AutoservRunError as e:
537            logging.exception('Failed to install: %s', e)
538            raise Exception('chromeos-install failed')
539
540
541def _install_and_update_afe(afe, hostname, host_attrs, arguments):
542    """Perform all installation and AFE updates.
543
544    First, lock the host if it exists and is unlocked.  Then,
545    install the test image on the DUT.  At the end, unlock the
546    DUT, unless the installation failed and the DUT was locked
547    before we started.
548
549    If installation succeeds, make sure the DUT is in the AFE,
550    and make sure that it has basic labels.
551
552    @param afe          AFE object for RPC calls.
553    @param hostname     Host name of the DUT.
554    @param host_attrs   Dictionary with attributes to be applied to the
555                        host.
556    @param arguments    Command line arguments with options.
557    """
558    afe_host, unlock_on_failure = _get_afe_host(afe, hostname, host_attrs,
559                                                arguments)
560    host = None
561    try:
562        host = _create_host(hostname, afe, afe_host)
563        with _create_host_for_installation(host, arguments) as host_to_install:
564            _install_test_image(host_to_install, arguments)
565
566        if arguments.install_test_image and not arguments.dry_run:
567            host.labels.update_labels(host)
568            platform_labels = afe.get_labels(
569                    host__hostname=hostname, platform=True)
570            if not platform_labels:
571                platform = host.get_platform()
572                new_labels = afe.get_labels(name=platform)
573                if not new_labels:
574                    afe.create_label(platform, platform=True)
575                afe_host.add_labels([platform])
576        version = [label for label in afe_host.labels
577                       if label.startswith(VERSION_PREFIX)]
578        if version and not arguments.dry_run:
579            afe_host.remove_labels(version)
580    except Exception as e:
581        if unlock_on_failure and not _try_unlock_host(afe_host):
582            logging.error('Failed to unlock host!')
583        raise
584    finally:
585        if host is not None:
586            host.close()
587
588    if not _try_unlock_host(afe_host):
589        raise Exception('Install succeeded, but failed to unlock the DUT.')
590
591
592def _install_dut(arguments, host_attr_dict, hostname):
593    """Deploy or repair a single DUT.
594
595    @param arguments       Command line arguments with options.
596    @param host_attr_dict  Dict mapping hostnames to attributes to be
597                           stored in the AFE.
598    @param hostname        Host name of the DUT to install on.
599
600    @return On success, return `None`.  On failure, return a string
601            with an error message.
602    """
603    # In some cases, autotest code that we call during install may
604    # put stuff onto stdout with 'print' statements.  Most notably,
605    # the AFE frontend may print 'FAILED RPC CALL' (boo, hiss).  We
606    # want nothing from this subprocess going to the output we
607    # inherited from our parent, so redirect stdout and stderr, before
608    # we make any AFE calls.  Note that this is reasonable because we're
609    # in a subprocess.
610
611    logpath = os.path.join(arguments.logdir, hostname + '.log')
612    logfile = open(logpath, 'w')
613    sys.stderr = sys.stdout = logfile
614    _configure_logging_to_file(logfile)
615
616    afe = frontend.AFE(server=arguments.web)
617    try:
618        _install_and_update_afe(afe, hostname,
619                                host_attr_dict.get(hostname, {}),
620                                arguments)
621    except Exception as e:
622        logging.exception('Original exception: %s', e)
623        return str(e)
624    return None
625
626
627def _report_hosts(report_log, heading, host_results_list):
628    """Report results for a list of hosts.
629
630    To improve visibility, results are preceded by a header line,
631    followed by a divider line.  Then results are printed, one host
632    per line.
633
634    @param report_log         File-like object for logging report
635                              output.
636    @param heading            The header string to be printed before
637                              results.
638    @param host_results_list  A list of _ReportResult tuples
639                              to be printed one per line.
640    """
641    if not host_results_list:
642        return
643    report_log.write(heading)
644    report_log.write(_DIVIDER)
645    for result in host_results_list:
646        report_log.write('{result.hostname:30} {result.message}\n'
647                         .format(result=result))
648    report_log.write('\n')
649
650
651def _report_results(afe, report_log, hostnames, results):
652    """Gather and report a summary of results from installation.
653
654    Segregate results into successes and failures, reporting
655    each separately.  At the end, report the total of successes
656    and failures.
657
658    @param afe          AFE object for RPC calls.
659    @param report_log   File-like object for logging report output.
660    @param hostnames    List of the hostnames that were tested.
661    @param results      List of error messages, in the same order
662                        as the hostnames.  `None` means the
663                        corresponding host succeeded.
664    """
665    successful_hosts = []
666    success_reports = []
667    failure_reports = []
668    for result, hostname in zip(results, hostnames):
669        if result is None:
670            successful_hosts.append(hostname)
671        else:
672            failure_reports.append(_ReportResult(hostname, result))
673    if successful_hosts:
674        afe.repair_hosts(hostnames=successful_hosts)
675        for h in afe.get_hosts(hostnames=successful_hosts):
676            for label in h.labels:
677                if label.startswith(constants.Labels.POOL_PREFIX):
678                    result = _ReportResult(h.hostname,
679                                           'Host already in %s' % label)
680                    success_reports.append(result)
681                    break
682            else:
683                h.add_labels([_DEFAULT_POOL])
684                result = _ReportResult(h.hostname,
685                                       'Host added to %s' % _DEFAULT_POOL)
686                success_reports.append(result)
687    report_log.write(_DIVIDER)
688    _report_hosts(report_log, 'Successes', success_reports)
689    _report_hosts(report_log, 'Failures', failure_reports)
690    report_log.write(
691        'Installation complete:  %d successes, %d failures.\n' %
692        (len(success_reports), len(failure_reports)))
693
694
695def _clear_root_logger_handlers():
696    """Remove all handlers from root logger."""
697    root_logger = logging.getLogger()
698    for h in root_logger.handlers:
699        root_logger.removeHandler(h)
700
701
702def _configure_logging_to_file(logfile):
703    """Configure the logging module for `install_duts()`.
704
705    @param log_file  Log file object.
706    """
707    _clear_root_logger_handlers()
708    handler = logging.StreamHandler(logfile)
709    formatter = logging.Formatter(_LOG_FORMAT, time_utils.TIME_FMT)
710    handler.setFormatter(formatter)
711    root_logger = logging.getLogger()
712    root_logger.addHandler(handler)
713
714
715def _get_used_servo_ports(servo_hostname, afe):
716    """
717    Return a list of used servo ports for the given servo host.
718
719    @param servo_hostname:  Hostname of the servo host to check for.
720    @param afe:             AFE instance.
721
722    @returns a list of used ports for the given servo host.
723    """
724    used_ports = []
725    host_list = afe.get_hosts_by_attribute(
726            attribute=servo_host.SERVO_HOST_ATTR, value=servo_hostname)
727    for host in host_list:
728        afe_host = afe.get_hosts(hostname=host)
729        if afe_host:
730            servo_port = afe_host[0].attributes.get(servo_host.SERVO_PORT_ATTR)
731            if servo_port:
732                used_ports.append(int(servo_port))
733    return used_ports
734
735
736def _get_free_servo_port(servo_hostname, used_servo_ports, afe):
737    """
738    Get a free servo port for the servo_host.
739
740    @param servo_hostname:    Hostname of the servo host.
741    @param used_servo_ports:  Dict of dicts that contain the list of used ports
742                              for the given servo host.
743    @param afe:               AFE instance.
744
745    @returns a free servo port if servo_hostname is non-empty, otherwise an
746        empty string.
747    """
748    used_ports = []
749    servo_port = servo_host.ServoHost.DEFAULT_PORT
750    # If no servo hostname was specified we can assume we're dealing with a
751    # servo v3 or older deployment since the servo hostname can be
752    # inferred from the dut hostname (by appending '-servo' to it).  We only
753    # need to find a free port if we're using a servo v4 since we can use the
754    # default port for v3 and older.
755    if not servo_hostname:
756        return ''
757    # If we haven't checked this servo host yet, check the AFE if other duts
758    # used this servo host and grab the ports specified for them.
759    elif servo_hostname not in used_servo_ports:
760        used_ports = _get_used_servo_ports(servo_hostname, afe)
761    else:
762        used_ports = used_servo_ports[servo_hostname]
763    used_ports.sort()
764    if used_ports:
765        # Range is taken from servod.py in hdctools.
766        start_port = servo_host.ServoHost.DEFAULT_PORT
767        end_port = start_port - 99
768        # We'll choose first port available in descending order.
769        for port in xrange(start_port, end_port - 1, -1):
770            if port not in used_ports:
771              servo_port = port
772              break
773    used_ports.append(servo_port)
774    used_servo_ports[servo_hostname] = used_ports
775    return servo_port
776
777
778def _get_afe_servo_port(host_info, afe):
779    """
780    Get the servo port from the afe if it matches the same servo host hostname.
781
782    @param host_info   HostInfo tuple (hostname, host_attr_dict).
783
784    @returns Servo port (int) if servo host hostname matches the one specified
785    host_info.host_attr_dict, otherwise None.
786
787    @raises _NoAFEServoPortError: When there is no stored host info or servo
788        port host attribute in the AFE for the given host.
789    """
790    afe_hosts = afe.get_hosts(hostname=host_info.hostname)
791    if not afe_hosts:
792        raise _NoAFEServoPortError
793
794    servo_port = afe_hosts[0].attributes.get(servo_host.SERVO_PORT_ATTR)
795    afe_servo_host = afe_hosts[0].attributes.get(servo_host.SERVO_HOST_ATTR)
796    host_info_servo_host = host_info.host_attr_dict.get(
797        servo_host.SERVO_HOST_ATTR)
798
799    if afe_servo_host == host_info_servo_host and servo_port:
800        return int(servo_port)
801    else:
802        raise _NoAFEServoPortError
803
804
805def _get_host_attributes(host_info_list, afe):
806    """
807    Get host attributes if a hostname_file was supplied.
808
809    @param host_info_list   List of HostInfo tuples (hostname, host_attr_dict).
810
811    @returns Dict of attributes from host_info_list.
812    """
813    host_attributes = {}
814    # We need to choose servo ports for these hosts but we need to make sure
815    # we don't choose ports already used. We'll store all used ports in a
816    # dict of lists where the key is the servo_host and the val is a list of
817    # ports used.
818    used_servo_ports = {}
819    for host_info in host_info_list:
820        host_attr_dict = host_info.host_attr_dict
821        # If the host already has an entry in the AFE that matches the same
822        # servo host hostname and the servo port is set, use that port.
823        try:
824            host_attr_dict[servo_host.SERVO_PORT_ATTR] = _get_afe_servo_port(
825                host_info, afe)
826        except _NoAFEServoPortError:
827            host_attr_dict[servo_host.SERVO_PORT_ATTR] = _get_free_servo_port(
828                host_attr_dict[servo_host.SERVO_HOST_ATTR], used_servo_ports,
829                afe)
830        host_attributes[host_info.hostname] = host_attr_dict
831    return host_attributes
832
833
834def _get_cros_repair_image_name(host):
835    """Get the CrOS repair image name for given host.
836
837    @param host: hosts.CrosHost object. This object need not have an AFE
838                 reference.
839    """
840    info = host.host_info_store.get()
841    if not info.board:
842        raise InstallFailedError('Unknown board for given host')
843    return afe_utils.get_stable_cros_image_name(info.board)
844
845
846def install_duts(arguments):
847    """Install a test image on DUTs, and deploy them.
848
849    This handles command line parsing for both the repair and
850    deployment commands.  The two operations are largely identical;
851    the main difference is that full deployment includes flashing
852    dev-signed firmware on the DUT prior to installing the test
853    image.
854
855    @param arguments    Command line arguments with options, as
856                        returned by `argparse.Argparser`.
857    """
858    arguments = cmdvalidate.validate_arguments(arguments)
859    if arguments is None:
860        sys.exit(1)
861    sys.stderr.write('Installation output logs in %s\n' % arguments.logdir)
862
863    # Override tempfile.tempdir.  Some of the autotest code we call
864    # will create temporary files that don't get cleaned up.  So, we
865    # put the temp files in our results directory, so that we can
866    # clean up everything at one fell swoop.
867    tempfile.tempdir = tempfile.mkdtemp()
868    atexit.register(shutil.rmtree, tempfile.tempdir)
869
870    # We don't want to distract the user with logging output, so we catch
871    # logging output in a file.
872    logging_file_path = os.path.join(arguments.logdir, 'debug.log')
873    logfile = open(logging_file_path, 'w')
874    _configure_logging_to_file(logfile)
875
876    report_log_path = os.path.join(arguments.logdir, 'report.log')
877    with open(report_log_path, 'w') as report_log_file:
878        report_log = _MultiFileWriter([report_log_file, sys.stdout])
879        afe = frontend.AFE(server=arguments.web)
880        if arguments.dry_run:
881            report_log.write('Dry run - installation and most testing '
882                             'will be skipped.\n')
883        current_build = _update_build(afe, report_log, arguments)
884        host_attr_dict = _get_host_attributes(arguments.host_info_list, afe)
885        install_pool = multiprocessing.Pool(len(arguments.hostnames))
886        install_function = functools.partial(_install_dut, arguments,
887                                             host_attr_dict)
888        results_list = install_pool.map(install_function, arguments.hostnames)
889        _report_results(afe, report_log, arguments.hostnames, results_list)
890
891    if arguments.upload:
892        try:
893            gspath = _get_upload_log_path(arguments)
894            sys.stderr.write('Logs will be uploaded to %s\n' % (gspath,))
895            _upload_logs(arguments.logdir, gspath)
896        except Exception as e:
897            upload_failure_log_path = os.path.join(arguments.logdir,
898                                                   'gs_upload_failure.log')
899            with open(upload_failure_log_path, 'w') as file:
900                traceback.print_exc(limit=None, file=file)
901            sys.stderr.write('Failed to upload logs;'
902                             ' failure details are stored in {}.\n'
903                             .format(upload_failure_log_path))
904