• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import collections
12import contextlib
13import json
14import logging
15import os
16import random
17import re
18import time
19import traceback
20from six.moves import filter
21from six.moves import range
22from six.moves import urllib
23
24import common
25from autotest_lib.client.bin.result_tools import utils as result_utils
26from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
27from autotest_lib.client.bin.result_tools import view as result_view
28from autotest_lib.client.common_lib import lsbrelease_utils
29from autotest_lib.client.common_lib import utils
30from autotest_lib.client.common_lib import error
31from autotest_lib.client.common_lib import file_utils
32from autotest_lib.client.common_lib import global_config
33from autotest_lib.client.common_lib import host_queue_entry_states
34from autotest_lib.client.common_lib import host_states
35from autotest_lib.server.cros import provision
36from autotest_lib.server.cros.dynamic_suite import constants
37from autotest_lib.server.cros.dynamic_suite import job_status
38
39try:
40    from autotest_lib.utils.frozen_chromite.lib import metrics
41except ImportError:
42    metrics = utils.metrics_mock
43
44
45CONFIG = global_config.global_config
46
47LAB_GOOD_STATES = ('open', 'throttled')
48
49ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
50        'CROS', 'enable_drone_in_restricted_subnet', type=bool,
51        default=False)
52
53# Wait at most 10 mins for duts to go idle.
54IDLE_DUT_WAIT_TIMEOUT = 600
55
56# Mapping between board name and build target. This is for special case handling
57# for certain Android board that the board name and build target name does not
58# match.
59ANDROID_TARGET_TO_BOARD_MAP = {
60        'seed_l8150': 'gm4g_sprout',
61        'bat_land': 'bat'
62        }
63ANDROID_BOARD_TO_TARGET_MAP = {
64        'gm4g_sprout': 'seed_l8150',
65        'bat': 'bat_land'
66        }
67# Prefix for the metrics name for result size information.
68RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
69
70class TestLabException(Exception):
71    """Exception raised when the Test Lab blocks a test or suite."""
72    pass
73
74
75class ParseBuildNameException(Exception):
76    """Raised when ParseBuildName() cannot parse a build name."""
77    pass
78
79
80class Singleton(type):
81    """Enforce that only one client class is instantiated per process."""
82    _instances = {}
83
84    def __call__(cls, *args, **kwargs):
85        """Fetch the instance of a class to use for subsequent calls."""
86        if cls not in cls._instances:
87            cls._instances[cls] = super(Singleton, cls).__call__(
88                    *args, **kwargs)
89        return cls._instances[cls]
90
91class EmptyAFEHost(object):
92    """Object to represent an AFE host object when there is no AFE."""
93
94    def __init__(self):
95        """
96        We'll be setting the instance attributes as we use them.  Right now
97        we only use attributes and labels but as time goes by and other
98        attributes are used from an actual AFE Host object (check
99        rpc_interfaces.get_hosts()), we'll add them in here so users won't be
100        perplexed why their host's afe_host object complains that attribute
101        doesn't exist.
102        """
103        self.attributes = {}
104        self.labels = []
105
106
107def ParseBuildName(name):
108    """Format a build name, given board, type, milestone, and manifest num.
109
110    @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
111                 relative build name, e.g. 'x86-alex-release/LATEST'
112
113    @return board: board the manifest is for, e.g. x86-alex.
114    @return type: one of 'release', 'factory', or 'firmware'
115    @return milestone: (numeric) milestone the manifest was associated with.
116                        Will be None for relative build names.
117    @return manifest: manifest number, e.g. '2015.0.0'.
118                      Will be None for relative build names.
119
120    """
121    match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
122                     r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
123                     r'(?P<manifest>[\d.ab-]+)|LATEST)',
124                     name)
125    if match and len(match.groups()) >= 5:
126        return (match.group('board'), match.group('type'),
127                match.group('milestone'), match.group('manifest'))
128    raise ParseBuildNameException('%s is a malformed build name.' % name)
129
130
131def get_labels_from_afe(hostname, label_prefix, afe):
132    """Retrieve a host's specific labels from the AFE.
133
134    Looks for the host labels that have the form <label_prefix>:<value>
135    and returns the "<value>" part of the label. None is returned
136    if there is not a label matching the pattern
137
138    @param hostname: hostname of given DUT.
139    @param label_prefix: prefix of label to be matched, e.g., |board:|
140    @param afe: afe instance.
141
142    @returns A list of labels that match the prefix or 'None'
143
144    """
145    labels = afe.get_labels(name__startswith=label_prefix,
146                            host__hostname__in=[hostname])
147    if labels:
148        return [l.name.split(label_prefix, 1)[1] for l in labels]
149
150
151def get_label_from_afe(hostname, label_prefix, afe):
152    """Retrieve a host's specific label from the AFE.
153
154    Looks for a host label that has the form <label_prefix>:<value>
155    and returns the "<value>" part of the label. None is returned
156    if there is not a label matching the pattern
157
158    @param hostname: hostname of given DUT.
159    @param label_prefix: prefix of label to be matched, e.g., |board:|
160    @param afe: afe instance.
161    @returns the label that matches the prefix or 'None'
162
163    """
164    labels = get_labels_from_afe(hostname, label_prefix, afe)
165    if labels and len(labels) == 1:
166        return labels[0]
167
168
169def get_board_from_afe(hostname, afe):
170    """Retrieve given host's board from its labels in the AFE.
171
172    Looks for a host label of the form "board:<board>", and
173    returns the "<board>" part of the label.  `None` is returned
174    if there is not a single, unique label matching the pattern.
175
176    @param hostname: hostname of given DUT.
177    @param afe: afe instance.
178    @returns board from label, or `None`.
179
180    """
181    return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
182
183
184def get_build_from_afe(hostname, afe):
185    """Retrieve the current build for given host from the AFE.
186
187    Looks through the host's labels in the AFE to determine its build.
188
189    @param hostname: hostname of given DUT.
190    @param afe: afe instance.
191    @returns The current build or None if it could not find it or if there
192             were multiple build labels assigned to this host.
193
194    """
195    prefix = provision.CROS_VERSION_PREFIX
196    build = get_label_from_afe(hostname, prefix + ':', afe)
197    if build:
198        return build
199    return None
200
201
202# TODO(ayatane): Can be deleted
203def get_sheriffs(lab_only=False):
204    """
205    Polls the javascript file that holds the identity of the sheriff and
206    parses it's output to return a list of chromium sheriff email addresses.
207    The javascript file can contain the ldap of more than one sheriff, eg:
208    document.write('sheriff_one, sheriff_two').
209
210    @param lab_only: if True, only pulls lab sheriff.
211    @return: A list of chroium.org sheriff email addresses to cc on the bug.
212             An empty list if failed to parse the javascript.
213    """
214    return []
215
216
217def remote_wget(source_url, dest_path, ssh_cmd):
218    """wget source_url from localhost to dest_path on remote host using ssh.
219
220    @param source_url: The complete url of the source of the package to send.
221    @param dest_path: The path on the remote host's file system where we would
222        like to store the package.
223    @param ssh_cmd: The ssh command to use in performing the remote wget.
224    """
225    wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
226                (source_url, ssh_cmd, dest_path))
227    utils.run(wget_cmd)
228
229
230_MAX_LAB_STATUS_ATTEMPTS = 5
231def _get_lab_status(status_url):
232    """Grabs the current lab status and message.
233
234    @returns The JSON object obtained from the given URL.
235
236    """
237    retry_waittime = 1
238    for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
239        try:
240            response = urllib.request.urlopen(status_url)
241        except IOError as e:
242            logging.debug('Error occurred when grabbing the lab status: %s.',
243                          e)
244            time.sleep(retry_waittime)
245            continue
246        # Check for successful response code.
247        if response.getcode() == 200:
248            return json.load(response)
249        time.sleep(retry_waittime)
250    return None
251
252
253def _decode_lab_status(lab_status, build):
254    """Decode lab status, and report exceptions as needed.
255
256    Take a deserialized JSON object from the lab status page, and
257    interpret it to determine the actual lab status.  Raise
258    exceptions as required to report when the lab is down.
259
260    @param build: build name that we want to check the status of.
261
262    @raises TestLabException Raised if a request to test for the given
263                             status and build should be blocked.
264    """
265    # First check if the lab is up.
266    if not lab_status['general_state'] in LAB_GOOD_STATES:
267        raise TestLabException('Chromium OS Test Lab is closed: '
268                               '%s.' % lab_status['message'])
269
270    # Check if the build we wish to use is disabled.
271    # Lab messages should be in the format of:
272    #    Lab is 'status' [regex ...] (comment)
273    # If the build name matches any regex, it will be blocked.
274    build_exceptions = re.search('\[(.*)\]', lab_status['message'])
275    if not build_exceptions or not build:
276        return
277    for build_pattern in build_exceptions.group(1).split():
278        if re.match(build_pattern, build):
279            raise TestLabException('Chromium OS Test Lab is closed: '
280                                   '%s matches %s.' % (
281                                           build, build_pattern))
282    return
283
284
285def is_in_lab():
286    """Check if current Autotest instance is in lab
287
288    @return: True if the Autotest instance is in lab.
289    """
290    test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
291    return test_server_name.startswith('cautotest')
292
293
294def check_lab_status(build):
295    """Check if the lab status allows us to schedule for a build.
296
297    Checks if the lab is down, or if testing for the requested build
298    should be blocked.
299
300    @param build: Name of the build to be scheduled for testing.
301
302    @raises TestLabException Raised if a request to test for the given
303                             status and build should be blocked.
304
305    """
306    # Ensure we are trying to schedule on the actual lab.
307    if not is_in_lab():
308        return
309
310    # Download the lab status from its home on the web.
311    status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
312    json_status = _get_lab_status(status_url)
313    if json_status is None:
314        # We go ahead and say the lab is open if we can't get the status.
315        logging.warning('Could not get a status from %s', status_url)
316        return
317    _decode_lab_status(json_status, build)
318
319
320def host_in_lab(hostname):
321    """Check if the execution is against a host in the lab"""
322    return (not utils.in_moblab_ssp()
323            and not lsbrelease_utils.is_moblab()
324            and utils.host_is_in_lab_zone(hostname))
325
326
327def lock_host_with_labels(afe, lock_manager, labels):
328    """Lookup and lock one host that matches the list of input labels.
329
330    @param afe: An instance of the afe class, as defined in server.frontend.
331    @param lock_manager: A lock manager capable of locking hosts, eg the
332        one defined in server.cros.host_lock_manager.
333    @param labels: A list of labels to look for on hosts.
334
335    @return: The hostname of a host matching all labels, and locked through the
336        lock_manager. The hostname will be as specified in the database the afe
337        object is associated with, i.e if it exists in afe_hosts with a .cros
338        suffix, the hostname returned will contain a .cros suffix.
339
340    @raises: error.NoEligibleHostException: If no hosts matching the list of
341        input labels are available.
342    @raises: error.TestError: If unable to lock a host matching the labels.
343    """
344    potential_hosts = afe.get_hosts(multiple_labels=labels)
345    if not potential_hosts:
346        raise error.NoEligibleHostException(
347                'No devices found with labels %s.' % labels)
348
349    # This prevents errors where a fault might seem repeatable
350    # because we lock, say, the same packet capturer for each test run.
351    random.shuffle(potential_hosts)
352    for host in potential_hosts:
353        if lock_manager.lock([host.hostname]):
354            logging.info('Locked device %s with labels %s.',
355                         host.hostname, labels)
356            return host.hostname
357        else:
358            logging.info('Unable to lock device %s with labels %s.',
359                         host.hostname, labels)
360
361    raise error.TestError('Could not lock a device with labels %s' % labels)
362
363
364def get_test_views_from_tko(suite_job_id, tko):
365    """Get test name and result for given suite job ID.
366
367    @param suite_job_id: ID of suite job.
368    @param tko: an instance of TKO as defined in server/frontend.py.
369    @return: A defaultdict where keys are test names and values are
370             lists of test statuses, e.g.,
371             {'stub_Fail.Error': ['ERROR'. 'ERROR'],
372              'stub_Fail.NAError': ['TEST_NA'],
373              'stub_Fail.RetrySuccess': ['ERROR', 'GOOD'],
374              }
375    @raise: Exception when there is no test view found.
376
377    """
378    views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
379    relevant_views = list(filter(job_status.view_is_relevant, views))
380    if not relevant_views:
381        raise Exception('Failed to retrieve job results.')
382
383    test_views = collections.defaultdict(list)
384    for view in relevant_views:
385        test_views[view['test_name']].append(view['status'])
386    return test_views
387
388
389def get_data_key(prefix, suite, build, board):
390    """
391    Constructs a key string from parameters.
392
393    @param prefix: Prefix for the generating key.
394    @param suite: a suite name. e.g., bvt-cq, bvt-inline, infra_qual
395    @param build: The build string. This string should have a consistent
396        format eg: x86-mario-release/R26-3570.0.0. If the format of this
397        string changes such that we can't determine build_type or branch
398        we give up and use the parametes we're sure of instead (suite,
399        board). eg:
400            1. build = x86-alex-pgo-release/R26-3570.0.0
401               branch = 26
402               build_type = pgo-release
403            2. build = lumpy-paladin/R28-3993.0.0-rc5
404               branch = 28
405               build_type = paladin
406    @param board: The board that this suite ran on.
407    @return: The key string used for a dictionary.
408    """
409    try:
410        _board, build_type, branch = ParseBuildName(build)[:3]
411    except ParseBuildNameException as e:
412        logging.error(str(e))
413        branch = 'Unknown'
414        build_type = 'Unknown'
415    else:
416        embedded_str = re.search(r'x86-\w+-(.*)', _board)
417        if embedded_str:
418            build_type = embedded_str.group(1) + '-' + build_type
419
420    data_key_dict = {
421        'prefix': prefix,
422        'board': board,
423        'branch': branch,
424        'build_type': build_type,
425        'suite': suite,
426    }
427    return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
428            % data_key_dict)
429
430
431def is_shard():
432    """Determines if this instance is running as a shard.
433
434    Reads the global_config value shard_hostname in the section SHARD.
435
436    @return True, if shard_hostname is set, False otherwise.
437    """
438    hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
439    return bool(hostname)
440
441
442def get_global_afe_hostname():
443    """Read the hostname of the global AFE from the global configuration."""
444    return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
445
446
447def get_special_task_status(is_complete, success, is_active):
448    """Get the status of a special task.
449
450    Emulate a host queue entry status for a special task
451    Although SpecialTasks are not HostQueueEntries, it is helpful to
452    the user to present similar statuses.
453
454    @param is_complete    Boolean if the task is completed.
455    @param success        Boolean if the task succeeded.
456    @param is_active      Boolean if the task is active.
457
458    @return The status of a special task.
459    """
460    if is_complete:
461        if success:
462            return host_queue_entry_states.Status.COMPLETED
463        return host_queue_entry_states.Status.FAILED
464    if is_active:
465        return host_queue_entry_states.Status.RUNNING
466    return host_queue_entry_states.Status.QUEUED
467
468
469def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
470    """Get the execution path of the SpecialTask.
471
472    This method returns different paths depending on where a
473    the task ran:
474        * main: hosts/hostname/task_id-task_type
475        * Shard: main_path/time_created
476    This is to work around the fact that a shard can fail independent
477    of the main, and be replaced by another shard that has the same
478    hosts. Without the time_created stamp the logs of the tasks running
479    on the second shard will clobber the logs from the first in google
480    storage, because task ids are not globally unique.
481
482    @param hostname        Hostname
483    @param task_id         Special task id
484    @param task_name       Special task name (e.g., Verify, Repair, etc)
485    @param time_requested  Special task requested time.
486
487    @return An execution path for the task.
488    """
489    results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
490
491    # If we do this on the main it will break backward compatibility,
492    # as there are tasks that currently don't have timestamps. If a host
493    # or job has been sent to a shard, the rpc for that host/job will
494    # be redirected to the shard, so this global_config check will happen
495    # on the shard the logs are on.
496    if not is_shard():
497        return results_path
498
499    # Generate a uid to disambiguate special task result directories
500    # in case this shard fails. The simplest uid is the job_id, however
501    # in rare cases tasks do not have jobs associated with them (eg:
502    # frontend verify), so just use the creation timestamp. The clocks
503    # between a shard and main should always be in sync. Any discrepancies
504    # will be brought to our attention in the form of job timeouts.
505    uid = time_requested.strftime('%Y%d%m%H%M%S')
506
507    # TODO: This is a hack, however it is the easiest way to achieve
508    # correctness. There is currently some debate over the future of
509    # tasks in our infrastructure and refactoring everything right
510    # now isn't worth the time.
511    return '%s/%s' % (results_path, uid)
512
513
514def get_job_tag(id, owner):
515    """Returns a string tag for a job.
516
517    @param id    Job id
518    @param owner Job owner
519
520    """
521    return '%s-%s' % (id, owner)
522
523
524def get_hqe_exec_path(tag, execution_subdir):
525    """Returns a execution path to a HQE's results.
526
527    @param tag               Tag string for a job associated with a HQE.
528    @param execution_subdir  Execution sub-directory string of a HQE.
529
530    """
531    return os.path.join(tag, execution_subdir)
532
533
534def is_inside_chroot():
535    """Check if the process is running inside chroot.
536
537    @return: True if the process is running inside chroot.
538
539    """
540    return os.path.exists('/etc/cros_chroot_version')
541
542
543def parse_job_name(name):
544    """Parse job name to get information including build, board and suite etc.
545
546    Suite job created by run_suite follows the naming convention of:
547    [build]-test_suites/control.[suite]
548    For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
549    The naming convention is defined in rpc_interface.create_suite_job.
550
551    Test job created by suite job follows the naming convention of:
552    [build]/[suite]/[test name]
553    For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
554    The naming convention is defined in
555    server/cros/dynamic_suite/tools.create_job_name
556
557    Note that pgo and chrome-perf builds will fail the method. Since lab does
558    not run test for these builds, they can be ignored.
559    Also, tests for Launch Control builds have different naming convention.
560    The build ID will be used as build_version.
561
562    @param name: Name of the job.
563
564    @return: A dictionary containing the test information. The keyvals include:
565             build: Name of the build, e.g., lumpy-release/R46-7272.0.0
566             build_version: The version of the build, e.g., R46-7272.0.0
567             board: Name of the board, e.g., lumpy
568             suite: Name of the test suite, e.g., bvt
569
570    """
571    info = {}
572    suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
573    test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
574    match = re.match(suite_job_regex, name)
575    if not match:
576        match = re.match(test_job_regex, name)
577    if match:
578        info['build'] = match.groups()[0]
579        info['suite'] = match.groups()[1]
580        info['build_version'] = info['build'].split('/')[1]
581        try:
582            info['board'], _, _, _ = ParseBuildName(info['build'])
583        except ParseBuildNameException:
584            # Try to parse it as Launch Control build
585            # Launch Control builds have name format:
586            # branch/build_target-build_type/build_id.
587            try:
588                _, target, build_id = utils.parse_launch_control_build(
589                        info['build'])
590                build_target, _ = utils.parse_launch_control_target(target)
591                if build_target:
592                    info['board'] = build_target
593                    info['build_version'] = build_id
594            except ValueError:
595                pass
596    return info
597
598
599def verify_not_root_user():
600    """Simple function to error out if running with uid == 0"""
601    if os.getuid() == 0:
602        raise error.IllegalUser('This script can not be ran as root.')
603
604
605def get_hostname_from_machine(machine):
606    """Lookup hostname from a machine string or dict.
607
608    @returns: Machine hostname in string format.
609    """
610    hostname, _ = get_host_info_from_machine(machine)
611    return hostname
612
613
614def get_host_info_from_machine(machine):
615    """Lookup host information from a machine string or dict.
616
617    @returns: Tuple of (hostname, afe_host)
618    """
619    if isinstance(machine, dict):
620        return (machine['hostname'], machine['afe_host'])
621    else:
622        return (machine, EmptyAFEHost())
623
624
625def get_afe_host_from_machine(machine):
626    """Return the afe_host from the machine dict if possible.
627
628    @returns: AFE host object.
629    """
630    _, afe_host = get_host_info_from_machine(machine)
631    return afe_host
632
633
634def get_connection_pool_from_machine(machine):
635    """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
636    if not isinstance(machine, dict):
637        return None
638    return machine.get('connection_pool')
639
640
641def SetupTsMonGlobalState(*args, **kwargs):
642    """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
643
644    @param *args: Args to pass through.
645    @param **kwargs: Kwargs to pass through.
646    """
647    try:
648        # TODO(crbug.com/739466) This module import is delayed because it adds
649        # 1-2 seconds to the module import time and most users of site_utils
650        # don't need it. The correct fix is to break apart site_utils into more
651        # meaningful chunks.
652        from autotest_lib.utils.frozen_chromite.lib import ts_mon_config
653    except ImportError as e:
654        logging.warning('Unable to import chromite. Monarch is disabled: %s', e)
655        return TrivialContextManager()
656
657    try:
658        context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
659        if hasattr(context, '__exit__'):
660            return context
661    except Exception as e:
662        logging.warning('Caught an exception trying to setup ts_mon, '
663                        'monitoring is disabled: %s', e, exc_info=True)
664    return TrivialContextManager()
665
666
667@contextlib.contextmanager
668def TrivialContextManager(*args, **kwargs):
669    """Context manager that does nothing.
670
671    @param *args: Ignored args
672    @param **kwargs: Ignored kwargs.
673    """
674    yield
675
676
677def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
678    """Wait for the hosts to all go idle.
679
680    @param duts: List of duts to check for idle state.
681    @param afe: afe instance.
682    @param max_wait: Max wait time in seconds to wait for duts to be idle.
683
684    @returns Boolean True if all hosts are idle or False if any hosts did not
685            go idle within max_wait.
686    """
687    start_time = time.time()
688    # We make a shallow copy since we're going to be modifying active_dut_list.
689    active_dut_list = duts[:]
690    while active_dut_list:
691        # Let's rate-limit how often we hit the AFE.
692        time.sleep(1)
693
694        # Check if we've waited too long.
695        if (time.time() - start_time) > max_wait:
696            return False
697
698        idle_duts = []
699        # Get the status for the duts and see if they're in the idle state.
700        afe_hosts = afe.get_hosts(active_dut_list)
701        idle_duts = [afe_host.hostname for afe_host in afe_hosts
702                     if afe_host.status in host_states.IDLE_STATES]
703
704        # Take out idle duts so we don't needlessly check them
705        # next time around.
706        for idle_dut in idle_duts:
707            active_dut_list.remove(idle_dut)
708
709        logging.info('still waiting for following duts to go idle: %s',
710                     active_dut_list)
711    return True
712
713
714@contextlib.contextmanager
715def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
716                       max_wait=IDLE_DUT_WAIT_TIMEOUT):
717    """Context manager to lock the duts and wait for them to go idle.
718
719    @param duts: List of duts to lock.
720    @param afe: afe instance.
721    @param lock_msg: message for afe on locking this host.
722    @param max_wait: Max wait time in seconds to wait for duts to be idle.
723
724    @returns Boolean lock_success where True if all duts locked successfully or
725             False if we timed out waiting too long for hosts to go idle.
726    """
727    try:
728        locked_duts = []
729        duts.sort()
730        for dut in duts:
731            if afe.lock_host(dut, lock_msg, fail_if_locked=True):
732                locked_duts.append(dut)
733            else:
734                logging.info('%s already locked', dut)
735        yield wait_for_idle_duts(locked_duts, afe, max_wait)
736    finally:
737        afe.unlock_hosts(locked_duts)
738
739
740def _get_default_size_info(path):
741    """Get the default result size information.
742
743    In case directory summary is failed to build, assume the test result is not
744    throttled and all result sizes are the size of existing test results.
745
746    @return: A namedtuple of result size informations, including:
747            client_result_collected_KB: The total size (in KB) of test results
748                    collected from test device. Set to be the total size of the
749                    given path.
750            original_result_total_KB: The original size (in KB) of test results
751                    before being trimmed. Set to be the total size of the given
752                    path.
753            result_uploaded_KB: The total size (in KB) of test results to be
754                    uploaded. Set to be the total size of the given path.
755            result_throttled: True if test results collection is throttled.
756                    It's set to False in this default behavior.
757    """
758    total_size = file_utils.get_directory_size_kibibytes(path);
759    return result_utils_lib.ResultSizeInfo(
760            client_result_collected_KB=total_size,
761            original_result_total_KB=total_size,
762            result_uploaded_KB=total_size,
763            result_throttled=False)
764
765
766def _report_result_size_metrics(result_size_info):
767    """Report result sizes information to metrics.
768
769    @param result_size_info: A ResultSizeInfo namedtuple containing information
770            of test result sizes.
771    """
772    fields = {'result_throttled' : result_size_info.result_throttled}
773    metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
774                    description='The total size (in KB) of test results '
775                    'collected from test device. Set to be the total size of '
776                    'the given path.').increment_by(int(
777                            result_size_info.client_result_collected_KB),
778                                                    fields=fields)
779    metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
780                    description='The original size (in KB) of test results '
781                    'before being trimmed.').increment_by(int(
782                            result_size_info.original_result_total_KB),
783                                                          fields=fields)
784    metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
785                    description='The total size (in KB) of test results to be '
786                    'uploaded.').increment_by(int(
787                            result_size_info.result_uploaded_KB),
788                                              fields=fields)
789
790
791@metrics.SecondsTimerDecorator(
792        'chromeos/autotest/result_collection/collect_result_sizes_duration')
793def collect_result_sizes(path, log=logging.debug):
794    """Collect the result sizes information and build result summary.
795
796    It first tries to merge directory summaries and calculate the result sizes
797    including:
798    client_result_collected_KB: The volume in KB that's transfered from the test
799            device.
800    original_result_total_KB: The volume in KB that's the original size of the
801            result files before being trimmed.
802    result_uploaded_KB: The volume in KB that will be uploaded.
803    result_throttled: Indicating if the result files were throttled.
804
805    If directory summary merging failed for any reason, fall back to use the
806    total size of the given result directory.
807
808    @param path: Path of the result directory to get size information.
809    @param log: The logging method, default to logging.debug
810    @return: A ResultSizeInfo namedtuple containing information of test result
811             sizes.
812    """
813    try:
814        client_collected_bytes, summary, files = result_utils.merge_summaries(
815                path)
816        result_size_info = result_utils_lib.get_result_size_info(
817                client_collected_bytes, summary)
818        html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
819        result_view.build(client_collected_bytes, summary, html_file)
820
821        # Delete all summary files after final view is built.
822        for summary_file in files:
823            os.remove(summary_file)
824    except:
825        log('Failed to calculate result sizes based on directory summaries for '
826            'directory %s. Fall back to record the total size.\nException: %s' %
827            (path, traceback.format_exc()))
828        result_size_info = _get_default_size_info(path)
829
830    _report_result_size_metrics(result_size_info)
831
832    return result_size_info
833