• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5
6import collections
7import contextlib
8import grp
9import httplib
10import json
11import logging
12import os
13import random
14import re
15import time
16import traceback
17import urllib2
18
19import common
20from autotest_lib.client.bin.result_tools import utils as result_utils
21from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
22from autotest_lib.client.bin.result_tools import view as result_view
23from autotest_lib.client.common_lib import lsbrelease_utils
24from autotest_lib.client.common_lib import utils
25from autotest_lib.client.common_lib import error
26from autotest_lib.client.common_lib import file_utils
27from autotest_lib.client.common_lib import global_config
28from autotest_lib.client.common_lib import host_queue_entry_states
29from autotest_lib.client.common_lib import host_states
30from autotest_lib.server.cros import provision
31from autotest_lib.server.cros.dynamic_suite import constants
32from autotest_lib.server.cros.dynamic_suite import job_status
33
34try:
35    from chromite.lib import metrics
36except ImportError:
37    metrics = utils.metrics_mock
38
39
40CONFIG = global_config.global_config
41
42_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='')
43_LAB_SHERIFF_JS = CONFIG.get_config_value(
44        'NOTIFICATIONS', 'lab_sheriffs', default='')
45_CHROMIUM_BUILD_URL = CONFIG.get_config_value(
46        'NOTIFICATIONS', 'chromium_build_url', default='')
47
48LAB_GOOD_STATES = ('open', 'throttled')
49
50ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value(
51        'CROS', 'enable_drone_in_restricted_subnet', type=bool,
52        default=False)
53
54# Wait at most 10 mins for duts to go idle.
55IDLE_DUT_WAIT_TIMEOUT = 600
56
57# Mapping between board name and build target. This is for special case handling
58# for certain Android board that the board name and build target name does not
59# match.
60ANDROID_TARGET_TO_BOARD_MAP = {
61        'seed_l8150': 'gm4g_sprout',
62        'bat_land': 'bat'
63        }
64ANDROID_BOARD_TO_TARGET_MAP = {
65        'gm4g_sprout': 'seed_l8150',
66        'bat': 'bat_land'
67        }
68# Prefix for the metrics name for result size information.
69RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/'
70
71class TestLabException(Exception):
72    """Exception raised when the Test Lab blocks a test or suite."""
73    pass
74
75
76class ParseBuildNameException(Exception):
77    """Raised when ParseBuildName() cannot parse a build name."""
78    pass
79
80
81class Singleton(type):
82    """Enforce that only one client class is instantiated per process."""
83    _instances = {}
84
85    def __call__(cls, *args, **kwargs):
86        """Fetch the instance of a class to use for subsequent calls."""
87        if cls not in cls._instances:
88            cls._instances[cls] = super(Singleton, cls).__call__(
89                    *args, **kwargs)
90        return cls._instances[cls]
91
92class EmptyAFEHost(object):
93    """Object to represent an AFE host object when there is no AFE."""
94
95    def __init__(self):
96        """
97        We'll be setting the instance attributes as we use them.  Right now
98        we only use attributes and labels but as time goes by and other
99        attributes are used from an actual AFE Host object (check
100        rpc_interfaces.get_hosts()), we'll add them in here so users won't be
101        perplexed why their host's afe_host object complains that attribute
102        doesn't exist.
103        """
104        self.attributes = {}
105        self.labels = []
106
107
108def ParseBuildName(name):
109    """Format a build name, given board, type, milestone, and manifest num.
110
111    @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a
112                 relative build name, e.g. 'x86-alex-release/LATEST'
113
114    @return board: board the manifest is for, e.g. x86-alex.
115    @return type: one of 'release', 'factory', or 'firmware'
116    @return milestone: (numeric) milestone the manifest was associated with.
117                        Will be None for relative build names.
118    @return manifest: manifest number, e.g. '2015.0.0'.
119                      Will be None for relative build names.
120
121    """
122    match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?'
123                     r'-(?P<type>\w+)/(R(?P<milestone>\d+)-'
124                     r'(?P<manifest>[\d.ab-]+)|LATEST)',
125                     name)
126    if match and len(match.groups()) >= 5:
127        return (match.group('board'), match.group('type'),
128                match.group('milestone'), match.group('manifest'))
129    raise ParseBuildNameException('%s is a malformed build name.' % name)
130
131
132def get_labels_from_afe(hostname, label_prefix, afe):
133    """Retrieve a host's specific labels from the AFE.
134
135    Looks for the host labels that have the form <label_prefix>:<value>
136    and returns the "<value>" part of the label. None is returned
137    if there is not a label matching the pattern
138
139    @param hostname: hostname of given DUT.
140    @param label_prefix: prefix of label to be matched, e.g., |board:|
141    @param afe: afe instance.
142
143    @returns A list of labels that match the prefix or 'None'
144
145    """
146    labels = afe.get_labels(name__startswith=label_prefix,
147                            host__hostname__in=[hostname])
148    if labels:
149        return [l.name.split(label_prefix, 1)[1] for l in labels]
150
151
152def get_label_from_afe(hostname, label_prefix, afe):
153    """Retrieve a host's specific label from the AFE.
154
155    Looks for a host label that has the form <label_prefix>:<value>
156    and returns the "<value>" part of the label. None is returned
157    if there is not a label matching the pattern
158
159    @param hostname: hostname of given DUT.
160    @param label_prefix: prefix of label to be matched, e.g., |board:|
161    @param afe: afe instance.
162    @returns the label that matches the prefix or 'None'
163
164    """
165    labels = get_labels_from_afe(hostname, label_prefix, afe)
166    if labels and len(labels) == 1:
167        return labels[0]
168
169
170def get_board_from_afe(hostname, afe):
171    """Retrieve given host's board from its labels in the AFE.
172
173    Looks for a host label of the form "board:<board>", and
174    returns the "<board>" part of the label.  `None` is returned
175    if there is not a single, unique label matching the pattern.
176
177    @param hostname: hostname of given DUT.
178    @param afe: afe instance.
179    @returns board from label, or `None`.
180
181    """
182    return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe)
183
184
185def get_build_from_afe(hostname, afe):
186    """Retrieve the current build for given host from the AFE.
187
188    Looks through the host's labels in the AFE to determine its build.
189
190    @param hostname: hostname of given DUT.
191    @param afe: afe instance.
192    @returns The current build or None if it could not find it or if there
193             were multiple build labels assigned to this host.
194
195    """
196    prefix = provision.CROS_VERSION_PREFIX
197    build = get_label_from_afe(hostname, prefix + ':', afe)
198    if build:
199        return build
200    return None
201
202
203# TODO(fdeng): fix get_sheriffs crbug.com/483254
204def get_sheriffs(lab_only=False):
205    """
206    Polls the javascript file that holds the identity of the sheriff and
207    parses it's output to return a list of chromium sheriff email addresses.
208    The javascript file can contain the ldap of more than one sheriff, eg:
209    document.write('sheriff_one, sheriff_two').
210
211    @param lab_only: if True, only pulls lab sheriff.
212    @return: A list of chroium.org sheriff email addresses to cc on the bug.
213             An empty list if failed to parse the javascript.
214    """
215    sheriff_ids = []
216    sheriff_js_list = _LAB_SHERIFF_JS.split(',')
217    if not lab_only:
218        sheriff_js_list.extend(_SHERIFF_JS.split(','))
219
220    for sheriff_js in sheriff_js_list:
221        try:
222            url_content = utils.urlopen('%s%s'% (
223                _CHROMIUM_BUILD_URL, sheriff_js)).read()
224        except (ValueError, IOError) as e:
225            logging.warning('could not parse sheriff from url %s%s: %s',
226                             _CHROMIUM_BUILD_URL, sheriff_js, str(e))
227        except (urllib2.URLError, httplib.HTTPException) as e:
228            logging.warning('unexpected error reading from url "%s%s": %s',
229                             _CHROMIUM_BUILD_URL, sheriff_js, str(e))
230        else:
231            ldaps = re.search(r"document.write\('(.*)'\)", url_content)
232            if not ldaps:
233                logging.warning('Could not retrieve sheriff ldaps for: %s',
234                                 url_content)
235                continue
236            sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '')
237                            for alias in ldaps.group(1).split(',')]
238    return sheriff_ids
239
240
241def remote_wget(source_url, dest_path, ssh_cmd):
242    """wget source_url from localhost to dest_path on remote host using ssh.
243
244    @param source_url: The complete url of the source of the package to send.
245    @param dest_path: The path on the remote host's file system where we would
246        like to store the package.
247    @param ssh_cmd: The ssh command to use in performing the remote wget.
248    """
249    wget_cmd = ("wget -O - %s | %s 'cat >%s'" %
250                (source_url, ssh_cmd, dest_path))
251    utils.run(wget_cmd)
252
253
254_MAX_LAB_STATUS_ATTEMPTS = 5
255def _get_lab_status(status_url):
256    """Grabs the current lab status and message.
257
258    @returns The JSON object obtained from the given URL.
259
260    """
261    retry_waittime = 1
262    for _ in range(_MAX_LAB_STATUS_ATTEMPTS):
263        try:
264            response = urllib2.urlopen(status_url)
265        except IOError as e:
266            logging.debug('Error occurred when grabbing the lab status: %s.',
267                          e)
268            time.sleep(retry_waittime)
269            continue
270        # Check for successful response code.
271        if response.getcode() == 200:
272            return json.load(response)
273        time.sleep(retry_waittime)
274    return None
275
276
277def _decode_lab_status(lab_status, build):
278    """Decode lab status, and report exceptions as needed.
279
280    Take a deserialized JSON object from the lab status page, and
281    interpret it to determine the actual lab status.  Raise
282    exceptions as required to report when the lab is down.
283
284    @param build: build name that we want to check the status of.
285
286    @raises TestLabException Raised if a request to test for the given
287                             status and build should be blocked.
288    """
289    # First check if the lab is up.
290    if not lab_status['general_state'] in LAB_GOOD_STATES:
291        raise TestLabException('Chromium OS Test Lab is closed: '
292                               '%s.' % lab_status['message'])
293
294    # Check if the build we wish to use is disabled.
295    # Lab messages should be in the format of:
296    #    Lab is 'status' [regex ...] (comment)
297    # If the build name matches any regex, it will be blocked.
298    build_exceptions = re.search('\[(.*)\]', lab_status['message'])
299    if not build_exceptions or not build:
300        return
301    for build_pattern in build_exceptions.group(1).split():
302        if re.match(build_pattern, build):
303            raise TestLabException('Chromium OS Test Lab is closed: '
304                                   '%s matches %s.' % (
305                                           build, build_pattern))
306    return
307
308
309def is_in_lab():
310    """Check if current Autotest instance is in lab
311
312    @return: True if the Autotest instance is in lab.
313    """
314    test_server_name = CONFIG.get_config_value('SERVER', 'hostname')
315    return test_server_name.startswith('cautotest')
316
317
318def check_lab_status(build):
319    """Check if the lab status allows us to schedule for a build.
320
321    Checks if the lab is down, or if testing for the requested build
322    should be blocked.
323
324    @param build: Name of the build to be scheduled for testing.
325
326    @raises TestLabException Raised if a request to test for the given
327                             status and build should be blocked.
328
329    """
330    # Ensure we are trying to schedule on the actual lab.
331    if not is_in_lab():
332        return
333
334    # Download the lab status from its home on the web.
335    status_url = CONFIG.get_config_value('CROS', 'lab_status_url')
336    json_status = _get_lab_status(status_url)
337    if json_status is None:
338        # We go ahead and say the lab is open if we can't get the status.
339        logging.warning('Could not get a status from %s', status_url)
340        return
341    _decode_lab_status(json_status, build)
342
343
344def host_in_lab(hostname):
345    """Check if the execution is against a host in the lab"""
346    return (not utils.in_moblab_ssp()
347            and not lsbrelease_utils.is_moblab()
348            and utils.host_is_in_lab_zone(hostname))
349
350
351def lock_host_with_labels(afe, lock_manager, labels):
352    """Lookup and lock one host that matches the list of input labels.
353
354    @param afe: An instance of the afe class, as defined in server.frontend.
355    @param lock_manager: A lock manager capable of locking hosts, eg the
356        one defined in server.cros.host_lock_manager.
357    @param labels: A list of labels to look for on hosts.
358
359    @return: The hostname of a host matching all labels, and locked through the
360        lock_manager. The hostname will be as specified in the database the afe
361        object is associated with, i.e if it exists in afe_hosts with a .cros
362        suffix, the hostname returned will contain a .cros suffix.
363
364    @raises: error.NoEligibleHostException: If no hosts matching the list of
365        input labels are available.
366    @raises: error.TestError: If unable to lock a host matching the labels.
367    """
368    potential_hosts = afe.get_hosts(multiple_labels=labels)
369    if not potential_hosts:
370        raise error.NoEligibleHostException(
371                'No devices found with labels %s.' % labels)
372
373    # This prevents errors where a fault might seem repeatable
374    # because we lock, say, the same packet capturer for each test run.
375    random.shuffle(potential_hosts)
376    for host in potential_hosts:
377        if lock_manager.lock([host.hostname]):
378            logging.info('Locked device %s with labels %s.',
379                         host.hostname, labels)
380            return host.hostname
381        else:
382            logging.info('Unable to lock device %s with labels %s.',
383                         host.hostname, labels)
384
385    raise error.TestError('Could not lock a device with labels %s' % labels)
386
387
388def get_test_views_from_tko(suite_job_id, tko):
389    """Get test name and result for given suite job ID.
390
391    @param suite_job_id: ID of suite job.
392    @param tko: an instance of TKO as defined in server/frontend.py.
393    @return: A defaultdict where keys are test names and values are
394             lists of test statuses, e.g.,
395             {'dummy_Fail.Error': ['ERROR'. 'ERROR'],
396              'dummy_Fail.NAError': ['TEST_NA'],
397              'dummy_Fail.RetrySuccess': ['ERROR', 'GOOD'],
398              }
399    @raise: Exception when there is no test view found.
400
401    """
402    views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id)
403    relevant_views = filter(job_status.view_is_relevant, views)
404    if not relevant_views:
405        raise Exception('Failed to retrieve job results.')
406
407    test_views = collections.defaultdict(list)
408    for view in relevant_views:
409        test_views[view['test_name']].append(view['status'])
410    return test_views
411
412
413def get_data_key(prefix, suite, build, board):
414    """
415    Constructs a key string from parameters.
416
417    @param prefix: Prefix for the generating key.
418    @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy
419    @param build: The build string. This string should have a consistent
420        format eg: x86-mario-release/R26-3570.0.0. If the format of this
421        string changes such that we can't determine build_type or branch
422        we give up and use the parametes we're sure of instead (suite,
423        board). eg:
424            1. build = x86-alex-pgo-release/R26-3570.0.0
425               branch = 26
426               build_type = pgo-release
427            2. build = lumpy-paladin/R28-3993.0.0-rc5
428               branch = 28
429               build_type = paladin
430    @param board: The board that this suite ran on.
431    @return: The key string used for a dictionary.
432    """
433    try:
434        _board, build_type, branch = ParseBuildName(build)[:3]
435    except ParseBuildNameException as e:
436        logging.error(str(e))
437        branch = 'Unknown'
438        build_type = 'Unknown'
439    else:
440        embedded_str = re.search(r'x86-\w+-(.*)', _board)
441        if embedded_str:
442            build_type = embedded_str.group(1) + '-' + build_type
443
444    data_key_dict = {
445        'prefix': prefix,
446        'board': board,
447        'branch': branch,
448        'build_type': build_type,
449        'suite': suite,
450    }
451    return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
452            % data_key_dict)
453
454
455def setup_logging(logfile=None, prefix=False):
456    """Setup basic logging with all logging info stripped.
457
458    Calls to logging will only show the message. No severity is logged.
459
460    @param logfile: If specified dump output to a file as well.
461    @param prefix: Flag for log prefix. Set to True to add prefix to log
462        entries to include timestamp and log level. Default is False.
463    """
464    # TODO (xixuan): Delete this code when finishing replacing run_suite.py &
465    # abort_suite.py in skylab.
466    # Remove all existing handlers. client/common_lib/logging_config adds
467    # a StreamHandler to logger when modules are imported, e.g.,
468    # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
469    # log only messages, not severity.
470    logging.getLogger().handlers = []
471
472    if prefix:
473        log_format = '%(asctime)s %(levelname)-5s| %(message)s'
474    else:
475        log_format = '%(message)s'
476
477    screen_handler = logging.StreamHandler()
478    screen_handler.setFormatter(logging.Formatter(log_format))
479    logging.getLogger().addHandler(screen_handler)
480    logging.getLogger().setLevel(logging.INFO)
481    if logfile:
482        file_handler = logging.FileHandler(logfile)
483        file_handler.setFormatter(logging.Formatter(log_format))
484        file_handler.setLevel(logging.DEBUG)
485        logging.getLogger().addHandler(file_handler)
486
487
488def is_shard():
489    """Determines if this instance is running as a shard.
490
491    Reads the global_config value shard_hostname in the section SHARD.
492
493    @return True, if shard_hostname is set, False otherwise.
494    """
495    hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None)
496    return bool(hostname)
497
498
499def get_global_afe_hostname():
500    """Read the hostname of the global AFE from the global configuration."""
501    return CONFIG.get_config_value('SERVER', 'global_afe_hostname')
502
503
504def is_restricted_user(username):
505    """Determines if a user is in a restricted group.
506
507    User in restricted group only have access to master.
508
509    @param username: A string, representing a username.
510
511    @returns: True if the user is in a restricted group.
512    """
513    if not username:
514        return False
515
516    restricted_groups = CONFIG.get_config_value(
517            'AUTOTEST_WEB', 'restricted_groups', default='').split(',')
518    for group in restricted_groups:
519        try:
520            if group and username in grp.getgrnam(group).gr_mem:
521                return True
522        except KeyError as e:
523            logging.debug("%s is not a valid group.", group)
524    return False
525
526
527def get_special_task_status(is_complete, success, is_active):
528    """Get the status of a special task.
529
530    Emulate a host queue entry status for a special task
531    Although SpecialTasks are not HostQueueEntries, it is helpful to
532    the user to present similar statuses.
533
534    @param is_complete    Boolean if the task is completed.
535    @param success        Boolean if the task succeeded.
536    @param is_active      Boolean if the task is active.
537
538    @return The status of a special task.
539    """
540    if is_complete:
541        if success:
542            return host_queue_entry_states.Status.COMPLETED
543        return host_queue_entry_states.Status.FAILED
544    if is_active:
545        return host_queue_entry_states.Status.RUNNING
546    return host_queue_entry_states.Status.QUEUED
547
548
549def get_special_task_exec_path(hostname, task_id, task_name, time_requested):
550    """Get the execution path of the SpecialTask.
551
552    This method returns different paths depending on where a
553    the task ran:
554        * Master: hosts/hostname/task_id-task_type
555        * Shard: Master_path/time_created
556    This is to work around the fact that a shard can fail independent
557    of the master, and be replaced by another shard that has the same
558    hosts. Without the time_created stamp the logs of the tasks running
559    on the second shard will clobber the logs from the first in google
560    storage, because task ids are not globally unique.
561
562    @param hostname        Hostname
563    @param task_id         Special task id
564    @param task_name       Special task name (e.g., Verify, Repair, etc)
565    @param time_requested  Special task requested time.
566
567    @return An execution path for the task.
568    """
569    results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower())
570
571    # If we do this on the master it will break backward compatibility,
572    # as there are tasks that currently don't have timestamps. If a host
573    # or job has been sent to a shard, the rpc for that host/job will
574    # be redirected to the shard, so this global_config check will happen
575    # on the shard the logs are on.
576    if not is_shard():
577        return results_path
578
579    # Generate a uid to disambiguate special task result directories
580    # in case this shard fails. The simplest uid is the job_id, however
581    # in rare cases tasks do not have jobs associated with them (eg:
582    # frontend verify), so just use the creation timestamp. The clocks
583    # between a shard and master should always be in sync. Any discrepancies
584    # will be brought to our attention in the form of job timeouts.
585    uid = time_requested.strftime('%Y%d%m%H%M%S')
586
587    # TODO: This is a hack, however it is the easiest way to achieve
588    # correctness. There is currently some debate over the future of
589    # tasks in our infrastructure and refactoring everything right
590    # now isn't worth the time.
591    return '%s/%s' % (results_path, uid)
592
593
594def get_job_tag(id, owner):
595    """Returns a string tag for a job.
596
597    @param id    Job id
598    @param owner Job owner
599
600    """
601    return '%s-%s' % (id, owner)
602
603
604def get_hqe_exec_path(tag, execution_subdir):
605    """Returns a execution path to a HQE's results.
606
607    @param tag               Tag string for a job associated with a HQE.
608    @param execution_subdir  Execution sub-directory string of a HQE.
609
610    """
611    return os.path.join(tag, execution_subdir)
612
613
614def is_inside_chroot():
615    """Check if the process is running inside chroot.
616
617    @return: True if the process is running inside chroot.
618
619    """
620    return os.path.exists('/etc/cros_chroot_version')
621
622
623def parse_job_name(name):
624    """Parse job name to get information including build, board and suite etc.
625
626    Suite job created by run_suite follows the naming convention of:
627    [build]-test_suites/control.[suite]
628    For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt
629    The naming convention is defined in rpc_interface.create_suite_job.
630
631    Test job created by suite job follows the naming convention of:
632    [build]/[suite]/[test name]
633    For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess
634    The naming convention is defined in
635    server/cros/dynamic_suite/tools.create_job_name
636
637    Note that pgo and chrome-perf builds will fail the method. Since lab does
638    not run test for these builds, they can be ignored.
639    Also, tests for Launch Control builds have different naming convention.
640    The build ID will be used as build_version.
641
642    @param name: Name of the job.
643
644    @return: A dictionary containing the test information. The keyvals include:
645             build: Name of the build, e.g., lumpy-release/R46-7272.0.0
646             build_version: The version of the build, e.g., R46-7272.0.0
647             board: Name of the board, e.g., lumpy
648             suite: Name of the test suite, e.g., bvt
649
650    """
651    info = {}
652    suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)'
653    test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*'
654    match = re.match(suite_job_regex, name)
655    if not match:
656        match = re.match(test_job_regex, name)
657    if match:
658        info['build'] = match.groups()[0]
659        info['suite'] = match.groups()[1]
660        info['build_version'] = info['build'].split('/')[1]
661        try:
662            info['board'], _, _, _ = ParseBuildName(info['build'])
663        except ParseBuildNameException:
664            # Try to parse it as Launch Control build
665            # Launch Control builds have name format:
666            # branch/build_target-build_type/build_id.
667            try:
668                _, target, build_id = utils.parse_launch_control_build(
669                        info['build'])
670                build_target, _ = utils.parse_launch_control_target(target)
671                if build_target:
672                    info['board'] = build_target
673                    info['build_version'] = build_id
674            except ValueError:
675                pass
676    return info
677
678
679def verify_not_root_user():
680    """Simple function to error out if running with uid == 0"""
681    if os.getuid() == 0:
682        raise error.IllegalUser('This script can not be ran as root.')
683
684
685def get_hostname_from_machine(machine):
686    """Lookup hostname from a machine string or dict.
687
688    @returns: Machine hostname in string format.
689    """
690    hostname, _ = get_host_info_from_machine(machine)
691    return hostname
692
693
694def get_host_info_from_machine(machine):
695    """Lookup host information from a machine string or dict.
696
697    @returns: Tuple of (hostname, afe_host)
698    """
699    if isinstance(machine, dict):
700        return (machine['hostname'], machine['afe_host'])
701    else:
702        return (machine, EmptyAFEHost())
703
704
705def get_afe_host_from_machine(machine):
706    """Return the afe_host from the machine dict if possible.
707
708    @returns: AFE host object.
709    """
710    _, afe_host = get_host_info_from_machine(machine)
711    return afe_host
712
713
714def get_connection_pool_from_machine(machine):
715    """Returns the ssh_multiplex.ConnectionPool from machine if possible."""
716    if not isinstance(machine, dict):
717        return None
718    return machine.get('connection_pool')
719
720
721def get_creds_abspath(creds_file):
722    """Returns the abspath of the credentials file.
723
724    If creds_file is already an absolute path, just return it.
725    Otherwise, assume it is located in the creds directory
726    specified in global_config and return the absolute path.
727
728    @param: creds_path, a path to the credentials.
729    @return: An absolute path to the credentials file.
730    """
731    if not creds_file:
732        return None
733    if os.path.isabs(creds_file):
734        return creds_file
735    creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='')
736    if not creds_dir or not os.path.exists(creds_dir):
737        creds_dir = common.autotest_dir
738    return os.path.join(creds_dir, creds_file)
739
740
741def SetupTsMonGlobalState(*args, **kwargs):
742    """Import-safe wrap around chromite.lib.ts_mon_config's setup function.
743
744    @param *args: Args to pass through.
745    @param **kwargs: Kwargs to pass through.
746    """
747    try:
748        # TODO(crbug.com/739466) This module import is delayed because it adds
749        # 1-2 seconds to the module import time and most users of site_utils
750        # don't need it. The correct fix is to break apart site_utils into more
751        # meaningful chunks.
752        from chromite.lib import ts_mon_config
753    except ImportError:
754        logging.warn('Unable to import chromite. Monarch is disabled.')
755        return TrivialContextManager()
756
757    try:
758        context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs)
759        if hasattr(context, '__exit__'):
760            return context
761    except Exception as e:
762        logging.warning('Caught an exception trying to setup ts_mon, '
763                        'monitoring is disabled: %s', e, exc_info=True)
764    return TrivialContextManager()
765
766
767@contextlib.contextmanager
768def TrivialContextManager(*args, **kwargs):
769    """Context manager that does nothing.
770
771    @param *args: Ignored args
772    @param **kwargs: Ignored kwargs.
773    """
774    yield
775
776
777def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT):
778    """Wait for the hosts to all go idle.
779
780    @param duts: List of duts to check for idle state.
781    @param afe: afe instance.
782    @param max_wait: Max wait time in seconds to wait for duts to be idle.
783
784    @returns Boolean True if all hosts are idle or False if any hosts did not
785            go idle within max_wait.
786    """
787    start_time = time.time()
788    # We make a shallow copy since we're going to be modifying active_dut_list.
789    active_dut_list = duts[:]
790    while active_dut_list:
791        # Let's rate-limit how often we hit the AFE.
792        time.sleep(1)
793
794        # Check if we've waited too long.
795        if (time.time() - start_time) > max_wait:
796            return False
797
798        idle_duts = []
799        # Get the status for the duts and see if they're in the idle state.
800        afe_hosts = afe.get_hosts(active_dut_list)
801        idle_duts = [afe_host.hostname for afe_host in afe_hosts
802                     if afe_host.status in host_states.IDLE_STATES]
803
804        # Take out idle duts so we don't needlessly check them
805        # next time around.
806        for idle_dut in idle_duts:
807            active_dut_list.remove(idle_dut)
808
809        logging.info('still waiting for following duts to go idle: %s',
810                     active_dut_list)
811    return True
812
813
814@contextlib.contextmanager
815def lock_duts_and_wait(duts, afe, lock_msg='default lock message',
816                       max_wait=IDLE_DUT_WAIT_TIMEOUT):
817    """Context manager to lock the duts and wait for them to go idle.
818
819    @param duts: List of duts to lock.
820    @param afe: afe instance.
821    @param lock_msg: message for afe on locking this host.
822    @param max_wait: Max wait time in seconds to wait for duts to be idle.
823
824    @returns Boolean lock_success where True if all duts locked successfully or
825             False if we timed out waiting too long for hosts to go idle.
826    """
827    try:
828        locked_duts = []
829        duts.sort()
830        for dut in duts:
831            if afe.lock_host(dut, lock_msg, fail_if_locked=True):
832                locked_duts.append(dut)
833            else:
834                logging.info('%s already locked', dut)
835        yield wait_for_idle_duts(locked_duts, afe, max_wait)
836    finally:
837        afe.unlock_hosts(locked_duts)
838
839
840def _get_default_size_info(path):
841    """Get the default result size information.
842
843    In case directory summary is failed to build, assume the test result is not
844    throttled and all result sizes are the size of existing test results.
845
846    @return: A namedtuple of result size informations, including:
847            client_result_collected_KB: The total size (in KB) of test results
848                    collected from test device. Set to be the total size of the
849                    given path.
850            original_result_total_KB: The original size (in KB) of test results
851                    before being trimmed. Set to be the total size of the given
852                    path.
853            result_uploaded_KB: The total size (in KB) of test results to be
854                    uploaded. Set to be the total size of the given path.
855            result_throttled: True if test results collection is throttled.
856                    It's set to False in this default behavior.
857    """
858    total_size = file_utils.get_directory_size_kibibytes(path);
859    return result_utils_lib.ResultSizeInfo(
860            client_result_collected_KB=total_size,
861            original_result_total_KB=total_size,
862            result_uploaded_KB=total_size,
863            result_throttled=False)
864
865
866def _report_result_size_metrics(result_size_info):
867    """Report result sizes information to metrics.
868
869    @param result_size_info: A ResultSizeInfo namedtuple containing information
870            of test result sizes.
871    """
872    fields = {'result_throttled' : result_size_info.result_throttled}
873    metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB',
874                    description='The total size (in KB) of test results '
875                    'collected from test device. Set to be the total size of '
876                    'the given path.'
877                    ).increment_by(result_size_info.client_result_collected_KB,
878                                   fields=fields)
879    metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB',
880                    description='The original size (in KB) of test results '
881                    'before being trimmed.'
882                    ).increment_by(result_size_info.original_result_total_KB,
883                                   fields=fields)
884    metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB',
885                    description='The total size (in KB) of test results to be '
886                    'uploaded.'
887                    ).increment_by(result_size_info.result_uploaded_KB,
888                                   fields=fields)
889
890
891@metrics.SecondsTimerDecorator(
892        'chromeos/autotest/result_collection/collect_result_sizes_duration')
893def collect_result_sizes(path, log=logging.debug):
894    """Collect the result sizes information and build result summary.
895
896    It first tries to merge directory summaries and calculate the result sizes
897    including:
898    client_result_collected_KB: The volume in KB that's transfered from the test
899            device.
900    original_result_total_KB: The volume in KB that's the original size of the
901            result files before being trimmed.
902    result_uploaded_KB: The volume in KB that will be uploaded.
903    result_throttled: Indicating if the result files were throttled.
904
905    If directory summary merging failed for any reason, fall back to use the
906    total size of the given result directory.
907
908    @param path: Path of the result directory to get size information.
909    @param log: The logging method, default to logging.debug
910    @return: A ResultSizeInfo namedtuple containing information of test result
911             sizes.
912    """
913    try:
914        client_collected_bytes, summary, files = result_utils.merge_summaries(
915                path)
916        result_size_info = result_utils_lib.get_result_size_info(
917                client_collected_bytes, summary)
918        html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME)
919        result_view.build(client_collected_bytes, summary, html_file)
920
921        # Delete all summary files after final view is built.
922        for summary_file in files:
923            os.remove(summary_file)
924    except:
925        log('Failed to calculate result sizes based on directory summaries for '
926            'directory %s. Fall back to record the total size.\nException: %s' %
927            (path, traceback.format_exc()))
928        result_size_info = _get_default_size_info(path)
929
930    _report_result_size_metrics(result_size_info)
931
932    return result_size_info
933