1# Copyright (c) 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6import collections 7import contextlib 8import grp 9import httplib 10import json 11import logging 12import os 13import random 14import re 15import time 16import traceback 17import urllib2 18 19import common 20from autotest_lib.client.bin.result_tools import utils as result_utils 21from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib 22from autotest_lib.client.bin.result_tools import view as result_view 23from autotest_lib.client.common_lib import lsbrelease_utils 24from autotest_lib.client.common_lib import utils 25from autotest_lib.client.common_lib import error 26from autotest_lib.client.common_lib import file_utils 27from autotest_lib.client.common_lib import global_config 28from autotest_lib.client.common_lib import host_queue_entry_states 29from autotest_lib.client.common_lib import host_states 30from autotest_lib.server.cros import provision 31from autotest_lib.server.cros.dynamic_suite import constants 32from autotest_lib.server.cros.dynamic_suite import job_status 33 34try: 35 from chromite.lib import metrics 36except ImportError: 37 metrics = utils.metrics_mock 38 39 40CONFIG = global_config.global_config 41 42_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='') 43_LAB_SHERIFF_JS = CONFIG.get_config_value( 44 'NOTIFICATIONS', 'lab_sheriffs', default='') 45_CHROMIUM_BUILD_URL = CONFIG.get_config_value( 46 'NOTIFICATIONS', 'chromium_build_url', default='') 47 48LAB_GOOD_STATES = ('open', 'throttled') 49 50ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( 51 'CROS', 'enable_drone_in_restricted_subnet', type=bool, 52 default=False) 53 54# Wait at most 10 mins for duts to go idle. 55IDLE_DUT_WAIT_TIMEOUT = 600 56 57# Mapping between board name and build target. This is for special case handling 58# for certain Android board that the board name and build target name does not 59# match. 60ANDROID_TARGET_TO_BOARD_MAP = { 61 'seed_l8150': 'gm4g_sprout', 62 'bat_land': 'bat' 63 } 64ANDROID_BOARD_TO_TARGET_MAP = { 65 'gm4g_sprout': 'seed_l8150', 66 'bat': 'bat_land' 67 } 68# Prefix for the metrics name for result size information. 69RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/' 70 71class TestLabException(Exception): 72 """Exception raised when the Test Lab blocks a test or suite.""" 73 pass 74 75 76class ParseBuildNameException(Exception): 77 """Raised when ParseBuildName() cannot parse a build name.""" 78 pass 79 80 81class Singleton(type): 82 """Enforce that only one client class is instantiated per process.""" 83 _instances = {} 84 85 def __call__(cls, *args, **kwargs): 86 """Fetch the instance of a class to use for subsequent calls.""" 87 if cls not in cls._instances: 88 cls._instances[cls] = super(Singleton, cls).__call__( 89 *args, **kwargs) 90 return cls._instances[cls] 91 92class EmptyAFEHost(object): 93 """Object to represent an AFE host object when there is no AFE.""" 94 95 def __init__(self): 96 """ 97 We'll be setting the instance attributes as we use them. Right now 98 we only use attributes and labels but as time goes by and other 99 attributes are used from an actual AFE Host object (check 100 rpc_interfaces.get_hosts()), we'll add them in here so users won't be 101 perplexed why their host's afe_host object complains that attribute 102 doesn't exist. 103 """ 104 self.attributes = {} 105 self.labels = [] 106 107 108def ParseBuildName(name): 109 """Format a build name, given board, type, milestone, and manifest num. 110 111 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a 112 relative build name, e.g. 'x86-alex-release/LATEST' 113 114 @return board: board the manifest is for, e.g. x86-alex. 115 @return type: one of 'release', 'factory', or 'firmware' 116 @return milestone: (numeric) milestone the manifest was associated with. 117 Will be None for relative build names. 118 @return manifest: manifest number, e.g. '2015.0.0'. 119 Will be None for relative build names. 120 121 """ 122 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?' 123 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-' 124 r'(?P<manifest>[\d.ab-]+)|LATEST)', 125 name) 126 if match and len(match.groups()) >= 5: 127 return (match.group('board'), match.group('type'), 128 match.group('milestone'), match.group('manifest')) 129 raise ParseBuildNameException('%s is a malformed build name.' % name) 130 131 132def get_labels_from_afe(hostname, label_prefix, afe): 133 """Retrieve a host's specific labels from the AFE. 134 135 Looks for the host labels that have the form <label_prefix>:<value> 136 and returns the "<value>" part of the label. None is returned 137 if there is not a label matching the pattern 138 139 @param hostname: hostname of given DUT. 140 @param label_prefix: prefix of label to be matched, e.g., |board:| 141 @param afe: afe instance. 142 143 @returns A list of labels that match the prefix or 'None' 144 145 """ 146 labels = afe.get_labels(name__startswith=label_prefix, 147 host__hostname__in=[hostname]) 148 if labels: 149 return [l.name.split(label_prefix, 1)[1] for l in labels] 150 151 152def get_label_from_afe(hostname, label_prefix, afe): 153 """Retrieve a host's specific label from the AFE. 154 155 Looks for a host label that has the form <label_prefix>:<value> 156 and returns the "<value>" part of the label. None is returned 157 if there is not a label matching the pattern 158 159 @param hostname: hostname of given DUT. 160 @param label_prefix: prefix of label to be matched, e.g., |board:| 161 @param afe: afe instance. 162 @returns the label that matches the prefix or 'None' 163 164 """ 165 labels = get_labels_from_afe(hostname, label_prefix, afe) 166 if labels and len(labels) == 1: 167 return labels[0] 168 169 170def get_board_from_afe(hostname, afe): 171 """Retrieve given host's board from its labels in the AFE. 172 173 Looks for a host label of the form "board:<board>", and 174 returns the "<board>" part of the label. `None` is returned 175 if there is not a single, unique label matching the pattern. 176 177 @param hostname: hostname of given DUT. 178 @param afe: afe instance. 179 @returns board from label, or `None`. 180 181 """ 182 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) 183 184 185def get_build_from_afe(hostname, afe): 186 """Retrieve the current build for given host from the AFE. 187 188 Looks through the host's labels in the AFE to determine its build. 189 190 @param hostname: hostname of given DUT. 191 @param afe: afe instance. 192 @returns The current build or None if it could not find it or if there 193 were multiple build labels assigned to this host. 194 195 """ 196 prefix = provision.CROS_VERSION_PREFIX 197 build = get_label_from_afe(hostname, prefix + ':', afe) 198 if build: 199 return build 200 return None 201 202 203# TODO(fdeng): fix get_sheriffs crbug.com/483254 204def get_sheriffs(lab_only=False): 205 """ 206 Polls the javascript file that holds the identity of the sheriff and 207 parses it's output to return a list of chromium sheriff email addresses. 208 The javascript file can contain the ldap of more than one sheriff, eg: 209 document.write('sheriff_one, sheriff_two'). 210 211 @param lab_only: if True, only pulls lab sheriff. 212 @return: A list of chroium.org sheriff email addresses to cc on the bug. 213 An empty list if failed to parse the javascript. 214 """ 215 sheriff_ids = [] 216 sheriff_js_list = _LAB_SHERIFF_JS.split(',') 217 if not lab_only: 218 sheriff_js_list.extend(_SHERIFF_JS.split(',')) 219 220 for sheriff_js in sheriff_js_list: 221 try: 222 url_content = utils.urlopen('%s%s'% ( 223 _CHROMIUM_BUILD_URL, sheriff_js)).read() 224 except (ValueError, IOError) as e: 225 logging.warning('could not parse sheriff from url %s%s: %s', 226 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 227 except (urllib2.URLError, httplib.HTTPException) as e: 228 logging.warning('unexpected error reading from url "%s%s": %s', 229 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 230 else: 231 ldaps = re.search(r"document.write\('(.*)'\)", url_content) 232 if not ldaps: 233 logging.warning('Could not retrieve sheriff ldaps for: %s', 234 url_content) 235 continue 236 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') 237 for alias in ldaps.group(1).split(',')] 238 return sheriff_ids 239 240 241def remote_wget(source_url, dest_path, ssh_cmd): 242 """wget source_url from localhost to dest_path on remote host using ssh. 243 244 @param source_url: The complete url of the source of the package to send. 245 @param dest_path: The path on the remote host's file system where we would 246 like to store the package. 247 @param ssh_cmd: The ssh command to use in performing the remote wget. 248 """ 249 wget_cmd = ("wget -O - %s | %s 'cat >%s'" % 250 (source_url, ssh_cmd, dest_path)) 251 utils.run(wget_cmd) 252 253 254_MAX_LAB_STATUS_ATTEMPTS = 5 255def _get_lab_status(status_url): 256 """Grabs the current lab status and message. 257 258 @returns The JSON object obtained from the given URL. 259 260 """ 261 retry_waittime = 1 262 for _ in range(_MAX_LAB_STATUS_ATTEMPTS): 263 try: 264 response = urllib2.urlopen(status_url) 265 except IOError as e: 266 logging.debug('Error occurred when grabbing the lab status: %s.', 267 e) 268 time.sleep(retry_waittime) 269 continue 270 # Check for successful response code. 271 if response.getcode() == 200: 272 return json.load(response) 273 time.sleep(retry_waittime) 274 return None 275 276 277def _decode_lab_status(lab_status, build): 278 """Decode lab status, and report exceptions as needed. 279 280 Take a deserialized JSON object from the lab status page, and 281 interpret it to determine the actual lab status. Raise 282 exceptions as required to report when the lab is down. 283 284 @param build: build name that we want to check the status of. 285 286 @raises TestLabException Raised if a request to test for the given 287 status and build should be blocked. 288 """ 289 # First check if the lab is up. 290 if not lab_status['general_state'] in LAB_GOOD_STATES: 291 raise TestLabException('Chromium OS Test Lab is closed: ' 292 '%s.' % lab_status['message']) 293 294 # Check if the build we wish to use is disabled. 295 # Lab messages should be in the format of: 296 # Lab is 'status' [regex ...] (comment) 297 # If the build name matches any regex, it will be blocked. 298 build_exceptions = re.search('\[(.*)\]', lab_status['message']) 299 if not build_exceptions or not build: 300 return 301 for build_pattern in build_exceptions.group(1).split(): 302 if re.match(build_pattern, build): 303 raise TestLabException('Chromium OS Test Lab is closed: ' 304 '%s matches %s.' % ( 305 build, build_pattern)) 306 return 307 308 309def is_in_lab(): 310 """Check if current Autotest instance is in lab 311 312 @return: True if the Autotest instance is in lab. 313 """ 314 test_server_name = CONFIG.get_config_value('SERVER', 'hostname') 315 return test_server_name.startswith('cautotest') 316 317 318def check_lab_status(build): 319 """Check if the lab status allows us to schedule for a build. 320 321 Checks if the lab is down, or if testing for the requested build 322 should be blocked. 323 324 @param build: Name of the build to be scheduled for testing. 325 326 @raises TestLabException Raised if a request to test for the given 327 status and build should be blocked. 328 329 """ 330 # Ensure we are trying to schedule on the actual lab. 331 if not is_in_lab(): 332 return 333 334 # Download the lab status from its home on the web. 335 status_url = CONFIG.get_config_value('CROS', 'lab_status_url') 336 json_status = _get_lab_status(status_url) 337 if json_status is None: 338 # We go ahead and say the lab is open if we can't get the status. 339 logging.warning('Could not get a status from %s', status_url) 340 return 341 _decode_lab_status(json_status, build) 342 343 344def host_in_lab(hostname): 345 """Check if the execution is against a host in the lab""" 346 return (not utils.in_moblab_ssp() 347 and not lsbrelease_utils.is_moblab() 348 and utils.host_is_in_lab_zone(hostname)) 349 350 351def lock_host_with_labels(afe, lock_manager, labels): 352 """Lookup and lock one host that matches the list of input labels. 353 354 @param afe: An instance of the afe class, as defined in server.frontend. 355 @param lock_manager: A lock manager capable of locking hosts, eg the 356 one defined in server.cros.host_lock_manager. 357 @param labels: A list of labels to look for on hosts. 358 359 @return: The hostname of a host matching all labels, and locked through the 360 lock_manager. The hostname will be as specified in the database the afe 361 object is associated with, i.e if it exists in afe_hosts with a .cros 362 suffix, the hostname returned will contain a .cros suffix. 363 364 @raises: error.NoEligibleHostException: If no hosts matching the list of 365 input labels are available. 366 @raises: error.TestError: If unable to lock a host matching the labels. 367 """ 368 potential_hosts = afe.get_hosts(multiple_labels=labels) 369 if not potential_hosts: 370 raise error.NoEligibleHostException( 371 'No devices found with labels %s.' % labels) 372 373 # This prevents errors where a fault might seem repeatable 374 # because we lock, say, the same packet capturer for each test run. 375 random.shuffle(potential_hosts) 376 for host in potential_hosts: 377 if lock_manager.lock([host.hostname]): 378 logging.info('Locked device %s with labels %s.', 379 host.hostname, labels) 380 return host.hostname 381 else: 382 logging.info('Unable to lock device %s with labels %s.', 383 host.hostname, labels) 384 385 raise error.TestError('Could not lock a device with labels %s' % labels) 386 387 388def get_test_views_from_tko(suite_job_id, tko): 389 """Get test name and result for given suite job ID. 390 391 @param suite_job_id: ID of suite job. 392 @param tko: an instance of TKO as defined in server/frontend.py. 393 @return: A defaultdict where keys are test names and values are 394 lists of test statuses, e.g., 395 {'dummy_Fail.Error': ['ERROR'. 'ERROR'], 396 'dummy_Fail.NAError': ['TEST_NA'], 397 'dummy_Fail.RetrySuccess': ['ERROR', 'GOOD'], 398 } 399 @raise: Exception when there is no test view found. 400 401 """ 402 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) 403 relevant_views = filter(job_status.view_is_relevant, views) 404 if not relevant_views: 405 raise Exception('Failed to retrieve job results.') 406 407 test_views = collections.defaultdict(list) 408 for view in relevant_views: 409 test_views[view['test_name']].append(view['status']) 410 return test_views 411 412 413def get_data_key(prefix, suite, build, board): 414 """ 415 Constructs a key string from parameters. 416 417 @param prefix: Prefix for the generating key. 418 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy 419 @param build: The build string. This string should have a consistent 420 format eg: x86-mario-release/R26-3570.0.0. If the format of this 421 string changes such that we can't determine build_type or branch 422 we give up and use the parametes we're sure of instead (suite, 423 board). eg: 424 1. build = x86-alex-pgo-release/R26-3570.0.0 425 branch = 26 426 build_type = pgo-release 427 2. build = lumpy-paladin/R28-3993.0.0-rc5 428 branch = 28 429 build_type = paladin 430 @param board: The board that this suite ran on. 431 @return: The key string used for a dictionary. 432 """ 433 try: 434 _board, build_type, branch = ParseBuildName(build)[:3] 435 except ParseBuildNameException as e: 436 logging.error(str(e)) 437 branch = 'Unknown' 438 build_type = 'Unknown' 439 else: 440 embedded_str = re.search(r'x86-\w+-(.*)', _board) 441 if embedded_str: 442 build_type = embedded_str.group(1) + '-' + build_type 443 444 data_key_dict = { 445 'prefix': prefix, 446 'board': board, 447 'branch': branch, 448 'build_type': build_type, 449 'suite': suite, 450 } 451 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s' 452 % data_key_dict) 453 454 455def setup_logging(logfile=None, prefix=False): 456 """Setup basic logging with all logging info stripped. 457 458 Calls to logging will only show the message. No severity is logged. 459 460 @param logfile: If specified dump output to a file as well. 461 @param prefix: Flag for log prefix. Set to True to add prefix to log 462 entries to include timestamp and log level. Default is False. 463 """ 464 # TODO (xixuan): Delete this code when finishing replacing run_suite.py & 465 # abort_suite.py in skylab. 466 # Remove all existing handlers. client/common_lib/logging_config adds 467 # a StreamHandler to logger when modules are imported, e.g., 468 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to 469 # log only messages, not severity. 470 logging.getLogger().handlers = [] 471 472 if prefix: 473 log_format = '%(asctime)s %(levelname)-5s| %(message)s' 474 else: 475 log_format = '%(message)s' 476 477 screen_handler = logging.StreamHandler() 478 screen_handler.setFormatter(logging.Formatter(log_format)) 479 logging.getLogger().addHandler(screen_handler) 480 logging.getLogger().setLevel(logging.INFO) 481 if logfile: 482 file_handler = logging.FileHandler(logfile) 483 file_handler.setFormatter(logging.Formatter(log_format)) 484 file_handler.setLevel(logging.DEBUG) 485 logging.getLogger().addHandler(file_handler) 486 487 488def is_shard(): 489 """Determines if this instance is running as a shard. 490 491 Reads the global_config value shard_hostname in the section SHARD. 492 493 @return True, if shard_hostname is set, False otherwise. 494 """ 495 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None) 496 return bool(hostname) 497 498 499def get_global_afe_hostname(): 500 """Read the hostname of the global AFE from the global configuration.""" 501 return CONFIG.get_config_value('SERVER', 'global_afe_hostname') 502 503 504def is_restricted_user(username): 505 """Determines if a user is in a restricted group. 506 507 User in restricted group only have access to master. 508 509 @param username: A string, representing a username. 510 511 @returns: True if the user is in a restricted group. 512 """ 513 if not username: 514 return False 515 516 restricted_groups = CONFIG.get_config_value( 517 'AUTOTEST_WEB', 'restricted_groups', default='').split(',') 518 for group in restricted_groups: 519 try: 520 if group and username in grp.getgrnam(group).gr_mem: 521 return True 522 except KeyError as e: 523 logging.debug("%s is not a valid group.", group) 524 return False 525 526 527def get_special_task_status(is_complete, success, is_active): 528 """Get the status of a special task. 529 530 Emulate a host queue entry status for a special task 531 Although SpecialTasks are not HostQueueEntries, it is helpful to 532 the user to present similar statuses. 533 534 @param is_complete Boolean if the task is completed. 535 @param success Boolean if the task succeeded. 536 @param is_active Boolean if the task is active. 537 538 @return The status of a special task. 539 """ 540 if is_complete: 541 if success: 542 return host_queue_entry_states.Status.COMPLETED 543 return host_queue_entry_states.Status.FAILED 544 if is_active: 545 return host_queue_entry_states.Status.RUNNING 546 return host_queue_entry_states.Status.QUEUED 547 548 549def get_special_task_exec_path(hostname, task_id, task_name, time_requested): 550 """Get the execution path of the SpecialTask. 551 552 This method returns different paths depending on where a 553 the task ran: 554 * Master: hosts/hostname/task_id-task_type 555 * Shard: Master_path/time_created 556 This is to work around the fact that a shard can fail independent 557 of the master, and be replaced by another shard that has the same 558 hosts. Without the time_created stamp the logs of the tasks running 559 on the second shard will clobber the logs from the first in google 560 storage, because task ids are not globally unique. 561 562 @param hostname Hostname 563 @param task_id Special task id 564 @param task_name Special task name (e.g., Verify, Repair, etc) 565 @param time_requested Special task requested time. 566 567 @return An execution path for the task. 568 """ 569 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower()) 570 571 # If we do this on the master it will break backward compatibility, 572 # as there are tasks that currently don't have timestamps. If a host 573 # or job has been sent to a shard, the rpc for that host/job will 574 # be redirected to the shard, so this global_config check will happen 575 # on the shard the logs are on. 576 if not is_shard(): 577 return results_path 578 579 # Generate a uid to disambiguate special task result directories 580 # in case this shard fails. The simplest uid is the job_id, however 581 # in rare cases tasks do not have jobs associated with them (eg: 582 # frontend verify), so just use the creation timestamp. The clocks 583 # between a shard and master should always be in sync. Any discrepancies 584 # will be brought to our attention in the form of job timeouts. 585 uid = time_requested.strftime('%Y%d%m%H%M%S') 586 587 # TODO: This is a hack, however it is the easiest way to achieve 588 # correctness. There is currently some debate over the future of 589 # tasks in our infrastructure and refactoring everything right 590 # now isn't worth the time. 591 return '%s/%s' % (results_path, uid) 592 593 594def get_job_tag(id, owner): 595 """Returns a string tag for a job. 596 597 @param id Job id 598 @param owner Job owner 599 600 """ 601 return '%s-%s' % (id, owner) 602 603 604def get_hqe_exec_path(tag, execution_subdir): 605 """Returns a execution path to a HQE's results. 606 607 @param tag Tag string for a job associated with a HQE. 608 @param execution_subdir Execution sub-directory string of a HQE. 609 610 """ 611 return os.path.join(tag, execution_subdir) 612 613 614def is_inside_chroot(): 615 """Check if the process is running inside chroot. 616 617 @return: True if the process is running inside chroot. 618 619 """ 620 return os.path.exists('/etc/cros_chroot_version') 621 622 623def parse_job_name(name): 624 """Parse job name to get information including build, board and suite etc. 625 626 Suite job created by run_suite follows the naming convention of: 627 [build]-test_suites/control.[suite] 628 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt 629 The naming convention is defined in rpc_interface.create_suite_job. 630 631 Test job created by suite job follows the naming convention of: 632 [build]/[suite]/[test name] 633 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess 634 The naming convention is defined in 635 server/cros/dynamic_suite/tools.create_job_name 636 637 Note that pgo and chrome-perf builds will fail the method. Since lab does 638 not run test for these builds, they can be ignored. 639 Also, tests for Launch Control builds have different naming convention. 640 The build ID will be used as build_version. 641 642 @param name: Name of the job. 643 644 @return: A dictionary containing the test information. The keyvals include: 645 build: Name of the build, e.g., lumpy-release/R46-7272.0.0 646 build_version: The version of the build, e.g., R46-7272.0.0 647 board: Name of the board, e.g., lumpy 648 suite: Name of the test suite, e.g., bvt 649 650 """ 651 info = {} 652 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)' 653 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*' 654 match = re.match(suite_job_regex, name) 655 if not match: 656 match = re.match(test_job_regex, name) 657 if match: 658 info['build'] = match.groups()[0] 659 info['suite'] = match.groups()[1] 660 info['build_version'] = info['build'].split('/')[1] 661 try: 662 info['board'], _, _, _ = ParseBuildName(info['build']) 663 except ParseBuildNameException: 664 # Try to parse it as Launch Control build 665 # Launch Control builds have name format: 666 # branch/build_target-build_type/build_id. 667 try: 668 _, target, build_id = utils.parse_launch_control_build( 669 info['build']) 670 build_target, _ = utils.parse_launch_control_target(target) 671 if build_target: 672 info['board'] = build_target 673 info['build_version'] = build_id 674 except ValueError: 675 pass 676 return info 677 678 679def verify_not_root_user(): 680 """Simple function to error out if running with uid == 0""" 681 if os.getuid() == 0: 682 raise error.IllegalUser('This script can not be ran as root.') 683 684 685def get_hostname_from_machine(machine): 686 """Lookup hostname from a machine string or dict. 687 688 @returns: Machine hostname in string format. 689 """ 690 hostname, _ = get_host_info_from_machine(machine) 691 return hostname 692 693 694def get_host_info_from_machine(machine): 695 """Lookup host information from a machine string or dict. 696 697 @returns: Tuple of (hostname, afe_host) 698 """ 699 if isinstance(machine, dict): 700 return (machine['hostname'], machine['afe_host']) 701 else: 702 return (machine, EmptyAFEHost()) 703 704 705def get_afe_host_from_machine(machine): 706 """Return the afe_host from the machine dict if possible. 707 708 @returns: AFE host object. 709 """ 710 _, afe_host = get_host_info_from_machine(machine) 711 return afe_host 712 713 714def get_connection_pool_from_machine(machine): 715 """Returns the ssh_multiplex.ConnectionPool from machine if possible.""" 716 if not isinstance(machine, dict): 717 return None 718 return machine.get('connection_pool') 719 720 721def get_creds_abspath(creds_file): 722 """Returns the abspath of the credentials file. 723 724 If creds_file is already an absolute path, just return it. 725 Otherwise, assume it is located in the creds directory 726 specified in global_config and return the absolute path. 727 728 @param: creds_path, a path to the credentials. 729 @return: An absolute path to the credentials file. 730 """ 731 if not creds_file: 732 return None 733 if os.path.isabs(creds_file): 734 return creds_file 735 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='') 736 if not creds_dir or not os.path.exists(creds_dir): 737 creds_dir = common.autotest_dir 738 return os.path.join(creds_dir, creds_file) 739 740 741def SetupTsMonGlobalState(*args, **kwargs): 742 """Import-safe wrap around chromite.lib.ts_mon_config's setup function. 743 744 @param *args: Args to pass through. 745 @param **kwargs: Kwargs to pass through. 746 """ 747 try: 748 # TODO(crbug.com/739466) This module import is delayed because it adds 749 # 1-2 seconds to the module import time and most users of site_utils 750 # don't need it. The correct fix is to break apart site_utils into more 751 # meaningful chunks. 752 from chromite.lib import ts_mon_config 753 except ImportError: 754 logging.warn('Unable to import chromite. Monarch is disabled.') 755 return TrivialContextManager() 756 757 try: 758 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs) 759 if hasattr(context, '__exit__'): 760 return context 761 except Exception as e: 762 logging.warning('Caught an exception trying to setup ts_mon, ' 763 'monitoring is disabled: %s', e, exc_info=True) 764 return TrivialContextManager() 765 766 767@contextlib.contextmanager 768def TrivialContextManager(*args, **kwargs): 769 """Context manager that does nothing. 770 771 @param *args: Ignored args 772 @param **kwargs: Ignored kwargs. 773 """ 774 yield 775 776 777def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT): 778 """Wait for the hosts to all go idle. 779 780 @param duts: List of duts to check for idle state. 781 @param afe: afe instance. 782 @param max_wait: Max wait time in seconds to wait for duts to be idle. 783 784 @returns Boolean True if all hosts are idle or False if any hosts did not 785 go idle within max_wait. 786 """ 787 start_time = time.time() 788 # We make a shallow copy since we're going to be modifying active_dut_list. 789 active_dut_list = duts[:] 790 while active_dut_list: 791 # Let's rate-limit how often we hit the AFE. 792 time.sleep(1) 793 794 # Check if we've waited too long. 795 if (time.time() - start_time) > max_wait: 796 return False 797 798 idle_duts = [] 799 # Get the status for the duts and see if they're in the idle state. 800 afe_hosts = afe.get_hosts(active_dut_list) 801 idle_duts = [afe_host.hostname for afe_host in afe_hosts 802 if afe_host.status in host_states.IDLE_STATES] 803 804 # Take out idle duts so we don't needlessly check them 805 # next time around. 806 for idle_dut in idle_duts: 807 active_dut_list.remove(idle_dut) 808 809 logging.info('still waiting for following duts to go idle: %s', 810 active_dut_list) 811 return True 812 813 814@contextlib.contextmanager 815def lock_duts_and_wait(duts, afe, lock_msg='default lock message', 816 max_wait=IDLE_DUT_WAIT_TIMEOUT): 817 """Context manager to lock the duts and wait for them to go idle. 818 819 @param duts: List of duts to lock. 820 @param afe: afe instance. 821 @param lock_msg: message for afe on locking this host. 822 @param max_wait: Max wait time in seconds to wait for duts to be idle. 823 824 @returns Boolean lock_success where True if all duts locked successfully or 825 False if we timed out waiting too long for hosts to go idle. 826 """ 827 try: 828 locked_duts = [] 829 duts.sort() 830 for dut in duts: 831 if afe.lock_host(dut, lock_msg, fail_if_locked=True): 832 locked_duts.append(dut) 833 else: 834 logging.info('%s already locked', dut) 835 yield wait_for_idle_duts(locked_duts, afe, max_wait) 836 finally: 837 afe.unlock_hosts(locked_duts) 838 839 840def _get_default_size_info(path): 841 """Get the default result size information. 842 843 In case directory summary is failed to build, assume the test result is not 844 throttled and all result sizes are the size of existing test results. 845 846 @return: A namedtuple of result size informations, including: 847 client_result_collected_KB: The total size (in KB) of test results 848 collected from test device. Set to be the total size of the 849 given path. 850 original_result_total_KB: The original size (in KB) of test results 851 before being trimmed. Set to be the total size of the given 852 path. 853 result_uploaded_KB: The total size (in KB) of test results to be 854 uploaded. Set to be the total size of the given path. 855 result_throttled: True if test results collection is throttled. 856 It's set to False in this default behavior. 857 """ 858 total_size = file_utils.get_directory_size_kibibytes(path); 859 return result_utils_lib.ResultSizeInfo( 860 client_result_collected_KB=total_size, 861 original_result_total_KB=total_size, 862 result_uploaded_KB=total_size, 863 result_throttled=False) 864 865 866def _report_result_size_metrics(result_size_info): 867 """Report result sizes information to metrics. 868 869 @param result_size_info: A ResultSizeInfo namedtuple containing information 870 of test result sizes. 871 """ 872 fields = {'result_throttled' : result_size_info.result_throttled} 873 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB', 874 description='The total size (in KB) of test results ' 875 'collected from test device. Set to be the total size of ' 876 'the given path.' 877 ).increment_by(result_size_info.client_result_collected_KB, 878 fields=fields) 879 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB', 880 description='The original size (in KB) of test results ' 881 'before being trimmed.' 882 ).increment_by(result_size_info.original_result_total_KB, 883 fields=fields) 884 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB', 885 description='The total size (in KB) of test results to be ' 886 'uploaded.' 887 ).increment_by(result_size_info.result_uploaded_KB, 888 fields=fields) 889 890 891@metrics.SecondsTimerDecorator( 892 'chromeos/autotest/result_collection/collect_result_sizes_duration') 893def collect_result_sizes(path, log=logging.debug): 894 """Collect the result sizes information and build result summary. 895 896 It first tries to merge directory summaries and calculate the result sizes 897 including: 898 client_result_collected_KB: The volume in KB that's transfered from the test 899 device. 900 original_result_total_KB: The volume in KB that's the original size of the 901 result files before being trimmed. 902 result_uploaded_KB: The volume in KB that will be uploaded. 903 result_throttled: Indicating if the result files were throttled. 904 905 If directory summary merging failed for any reason, fall back to use the 906 total size of the given result directory. 907 908 @param path: Path of the result directory to get size information. 909 @param log: The logging method, default to logging.debug 910 @return: A ResultSizeInfo namedtuple containing information of test result 911 sizes. 912 """ 913 try: 914 client_collected_bytes, summary, files = result_utils.merge_summaries( 915 path) 916 result_size_info = result_utils_lib.get_result_size_info( 917 client_collected_bytes, summary) 918 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME) 919 result_view.build(client_collected_bytes, summary, html_file) 920 921 # Delete all summary files after final view is built. 922 for summary_file in files: 923 os.remove(summary_file) 924 except: 925 log('Failed to calculate result sizes based on directory summaries for ' 926 'directory %s. Fall back to record the total size.\nException: %s' % 927 (path, traceback.format_exc())) 928 result_size_info = _get_default_size_info(path) 929 930 _report_result_size_metrics(result_size_info) 931 932 return result_size_info 933