1# Lint as: python2, python3 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6 7from __future__ import absolute_import 8from __future__ import division 9from __future__ import print_function 10 11import collections 12import contextlib 13import grp 14import six.moves.http_client 15import json 16import logging 17import os 18import random 19import re 20import time 21import traceback 22from six.moves import filter 23from six.moves import range 24from six.moves import urllib 25 26import common 27from autotest_lib.client.bin.result_tools import utils as result_utils 28from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib 29from autotest_lib.client.bin.result_tools import view as result_view 30from autotest_lib.client.common_lib import lsbrelease_utils 31from autotest_lib.client.common_lib import utils 32from autotest_lib.client.common_lib import error 33from autotest_lib.client.common_lib import file_utils 34from autotest_lib.client.common_lib import global_config 35from autotest_lib.client.common_lib import host_queue_entry_states 36from autotest_lib.client.common_lib import host_states 37from autotest_lib.server.cros import provision 38from autotest_lib.server.cros.dynamic_suite import constants 39from autotest_lib.server.cros.dynamic_suite import job_status 40 41try: 42 from chromite.lib import metrics 43except ImportError: 44 metrics = utils.metrics_mock 45 46 47CONFIG = global_config.global_config 48 49_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='') 50_LAB_SHERIFF_JS = CONFIG.get_config_value( 51 'NOTIFICATIONS', 'lab_sheriffs', default='') 52_CHROMIUM_BUILD_URL = CONFIG.get_config_value( 53 'NOTIFICATIONS', 'chromium_build_url', default='') 54 55LAB_GOOD_STATES = ('open', 'throttled') 56 57ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( 58 'CROS', 'enable_drone_in_restricted_subnet', type=bool, 59 default=False) 60 61# Wait at most 10 mins for duts to go idle. 62IDLE_DUT_WAIT_TIMEOUT = 600 63 64# Mapping between board name and build target. This is for special case handling 65# for certain Android board that the board name and build target name does not 66# match. 67ANDROID_TARGET_TO_BOARD_MAP = { 68 'seed_l8150': 'gm4g_sprout', 69 'bat_land': 'bat' 70 } 71ANDROID_BOARD_TO_TARGET_MAP = { 72 'gm4g_sprout': 'seed_l8150', 73 'bat': 'bat_land' 74 } 75# Prefix for the metrics name for result size information. 76RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/' 77 78class TestLabException(Exception): 79 """Exception raised when the Test Lab blocks a test or suite.""" 80 pass 81 82 83class ParseBuildNameException(Exception): 84 """Raised when ParseBuildName() cannot parse a build name.""" 85 pass 86 87 88class Singleton(type): 89 """Enforce that only one client class is instantiated per process.""" 90 _instances = {} 91 92 def __call__(cls, *args, **kwargs): 93 """Fetch the instance of a class to use for subsequent calls.""" 94 if cls not in cls._instances: 95 cls._instances[cls] = super(Singleton, cls).__call__( 96 *args, **kwargs) 97 return cls._instances[cls] 98 99class EmptyAFEHost(object): 100 """Object to represent an AFE host object when there is no AFE.""" 101 102 def __init__(self): 103 """ 104 We'll be setting the instance attributes as we use them. Right now 105 we only use attributes and labels but as time goes by and other 106 attributes are used from an actual AFE Host object (check 107 rpc_interfaces.get_hosts()), we'll add them in here so users won't be 108 perplexed why their host's afe_host object complains that attribute 109 doesn't exist. 110 """ 111 self.attributes = {} 112 self.labels = [] 113 114 115def ParseBuildName(name): 116 """Format a build name, given board, type, milestone, and manifest num. 117 118 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a 119 relative build name, e.g. 'x86-alex-release/LATEST' 120 121 @return board: board the manifest is for, e.g. x86-alex. 122 @return type: one of 'release', 'factory', or 'firmware' 123 @return milestone: (numeric) milestone the manifest was associated with. 124 Will be None for relative build names. 125 @return manifest: manifest number, e.g. '2015.0.0'. 126 Will be None for relative build names. 127 128 """ 129 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?' 130 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-' 131 r'(?P<manifest>[\d.ab-]+)|LATEST)', 132 name) 133 if match and len(match.groups()) >= 5: 134 return (match.group('board'), match.group('type'), 135 match.group('milestone'), match.group('manifest')) 136 raise ParseBuildNameException('%s is a malformed build name.' % name) 137 138 139def get_labels_from_afe(hostname, label_prefix, afe): 140 """Retrieve a host's specific labels from the AFE. 141 142 Looks for the host labels that have the form <label_prefix>:<value> 143 and returns the "<value>" part of the label. None is returned 144 if there is not a label matching the pattern 145 146 @param hostname: hostname of given DUT. 147 @param label_prefix: prefix of label to be matched, e.g., |board:| 148 @param afe: afe instance. 149 150 @returns A list of labels that match the prefix or 'None' 151 152 """ 153 labels = afe.get_labels(name__startswith=label_prefix, 154 host__hostname__in=[hostname]) 155 if labels: 156 return [l.name.split(label_prefix, 1)[1] for l in labels] 157 158 159def get_label_from_afe(hostname, label_prefix, afe): 160 """Retrieve a host's specific label from the AFE. 161 162 Looks for a host label that has the form <label_prefix>:<value> 163 and returns the "<value>" part of the label. None is returned 164 if there is not a label matching the pattern 165 166 @param hostname: hostname of given DUT. 167 @param label_prefix: prefix of label to be matched, e.g., |board:| 168 @param afe: afe instance. 169 @returns the label that matches the prefix or 'None' 170 171 """ 172 labels = get_labels_from_afe(hostname, label_prefix, afe) 173 if labels and len(labels) == 1: 174 return labels[0] 175 176 177def get_board_from_afe(hostname, afe): 178 """Retrieve given host's board from its labels in the AFE. 179 180 Looks for a host label of the form "board:<board>", and 181 returns the "<board>" part of the label. `None` is returned 182 if there is not a single, unique label matching the pattern. 183 184 @param hostname: hostname of given DUT. 185 @param afe: afe instance. 186 @returns board from label, or `None`. 187 188 """ 189 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) 190 191 192def get_build_from_afe(hostname, afe): 193 """Retrieve the current build for given host from the AFE. 194 195 Looks through the host's labels in the AFE to determine its build. 196 197 @param hostname: hostname of given DUT. 198 @param afe: afe instance. 199 @returns The current build or None if it could not find it or if there 200 were multiple build labels assigned to this host. 201 202 """ 203 prefix = provision.CROS_VERSION_PREFIX 204 build = get_label_from_afe(hostname, prefix + ':', afe) 205 if build: 206 return build 207 return None 208 209 210# TODO(fdeng): fix get_sheriffs crbug.com/483254 211def get_sheriffs(lab_only=False): 212 """ 213 Polls the javascript file that holds the identity of the sheriff and 214 parses it's output to return a list of chromium sheriff email addresses. 215 The javascript file can contain the ldap of more than one sheriff, eg: 216 document.write('sheriff_one, sheriff_two'). 217 218 @param lab_only: if True, only pulls lab sheriff. 219 @return: A list of chroium.org sheriff email addresses to cc on the bug. 220 An empty list if failed to parse the javascript. 221 """ 222 sheriff_ids = [] 223 sheriff_js_list = _LAB_SHERIFF_JS.split(',') 224 if not lab_only: 225 sheriff_js_list.extend(_SHERIFF_JS.split(',')) 226 227 for sheriff_js in sheriff_js_list: 228 try: 229 url_content = utils.urlopen('%s%s'% ( 230 _CHROMIUM_BUILD_URL, sheriff_js)).read() 231 except (ValueError, IOError) as e: 232 logging.warning('could not parse sheriff from url %s%s: %s', 233 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 234 except (urllib.error.URLError, six.moves.http_client.HTTPException) as e: 235 logging.warning('unexpected error reading from url "%s%s": %s', 236 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 237 else: 238 ldaps = re.search(r"document.write\('(.*)'\)", url_content) 239 if not ldaps: 240 logging.warning('Could not retrieve sheriff ldaps for: %s', 241 url_content) 242 continue 243 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') 244 for alias in ldaps.group(1).split(',')] 245 return sheriff_ids 246 247 248def remote_wget(source_url, dest_path, ssh_cmd): 249 """wget source_url from localhost to dest_path on remote host using ssh. 250 251 @param source_url: The complete url of the source of the package to send. 252 @param dest_path: The path on the remote host's file system where we would 253 like to store the package. 254 @param ssh_cmd: The ssh command to use in performing the remote wget. 255 """ 256 wget_cmd = ("wget -O - %s | %s 'cat >%s'" % 257 (source_url, ssh_cmd, dest_path)) 258 utils.run(wget_cmd) 259 260 261_MAX_LAB_STATUS_ATTEMPTS = 5 262def _get_lab_status(status_url): 263 """Grabs the current lab status and message. 264 265 @returns The JSON object obtained from the given URL. 266 267 """ 268 retry_waittime = 1 269 for _ in range(_MAX_LAB_STATUS_ATTEMPTS): 270 try: 271 response = urllib.request.urlopen(status_url) 272 except IOError as e: 273 logging.debug('Error occurred when grabbing the lab status: %s.', 274 e) 275 time.sleep(retry_waittime) 276 continue 277 # Check for successful response code. 278 if response.getcode() == 200: 279 return json.load(response) 280 time.sleep(retry_waittime) 281 return None 282 283 284def _decode_lab_status(lab_status, build): 285 """Decode lab status, and report exceptions as needed. 286 287 Take a deserialized JSON object from the lab status page, and 288 interpret it to determine the actual lab status. Raise 289 exceptions as required to report when the lab is down. 290 291 @param build: build name that we want to check the status of. 292 293 @raises TestLabException Raised if a request to test for the given 294 status and build should be blocked. 295 """ 296 # First check if the lab is up. 297 if not lab_status['general_state'] in LAB_GOOD_STATES: 298 raise TestLabException('Chromium OS Test Lab is closed: ' 299 '%s.' % lab_status['message']) 300 301 # Check if the build we wish to use is disabled. 302 # Lab messages should be in the format of: 303 # Lab is 'status' [regex ...] (comment) 304 # If the build name matches any regex, it will be blocked. 305 build_exceptions = re.search('\[(.*)\]', lab_status['message']) 306 if not build_exceptions or not build: 307 return 308 for build_pattern in build_exceptions.group(1).split(): 309 if re.match(build_pattern, build): 310 raise TestLabException('Chromium OS Test Lab is closed: ' 311 '%s matches %s.' % ( 312 build, build_pattern)) 313 return 314 315 316def is_in_lab(): 317 """Check if current Autotest instance is in lab 318 319 @return: True if the Autotest instance is in lab. 320 """ 321 test_server_name = CONFIG.get_config_value('SERVER', 'hostname') 322 return test_server_name.startswith('cautotest') 323 324 325def check_lab_status(build): 326 """Check if the lab status allows us to schedule for a build. 327 328 Checks if the lab is down, or if testing for the requested build 329 should be blocked. 330 331 @param build: Name of the build to be scheduled for testing. 332 333 @raises TestLabException Raised if a request to test for the given 334 status and build should be blocked. 335 336 """ 337 # Ensure we are trying to schedule on the actual lab. 338 if not is_in_lab(): 339 return 340 341 # Download the lab status from its home on the web. 342 status_url = CONFIG.get_config_value('CROS', 'lab_status_url') 343 json_status = _get_lab_status(status_url) 344 if json_status is None: 345 # We go ahead and say the lab is open if we can't get the status. 346 logging.warning('Could not get a status from %s', status_url) 347 return 348 _decode_lab_status(json_status, build) 349 350 351def host_in_lab(hostname): 352 """Check if the execution is against a host in the lab""" 353 return (not utils.in_moblab_ssp() 354 and not lsbrelease_utils.is_moblab() 355 and utils.host_is_in_lab_zone(hostname)) 356 357 358def lock_host_with_labels(afe, lock_manager, labels): 359 """Lookup and lock one host that matches the list of input labels. 360 361 @param afe: An instance of the afe class, as defined in server.frontend. 362 @param lock_manager: A lock manager capable of locking hosts, eg the 363 one defined in server.cros.host_lock_manager. 364 @param labels: A list of labels to look for on hosts. 365 366 @return: The hostname of a host matching all labels, and locked through the 367 lock_manager. The hostname will be as specified in the database the afe 368 object is associated with, i.e if it exists in afe_hosts with a .cros 369 suffix, the hostname returned will contain a .cros suffix. 370 371 @raises: error.NoEligibleHostException: If no hosts matching the list of 372 input labels are available. 373 @raises: error.TestError: If unable to lock a host matching the labels. 374 """ 375 potential_hosts = afe.get_hosts(multiple_labels=labels) 376 if not potential_hosts: 377 raise error.NoEligibleHostException( 378 'No devices found with labels %s.' % labels) 379 380 # This prevents errors where a fault might seem repeatable 381 # because we lock, say, the same packet capturer for each test run. 382 random.shuffle(potential_hosts) 383 for host in potential_hosts: 384 if lock_manager.lock([host.hostname]): 385 logging.info('Locked device %s with labels %s.', 386 host.hostname, labels) 387 return host.hostname 388 else: 389 logging.info('Unable to lock device %s with labels %s.', 390 host.hostname, labels) 391 392 raise error.TestError('Could not lock a device with labels %s' % labels) 393 394 395def get_test_views_from_tko(suite_job_id, tko): 396 """Get test name and result for given suite job ID. 397 398 @param suite_job_id: ID of suite job. 399 @param tko: an instance of TKO as defined in server/frontend.py. 400 @return: A defaultdict where keys are test names and values are 401 lists of test statuses, e.g., 402 {'dummy_Fail.Error': ['ERROR'. 'ERROR'], 403 'dummy_Fail.NAError': ['TEST_NA'], 404 'dummy_Fail.RetrySuccess': ['ERROR', 'GOOD'], 405 } 406 @raise: Exception when there is no test view found. 407 408 """ 409 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) 410 relevant_views = list(filter(job_status.view_is_relevant, views)) 411 if not relevant_views: 412 raise Exception('Failed to retrieve job results.') 413 414 test_views = collections.defaultdict(list) 415 for view in relevant_views: 416 test_views[view['test_name']].append(view['status']) 417 return test_views 418 419 420def get_data_key(prefix, suite, build, board): 421 """ 422 Constructs a key string from parameters. 423 424 @param prefix: Prefix for the generating key. 425 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy 426 @param build: The build string. This string should have a consistent 427 format eg: x86-mario-release/R26-3570.0.0. If the format of this 428 string changes such that we can't determine build_type or branch 429 we give up and use the parametes we're sure of instead (suite, 430 board). eg: 431 1. build = x86-alex-pgo-release/R26-3570.0.0 432 branch = 26 433 build_type = pgo-release 434 2. build = lumpy-paladin/R28-3993.0.0-rc5 435 branch = 28 436 build_type = paladin 437 @param board: The board that this suite ran on. 438 @return: The key string used for a dictionary. 439 """ 440 try: 441 _board, build_type, branch = ParseBuildName(build)[:3] 442 except ParseBuildNameException as e: 443 logging.error(str(e)) 444 branch = 'Unknown' 445 build_type = 'Unknown' 446 else: 447 embedded_str = re.search(r'x86-\w+-(.*)', _board) 448 if embedded_str: 449 build_type = embedded_str.group(1) + '-' + build_type 450 451 data_key_dict = { 452 'prefix': prefix, 453 'board': board, 454 'branch': branch, 455 'build_type': build_type, 456 'suite': suite, 457 } 458 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s' 459 % data_key_dict) 460 461 462def is_shard(): 463 """Determines if this instance is running as a shard. 464 465 Reads the global_config value shard_hostname in the section SHARD. 466 467 @return True, if shard_hostname is set, False otherwise. 468 """ 469 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None) 470 return bool(hostname) 471 472 473def get_global_afe_hostname(): 474 """Read the hostname of the global AFE from the global configuration.""" 475 return CONFIG.get_config_value('SERVER', 'global_afe_hostname') 476 477 478def is_restricted_user(username): 479 """Determines if a user is in a restricted group. 480 481 User in restricted group only have access to main. 482 483 @param username: A string, representing a username. 484 485 @returns: True if the user is in a restricted group. 486 """ 487 if not username: 488 return False 489 490 restricted_groups = CONFIG.get_config_value( 491 'AUTOTEST_WEB', 'restricted_groups', default='').split(',') 492 for group in restricted_groups: 493 try: 494 if group and username in grp.getgrnam(group).gr_mem: 495 return True 496 except KeyError as e: 497 logging.debug("%s is not a valid group.", group) 498 return False 499 500 501def get_special_task_status(is_complete, success, is_active): 502 """Get the status of a special task. 503 504 Emulate a host queue entry status for a special task 505 Although SpecialTasks are not HostQueueEntries, it is helpful to 506 the user to present similar statuses. 507 508 @param is_complete Boolean if the task is completed. 509 @param success Boolean if the task succeeded. 510 @param is_active Boolean if the task is active. 511 512 @return The status of a special task. 513 """ 514 if is_complete: 515 if success: 516 return host_queue_entry_states.Status.COMPLETED 517 return host_queue_entry_states.Status.FAILED 518 if is_active: 519 return host_queue_entry_states.Status.RUNNING 520 return host_queue_entry_states.Status.QUEUED 521 522 523def get_special_task_exec_path(hostname, task_id, task_name, time_requested): 524 """Get the execution path of the SpecialTask. 525 526 This method returns different paths depending on where a 527 the task ran: 528 * main: hosts/hostname/task_id-task_type 529 * Shard: main_path/time_created 530 This is to work around the fact that a shard can fail independent 531 of the main, and be replaced by another shard that has the same 532 hosts. Without the time_created stamp the logs of the tasks running 533 on the second shard will clobber the logs from the first in google 534 storage, because task ids are not globally unique. 535 536 @param hostname Hostname 537 @param task_id Special task id 538 @param task_name Special task name (e.g., Verify, Repair, etc) 539 @param time_requested Special task requested time. 540 541 @return An execution path for the task. 542 """ 543 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower()) 544 545 # If we do this on the main it will break backward compatibility, 546 # as there are tasks that currently don't have timestamps. If a host 547 # or job has been sent to a shard, the rpc for that host/job will 548 # be redirected to the shard, so this global_config check will happen 549 # on the shard the logs are on. 550 if not is_shard(): 551 return results_path 552 553 # Generate a uid to disambiguate special task result directories 554 # in case this shard fails. The simplest uid is the job_id, however 555 # in rare cases tasks do not have jobs associated with them (eg: 556 # frontend verify), so just use the creation timestamp. The clocks 557 # between a shard and main should always be in sync. Any discrepancies 558 # will be brought to our attention in the form of job timeouts. 559 uid = time_requested.strftime('%Y%d%m%H%M%S') 560 561 # TODO: This is a hack, however it is the easiest way to achieve 562 # correctness. There is currently some debate over the future of 563 # tasks in our infrastructure and refactoring everything right 564 # now isn't worth the time. 565 return '%s/%s' % (results_path, uid) 566 567 568def get_job_tag(id, owner): 569 """Returns a string tag for a job. 570 571 @param id Job id 572 @param owner Job owner 573 574 """ 575 return '%s-%s' % (id, owner) 576 577 578def get_hqe_exec_path(tag, execution_subdir): 579 """Returns a execution path to a HQE's results. 580 581 @param tag Tag string for a job associated with a HQE. 582 @param execution_subdir Execution sub-directory string of a HQE. 583 584 """ 585 return os.path.join(tag, execution_subdir) 586 587 588def is_inside_chroot(): 589 """Check if the process is running inside chroot. 590 591 @return: True if the process is running inside chroot. 592 593 """ 594 return os.path.exists('/etc/cros_chroot_version') 595 596 597def parse_job_name(name): 598 """Parse job name to get information including build, board and suite etc. 599 600 Suite job created by run_suite follows the naming convention of: 601 [build]-test_suites/control.[suite] 602 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt 603 The naming convention is defined in rpc_interface.create_suite_job. 604 605 Test job created by suite job follows the naming convention of: 606 [build]/[suite]/[test name] 607 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess 608 The naming convention is defined in 609 server/cros/dynamic_suite/tools.create_job_name 610 611 Note that pgo and chrome-perf builds will fail the method. Since lab does 612 not run test for these builds, they can be ignored. 613 Also, tests for Launch Control builds have different naming convention. 614 The build ID will be used as build_version. 615 616 @param name: Name of the job. 617 618 @return: A dictionary containing the test information. The keyvals include: 619 build: Name of the build, e.g., lumpy-release/R46-7272.0.0 620 build_version: The version of the build, e.g., R46-7272.0.0 621 board: Name of the board, e.g., lumpy 622 suite: Name of the test suite, e.g., bvt 623 624 """ 625 info = {} 626 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)' 627 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*' 628 match = re.match(suite_job_regex, name) 629 if not match: 630 match = re.match(test_job_regex, name) 631 if match: 632 info['build'] = match.groups()[0] 633 info['suite'] = match.groups()[1] 634 info['build_version'] = info['build'].split('/')[1] 635 try: 636 info['board'], _, _, _ = ParseBuildName(info['build']) 637 except ParseBuildNameException: 638 # Try to parse it as Launch Control build 639 # Launch Control builds have name format: 640 # branch/build_target-build_type/build_id. 641 try: 642 _, target, build_id = utils.parse_launch_control_build( 643 info['build']) 644 build_target, _ = utils.parse_launch_control_target(target) 645 if build_target: 646 info['board'] = build_target 647 info['build_version'] = build_id 648 except ValueError: 649 pass 650 return info 651 652 653def verify_not_root_user(): 654 """Simple function to error out if running with uid == 0""" 655 if os.getuid() == 0: 656 raise error.IllegalUser('This script can not be ran as root.') 657 658 659def get_hostname_from_machine(machine): 660 """Lookup hostname from a machine string or dict. 661 662 @returns: Machine hostname in string format. 663 """ 664 hostname, _ = get_host_info_from_machine(machine) 665 return hostname 666 667 668def get_host_info_from_machine(machine): 669 """Lookup host information from a machine string or dict. 670 671 @returns: Tuple of (hostname, afe_host) 672 """ 673 if isinstance(machine, dict): 674 return (machine['hostname'], machine['afe_host']) 675 else: 676 return (machine, EmptyAFEHost()) 677 678 679def get_afe_host_from_machine(machine): 680 """Return the afe_host from the machine dict if possible. 681 682 @returns: AFE host object. 683 """ 684 _, afe_host = get_host_info_from_machine(machine) 685 return afe_host 686 687 688def get_connection_pool_from_machine(machine): 689 """Returns the ssh_multiplex.ConnectionPool from machine if possible.""" 690 if not isinstance(machine, dict): 691 return None 692 return machine.get('connection_pool') 693 694 695def get_creds_abspath(creds_file): 696 """Returns the abspath of the credentials file. 697 698 If creds_file is already an absolute path, just return it. 699 Otherwise, assume it is located in the creds directory 700 specified in global_config and return the absolute path. 701 702 @param: creds_path, a path to the credentials. 703 @return: An absolute path to the credentials file. 704 """ 705 if not creds_file: 706 return None 707 if os.path.isabs(creds_file): 708 return creds_file 709 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='') 710 if not creds_dir or not os.path.exists(creds_dir): 711 creds_dir = common.autotest_dir 712 return os.path.join(creds_dir, creds_file) 713 714 715def SetupTsMonGlobalState(*args, **kwargs): 716 """Import-safe wrap around chromite.lib.ts_mon_config's setup function. 717 718 @param *args: Args to pass through. 719 @param **kwargs: Kwargs to pass through. 720 """ 721 try: 722 # TODO(crbug.com/739466) This module import is delayed because it adds 723 # 1-2 seconds to the module import time and most users of site_utils 724 # don't need it. The correct fix is to break apart site_utils into more 725 # meaningful chunks. 726 from chromite.lib import ts_mon_config 727 except ImportError: 728 logging.warn('Unable to import chromite. Monarch is disabled.') 729 return TrivialContextManager() 730 731 try: 732 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs) 733 if hasattr(context, '__exit__'): 734 return context 735 except Exception as e: 736 logging.warning('Caught an exception trying to setup ts_mon, ' 737 'monitoring is disabled: %s', e, exc_info=True) 738 return TrivialContextManager() 739 740 741@contextlib.contextmanager 742def TrivialContextManager(*args, **kwargs): 743 """Context manager that does nothing. 744 745 @param *args: Ignored args 746 @param **kwargs: Ignored kwargs. 747 """ 748 yield 749 750 751def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT): 752 """Wait for the hosts to all go idle. 753 754 @param duts: List of duts to check for idle state. 755 @param afe: afe instance. 756 @param max_wait: Max wait time in seconds to wait for duts to be idle. 757 758 @returns Boolean True if all hosts are idle or False if any hosts did not 759 go idle within max_wait. 760 """ 761 start_time = time.time() 762 # We make a shallow copy since we're going to be modifying active_dut_list. 763 active_dut_list = duts[:] 764 while active_dut_list: 765 # Let's rate-limit how often we hit the AFE. 766 time.sleep(1) 767 768 # Check if we've waited too long. 769 if (time.time() - start_time) > max_wait: 770 return False 771 772 idle_duts = [] 773 # Get the status for the duts and see if they're in the idle state. 774 afe_hosts = afe.get_hosts(active_dut_list) 775 idle_duts = [afe_host.hostname for afe_host in afe_hosts 776 if afe_host.status in host_states.IDLE_STATES] 777 778 # Take out idle duts so we don't needlessly check them 779 # next time around. 780 for idle_dut in idle_duts: 781 active_dut_list.remove(idle_dut) 782 783 logging.info('still waiting for following duts to go idle: %s', 784 active_dut_list) 785 return True 786 787 788@contextlib.contextmanager 789def lock_duts_and_wait(duts, afe, lock_msg='default lock message', 790 max_wait=IDLE_DUT_WAIT_TIMEOUT): 791 """Context manager to lock the duts and wait for them to go idle. 792 793 @param duts: List of duts to lock. 794 @param afe: afe instance. 795 @param lock_msg: message for afe on locking this host. 796 @param max_wait: Max wait time in seconds to wait for duts to be idle. 797 798 @returns Boolean lock_success where True if all duts locked successfully or 799 False if we timed out waiting too long for hosts to go idle. 800 """ 801 try: 802 locked_duts = [] 803 duts.sort() 804 for dut in duts: 805 if afe.lock_host(dut, lock_msg, fail_if_locked=True): 806 locked_duts.append(dut) 807 else: 808 logging.info('%s already locked', dut) 809 yield wait_for_idle_duts(locked_duts, afe, max_wait) 810 finally: 811 afe.unlock_hosts(locked_duts) 812 813 814def _get_default_size_info(path): 815 """Get the default result size information. 816 817 In case directory summary is failed to build, assume the test result is not 818 throttled and all result sizes are the size of existing test results. 819 820 @return: A namedtuple of result size informations, including: 821 client_result_collected_KB: The total size (in KB) of test results 822 collected from test device. Set to be the total size of the 823 given path. 824 original_result_total_KB: The original size (in KB) of test results 825 before being trimmed. Set to be the total size of the given 826 path. 827 result_uploaded_KB: The total size (in KB) of test results to be 828 uploaded. Set to be the total size of the given path. 829 result_throttled: True if test results collection is throttled. 830 It's set to False in this default behavior. 831 """ 832 total_size = file_utils.get_directory_size_kibibytes(path); 833 return result_utils_lib.ResultSizeInfo( 834 client_result_collected_KB=total_size, 835 original_result_total_KB=total_size, 836 result_uploaded_KB=total_size, 837 result_throttled=False) 838 839 840def _report_result_size_metrics(result_size_info): 841 """Report result sizes information to metrics. 842 843 @param result_size_info: A ResultSizeInfo namedtuple containing information 844 of test result sizes. 845 """ 846 fields = {'result_throttled' : result_size_info.result_throttled} 847 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB', 848 description='The total size (in KB) of test results ' 849 'collected from test device. Set to be the total size of ' 850 'the given path.' 851 ).increment_by(result_size_info.client_result_collected_KB, 852 fields=fields) 853 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB', 854 description='The original size (in KB) of test results ' 855 'before being trimmed.' 856 ).increment_by(result_size_info.original_result_total_KB, 857 fields=fields) 858 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB', 859 description='The total size (in KB) of test results to be ' 860 'uploaded.' 861 ).increment_by(result_size_info.result_uploaded_KB, 862 fields=fields) 863 864 865@metrics.SecondsTimerDecorator( 866 'chromeos/autotest/result_collection/collect_result_sizes_duration') 867def collect_result_sizes(path, log=logging.debug): 868 """Collect the result sizes information and build result summary. 869 870 It first tries to merge directory summaries and calculate the result sizes 871 including: 872 client_result_collected_KB: The volume in KB that's transfered from the test 873 device. 874 original_result_total_KB: The volume in KB that's the original size of the 875 result files before being trimmed. 876 result_uploaded_KB: The volume in KB that will be uploaded. 877 result_throttled: Indicating if the result files were throttled. 878 879 If directory summary merging failed for any reason, fall back to use the 880 total size of the given result directory. 881 882 @param path: Path of the result directory to get size information. 883 @param log: The logging method, default to logging.debug 884 @return: A ResultSizeInfo namedtuple containing information of test result 885 sizes. 886 """ 887 try: 888 client_collected_bytes, summary, files = result_utils.merge_summaries( 889 path) 890 result_size_info = result_utils_lib.get_result_size_info( 891 client_collected_bytes, summary) 892 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME) 893 result_view.build(client_collected_bytes, summary, html_file) 894 895 # Delete all summary files after final view is built. 896 for summary_file in files: 897 os.remove(summary_file) 898 except: 899 log('Failed to calculate result sizes based on directory summaries for ' 900 'directory %s. Fall back to record the total size.\nException: %s' % 901 (path, traceback.format_exc())) 902 result_size_info = _get_default_size_info(path) 903 904 _report_result_size_metrics(result_size_info) 905 906 return result_size_info 907