1# Lint as: python2, python3 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6 7from __future__ import absolute_import 8from __future__ import division 9from __future__ import print_function 10 11import collections 12import contextlib 13import json 14import logging 15import os 16import random 17import re 18import time 19import traceback 20from six.moves import filter 21from six.moves import range 22from six.moves import urllib 23 24import common 25from autotest_lib.client.bin.result_tools import utils as result_utils 26from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib 27from autotest_lib.client.bin.result_tools import view as result_view 28from autotest_lib.client.common_lib import lsbrelease_utils 29from autotest_lib.client.common_lib import utils 30from autotest_lib.client.common_lib import error 31from autotest_lib.client.common_lib import file_utils 32from autotest_lib.client.common_lib import global_config 33from autotest_lib.client.common_lib import host_queue_entry_states 34from autotest_lib.client.common_lib import host_states 35from autotest_lib.server.cros import provision 36from autotest_lib.server.cros.dynamic_suite import constants 37from autotest_lib.server.cros.dynamic_suite import job_status 38 39try: 40 from autotest_lib.utils.frozen_chromite.lib import metrics 41except ImportError: 42 metrics = utils.metrics_mock 43 44 45CONFIG = global_config.global_config 46 47LAB_GOOD_STATES = ('open', 'throttled') 48 49ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( 50 'CROS', 'enable_drone_in_restricted_subnet', type=bool, 51 default=False) 52 53# Wait at most 10 mins for duts to go idle. 54IDLE_DUT_WAIT_TIMEOUT = 600 55 56# Mapping between board name and build target. This is for special case handling 57# for certain Android board that the board name and build target name does not 58# match. 59ANDROID_TARGET_TO_BOARD_MAP = { 60 'seed_l8150': 'gm4g_sprout', 61 'bat_land': 'bat' 62 } 63ANDROID_BOARD_TO_TARGET_MAP = { 64 'gm4g_sprout': 'seed_l8150', 65 'bat': 'bat_land' 66 } 67# Prefix for the metrics name for result size information. 68RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/' 69 70class TestLabException(Exception): 71 """Exception raised when the Test Lab blocks a test or suite.""" 72 pass 73 74 75class ParseBuildNameException(Exception): 76 """Raised when ParseBuildName() cannot parse a build name.""" 77 pass 78 79 80class Singleton(type): 81 """Enforce that only one client class is instantiated per process.""" 82 _instances = {} 83 84 def __call__(cls, *args, **kwargs): 85 """Fetch the instance of a class to use for subsequent calls.""" 86 if cls not in cls._instances: 87 cls._instances[cls] = super(Singleton, cls).__call__( 88 *args, **kwargs) 89 return cls._instances[cls] 90 91class EmptyAFEHost(object): 92 """Object to represent an AFE host object when there is no AFE.""" 93 94 def __init__(self): 95 """ 96 We'll be setting the instance attributes as we use them. Right now 97 we only use attributes and labels but as time goes by and other 98 attributes are used from an actual AFE Host object (check 99 rpc_interfaces.get_hosts()), we'll add them in here so users won't be 100 perplexed why their host's afe_host object complains that attribute 101 doesn't exist. 102 """ 103 self.attributes = {} 104 self.labels = [] 105 106 107def ParseBuildName(name): 108 """Format a build name, given board, type, milestone, and manifest num. 109 110 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a 111 relative build name, e.g. 'x86-alex-release/LATEST' 112 113 @return board: board the manifest is for, e.g. x86-alex. 114 @return type: one of 'release', 'factory', or 'firmware' 115 @return milestone: (numeric) milestone the manifest was associated with. 116 Will be None for relative build names. 117 @return manifest: manifest number, e.g. '2015.0.0'. 118 Will be None for relative build names. 119 120 """ 121 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?' 122 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-' 123 r'(?P<manifest>[\d.ab-]+)|LATEST)', 124 name) 125 if match and len(match.groups()) >= 5: 126 return (match.group('board'), match.group('type'), 127 match.group('milestone'), match.group('manifest')) 128 raise ParseBuildNameException('%s is a malformed build name.' % name) 129 130 131def get_labels_from_afe(hostname, label_prefix, afe): 132 """Retrieve a host's specific labels from the AFE. 133 134 Looks for the host labels that have the form <label_prefix>:<value> 135 and returns the "<value>" part of the label. None is returned 136 if there is not a label matching the pattern 137 138 @param hostname: hostname of given DUT. 139 @param label_prefix: prefix of label to be matched, e.g., |board:| 140 @param afe: afe instance. 141 142 @returns A list of labels that match the prefix or 'None' 143 144 """ 145 labels = afe.get_labels(name__startswith=label_prefix, 146 host__hostname__in=[hostname]) 147 if labels: 148 return [l.name.split(label_prefix, 1)[1] for l in labels] 149 150 151def get_label_from_afe(hostname, label_prefix, afe): 152 """Retrieve a host's specific label from the AFE. 153 154 Looks for a host label that has the form <label_prefix>:<value> 155 and returns the "<value>" part of the label. None is returned 156 if there is not a label matching the pattern 157 158 @param hostname: hostname of given DUT. 159 @param label_prefix: prefix of label to be matched, e.g., |board:| 160 @param afe: afe instance. 161 @returns the label that matches the prefix or 'None' 162 163 """ 164 labels = get_labels_from_afe(hostname, label_prefix, afe) 165 if labels and len(labels) == 1: 166 return labels[0] 167 168 169def get_board_from_afe(hostname, afe): 170 """Retrieve given host's board from its labels in the AFE. 171 172 Looks for a host label of the form "board:<board>", and 173 returns the "<board>" part of the label. `None` is returned 174 if there is not a single, unique label matching the pattern. 175 176 @param hostname: hostname of given DUT. 177 @param afe: afe instance. 178 @returns board from label, or `None`. 179 180 """ 181 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) 182 183 184def get_build_from_afe(hostname, afe): 185 """Retrieve the current build for given host from the AFE. 186 187 Looks through the host's labels in the AFE to determine its build. 188 189 @param hostname: hostname of given DUT. 190 @param afe: afe instance. 191 @returns The current build or None if it could not find it or if there 192 were multiple build labels assigned to this host. 193 194 """ 195 prefix = provision.CROS_VERSION_PREFIX 196 build = get_label_from_afe(hostname, prefix + ':', afe) 197 if build: 198 return build 199 return None 200 201 202# TODO(ayatane): Can be deleted 203def get_sheriffs(lab_only=False): 204 """ 205 Polls the javascript file that holds the identity of the sheriff and 206 parses it's output to return a list of chromium sheriff email addresses. 207 The javascript file can contain the ldap of more than one sheriff, eg: 208 document.write('sheriff_one, sheriff_two'). 209 210 @param lab_only: if True, only pulls lab sheriff. 211 @return: A list of chroium.org sheriff email addresses to cc on the bug. 212 An empty list if failed to parse the javascript. 213 """ 214 return [] 215 216 217def remote_wget(source_url, dest_path, ssh_cmd): 218 """wget source_url from localhost to dest_path on remote host using ssh. 219 220 @param source_url: The complete url of the source of the package to send. 221 @param dest_path: The path on the remote host's file system where we would 222 like to store the package. 223 @param ssh_cmd: The ssh command to use in performing the remote wget. 224 """ 225 wget_cmd = ("wget -O - %s | %s 'cat >%s'" % 226 (source_url, ssh_cmd, dest_path)) 227 utils.run(wget_cmd) 228 229 230_MAX_LAB_STATUS_ATTEMPTS = 5 231def _get_lab_status(status_url): 232 """Grabs the current lab status and message. 233 234 @returns The JSON object obtained from the given URL. 235 236 """ 237 retry_waittime = 1 238 for _ in range(_MAX_LAB_STATUS_ATTEMPTS): 239 try: 240 response = urllib.request.urlopen(status_url) 241 except IOError as e: 242 logging.debug('Error occurred when grabbing the lab status: %s.', 243 e) 244 time.sleep(retry_waittime) 245 continue 246 # Check for successful response code. 247 if response.getcode() == 200: 248 return json.load(response) 249 time.sleep(retry_waittime) 250 return None 251 252 253def _decode_lab_status(lab_status, build): 254 """Decode lab status, and report exceptions as needed. 255 256 Take a deserialized JSON object from the lab status page, and 257 interpret it to determine the actual lab status. Raise 258 exceptions as required to report when the lab is down. 259 260 @param build: build name that we want to check the status of. 261 262 @raises TestLabException Raised if a request to test for the given 263 status and build should be blocked. 264 """ 265 # First check if the lab is up. 266 if not lab_status['general_state'] in LAB_GOOD_STATES: 267 raise TestLabException('Chromium OS Test Lab is closed: ' 268 '%s.' % lab_status['message']) 269 270 # Check if the build we wish to use is disabled. 271 # Lab messages should be in the format of: 272 # Lab is 'status' [regex ...] (comment) 273 # If the build name matches any regex, it will be blocked. 274 build_exceptions = re.search('\[(.*)\]', lab_status['message']) 275 if not build_exceptions or not build: 276 return 277 for build_pattern in build_exceptions.group(1).split(): 278 if re.match(build_pattern, build): 279 raise TestLabException('Chromium OS Test Lab is closed: ' 280 '%s matches %s.' % ( 281 build, build_pattern)) 282 return 283 284 285def is_in_lab(): 286 """Check if current Autotest instance is in lab 287 288 @return: True if the Autotest instance is in lab. 289 """ 290 test_server_name = CONFIG.get_config_value('SERVER', 'hostname') 291 return test_server_name.startswith('cautotest') 292 293 294def check_lab_status(build): 295 """Check if the lab status allows us to schedule for a build. 296 297 Checks if the lab is down, or if testing for the requested build 298 should be blocked. 299 300 @param build: Name of the build to be scheduled for testing. 301 302 @raises TestLabException Raised if a request to test for the given 303 status and build should be blocked. 304 305 """ 306 # Ensure we are trying to schedule on the actual lab. 307 if not is_in_lab(): 308 return 309 310 # Download the lab status from its home on the web. 311 status_url = CONFIG.get_config_value('CROS', 'lab_status_url') 312 json_status = _get_lab_status(status_url) 313 if json_status is None: 314 # We go ahead and say the lab is open if we can't get the status. 315 logging.warning('Could not get a status from %s', status_url) 316 return 317 _decode_lab_status(json_status, build) 318 319 320def host_in_lab(hostname): 321 """Check if the execution is against a host in the lab""" 322 return (not utils.in_moblab_ssp() 323 and not lsbrelease_utils.is_moblab() 324 and utils.host_is_in_lab_zone(hostname)) 325 326 327def lock_host_with_labels(afe, lock_manager, labels): 328 """Lookup and lock one host that matches the list of input labels. 329 330 @param afe: An instance of the afe class, as defined in server.frontend. 331 @param lock_manager: A lock manager capable of locking hosts, eg the 332 one defined in server.cros.host_lock_manager. 333 @param labels: A list of labels to look for on hosts. 334 335 @return: The hostname of a host matching all labels, and locked through the 336 lock_manager. The hostname will be as specified in the database the afe 337 object is associated with, i.e if it exists in afe_hosts with a .cros 338 suffix, the hostname returned will contain a .cros suffix. 339 340 @raises: error.NoEligibleHostException: If no hosts matching the list of 341 input labels are available. 342 @raises: error.TestError: If unable to lock a host matching the labels. 343 """ 344 potential_hosts = afe.get_hosts(multiple_labels=labels) 345 if not potential_hosts: 346 raise error.NoEligibleHostException( 347 'No devices found with labels %s.' % labels) 348 349 # This prevents errors where a fault might seem repeatable 350 # because we lock, say, the same packet capturer for each test run. 351 random.shuffle(potential_hosts) 352 for host in potential_hosts: 353 if lock_manager.lock([host.hostname]): 354 logging.info('Locked device %s with labels %s.', 355 host.hostname, labels) 356 return host.hostname 357 else: 358 logging.info('Unable to lock device %s with labels %s.', 359 host.hostname, labels) 360 361 raise error.TestError('Could not lock a device with labels %s' % labels) 362 363 364def get_test_views_from_tko(suite_job_id, tko): 365 """Get test name and result for given suite job ID. 366 367 @param suite_job_id: ID of suite job. 368 @param tko: an instance of TKO as defined in server/frontend.py. 369 @return: A defaultdict where keys are test names and values are 370 lists of test statuses, e.g., 371 {'stub_Fail.Error': ['ERROR'. 'ERROR'], 372 'stub_Fail.NAError': ['TEST_NA'], 373 'stub_Fail.RetrySuccess': ['ERROR', 'GOOD'], 374 } 375 @raise: Exception when there is no test view found. 376 377 """ 378 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) 379 relevant_views = list(filter(job_status.view_is_relevant, views)) 380 if not relevant_views: 381 raise Exception('Failed to retrieve job results.') 382 383 test_views = collections.defaultdict(list) 384 for view in relevant_views: 385 test_views[view['test_name']].append(view['status']) 386 return test_views 387 388 389def get_data_key(prefix, suite, build, board): 390 """ 391 Constructs a key string from parameters. 392 393 @param prefix: Prefix for the generating key. 394 @param suite: a suite name. e.g., bvt-cq, bvt-inline, infra_qual 395 @param build: The build string. This string should have a consistent 396 format eg: x86-mario-release/R26-3570.0.0. If the format of this 397 string changes such that we can't determine build_type or branch 398 we give up and use the parametes we're sure of instead (suite, 399 board). eg: 400 1. build = x86-alex-pgo-release/R26-3570.0.0 401 branch = 26 402 build_type = pgo-release 403 2. build = lumpy-paladin/R28-3993.0.0-rc5 404 branch = 28 405 build_type = paladin 406 @param board: The board that this suite ran on. 407 @return: The key string used for a dictionary. 408 """ 409 try: 410 _board, build_type, branch = ParseBuildName(build)[:3] 411 except ParseBuildNameException as e: 412 logging.error(str(e)) 413 branch = 'Unknown' 414 build_type = 'Unknown' 415 else: 416 embedded_str = re.search(r'x86-\w+-(.*)', _board) 417 if embedded_str: 418 build_type = embedded_str.group(1) + '-' + build_type 419 420 data_key_dict = { 421 'prefix': prefix, 422 'board': board, 423 'branch': branch, 424 'build_type': build_type, 425 'suite': suite, 426 } 427 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s' 428 % data_key_dict) 429 430 431def is_shard(): 432 """Determines if this instance is running as a shard. 433 434 Reads the global_config value shard_hostname in the section SHARD. 435 436 @return True, if shard_hostname is set, False otherwise. 437 """ 438 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None) 439 return bool(hostname) 440 441 442def get_global_afe_hostname(): 443 """Read the hostname of the global AFE from the global configuration.""" 444 return CONFIG.get_config_value('SERVER', 'global_afe_hostname') 445 446 447def get_special_task_status(is_complete, success, is_active): 448 """Get the status of a special task. 449 450 Emulate a host queue entry status for a special task 451 Although SpecialTasks are not HostQueueEntries, it is helpful to 452 the user to present similar statuses. 453 454 @param is_complete Boolean if the task is completed. 455 @param success Boolean if the task succeeded. 456 @param is_active Boolean if the task is active. 457 458 @return The status of a special task. 459 """ 460 if is_complete: 461 if success: 462 return host_queue_entry_states.Status.COMPLETED 463 return host_queue_entry_states.Status.FAILED 464 if is_active: 465 return host_queue_entry_states.Status.RUNNING 466 return host_queue_entry_states.Status.QUEUED 467 468 469def get_special_task_exec_path(hostname, task_id, task_name, time_requested): 470 """Get the execution path of the SpecialTask. 471 472 This method returns different paths depending on where a 473 the task ran: 474 * main: hosts/hostname/task_id-task_type 475 * Shard: main_path/time_created 476 This is to work around the fact that a shard can fail independent 477 of the main, and be replaced by another shard that has the same 478 hosts. Without the time_created stamp the logs of the tasks running 479 on the second shard will clobber the logs from the first in google 480 storage, because task ids are not globally unique. 481 482 @param hostname Hostname 483 @param task_id Special task id 484 @param task_name Special task name (e.g., Verify, Repair, etc) 485 @param time_requested Special task requested time. 486 487 @return An execution path for the task. 488 """ 489 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower()) 490 491 # If we do this on the main it will break backward compatibility, 492 # as there are tasks that currently don't have timestamps. If a host 493 # or job has been sent to a shard, the rpc for that host/job will 494 # be redirected to the shard, so this global_config check will happen 495 # on the shard the logs are on. 496 if not is_shard(): 497 return results_path 498 499 # Generate a uid to disambiguate special task result directories 500 # in case this shard fails. The simplest uid is the job_id, however 501 # in rare cases tasks do not have jobs associated with them (eg: 502 # frontend verify), so just use the creation timestamp. The clocks 503 # between a shard and main should always be in sync. Any discrepancies 504 # will be brought to our attention in the form of job timeouts. 505 uid = time_requested.strftime('%Y%d%m%H%M%S') 506 507 # TODO: This is a hack, however it is the easiest way to achieve 508 # correctness. There is currently some debate over the future of 509 # tasks in our infrastructure and refactoring everything right 510 # now isn't worth the time. 511 return '%s/%s' % (results_path, uid) 512 513 514def get_job_tag(id, owner): 515 """Returns a string tag for a job. 516 517 @param id Job id 518 @param owner Job owner 519 520 """ 521 return '%s-%s' % (id, owner) 522 523 524def get_hqe_exec_path(tag, execution_subdir): 525 """Returns a execution path to a HQE's results. 526 527 @param tag Tag string for a job associated with a HQE. 528 @param execution_subdir Execution sub-directory string of a HQE. 529 530 """ 531 return os.path.join(tag, execution_subdir) 532 533 534def is_inside_chroot(): 535 """Check if the process is running inside chroot. 536 537 @return: True if the process is running inside chroot. 538 539 """ 540 return os.path.exists('/etc/cros_chroot_version') 541 542 543def parse_job_name(name): 544 """Parse job name to get information including build, board and suite etc. 545 546 Suite job created by run_suite follows the naming convention of: 547 [build]-test_suites/control.[suite] 548 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt 549 The naming convention is defined in rpc_interface.create_suite_job. 550 551 Test job created by suite job follows the naming convention of: 552 [build]/[suite]/[test name] 553 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess 554 The naming convention is defined in 555 server/cros/dynamic_suite/tools.create_job_name 556 557 Note that pgo and chrome-perf builds will fail the method. Since lab does 558 not run test for these builds, they can be ignored. 559 Also, tests for Launch Control builds have different naming convention. 560 The build ID will be used as build_version. 561 562 @param name: Name of the job. 563 564 @return: A dictionary containing the test information. The keyvals include: 565 build: Name of the build, e.g., lumpy-release/R46-7272.0.0 566 build_version: The version of the build, e.g., R46-7272.0.0 567 board: Name of the board, e.g., lumpy 568 suite: Name of the test suite, e.g., bvt 569 570 """ 571 info = {} 572 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)' 573 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*' 574 match = re.match(suite_job_regex, name) 575 if not match: 576 match = re.match(test_job_regex, name) 577 if match: 578 info['build'] = match.groups()[0] 579 info['suite'] = match.groups()[1] 580 info['build_version'] = info['build'].split('/')[1] 581 try: 582 info['board'], _, _, _ = ParseBuildName(info['build']) 583 except ParseBuildNameException: 584 # Try to parse it as Launch Control build 585 # Launch Control builds have name format: 586 # branch/build_target-build_type/build_id. 587 try: 588 _, target, build_id = utils.parse_launch_control_build( 589 info['build']) 590 build_target, _ = utils.parse_launch_control_target(target) 591 if build_target: 592 info['board'] = build_target 593 info['build_version'] = build_id 594 except ValueError: 595 pass 596 return info 597 598 599def verify_not_root_user(): 600 """Simple function to error out if running with uid == 0""" 601 if os.getuid() == 0: 602 raise error.IllegalUser('This script can not be ran as root.') 603 604 605def get_hostname_from_machine(machine): 606 """Lookup hostname from a machine string or dict. 607 608 @returns: Machine hostname in string format. 609 """ 610 hostname, _ = get_host_info_from_machine(machine) 611 return hostname 612 613 614def get_host_info_from_machine(machine): 615 """Lookup host information from a machine string or dict. 616 617 @returns: Tuple of (hostname, afe_host) 618 """ 619 if isinstance(machine, dict): 620 return (machine['hostname'], machine['afe_host']) 621 else: 622 return (machine, EmptyAFEHost()) 623 624 625def get_afe_host_from_machine(machine): 626 """Return the afe_host from the machine dict if possible. 627 628 @returns: AFE host object. 629 """ 630 _, afe_host = get_host_info_from_machine(machine) 631 return afe_host 632 633 634def get_connection_pool_from_machine(machine): 635 """Returns the ssh_multiplex.ConnectionPool from machine if possible.""" 636 if not isinstance(machine, dict): 637 return None 638 return machine.get('connection_pool') 639 640 641def SetupTsMonGlobalState(*args, **kwargs): 642 """Import-safe wrap around chromite.lib.ts_mon_config's setup function. 643 644 @param *args: Args to pass through. 645 @param **kwargs: Kwargs to pass through. 646 """ 647 try: 648 # TODO(crbug.com/739466) This module import is delayed because it adds 649 # 1-2 seconds to the module import time and most users of site_utils 650 # don't need it. The correct fix is to break apart site_utils into more 651 # meaningful chunks. 652 from autotest_lib.utils.frozen_chromite.lib import ts_mon_config 653 except ImportError as e: 654 logging.warning('Unable to import chromite. Monarch is disabled: %s', e) 655 return TrivialContextManager() 656 657 try: 658 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs) 659 if hasattr(context, '__exit__'): 660 return context 661 except Exception as e: 662 logging.warning('Caught an exception trying to setup ts_mon, ' 663 'monitoring is disabled: %s', e, exc_info=True) 664 return TrivialContextManager() 665 666 667@contextlib.contextmanager 668def TrivialContextManager(*args, **kwargs): 669 """Context manager that does nothing. 670 671 @param *args: Ignored args 672 @param **kwargs: Ignored kwargs. 673 """ 674 yield 675 676 677def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT): 678 """Wait for the hosts to all go idle. 679 680 @param duts: List of duts to check for idle state. 681 @param afe: afe instance. 682 @param max_wait: Max wait time in seconds to wait for duts to be idle. 683 684 @returns Boolean True if all hosts are idle or False if any hosts did not 685 go idle within max_wait. 686 """ 687 start_time = time.time() 688 # We make a shallow copy since we're going to be modifying active_dut_list. 689 active_dut_list = duts[:] 690 while active_dut_list: 691 # Let's rate-limit how often we hit the AFE. 692 time.sleep(1) 693 694 # Check if we've waited too long. 695 if (time.time() - start_time) > max_wait: 696 return False 697 698 idle_duts = [] 699 # Get the status for the duts and see if they're in the idle state. 700 afe_hosts = afe.get_hosts(active_dut_list) 701 idle_duts = [afe_host.hostname for afe_host in afe_hosts 702 if afe_host.status in host_states.IDLE_STATES] 703 704 # Take out idle duts so we don't needlessly check them 705 # next time around. 706 for idle_dut in idle_duts: 707 active_dut_list.remove(idle_dut) 708 709 logging.info('still waiting for following duts to go idle: %s', 710 active_dut_list) 711 return True 712 713 714@contextlib.contextmanager 715def lock_duts_and_wait(duts, afe, lock_msg='default lock message', 716 max_wait=IDLE_DUT_WAIT_TIMEOUT): 717 """Context manager to lock the duts and wait for them to go idle. 718 719 @param duts: List of duts to lock. 720 @param afe: afe instance. 721 @param lock_msg: message for afe on locking this host. 722 @param max_wait: Max wait time in seconds to wait for duts to be idle. 723 724 @returns Boolean lock_success where True if all duts locked successfully or 725 False if we timed out waiting too long for hosts to go idle. 726 """ 727 try: 728 locked_duts = [] 729 duts.sort() 730 for dut in duts: 731 if afe.lock_host(dut, lock_msg, fail_if_locked=True): 732 locked_duts.append(dut) 733 else: 734 logging.info('%s already locked', dut) 735 yield wait_for_idle_duts(locked_duts, afe, max_wait) 736 finally: 737 afe.unlock_hosts(locked_duts) 738 739 740def _get_default_size_info(path): 741 """Get the default result size information. 742 743 In case directory summary is failed to build, assume the test result is not 744 throttled and all result sizes are the size of existing test results. 745 746 @return: A namedtuple of result size informations, including: 747 client_result_collected_KB: The total size (in KB) of test results 748 collected from test device. Set to be the total size of the 749 given path. 750 original_result_total_KB: The original size (in KB) of test results 751 before being trimmed. Set to be the total size of the given 752 path. 753 result_uploaded_KB: The total size (in KB) of test results to be 754 uploaded. Set to be the total size of the given path. 755 result_throttled: True if test results collection is throttled. 756 It's set to False in this default behavior. 757 """ 758 total_size = file_utils.get_directory_size_kibibytes(path); 759 return result_utils_lib.ResultSizeInfo( 760 client_result_collected_KB=total_size, 761 original_result_total_KB=total_size, 762 result_uploaded_KB=total_size, 763 result_throttled=False) 764 765 766def _report_result_size_metrics(result_size_info): 767 """Report result sizes information to metrics. 768 769 @param result_size_info: A ResultSizeInfo namedtuple containing information 770 of test result sizes. 771 """ 772 fields = {'result_throttled' : result_size_info.result_throttled} 773 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB', 774 description='The total size (in KB) of test results ' 775 'collected from test device. Set to be the total size of ' 776 'the given path.').increment_by(int( 777 result_size_info.client_result_collected_KB), 778 fields=fields) 779 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB', 780 description='The original size (in KB) of test results ' 781 'before being trimmed.').increment_by(int( 782 result_size_info.original_result_total_KB), 783 fields=fields) 784 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB', 785 description='The total size (in KB) of test results to be ' 786 'uploaded.').increment_by(int( 787 result_size_info.result_uploaded_KB), 788 fields=fields) 789 790 791@metrics.SecondsTimerDecorator( 792 'chromeos/autotest/result_collection/collect_result_sizes_duration') 793def collect_result_sizes(path, log=logging.debug): 794 """Collect the result sizes information and build result summary. 795 796 It first tries to merge directory summaries and calculate the result sizes 797 including: 798 client_result_collected_KB: The volume in KB that's transfered from the test 799 device. 800 original_result_total_KB: The volume in KB that's the original size of the 801 result files before being trimmed. 802 result_uploaded_KB: The volume in KB that will be uploaded. 803 result_throttled: Indicating if the result files were throttled. 804 805 If directory summary merging failed for any reason, fall back to use the 806 total size of the given result directory. 807 808 @param path: Path of the result directory to get size information. 809 @param log: The logging method, default to logging.debug 810 @return: A ResultSizeInfo namedtuple containing information of test result 811 sizes. 812 """ 813 try: 814 client_collected_bytes, summary, files = result_utils.merge_summaries( 815 path) 816 result_size_info = result_utils_lib.get_result_size_info( 817 client_collected_bytes, summary) 818 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME) 819 result_view.build(client_collected_bytes, summary, html_file) 820 821 # Delete all summary files after final view is built. 822 for summary_file in files: 823 os.remove(summary_file) 824 except: 825 log('Failed to calculate result sizes based on directory summaries for ' 826 'directory %s. Fall back to record the total size.\nException: %s' % 827 (path, traceback.format_exc())) 828 result_size_info = _get_default_size_info(path) 829 830 _report_result_size_metrics(result_size_info) 831 832 return result_size_info 833