1# Copyright (c) 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6import contextlib 7import grp 8import httplib 9import json 10import logging 11import os 12import random 13import re 14import time 15import traceback 16import urllib2 17 18import common 19from autotest_lib.client.bin.result_tools import utils as result_utils 20from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib 21from autotest_lib.client.bin.result_tools import view as result_view 22from autotest_lib.client.common_lib import lsbrelease_utils 23from autotest_lib.client.common_lib import utils 24from autotest_lib.client.common_lib import error 25from autotest_lib.client.common_lib import file_utils 26from autotest_lib.client.common_lib import global_config 27from autotest_lib.client.common_lib import host_queue_entry_states 28from autotest_lib.client.common_lib import host_states 29from autotest_lib.server.cros import provision 30from autotest_lib.server.cros.dynamic_suite import constants 31from autotest_lib.server.cros.dynamic_suite import job_status 32 33try: 34 from chromite.lib import metrics 35except ImportError: 36 metrics = utils.metrics_mock 37 38 39CONFIG = global_config.global_config 40 41_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='') 42_LAB_SHERIFF_JS = CONFIG.get_config_value( 43 'NOTIFICATIONS', 'lab_sheriffs', default='') 44_CHROMIUM_BUILD_URL = CONFIG.get_config_value( 45 'NOTIFICATIONS', 'chromium_build_url', default='') 46 47LAB_GOOD_STATES = ('open', 'throttled') 48 49ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( 50 'CROS', 'enable_drone_in_restricted_subnet', type=bool, 51 default=False) 52 53# Wait at most 10 mins for duts to go idle. 54IDLE_DUT_WAIT_TIMEOUT = 600 55 56# Mapping between board name and build target. This is for special case handling 57# for certain Android board that the board name and build target name does not 58# match. 59ANDROID_TARGET_TO_BOARD_MAP = { 60 'seed_l8150': 'gm4g_sprout', 61 'bat_land': 'bat' 62 } 63ANDROID_BOARD_TO_TARGET_MAP = { 64 'gm4g_sprout': 'seed_l8150', 65 'bat': 'bat_land' 66 } 67# Prefix for the metrics name for result size information. 68RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/' 69 70class TestLabException(Exception): 71 """Exception raised when the Test Lab blocks a test or suite.""" 72 pass 73 74 75class ParseBuildNameException(Exception): 76 """Raised when ParseBuildName() cannot parse a build name.""" 77 pass 78 79 80class Singleton(type): 81 """Enforce that only one client class is instantiated per process.""" 82 _instances = {} 83 84 def __call__(cls, *args, **kwargs): 85 """Fetch the instance of a class to use for subsequent calls.""" 86 if cls not in cls._instances: 87 cls._instances[cls] = super(Singleton, cls).__call__( 88 *args, **kwargs) 89 return cls._instances[cls] 90 91class EmptyAFEHost(object): 92 """Object to represent an AFE host object when there is no AFE.""" 93 94 def __init__(self): 95 """ 96 We'll be setting the instance attributes as we use them. Right now 97 we only use attributes and labels but as time goes by and other 98 attributes are used from an actual AFE Host object (check 99 rpc_interfaces.get_hosts()), we'll add them in here so users won't be 100 perplexed why their host's afe_host object complains that attribute 101 doesn't exist. 102 """ 103 self.attributes = {} 104 self.labels = [] 105 106 107def ParseBuildName(name): 108 """Format a build name, given board, type, milestone, and manifest num. 109 110 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a 111 relative build name, e.g. 'x86-alex-release/LATEST' 112 113 @return board: board the manifest is for, e.g. x86-alex. 114 @return type: one of 'release', 'factory', or 'firmware' 115 @return milestone: (numeric) milestone the manifest was associated with. 116 Will be None for relative build names. 117 @return manifest: manifest number, e.g. '2015.0.0'. 118 Will be None for relative build names. 119 120 """ 121 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?' 122 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-' 123 r'(?P<manifest>[\d.ab-]+)|LATEST)', 124 name) 125 if match and len(match.groups()) >= 5: 126 return (match.group('board'), match.group('type'), 127 match.group('milestone'), match.group('manifest')) 128 raise ParseBuildNameException('%s is a malformed build name.' % name) 129 130 131def get_labels_from_afe(hostname, label_prefix, afe): 132 """Retrieve a host's specific labels from the AFE. 133 134 Looks for the host labels that have the form <label_prefix>:<value> 135 and returns the "<value>" part of the label. None is returned 136 if there is not a label matching the pattern 137 138 @param hostname: hostname of given DUT. 139 @param label_prefix: prefix of label to be matched, e.g., |board:| 140 @param afe: afe instance. 141 142 @returns A list of labels that match the prefix or 'None' 143 144 """ 145 labels = afe.get_labels(name__startswith=label_prefix, 146 host__hostname__in=[hostname]) 147 if labels: 148 return [l.name.split(label_prefix, 1)[1] for l in labels] 149 150 151def get_label_from_afe(hostname, label_prefix, afe): 152 """Retrieve a host's specific label from the AFE. 153 154 Looks for a host label that has the form <label_prefix>:<value> 155 and returns the "<value>" part of the label. None is returned 156 if there is not a label matching the pattern 157 158 @param hostname: hostname of given DUT. 159 @param label_prefix: prefix of label to be matched, e.g., |board:| 160 @param afe: afe instance. 161 @returns the label that matches the prefix or 'None' 162 163 """ 164 labels = get_labels_from_afe(hostname, label_prefix, afe) 165 if labels and len(labels) == 1: 166 return labels[0] 167 168 169def get_board_from_afe(hostname, afe): 170 """Retrieve given host's board from its labels in the AFE. 171 172 Looks for a host label of the form "board:<board>", and 173 returns the "<board>" part of the label. `None` is returned 174 if there is not a single, unique label matching the pattern. 175 176 @param hostname: hostname of given DUT. 177 @param afe: afe instance. 178 @returns board from label, or `None`. 179 180 """ 181 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) 182 183 184def get_build_from_afe(hostname, afe): 185 """Retrieve the current build for given host from the AFE. 186 187 Looks through the host's labels in the AFE to determine its build. 188 189 @param hostname: hostname of given DUT. 190 @param afe: afe instance. 191 @returns The current build or None if it could not find it or if there 192 were multiple build labels assigned to this host. 193 194 """ 195 prefix = provision.CROS_VERSION_PREFIX 196 build = get_label_from_afe(hostname, prefix + ':', afe) 197 if build: 198 return build 199 return None 200 201 202# TODO(fdeng): fix get_sheriffs crbug.com/483254 203def get_sheriffs(lab_only=False): 204 """ 205 Polls the javascript file that holds the identity of the sheriff and 206 parses it's output to return a list of chromium sheriff email addresses. 207 The javascript file can contain the ldap of more than one sheriff, eg: 208 document.write('sheriff_one, sheriff_two'). 209 210 @param lab_only: if True, only pulls lab sheriff. 211 @return: A list of chroium.org sheriff email addresses to cc on the bug. 212 An empty list if failed to parse the javascript. 213 """ 214 sheriff_ids = [] 215 sheriff_js_list = _LAB_SHERIFF_JS.split(',') 216 if not lab_only: 217 sheriff_js_list.extend(_SHERIFF_JS.split(',')) 218 219 for sheriff_js in sheriff_js_list: 220 try: 221 url_content = utils.urlopen('%s%s'% ( 222 _CHROMIUM_BUILD_URL, sheriff_js)).read() 223 except (ValueError, IOError) as e: 224 logging.warning('could not parse sheriff from url %s%s: %s', 225 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 226 except (urllib2.URLError, httplib.HTTPException) as e: 227 logging.warning('unexpected error reading from url "%s%s": %s', 228 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 229 else: 230 ldaps = re.search(r"document.write\('(.*)'\)", url_content) 231 if not ldaps: 232 logging.warning('Could not retrieve sheriff ldaps for: %s', 233 url_content) 234 continue 235 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') 236 for alias in ldaps.group(1).split(',')] 237 return sheriff_ids 238 239 240def remote_wget(source_url, dest_path, ssh_cmd): 241 """wget source_url from localhost to dest_path on remote host using ssh. 242 243 @param source_url: The complete url of the source of the package to send. 244 @param dest_path: The path on the remote host's file system where we would 245 like to store the package. 246 @param ssh_cmd: The ssh command to use in performing the remote wget. 247 """ 248 wget_cmd = ("wget -O - %s | %s 'cat >%s'" % 249 (source_url, ssh_cmd, dest_path)) 250 utils.run(wget_cmd) 251 252 253_MAX_LAB_STATUS_ATTEMPTS = 5 254def _get_lab_status(status_url): 255 """Grabs the current lab status and message. 256 257 @returns The JSON object obtained from the given URL. 258 259 """ 260 retry_waittime = 1 261 for _ in range(_MAX_LAB_STATUS_ATTEMPTS): 262 try: 263 response = urllib2.urlopen(status_url) 264 except IOError as e: 265 logging.debug('Error occurred when grabbing the lab status: %s.', 266 e) 267 time.sleep(retry_waittime) 268 continue 269 # Check for successful response code. 270 if response.getcode() == 200: 271 return json.load(response) 272 time.sleep(retry_waittime) 273 return None 274 275 276def _decode_lab_status(lab_status, build): 277 """Decode lab status, and report exceptions as needed. 278 279 Take a deserialized JSON object from the lab status page, and 280 interpret it to determine the actual lab status. Raise 281 exceptions as required to report when the lab is down. 282 283 @param build: build name that we want to check the status of. 284 285 @raises TestLabException Raised if a request to test for the given 286 status and build should be blocked. 287 """ 288 # First check if the lab is up. 289 if not lab_status['general_state'] in LAB_GOOD_STATES: 290 raise TestLabException('Chromium OS Test Lab is closed: ' 291 '%s.' % lab_status['message']) 292 293 # Check if the build we wish to use is disabled. 294 # Lab messages should be in the format of: 295 # Lab is 'status' [regex ...] (comment) 296 # If the build name matches any regex, it will be blocked. 297 build_exceptions = re.search('\[(.*)\]', lab_status['message']) 298 if not build_exceptions or not build: 299 return 300 for build_pattern in build_exceptions.group(1).split(): 301 if re.match(build_pattern, build): 302 raise TestLabException('Chromium OS Test Lab is closed: ' 303 '%s matches %s.' % ( 304 build, build_pattern)) 305 return 306 307 308def is_in_lab(): 309 """Check if current Autotest instance is in lab 310 311 @return: True if the Autotest instance is in lab. 312 """ 313 test_server_name = CONFIG.get_config_value('SERVER', 'hostname') 314 return test_server_name.startswith('cautotest') 315 316 317def check_lab_status(build): 318 """Check if the lab status allows us to schedule for a build. 319 320 Checks if the lab is down, or if testing for the requested build 321 should be blocked. 322 323 @param build: Name of the build to be scheduled for testing. 324 325 @raises TestLabException Raised if a request to test for the given 326 status and build should be blocked. 327 328 """ 329 # Ensure we are trying to schedule on the actual lab. 330 if not is_in_lab(): 331 return 332 333 # Download the lab status from its home on the web. 334 status_url = CONFIG.get_config_value('CROS', 'lab_status_url') 335 json_status = _get_lab_status(status_url) 336 if json_status is None: 337 # We go ahead and say the lab is open if we can't get the status. 338 logging.warning('Could not get a status from %s', status_url) 339 return 340 _decode_lab_status(json_status, build) 341 342 343def host_in_lab(hostname): 344 return (not utils.in_moblab_ssp() 345 and not lsbrelease_utils.is_moblab() 346 and utils.host_is_in_lab_zone(hostname)) 347 348 349def lock_host_with_labels(afe, lock_manager, labels): 350 """Lookup and lock one host that matches the list of input labels. 351 352 @param afe: An instance of the afe class, as defined in server.frontend. 353 @param lock_manager: A lock manager capable of locking hosts, eg the 354 one defined in server.cros.host_lock_manager. 355 @param labels: A list of labels to look for on hosts. 356 357 @return: The hostname of a host matching all labels, and locked through the 358 lock_manager. The hostname will be as specified in the database the afe 359 object is associated with, i.e if it exists in afe_hosts with a .cros 360 suffix, the hostname returned will contain a .cros suffix. 361 362 @raises: error.NoEligibleHostException: If no hosts matching the list of 363 input labels are available. 364 @raises: error.TestError: If unable to lock a host matching the labels. 365 """ 366 potential_hosts = afe.get_hosts(multiple_labels=labels) 367 if not potential_hosts: 368 raise error.NoEligibleHostException( 369 'No devices found with labels %s.' % labels) 370 371 # This prevents errors where a fault might seem repeatable 372 # because we lock, say, the same packet capturer for each test run. 373 random.shuffle(potential_hosts) 374 for host in potential_hosts: 375 if lock_manager.lock([host.hostname]): 376 logging.info('Locked device %s with labels %s.', 377 host.hostname, labels) 378 return host.hostname 379 else: 380 logging.info('Unable to lock device %s with labels %s.', 381 host.hostname, labels) 382 383 raise error.TestError('Could not lock a device with labels %s' % labels) 384 385 386def get_test_views_from_tko(suite_job_id, tko): 387 """Get test name and result for given suite job ID. 388 389 @param suite_job_id: ID of suite job. 390 @param tko: an instance of TKO as defined in server/frontend.py. 391 @return: A dictionary of test status keyed by test name, e.g., 392 {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'} 393 @raise: Exception when there is no test view found. 394 395 """ 396 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) 397 relevant_views = filter(job_status.view_is_relevant, views) 398 if not relevant_views: 399 raise Exception('Failed to retrieve job results.') 400 401 test_views = {} 402 for view in relevant_views: 403 test_views[view['test_name']] = view['status'] 404 405 return test_views 406 407 408def get_data_key(prefix, suite, build, board): 409 """ 410 Constructs a key string from parameters. 411 412 @param prefix: Prefix for the generating key. 413 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy 414 @param build: The build string. This string should have a consistent 415 format eg: x86-mario-release/R26-3570.0.0. If the format of this 416 string changes such that we can't determine build_type or branch 417 we give up and use the parametes we're sure of instead (suite, 418 board). eg: 419 1. build = x86-alex-pgo-release/R26-3570.0.0 420 branch = 26 421 build_type = pgo-release 422 2. build = lumpy-paladin/R28-3993.0.0-rc5 423 branch = 28 424 build_type = paladin 425 @param board: The board that this suite ran on. 426 @return: The key string used for a dictionary. 427 """ 428 try: 429 _board, build_type, branch = ParseBuildName(build)[:3] 430 except ParseBuildNameException as e: 431 logging.error(str(e)) 432 branch = 'Unknown' 433 build_type = 'Unknown' 434 else: 435 embedded_str = re.search(r'x86-\w+-(.*)', _board) 436 if embedded_str: 437 build_type = embedded_str.group(1) + '-' + build_type 438 439 data_key_dict = { 440 'prefix': prefix, 441 'board': board, 442 'branch': branch, 443 'build_type': build_type, 444 'suite': suite, 445 } 446 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s' 447 % data_key_dict) 448 449 450def setup_logging(logfile=None, prefix=False): 451 """Setup basic logging with all logging info stripped. 452 453 Calls to logging will only show the message. No severity is logged. 454 455 @param logfile: If specified dump output to a file as well. 456 @param prefix: Flag for log prefix. Set to True to add prefix to log 457 entries to include timestamp and log level. Default is False. 458 """ 459 # TODO (xixuan): Delete this code when finishing replacing run_suite.py & 460 # abort_suite.py in skylab. 461 # Remove all existing handlers. client/common_lib/logging_config adds 462 # a StreamHandler to logger when modules are imported, e.g., 463 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to 464 # log only messages, not severity. 465 logging.getLogger().handlers = [] 466 467 if prefix: 468 log_format = '%(asctime)s %(levelname)-5s| %(message)s' 469 else: 470 log_format = '%(message)s' 471 472 screen_handler = logging.StreamHandler() 473 screen_handler.setFormatter(logging.Formatter(log_format)) 474 logging.getLogger().addHandler(screen_handler) 475 logging.getLogger().setLevel(logging.INFO) 476 if logfile: 477 file_handler = logging.FileHandler(logfile) 478 file_handler.setFormatter(logging.Formatter(log_format)) 479 file_handler.setLevel(logging.DEBUG) 480 logging.getLogger().addHandler(file_handler) 481 482 483def is_shard(): 484 """Determines if this instance is running as a shard. 485 486 Reads the global_config value shard_hostname in the section SHARD. 487 488 @return True, if shard_hostname is set, False otherwise. 489 """ 490 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None) 491 return bool(hostname) 492 493 494def get_global_afe_hostname(): 495 """Read the hostname of the global AFE from the global configuration.""" 496 return CONFIG.get_config_value('SERVER', 'global_afe_hostname') 497 498 499def is_restricted_user(username): 500 """Determines if a user is in a restricted group. 501 502 User in restricted group only have access to master. 503 504 @param username: A string, representing a username. 505 506 @returns: True if the user is in a restricted group. 507 """ 508 if not username: 509 return False 510 511 restricted_groups = CONFIG.get_config_value( 512 'AUTOTEST_WEB', 'restricted_groups', default='').split(',') 513 for group in restricted_groups: 514 try: 515 if group and username in grp.getgrnam(group).gr_mem: 516 return True 517 except KeyError as e: 518 logging.debug("%s is not a valid group.", group) 519 return False 520 521 522def get_special_task_status(is_complete, success, is_active): 523 """Get the status of a special task. 524 525 Emulate a host queue entry status for a special task 526 Although SpecialTasks are not HostQueueEntries, it is helpful to 527 the user to present similar statuses. 528 529 @param is_complete Boolean if the task is completed. 530 @param success Boolean if the task succeeded. 531 @param is_active Boolean if the task is active. 532 533 @return The status of a special task. 534 """ 535 if is_complete: 536 if success: 537 return host_queue_entry_states.Status.COMPLETED 538 return host_queue_entry_states.Status.FAILED 539 if is_active: 540 return host_queue_entry_states.Status.RUNNING 541 return host_queue_entry_states.Status.QUEUED 542 543 544def get_special_task_exec_path(hostname, task_id, task_name, time_requested): 545 """Get the execution path of the SpecialTask. 546 547 This method returns different paths depending on where a 548 the task ran: 549 * Master: hosts/hostname/task_id-task_type 550 * Shard: Master_path/time_created 551 This is to work around the fact that a shard can fail independent 552 of the master, and be replaced by another shard that has the same 553 hosts. Without the time_created stamp the logs of the tasks running 554 on the second shard will clobber the logs from the first in google 555 storage, because task ids are not globally unique. 556 557 @param hostname Hostname 558 @param task_id Special task id 559 @param task_name Special task name (e.g., Verify, Repair, etc) 560 @param time_requested Special task requested time. 561 562 @return An execution path for the task. 563 """ 564 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower()) 565 566 # If we do this on the master it will break backward compatibility, 567 # as there are tasks that currently don't have timestamps. If a host 568 # or job has been sent to a shard, the rpc for that host/job will 569 # be redirected to the shard, so this global_config check will happen 570 # on the shard the logs are on. 571 if not is_shard(): 572 return results_path 573 574 # Generate a uid to disambiguate special task result directories 575 # in case this shard fails. The simplest uid is the job_id, however 576 # in rare cases tasks do not have jobs associated with them (eg: 577 # frontend verify), so just use the creation timestamp. The clocks 578 # between a shard and master should always be in sync. Any discrepancies 579 # will be brought to our attention in the form of job timeouts. 580 uid = time_requested.strftime('%Y%d%m%H%M%S') 581 582 # TODO: This is a hack, however it is the easiest way to achieve 583 # correctness. There is currently some debate over the future of 584 # tasks in our infrastructure and refactoring everything right 585 # now isn't worth the time. 586 return '%s/%s' % (results_path, uid) 587 588 589def get_job_tag(id, owner): 590 """Returns a string tag for a job. 591 592 @param id Job id 593 @param owner Job owner 594 595 """ 596 return '%s-%s' % (id, owner) 597 598 599def get_hqe_exec_path(tag, execution_subdir): 600 """Returns a execution path to a HQE's results. 601 602 @param tag Tag string for a job associated with a HQE. 603 @param execution_subdir Execution sub-directory string of a HQE. 604 605 """ 606 return os.path.join(tag, execution_subdir) 607 608 609def is_inside_chroot(): 610 """Check if the process is running inside chroot. 611 612 This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The 613 method checks if cros_build_lib can be imported first. 614 615 @return: True if the process is running inside chroot or cros_build_lib 616 cannot be imported. 617 618 """ 619 try: 620 # TODO(crbug.com/739466) This module import is delayed because it adds 621 # 1-2 seconds to the module import time and most users of site_utils 622 # don't need it. The correct fix is to break apart site_utils into more 623 # meaningful chunks. 624 from chromite.lib import cros_build_lib 625 except ImportError: 626 logging.warn('Unable to import chromite. Can not detect chroot. ' 627 'Defaulting to False') 628 return False 629 return cros_build_lib.IsInsideChroot() 630 631 632def parse_job_name(name): 633 """Parse job name to get information including build, board and suite etc. 634 635 Suite job created by run_suite follows the naming convention of: 636 [build]-test_suites/control.[suite] 637 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt 638 The naming convention is defined in rpc_interface.create_suite_job. 639 640 Test job created by suite job follows the naming convention of: 641 [build]/[suite]/[test name] 642 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess 643 The naming convention is defined in 644 server/cros/dynamic_suite/tools.create_job_name 645 646 Note that pgo and chrome-perf builds will fail the method. Since lab does 647 not run test for these builds, they can be ignored. 648 Also, tests for Launch Control builds have different naming convention. 649 The build ID will be used as build_version. 650 651 @param name: Name of the job. 652 653 @return: A dictionary containing the test information. The keyvals include: 654 build: Name of the build, e.g., lumpy-release/R46-7272.0.0 655 build_version: The version of the build, e.g., R46-7272.0.0 656 board: Name of the board, e.g., lumpy 657 suite: Name of the test suite, e.g., bvt 658 659 """ 660 info = {} 661 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)' 662 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*' 663 match = re.match(suite_job_regex, name) 664 if not match: 665 match = re.match(test_job_regex, name) 666 if match: 667 info['build'] = match.groups()[0] 668 info['suite'] = match.groups()[1] 669 info['build_version'] = info['build'].split('/')[1] 670 try: 671 info['board'], _, _, _ = ParseBuildName(info['build']) 672 except ParseBuildNameException: 673 # Try to parse it as Launch Control build 674 # Launch Control builds have name format: 675 # branch/build_target-build_type/build_id. 676 try: 677 _, target, build_id = utils.parse_launch_control_build( 678 info['build']) 679 build_target, _ = utils.parse_launch_control_target(target) 680 if build_target: 681 info['board'] = build_target 682 info['build_version'] = build_id 683 except ValueError: 684 pass 685 return info 686 687 688def verify_not_root_user(): 689 """Simple function to error out if running with uid == 0""" 690 if os.getuid() == 0: 691 raise error.IllegalUser('This script can not be ran as root.') 692 693 694def get_hostname_from_machine(machine): 695 """Lookup hostname from a machine string or dict. 696 697 @returns: Machine hostname in string format. 698 """ 699 hostname, _ = get_host_info_from_machine(machine) 700 return hostname 701 702 703def get_host_info_from_machine(machine): 704 """Lookup host information from a machine string or dict. 705 706 @returns: Tuple of (hostname, afe_host) 707 """ 708 if isinstance(machine, dict): 709 return (machine['hostname'], machine['afe_host']) 710 else: 711 return (machine, EmptyAFEHost()) 712 713 714def get_afe_host_from_machine(machine): 715 """Return the afe_host from the machine dict if possible. 716 717 @returns: AFE host object. 718 """ 719 _, afe_host = get_host_info_from_machine(machine) 720 return afe_host 721 722 723def get_connection_pool_from_machine(machine): 724 """Returns the ssh_multiplex.ConnectionPool from machine if possible.""" 725 if not isinstance(machine, dict): 726 return None 727 return machine.get('connection_pool') 728 729 730def get_creds_abspath(creds_file): 731 """Returns the abspath of the credentials file. 732 733 If creds_file is already an absolute path, just return it. 734 Otherwise, assume it is located in the creds directory 735 specified in global_config and return the absolute path. 736 737 @param: creds_path, a path to the credentials. 738 @return: An absolute path to the credentials file. 739 """ 740 if not creds_file: 741 return None 742 if os.path.isabs(creds_file): 743 return creds_file 744 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='') 745 if not creds_dir or not os.path.exists(creds_dir): 746 creds_dir = common.autotest_dir 747 return os.path.join(creds_dir, creds_file) 748 749 750def SetupTsMonGlobalState(*args, **kwargs): 751 """Import-safe wrap around chromite.lib.ts_mon_config's setup function. 752 753 @param *args: Args to pass through. 754 @param **kwargs: Kwargs to pass through. 755 """ 756 try: 757 # TODO(crbug.com/739466) This module import is delayed because it adds 758 # 1-2 seconds to the module import time and most users of site_utils 759 # don't need it. The correct fix is to break apart site_utils into more 760 # meaningful chunks. 761 from chromite.lib import ts_mon_config 762 except ImportError: 763 logging.warn('Unable to import chromite. Monarch is disabled.') 764 return TrivialContextManager() 765 766 try: 767 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs) 768 if hasattr(context, '__exit__'): 769 return context 770 except Exception as e: 771 logging.warning('Caught an exception trying to setup ts_mon, ' 772 'monitoring is disabled: %s', e, exc_info=True) 773 return TrivialContextManager() 774 775 776@contextlib.contextmanager 777def TrivialContextManager(*args, **kwargs): 778 """Context manager that does nothing. 779 780 @param *args: Ignored args 781 @param **kwargs: Ignored kwargs. 782 """ 783 yield 784 785 786def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT): 787 """Wait for the hosts to all go idle. 788 789 @param duts: List of duts to check for idle state. 790 @param afe: afe instance. 791 @param max_wait: Max wait time in seconds to wait for duts to be idle. 792 793 @returns Boolean True if all hosts are idle or False if any hosts did not 794 go idle within max_wait. 795 """ 796 start_time = time.time() 797 # We make a shallow copy since we're going to be modifying active_dut_list. 798 active_dut_list = duts[:] 799 while active_dut_list: 800 # Let's rate-limit how often we hit the AFE. 801 time.sleep(1) 802 803 # Check if we've waited too long. 804 if (time.time() - start_time) > max_wait: 805 return False 806 807 idle_duts = [] 808 # Get the status for the duts and see if they're in the idle state. 809 afe_hosts = afe.get_hosts(active_dut_list) 810 idle_duts = [afe_host.hostname for afe_host in afe_hosts 811 if afe_host.status in host_states.IDLE_STATES] 812 813 # Take out idle duts so we don't needlessly check them 814 # next time around. 815 for idle_dut in idle_duts: 816 active_dut_list.remove(idle_dut) 817 818 logging.info('still waiting for following duts to go idle: %s', 819 active_dut_list) 820 return True 821 822 823@contextlib.contextmanager 824def lock_duts_and_wait(duts, afe, lock_msg='default lock message', 825 max_wait=IDLE_DUT_WAIT_TIMEOUT): 826 """Context manager to lock the duts and wait for them to go idle. 827 828 @param duts: List of duts to lock. 829 @param afe: afe instance. 830 @param lock_msg: message for afe on locking this host. 831 @param max_wait: Max wait time in seconds to wait for duts to be idle. 832 833 @returns Boolean lock_success where True if all duts locked successfully or 834 False if we timed out waiting too long for hosts to go idle. 835 """ 836 try: 837 locked_duts = [] 838 duts.sort() 839 for dut in duts: 840 if afe.lock_host(dut, lock_msg, fail_if_locked=True): 841 locked_duts.append(dut) 842 else: 843 logging.info('%s already locked', dut) 844 yield wait_for_idle_duts(locked_duts, afe, max_wait) 845 finally: 846 afe.unlock_hosts(locked_duts) 847 848 849def _get_default_size_info(path): 850 """Get the default result size information. 851 852 In case directory summary is failed to build, assume the test result is not 853 throttled and all result sizes are the size of existing test results. 854 855 @return: A namedtuple of result size informations, including: 856 client_result_collected_KB: The total size (in KB) of test results 857 collected from test device. Set to be the total size of the 858 given path. 859 original_result_total_KB: The original size (in KB) of test results 860 before being trimmed. Set to be the total size of the given 861 path. 862 result_uploaded_KB: The total size (in KB) of test results to be 863 uploaded. Set to be the total size of the given path. 864 result_throttled: True if test results collection is throttled. 865 It's set to False in this default behavior. 866 """ 867 total_size = file_utils.get_directory_size_kibibytes(path); 868 return result_utils_lib.ResultSizeInfo( 869 client_result_collected_KB=total_size, 870 original_result_total_KB=total_size, 871 result_uploaded_KB=total_size, 872 result_throttled=False) 873 874 875def _report_result_size_metrics(result_size_info): 876 """Report result sizes information to metrics. 877 878 @param result_size_info: A ResultSizeInfo namedtuple containing information 879 of test result sizes. 880 """ 881 fields = {'result_throttled' : result_size_info.result_throttled} 882 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB', 883 description='The total size (in KB) of test results ' 884 'collected from test device. Set to be the total size of ' 885 'the given path.' 886 ).increment_by(result_size_info.client_result_collected_KB, 887 fields=fields) 888 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB', 889 description='The original size (in KB) of test results ' 890 'before being trimmed.' 891 ).increment_by(result_size_info.original_result_total_KB, 892 fields=fields) 893 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB', 894 description='The total size (in KB) of test results to be ' 895 'uploaded.' 896 ).increment_by(result_size_info.result_uploaded_KB, 897 fields=fields) 898 899 900@metrics.SecondsTimerDecorator( 901 'chromeos/autotest/result_collection/collect_result_sizes_duration') 902def collect_result_sizes(path, log=logging.debug): 903 """Collect the result sizes information and build result summary. 904 905 It first tries to merge directory summaries and calculate the result sizes 906 including: 907 client_result_collected_KB: The volume in KB that's transfered from the test 908 device. 909 original_result_total_KB: The volume in KB that's the original size of the 910 result files before being trimmed. 911 result_uploaded_KB: The volume in KB that will be uploaded. 912 result_throttled: Indicating if the result files were throttled. 913 914 If directory summary merging failed for any reason, fall back to use the 915 total size of the given result directory. 916 917 @param path: Path of the result directory to get size information. 918 @param log: The logging method, default to logging.debug 919 @return: A ResultSizeInfo namedtuple containing information of test result 920 sizes. 921 """ 922 try: 923 client_collected_bytes, summary, files = result_utils.merge_summaries( 924 path) 925 result_size_info = result_utils_lib.get_result_size_info( 926 client_collected_bytes, summary) 927 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME) 928 result_view.build(client_collected_bytes, summary, html_file) 929 930 # Delete all summary files after final view is built. 931 for summary_file in files: 932 os.remove(summary_file) 933 except: 934 log('Failed to calculate result sizes based on directory summaries for ' 935 'directory %s. Fall back to record the total size.\nException: %s' % 936 (path, traceback.format_exc())) 937 result_size_info = _get_default_size_info(path) 938 939 _report_result_size_metrics(result_size_info) 940 941 return result_size_info 942