1# Copyright (c) 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6import contextlib 7import grp 8import httplib 9import json 10import logging 11import os 12import random 13import re 14import time 15import traceback 16import urllib2 17 18import common 19from autotest_lib.client.bin.result_tools import utils as result_utils 20from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib 21from autotest_lib.client.bin.result_tools import view as result_view 22from autotest_lib.client.common_lib import utils 23from autotest_lib.client.common_lib import error 24from autotest_lib.client.common_lib import file_utils 25from autotest_lib.client.common_lib import global_config 26from autotest_lib.client.common_lib import host_queue_entry_states 27from autotest_lib.client.common_lib import host_states 28from autotest_lib.server.cros import provision 29from autotest_lib.server.cros.dynamic_suite import constants 30from autotest_lib.server.cros.dynamic_suite import job_status 31 32try: 33 from chromite.lib import metrics 34except ImportError: 35 metrics = utils.metrics_mock 36 37 38CONFIG = global_config.global_config 39 40_SHERIFF_JS = CONFIG.get_config_value('NOTIFICATIONS', 'sheriffs', default='') 41_LAB_SHERIFF_JS = CONFIG.get_config_value( 42 'NOTIFICATIONS', 'lab_sheriffs', default='') 43_CHROMIUM_BUILD_URL = CONFIG.get_config_value( 44 'NOTIFICATIONS', 'chromium_build_url', default='') 45 46LAB_GOOD_STATES = ('open', 'throttled') 47 48ENABLE_DRONE_IN_RESTRICTED_SUBNET = CONFIG.get_config_value( 49 'CROS', 'enable_drone_in_restricted_subnet', type=bool, 50 default=False) 51 52# Wait at most 10 mins for duts to go idle. 53IDLE_DUT_WAIT_TIMEOUT = 600 54 55# Mapping between board name and build target. This is for special case handling 56# for certain Android board that the board name and build target name does not 57# match. 58ANDROID_TARGET_TO_BOARD_MAP = { 59 'seed_l8150': 'gm4g_sprout', 60 'bat_land': 'bat' 61 } 62ANDROID_BOARD_TO_TARGET_MAP = { 63 'gm4g_sprout': 'seed_l8150', 64 'bat': 'bat_land' 65 } 66# Prefix for the metrics name for result size information. 67RESULT_METRICS_PREFIX = 'chromeos/autotest/result_collection/' 68 69class TestLabException(Exception): 70 """Exception raised when the Test Lab blocks a test or suite.""" 71 pass 72 73 74class ParseBuildNameException(Exception): 75 """Raised when ParseBuildName() cannot parse a build name.""" 76 pass 77 78 79class Singleton(type): 80 """Enforce that only one client class is instantiated per process.""" 81 _instances = {} 82 83 def __call__(cls, *args, **kwargs): 84 """Fetch the instance of a class to use for subsequent calls.""" 85 if cls not in cls._instances: 86 cls._instances[cls] = super(Singleton, cls).__call__( 87 *args, **kwargs) 88 return cls._instances[cls] 89 90class EmptyAFEHost(object): 91 """Object to represent an AFE host object when there is no AFE.""" 92 93 def __init__(self): 94 """ 95 We'll be setting the instance attributes as we use them. Right now 96 we only use attributes and labels but as time goes by and other 97 attributes are used from an actual AFE Host object (check 98 rpc_interfaces.get_hosts()), we'll add them in here so users won't be 99 perplexed why their host's afe_host object complains that attribute 100 doesn't exist. 101 """ 102 self.attributes = {} 103 self.labels = [] 104 105 106def ParseBuildName(name): 107 """Format a build name, given board, type, milestone, and manifest num. 108 109 @param name: a build name, e.g. 'x86-alex-release/R20-2015.0.0' or a 110 relative build name, e.g. 'x86-alex-release/LATEST' 111 112 @return board: board the manifest is for, e.g. x86-alex. 113 @return type: one of 'release', 'factory', or 'firmware' 114 @return milestone: (numeric) milestone the manifest was associated with. 115 Will be None for relative build names. 116 @return manifest: manifest number, e.g. '2015.0.0'. 117 Will be None for relative build names. 118 119 """ 120 match = re.match(r'(trybot-)?(?P<board>[\w-]+?)(?:-chrome)?(?:-chromium)?' 121 r'-(?P<type>\w+)/(R(?P<milestone>\d+)-' 122 r'(?P<manifest>[\d.ab-]+)|LATEST)', 123 name) 124 if match and len(match.groups()) >= 5: 125 return (match.group('board'), match.group('type'), 126 match.group('milestone'), match.group('manifest')) 127 raise ParseBuildNameException('%s is a malformed build name.' % name) 128 129 130def get_labels_from_afe(hostname, label_prefix, afe): 131 """Retrieve a host's specific labels from the AFE. 132 133 Looks for the host labels that have the form <label_prefix>:<value> 134 and returns the "<value>" part of the label. None is returned 135 if there is not a label matching the pattern 136 137 @param hostname: hostname of given DUT. 138 @param label_prefix: prefix of label to be matched, e.g., |board:| 139 @param afe: afe instance. 140 141 @returns A list of labels that match the prefix or 'None' 142 143 """ 144 labels = afe.get_labels(name__startswith=label_prefix, 145 host__hostname__in=[hostname]) 146 if labels: 147 return [l.name.split(label_prefix, 1)[1] for l in labels] 148 149 150def get_label_from_afe(hostname, label_prefix, afe): 151 """Retrieve a host's specific label from the AFE. 152 153 Looks for a host label that has the form <label_prefix>:<value> 154 and returns the "<value>" part of the label. None is returned 155 if there is not a label matching the pattern 156 157 @param hostname: hostname of given DUT. 158 @param label_prefix: prefix of label to be matched, e.g., |board:| 159 @param afe: afe instance. 160 @returns the label that matches the prefix or 'None' 161 162 """ 163 labels = get_labels_from_afe(hostname, label_prefix, afe) 164 if labels and len(labels) == 1: 165 return labels[0] 166 167 168def get_board_from_afe(hostname, afe): 169 """Retrieve given host's board from its labels in the AFE. 170 171 Looks for a host label of the form "board:<board>", and 172 returns the "<board>" part of the label. `None` is returned 173 if there is not a single, unique label matching the pattern. 174 175 @param hostname: hostname of given DUT. 176 @param afe: afe instance. 177 @returns board from label, or `None`. 178 179 """ 180 return get_label_from_afe(hostname, constants.BOARD_PREFIX, afe) 181 182 183def get_build_from_afe(hostname, afe): 184 """Retrieve the current build for given host from the AFE. 185 186 Looks through the host's labels in the AFE to determine its build. 187 188 @param hostname: hostname of given DUT. 189 @param afe: afe instance. 190 @returns The current build or None if it could not find it or if there 191 were multiple build labels assigned to this host. 192 193 """ 194 for prefix in [provision.CROS_VERSION_PREFIX, 195 provision.ANDROID_BUILD_VERSION_PREFIX]: 196 build = get_label_from_afe(hostname, prefix + ':', afe) 197 if build: 198 return build 199 return None 200 201 202# TODO(fdeng): fix get_sheriffs crbug.com/483254 203def get_sheriffs(lab_only=False): 204 """ 205 Polls the javascript file that holds the identity of the sheriff and 206 parses it's output to return a list of chromium sheriff email addresses. 207 The javascript file can contain the ldap of more than one sheriff, eg: 208 document.write('sheriff_one, sheriff_two'). 209 210 @param lab_only: if True, only pulls lab sheriff. 211 @return: A list of chroium.org sheriff email addresses to cc on the bug. 212 An empty list if failed to parse the javascript. 213 """ 214 sheriff_ids = [] 215 sheriff_js_list = _LAB_SHERIFF_JS.split(',') 216 if not lab_only: 217 sheriff_js_list.extend(_SHERIFF_JS.split(',')) 218 219 for sheriff_js in sheriff_js_list: 220 try: 221 url_content = utils.urlopen('%s%s'% ( 222 _CHROMIUM_BUILD_URL, sheriff_js)).read() 223 except (ValueError, IOError) as e: 224 logging.warning('could not parse sheriff from url %s%s: %s', 225 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 226 except (urllib2.URLError, httplib.HTTPException) as e: 227 logging.warning('unexpected error reading from url "%s%s": %s', 228 _CHROMIUM_BUILD_URL, sheriff_js, str(e)) 229 else: 230 ldaps = re.search(r"document.write\('(.*)'\)", url_content) 231 if not ldaps: 232 logging.warning('Could not retrieve sheriff ldaps for: %s', 233 url_content) 234 continue 235 sheriff_ids += ['%s@chromium.org' % alias.replace(' ', '') 236 for alias in ldaps.group(1).split(',')] 237 return sheriff_ids 238 239 240def remote_wget(source_url, dest_path, ssh_cmd): 241 """wget source_url from localhost to dest_path on remote host using ssh. 242 243 @param source_url: The complete url of the source of the package to send. 244 @param dest_path: The path on the remote host's file system where we would 245 like to store the package. 246 @param ssh_cmd: The ssh command to use in performing the remote wget. 247 """ 248 wget_cmd = ("wget -O - %s | %s 'cat >%s'" % 249 (source_url, ssh_cmd, dest_path)) 250 utils.run(wget_cmd) 251 252 253_MAX_LAB_STATUS_ATTEMPTS = 5 254def _get_lab_status(status_url): 255 """Grabs the current lab status and message. 256 257 @returns The JSON object obtained from the given URL. 258 259 """ 260 retry_waittime = 1 261 for _ in range(_MAX_LAB_STATUS_ATTEMPTS): 262 try: 263 response = urllib2.urlopen(status_url) 264 except IOError as e: 265 logging.debug('Error occurred when grabbing the lab status: %s.', 266 e) 267 time.sleep(retry_waittime) 268 continue 269 # Check for successful response code. 270 if response.getcode() == 200: 271 return json.load(response) 272 time.sleep(retry_waittime) 273 return None 274 275 276def _decode_lab_status(lab_status, build): 277 """Decode lab status, and report exceptions as needed. 278 279 Take a deserialized JSON object from the lab status page, and 280 interpret it to determine the actual lab status. Raise 281 exceptions as required to report when the lab is down. 282 283 @param build: build name that we want to check the status of. 284 285 @raises TestLabException Raised if a request to test for the given 286 status and build should be blocked. 287 """ 288 # First check if the lab is up. 289 if not lab_status['general_state'] in LAB_GOOD_STATES: 290 raise TestLabException('Chromium OS Test Lab is closed: ' 291 '%s.' % lab_status['message']) 292 293 # Check if the build we wish to use is disabled. 294 # Lab messages should be in the format of: 295 # Lab is 'status' [regex ...] (comment) 296 # If the build name matches any regex, it will be blocked. 297 build_exceptions = re.search('\[(.*)\]', lab_status['message']) 298 if not build_exceptions or not build: 299 return 300 for build_pattern in build_exceptions.group(1).split(): 301 if re.match(build_pattern, build): 302 raise TestLabException('Chromium OS Test Lab is closed: ' 303 '%s matches %s.' % ( 304 build, build_pattern)) 305 return 306 307 308def is_in_lab(): 309 """Check if current Autotest instance is in lab 310 311 @return: True if the Autotest instance is in lab. 312 """ 313 test_server_name = CONFIG.get_config_value('SERVER', 'hostname') 314 return test_server_name.startswith('cautotest') 315 316 317def check_lab_status(build): 318 """Check if the lab status allows us to schedule for a build. 319 320 Checks if the lab is down, or if testing for the requested build 321 should be blocked. 322 323 @param build: Name of the build to be scheduled for testing. 324 325 @raises TestLabException Raised if a request to test for the given 326 status and build should be blocked. 327 328 """ 329 # Ensure we are trying to schedule on the actual lab. 330 if not is_in_lab(): 331 return 332 333 # Download the lab status from its home on the web. 334 status_url = CONFIG.get_config_value('CROS', 'lab_status_url') 335 json_status = _get_lab_status(status_url) 336 if json_status is None: 337 # We go ahead and say the lab is open if we can't get the status. 338 logging.warning('Could not get a status from %s', status_url) 339 return 340 _decode_lab_status(json_status, build) 341 342 343def lock_host_with_labels(afe, lock_manager, labels): 344 """Lookup and lock one host that matches the list of input labels. 345 346 @param afe: An instance of the afe class, as defined in server.frontend. 347 @param lock_manager: A lock manager capable of locking hosts, eg the 348 one defined in server.cros.host_lock_manager. 349 @param labels: A list of labels to look for on hosts. 350 351 @return: The hostname of a host matching all labels, and locked through the 352 lock_manager. The hostname will be as specified in the database the afe 353 object is associated with, i.e if it exists in afe_hosts with a .cros 354 suffix, the hostname returned will contain a .cros suffix. 355 356 @raises: error.NoEligibleHostException: If no hosts matching the list of 357 input labels are available. 358 @raises: error.TestError: If unable to lock a host matching the labels. 359 """ 360 potential_hosts = afe.get_hosts(multiple_labels=labels) 361 if not potential_hosts: 362 raise error.NoEligibleHostException( 363 'No devices found with labels %s.' % labels) 364 365 # This prevents errors where a fault might seem repeatable 366 # because we lock, say, the same packet capturer for each test run. 367 random.shuffle(potential_hosts) 368 for host in potential_hosts: 369 if lock_manager.lock([host.hostname]): 370 logging.info('Locked device %s with labels %s.', 371 host.hostname, labels) 372 return host.hostname 373 else: 374 logging.info('Unable to lock device %s with labels %s.', 375 host.hostname, labels) 376 377 raise error.TestError('Could not lock a device with labels %s' % labels) 378 379 380def get_test_views_from_tko(suite_job_id, tko): 381 """Get test name and result for given suite job ID. 382 383 @param suite_job_id: ID of suite job. 384 @param tko: an instance of TKO as defined in server/frontend.py. 385 @return: A dictionary of test status keyed by test name, e.g., 386 {'dummy_Fail.Error': 'ERROR', 'dummy_Fail.NAError': 'TEST_NA'} 387 @raise: Exception when there is no test view found. 388 389 """ 390 views = tko.run('get_detailed_test_views', afe_job_id=suite_job_id) 391 relevant_views = filter(job_status.view_is_relevant, views) 392 if not relevant_views: 393 raise Exception('Failed to retrieve job results.') 394 395 test_views = {} 396 for view in relevant_views: 397 test_views[view['test_name']] = view['status'] 398 399 return test_views 400 401 402def get_data_key(prefix, suite, build, board): 403 """ 404 Constructs a key string from parameters. 405 406 @param prefix: Prefix for the generating key. 407 @param suite: a suite name. e.g., bvt-cq, bvt-inline, dummy 408 @param build: The build string. This string should have a consistent 409 format eg: x86-mario-release/R26-3570.0.0. If the format of this 410 string changes such that we can't determine build_type or branch 411 we give up and use the parametes we're sure of instead (suite, 412 board). eg: 413 1. build = x86-alex-pgo-release/R26-3570.0.0 414 branch = 26 415 build_type = pgo-release 416 2. build = lumpy-paladin/R28-3993.0.0-rc5 417 branch = 28 418 build_type = paladin 419 @param board: The board that this suite ran on. 420 @return: The key string used for a dictionary. 421 """ 422 try: 423 _board, build_type, branch = ParseBuildName(build)[:3] 424 except ParseBuildNameException as e: 425 logging.error(str(e)) 426 branch = 'Unknown' 427 build_type = 'Unknown' 428 else: 429 embedded_str = re.search(r'x86-\w+-(.*)', _board) 430 if embedded_str: 431 build_type = embedded_str.group(1) + '-' + build_type 432 433 data_key_dict = { 434 'prefix': prefix, 435 'board': board, 436 'branch': branch, 437 'build_type': build_type, 438 'suite': suite, 439 } 440 return ('%(prefix)s.%(board)s.%(build_type)s.%(branch)s.%(suite)s' 441 % data_key_dict) 442 443 444def setup_logging(logfile=None, prefix=False): 445 """Setup basic logging with all logging info stripped. 446 447 Calls to logging will only show the message. No severity is logged. 448 449 @param logfile: If specified dump output to a file as well. 450 @param prefix: Flag for log prefix. Set to True to add prefix to log 451 entries to include timestamp and log level. Default is False. 452 """ 453 # Remove all existing handlers. client/common_lib/logging_config adds 454 # a StreamHandler to logger when modules are imported, e.g., 455 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to 456 # log only messages, not severity. 457 logging.getLogger().handlers = [] 458 459 if prefix: 460 log_format = '%(asctime)s %(levelname)-5s| %(message)s' 461 else: 462 log_format = '%(message)s' 463 464 screen_handler = logging.StreamHandler() 465 screen_handler.setFormatter(logging.Formatter(log_format)) 466 logging.getLogger().addHandler(screen_handler) 467 logging.getLogger().setLevel(logging.INFO) 468 if logfile: 469 file_handler = logging.FileHandler(logfile) 470 file_handler.setFormatter(logging.Formatter(log_format)) 471 file_handler.setLevel(logging.DEBUG) 472 logging.getLogger().addHandler(file_handler) 473 474 475def is_shard(): 476 """Determines if this instance is running as a shard. 477 478 Reads the global_config value shard_hostname in the section SHARD. 479 480 @return True, if shard_hostname is set, False otherwise. 481 """ 482 hostname = CONFIG.get_config_value('SHARD', 'shard_hostname', default=None) 483 return bool(hostname) 484 485 486def get_global_afe_hostname(): 487 """Read the hostname of the global AFE from the global configuration.""" 488 return CONFIG.get_config_value('SERVER', 'global_afe_hostname') 489 490 491def is_restricted_user(username): 492 """Determines if a user is in a restricted group. 493 494 User in restricted group only have access to master. 495 496 @param username: A string, representing a username. 497 498 @returns: True if the user is in a restricted group. 499 """ 500 if not username: 501 return False 502 503 restricted_groups = CONFIG.get_config_value( 504 'AUTOTEST_WEB', 'restricted_groups', default='').split(',') 505 for group in restricted_groups: 506 try: 507 if group and username in grp.getgrnam(group).gr_mem: 508 return True 509 except KeyError as e: 510 logging.debug("%s is not a valid group.", group) 511 return False 512 513 514def get_special_task_status(is_complete, success, is_active): 515 """Get the status of a special task. 516 517 Emulate a host queue entry status for a special task 518 Although SpecialTasks are not HostQueueEntries, it is helpful to 519 the user to present similar statuses. 520 521 @param is_complete Boolean if the task is completed. 522 @param success Boolean if the task succeeded. 523 @param is_active Boolean if the task is active. 524 525 @return The status of a special task. 526 """ 527 if is_complete: 528 if success: 529 return host_queue_entry_states.Status.COMPLETED 530 return host_queue_entry_states.Status.FAILED 531 if is_active: 532 return host_queue_entry_states.Status.RUNNING 533 return host_queue_entry_states.Status.QUEUED 534 535 536def get_special_task_exec_path(hostname, task_id, task_name, time_requested): 537 """Get the execution path of the SpecialTask. 538 539 This method returns different paths depending on where a 540 the task ran: 541 * Master: hosts/hostname/task_id-task_type 542 * Shard: Master_path/time_created 543 This is to work around the fact that a shard can fail independent 544 of the master, and be replaced by another shard that has the same 545 hosts. Without the time_created stamp the logs of the tasks running 546 on the second shard will clobber the logs from the first in google 547 storage, because task ids are not globally unique. 548 549 @param hostname Hostname 550 @param task_id Special task id 551 @param task_name Special task name (e.g., Verify, Repair, etc) 552 @param time_requested Special task requested time. 553 554 @return An execution path for the task. 555 """ 556 results_path = 'hosts/%s/%s-%s' % (hostname, task_id, task_name.lower()) 557 558 # If we do this on the master it will break backward compatibility, 559 # as there are tasks that currently don't have timestamps. If a host 560 # or job has been sent to a shard, the rpc for that host/job will 561 # be redirected to the shard, so this global_config check will happen 562 # on the shard the logs are on. 563 if not is_shard(): 564 return results_path 565 566 # Generate a uid to disambiguate special task result directories 567 # in case this shard fails. The simplest uid is the job_id, however 568 # in rare cases tasks do not have jobs associated with them (eg: 569 # frontend verify), so just use the creation timestamp. The clocks 570 # between a shard and master should always be in sync. Any discrepancies 571 # will be brought to our attention in the form of job timeouts. 572 uid = time_requested.strftime('%Y%d%m%H%M%S') 573 574 # TODO: This is a hack, however it is the easiest way to achieve 575 # correctness. There is currently some debate over the future of 576 # tasks in our infrastructure and refactoring everything right 577 # now isn't worth the time. 578 return '%s/%s' % (results_path, uid) 579 580 581def get_job_tag(id, owner): 582 """Returns a string tag for a job. 583 584 @param id Job id 585 @param owner Job owner 586 587 """ 588 return '%s-%s' % (id, owner) 589 590 591def get_hqe_exec_path(tag, execution_subdir): 592 """Returns a execution path to a HQE's results. 593 594 @param tag Tag string for a job associated with a HQE. 595 @param execution_subdir Execution sub-directory string of a HQE. 596 597 """ 598 return os.path.join(tag, execution_subdir) 599 600 601def is_inside_chroot(): 602 """Check if the process is running inside chroot. 603 604 This is a wrapper around chromite.lib.cros_build_lib.IsInsideChroot(). The 605 method checks if cros_build_lib can be imported first. 606 607 @return: True if the process is running inside chroot or cros_build_lib 608 cannot be imported. 609 610 """ 611 try: 612 # TODO(crbug.com/739466) This module import is delayed because it adds 613 # 1-2 seconds to the module import time and most users of site_utils 614 # don't need it. The correct fix is to break apart site_utils into more 615 # meaningful chunks. 616 from chromite.lib import cros_build_lib 617 except ImportError: 618 logging.warn('Unable to import chromite. Can not detect chroot. ' 619 'Defaulting to False') 620 return False 621 return cros_build_lib.IsInsideChroot() 622 623 624def parse_job_name(name): 625 """Parse job name to get information including build, board and suite etc. 626 627 Suite job created by run_suite follows the naming convention of: 628 [build]-test_suites/control.[suite] 629 For example: lumpy-release/R46-7272.0.0-test_suites/control.bvt 630 The naming convention is defined in rpc_interface.create_suite_job. 631 632 Test job created by suite job follows the naming convention of: 633 [build]/[suite]/[test name] 634 For example: lumpy-release/R46-7272.0.0/bvt/login_LoginSuccess 635 The naming convention is defined in 636 server/cros/dynamic_suite/tools.create_job_name 637 638 Note that pgo and chrome-perf builds will fail the method. Since lab does 639 not run test for these builds, they can be ignored. 640 Also, tests for Launch Control builds have different naming convention. 641 The build ID will be used as build_version. 642 643 @param name: Name of the job. 644 645 @return: A dictionary containing the test information. The keyvals include: 646 build: Name of the build, e.g., lumpy-release/R46-7272.0.0 647 build_version: The version of the build, e.g., R46-7272.0.0 648 board: Name of the board, e.g., lumpy 649 suite: Name of the test suite, e.g., bvt 650 651 """ 652 info = {} 653 suite_job_regex = '([^/]*/[^/]*(?:/\d+)?)-test_suites/control\.(.*)' 654 test_job_regex = '([^/]*/[^/]*(?:/\d+)?)/([^/]+)/.*' 655 match = re.match(suite_job_regex, name) 656 if not match: 657 match = re.match(test_job_regex, name) 658 if match: 659 info['build'] = match.groups()[0] 660 info['suite'] = match.groups()[1] 661 info['build_version'] = info['build'].split('/')[1] 662 try: 663 info['board'], _, _, _ = ParseBuildName(info['build']) 664 except ParseBuildNameException: 665 # Try to parse it as Launch Control build 666 # Launch Control builds have name format: 667 # branch/build_target-build_type/build_id. 668 try: 669 _, target, build_id = utils.parse_launch_control_build( 670 info['build']) 671 build_target, _ = utils.parse_launch_control_target(target) 672 if build_target: 673 info['board'] = build_target 674 info['build_version'] = build_id 675 except ValueError: 676 pass 677 return info 678 679 680def add_label_detector(label_function_list, label_list=None, label=None): 681 """Decorator used to group functions together into the provided list. 682 683 This is a helper function to automatically add label functions that have 684 the label decorator. This is to help populate the class list of label 685 functions to be retrieved by the get_labels class method. 686 687 @param label_function_list: List of label detecting functions to add 688 decorated function to. 689 @param label_list: List of detectable labels to add detectable labels to. 690 (Default: None) 691 @param label: Label string that is detectable by this detection function 692 (Default: None) 693 """ 694 def add_func(func): 695 """ 696 @param func: The function to be added as a detector. 697 """ 698 label_function_list.append(func) 699 if label and label_list is not None: 700 label_list.append(label) 701 return func 702 return add_func 703 704 705def verify_not_root_user(): 706 """Simple function to error out if running with uid == 0""" 707 if os.getuid() == 0: 708 raise error.IllegalUser('This script can not be ran as root.') 709 710 711def get_hostname_from_machine(machine): 712 """Lookup hostname from a machine string or dict. 713 714 @returns: Machine hostname in string format. 715 """ 716 hostname, _ = get_host_info_from_machine(machine) 717 return hostname 718 719 720def get_host_info_from_machine(machine): 721 """Lookup host information from a machine string or dict. 722 723 @returns: Tuple of (hostname, afe_host) 724 """ 725 if isinstance(machine, dict): 726 return (machine['hostname'], machine['afe_host']) 727 else: 728 return (machine, EmptyAFEHost()) 729 730 731def get_afe_host_from_machine(machine): 732 """Return the afe_host from the machine dict if possible. 733 734 @returns: AFE host object. 735 """ 736 _, afe_host = get_host_info_from_machine(machine) 737 return afe_host 738 739 740def get_connection_pool_from_machine(machine): 741 """Returns the ssh_multiplex.ConnectionPool from machine if possible.""" 742 if not isinstance(machine, dict): 743 return None 744 return machine.get('connection_pool') 745 746 747def get_creds_abspath(creds_file): 748 """Returns the abspath of the credentials file. 749 750 If creds_file is already an absolute path, just return it. 751 Otherwise, assume it is located in the creds directory 752 specified in global_config and return the absolute path. 753 754 @param: creds_path, a path to the credentials. 755 @return: An absolute path to the credentials file. 756 """ 757 if not creds_file: 758 return None 759 if os.path.isabs(creds_file): 760 return creds_file 761 creds_dir = CONFIG.get_config_value('SERVER', 'creds_dir', default='') 762 if not creds_dir or not os.path.exists(creds_dir): 763 creds_dir = common.autotest_dir 764 return os.path.join(creds_dir, creds_file) 765 766 767def machine_is_testbed(machine): 768 """Checks if the machine is a testbed. 769 770 The signal we use to determine if the machine is a testbed 771 is if the host attributes contain more than 1 serial. 772 773 @param machine: is a list of dicts 774 775 @return: True if the machine is a testbed, False otherwise. 776 """ 777 _, afe_host = get_host_info_from_machine(machine) 778 return len(afe_host.attributes.get('serials', '').split(',')) > 1 779 780 781def SetupTsMonGlobalState(*args, **kwargs): 782 """Import-safe wrap around chromite.lib.ts_mon_config's setup function. 783 784 @param *args: Args to pass through. 785 @param **kwargs: Kwargs to pass through. 786 """ 787 try: 788 # TODO(crbug.com/739466) This module import is delayed because it adds 789 # 1-2 seconds to the module import time and most users of site_utils 790 # don't need it. The correct fix is to break apart site_utils into more 791 # meaningful chunks. 792 from chromite.lib import ts_mon_config 793 except ImportError: 794 logging.warn('Unable to import chromite. Monarch is disabled.') 795 return TrivialContextManager() 796 797 try: 798 context = ts_mon_config.SetupTsMonGlobalState(*args, **kwargs) 799 if hasattr(context, '__exit__'): 800 return context 801 except Exception as e: 802 logging.warning('Caught an exception trying to setup ts_mon, ' 803 'monitoring is disabled: %s', e, exc_info=True) 804 return TrivialContextManager() 805 806 807@contextlib.contextmanager 808def TrivialContextManager(*args, **kwargs): 809 """Context manager that does nothing. 810 811 @param *args: Ignored args 812 @param **kwargs: Ignored kwargs. 813 """ 814 yield 815 816 817def wait_for_idle_duts(duts, afe, max_wait=IDLE_DUT_WAIT_TIMEOUT): 818 """Wait for the hosts to all go idle. 819 820 @param duts: List of duts to check for idle state. 821 @param afe: afe instance. 822 @param max_wait: Max wait time in seconds to wait for duts to be idle. 823 824 @returns Boolean True if all hosts are idle or False if any hosts did not 825 go idle within max_wait. 826 """ 827 start_time = time.time() 828 # We make a shallow copy since we're going to be modifying active_dut_list. 829 active_dut_list = duts[:] 830 while active_dut_list: 831 # Let's rate-limit how often we hit the AFE. 832 time.sleep(1) 833 834 # Check if we've waited too long. 835 if (time.time() - start_time) > max_wait: 836 return False 837 838 idle_duts = [] 839 # Get the status for the duts and see if they're in the idle state. 840 afe_hosts = afe.get_hosts(active_dut_list) 841 idle_duts = [afe_host.hostname for afe_host in afe_hosts 842 if afe_host.status in host_states.IDLE_STATES] 843 844 # Take out idle duts so we don't needlessly check them 845 # next time around. 846 for idle_dut in idle_duts: 847 active_dut_list.remove(idle_dut) 848 849 logging.info('still waiting for following duts to go idle: %s', 850 active_dut_list) 851 return True 852 853 854@contextlib.contextmanager 855def lock_duts_and_wait(duts, afe, lock_msg='default lock message', 856 max_wait=IDLE_DUT_WAIT_TIMEOUT): 857 """Context manager to lock the duts and wait for them to go idle. 858 859 @param duts: List of duts to lock. 860 @param afe: afe instance. 861 @param lock_msg: message for afe on locking this host. 862 @param max_wait: Max wait time in seconds to wait for duts to be idle. 863 864 @returns Boolean lock_success where True if all duts locked successfully or 865 False if we timed out waiting too long for hosts to go idle. 866 """ 867 try: 868 locked_duts = [] 869 duts.sort() 870 for dut in duts: 871 if afe.lock_host(dut, lock_msg, fail_if_locked=True): 872 locked_duts.append(dut) 873 else: 874 logging.info('%s already locked', dut) 875 yield wait_for_idle_duts(locked_duts, afe, max_wait) 876 finally: 877 afe.unlock_hosts(locked_duts) 878 879 880def board_labels_allowed(boards): 881 """Check if the list of board labels can be set to a single host. 882 883 The only case multiple board labels can be set to a single host is for 884 testbed, which may have a list of board labels like 885 board:angler-1, board:angler-2, board:angler-3, board:marlin-1' 886 887 @param boards: A list of board labels (may include platform label). 888 889 @returns True if the the list of boards can be set to a single host. 890 """ 891 # Filter out any non-board labels 892 boards = [b for b in boards if re.match('board:.*', b)] 893 if len(boards) <= 1: 894 return True 895 for board in boards: 896 if not re.match('board:[^-]+-\d+', board): 897 return False 898 return True 899 900 901def _get_default_size_info(path): 902 """Get the default result size information. 903 904 In case directory summary is failed to build, assume the test result is not 905 throttled and all result sizes are the size of existing test results. 906 907 @return: A namedtuple of result size informations, including: 908 client_result_collected_KB: The total size (in KB) of test results 909 collected from test device. Set to be the total size of the 910 given path. 911 original_result_total_KB: The original size (in KB) of test results 912 before being trimmed. Set to be the total size of the given 913 path. 914 result_uploaded_KB: The total size (in KB) of test results to be 915 uploaded. Set to be the total size of the given path. 916 result_throttled: True if test results collection is throttled. 917 It's set to False in this default behavior. 918 """ 919 total_size = file_utils.get_directory_size_kibibytes(path); 920 return result_utils_lib.ResultSizeInfo( 921 client_result_collected_KB=total_size, 922 original_result_total_KB=total_size, 923 result_uploaded_KB=total_size, 924 result_throttled=False) 925 926 927def _report_result_size_metrics(result_size_info): 928 """Report result sizes information to metrics. 929 930 @param result_size_info: A ResultSizeInfo namedtuple containing information 931 of test result sizes. 932 """ 933 fields = {'result_throttled' : result_size_info.result_throttled} 934 metrics.Counter(RESULT_METRICS_PREFIX + 'client_result_collected_KB', 935 description='The total size (in KB) of test results ' 936 'collected from test device. Set to be the total size of ' 937 'the given path.' 938 ).increment_by(result_size_info.client_result_collected_KB, 939 fields=fields) 940 metrics.Counter(RESULT_METRICS_PREFIX + 'original_result_total_KB', 941 description='The original size (in KB) of test results ' 942 'before being trimmed.' 943 ).increment_by(result_size_info.original_result_total_KB, 944 fields=fields) 945 metrics.Counter(RESULT_METRICS_PREFIX + 'result_uploaded_KB', 946 description='The total size (in KB) of test results to be ' 947 'uploaded.' 948 ).increment_by(result_size_info.result_uploaded_KB, 949 fields=fields) 950 951 952def collect_result_sizes(path, log=logging.debug): 953 """Collect the result sizes information and build result summary. 954 955 It first tries to merge directory summaries and calculate the result sizes 956 including: 957 client_result_collected_KB: The volume in KB that's transfered from the test 958 device. 959 original_result_total_KB: The volume in KB that's the original size of the 960 result files before being trimmed. 961 result_uploaded_KB: The volume in KB that will be uploaded. 962 result_throttled: Indicating if the result files were throttled. 963 964 If directory summary merging failed for any reason, fall back to use the 965 total size of the given result directory. 966 967 @param path: Path of the result directory to get size information. 968 @param log: The logging method, default to logging.debug 969 @return: A ResultSizeInfo namedtuple containing information of test result 970 sizes. 971 """ 972 try: 973 client_collected_bytes, summary = result_utils.merge_summaries(path) 974 result_size_info = result_utils_lib.get_result_size_info( 975 client_collected_bytes, summary) 976 html_file = os.path.join(path, result_view.DEFAULT_RESULT_SUMMARY_NAME) 977 result_view.build(client_collected_bytes, summary, html_file) 978 except: 979 log('Failed to calculate result sizes based on directory summaries for ' 980 'directory %s. Fall back to record the total size.\nException: %s' % 981 (path, traceback.format_exc())) 982 result_size_info = _get_default_size_info(path) 983 984 _report_result_size_metrics(result_size_info) 985 986 return result_size_info