1# Lint as: python2, python3 2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6from __future__ import absolute_import 7from __future__ import division 8from __future__ import print_function 9 10from distutils import version 11import json 12import logging 13import multiprocessing 14import os 15import re 16import six 17from six.moves import urllib 18import six.moves.html_parser 19import six.moves.http_client 20import six.moves.urllib.parse 21import time 22 23from autotest_lib.client.bin import utils as bin_utils 24from autotest_lib.client.common_lib import android_utils 25from autotest_lib.client.common_lib import error 26from autotest_lib.client.common_lib import global_config 27from autotest_lib.client.common_lib import seven 28from autotest_lib.client.common_lib import utils 29from autotest_lib.client.common_lib.cros import retry 30 31# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107 32 33try: 34 from chromite.lib import metrics 35except ImportError: 36 metrics = utils.metrics_mock 37 38 39CONFIG = global_config.global_config 40# This file is generated at build time and specifies, per suite and per test, 41# the DEPENDENCIES list specified in each control file. It's a dict of dicts: 42# {'bvt': {'/path/to/autotest/control/site_tests/test1/control': ['dep1']} 43# 'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']} 44# 'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'], 45# '/path/to/autotest/control/site_tests/test3/control': ['dep3']} 46# } 47DEPENDENCIES_FILE = 'test_suites/dependency_info' 48# Number of seconds for caller to poll devserver's is_staged call to check if 49# artifacts are staged. 50_ARTIFACT_STAGE_POLLING_INTERVAL = 5 51# Artifacts that should be staged when client calls devserver RPC to stage an 52# image. 53_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful' 54# Artifacts that should be staged when client calls devserver RPC to stage an 55# image with autotest artifact. 56_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,' 57 'control_files,stateful,' 58 'autotest_packages') 59# Artifacts that should be staged when client calls devserver RPC to stage an 60# Android build. 61SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value( 62 'CROS', 'skip_devserver_health_check', type=bool) 63# Number of seconds for the call to get devserver load to time out. 64TIMEOUT_GET_DEVSERVER_LOAD = 2.0 65 66# Android artifact path in devserver 67ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value( 68 'CROS', 'android_build_name_pattern', type=str).replace('\\', '') 69 70# Return value from a devserver RPC indicating the call succeeded. 71SUCCESS = 'Success' 72 73# The timeout minutes for a given devserver ssh call. 74DEVSERVER_SSH_TIMEOUT_MINS = 1 75 76# Error message for invalid devserver response. 77ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error' 78ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable' 79 80# Error message for devserver call timedout. 81ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout' 82 83# The timeout minutes for waiting a devserver staging. 84DEVSERVER_IS_STAGING_RETRY_MIN = 100 85 86# Provision error patterns. 87# People who see this should know that they shouldn't change these 88# classification strings. These strings are used for monitoring provision 89# failures. Any changes may mess up the stats. 90_EXCEPTION_PATTERNS = [ 91 # Raised when devserver portfile does not exist on host. 92 (r".*Devserver portfile does not exist!.*$", 93 '(1) Devserver portfile does not exist on host'), 94 # Raised when devserver cannot copy packages to host. 95 (r".*Could not copy .* to device.*$", 96 '(2) Cannot copy packages to host'), 97 # Raised when devserver fails to run specific commands on host. 98 (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$", 99 '(3) Fail to run specific command on host'), 100 # Raised when new build fails to boot on the host. 101 (r'.*RootfsUpdateError: Build .* failed to boot on.*$', 102 '(4) Build failed to boot on host'), 103 # Raised when the auto-update process is timed out. 104 (r'.*The CrOS auto-update process is timed out, ' 105 'thus will be terminated.*$', 106 '(5) Auto-update is timed out'), 107 # Raised when the host is not pingable. 108 (r".*DeviceNotPingableError.*$", 109 '(6) Host is not pingable during auto-update'), 110 # Raised when hosts have unexpected status after rootfs update. 111 (r'.*Update failed with unexpected update status: ' 112 'UPDATE_STATUS_IDLE.*$', 113 '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs ' 114 'update'), 115 # Raised when devserver returns non-json response to shard/drone. 116 (r'.*No JSON object could be decoded.*$', 117 '(8) Devserver returned non-json object'), 118 # Raised when devserver loses host's ssh connection 119 (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$', 120 "(9) Devserver lost host's ssh connection"), 121 # Raised when error happens in writing files to host 122 (r'.*Write failed\: Broken pipe.*$', 123 "(10) Broken pipe while writing or connecting to host")] 124 125PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value( 126 'CROS', 'prefer_local_devserver', type=bool, default=False) 127 128ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value( 129 'CROS', 'enable_ssh_connection_for_devserver', type=bool, 130 default=False) 131 132DEFAULT_SUBNET_MASKBIT = 19 133 134 135class DevServerException(Exception): 136 """Raised when the dev server returns a non-200 HTTP response.""" 137 pass 138 139 140class DevServerOverloadException(Exception): 141 """Raised when the dev server returns a 502 HTTP response.""" 142 pass 143 144class DevServerFailToLocateException(Exception): 145 """Raised when fail to locate any devserver.""" 146 pass 147 148 149class MarkupStripper(six.moves.html_parser.HTMLParser): 150 """HTML parser that strips HTML tags, coded characters like & 151 152 Works by, basically, not doing anything for any tags, and only recording 153 the content of text nodes in an internal data structure. 154 """ 155 def __init__(self): 156 self.reset() 157 self.fed = [] 158 159 160 def handle_data(self, d): 161 """Consume content of text nodes, store it away.""" 162 self.fed.append(d) 163 164 165 def get_data(self): 166 """Concatenate and return all stored data.""" 167 return ''.join(self.fed) 168 169 170def _strip_http_message(message): 171 """Strip the HTTP marker from the an HTTP message. 172 173 @param message: A string returned by an HTTP call. 174 175 @return: A string with HTTP marker being stripped. 176 """ 177 strip = MarkupStripper() 178 try: 179 strip.feed(seven.ensure_text(message, 'utf_32')) 180 except UnicodeDecodeError: 181 strip.feed(message) 182 return strip.get_data() 183 184 185def _get_image_storage_server(): 186 return CONFIG.get_config_value('CROS', 'image_storage_server', type=str) 187 188 189def _get_canary_channel_server(): 190 """ 191 Get the url of the canary-channel server, 192 eg: gsutil://chromeos-releases/canary-channel/<board>/<release> 193 194 @return: The url to the canary channel server. 195 """ 196 return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str) 197 198 199def _get_storage_server_for_artifacts(artifacts=None): 200 """Gets the appropriate storage server for the given artifacts. 201 202 @param artifacts: A list of artifacts we need to stage. 203 @return: The address of the storage server that has these artifacts. 204 The default image storage server if no artifacts are specified. 205 """ 206 factory_artifact = global_config.global_config.get_config_value( 207 'CROS', 'factory_artifact', type=str, default='') 208 if artifacts and factory_artifact and factory_artifact in artifacts: 209 return _get_canary_channel_server() 210 return _get_image_storage_server() 211 212 213def _gs_or_local_archive_url_args(archive_url): 214 """Infer the devserver call arguments to use with the given archive_url. 215 216 @param archive_url: The archive url to include the in devserver RPC. This 217 can either e a GS path or a local path. 218 @return: A dict of arguments to include in the devserver call. 219 """ 220 if not archive_url: 221 return {} 222 elif archive_url.startswith('gs://'): 223 return {'archive_url': archive_url} 224 else: 225 # For a local path, we direct the devserver to move the files while 226 # staging. This is the fastest way to stage local files, but deletes the 227 # files from the source. This is OK because the files are available on 228 # the devserver once staged. 229 return { 230 'local_path': archive_url, 231 'delete_source': True, 232 } 233 234 235def _reverse_lookup_from_config(address): 236 """Look up hostname for the given IP address. 237 238 This uses the hostname-address map from the config file. 239 240 If multiple hostnames map to the same IP address, the first one 241 defined in the configuration file takes precedence. 242 243 @param address: IP address string 244 @returns: hostname string, or original input if not found 245 """ 246 for hostname, addr in six.iteritems(_get_hostname_addr_map()): 247 if addr == address: 248 return hostname 249 return address 250 251 252def _get_hostname_addr_map(): 253 """Get hostname address mapping from config. 254 255 @return: dict mapping server hostnames to addresses 256 """ 257 return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP') 258 259 260def _get_dev_server_list(): 261 return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[]) 262 263 264def _get_crash_server_list(): 265 return CONFIG.get_config_value('CROS', 'crash_server', type=list, 266 default=[]) 267 268 269def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN, 270 exception_to_raise=DevServerException): 271 """A decorator to use with remote devserver calls. 272 273 This decorator converts urllib2.HTTPErrors into DevServerExceptions 274 with any embedded error info converted into plain text. The method 275 retries on urllib2.URLError or error.CmdError to avoid devserver flakiness. 276 """ 277 #pylint: disable=C0111 278 279 def inner_decorator(method): 280 label = method.__name__ if hasattr(method, '__name__') else None 281 def metrics_wrapper(*args, **kwargs): 282 @retry.retry((urllib.error.URLError, error.CmdError, 283 DevServerOverloadException), 284 timeout_min=timeout_min, 285 exception_to_raise=exception_to_raise, 286 label=label) 287 def wrapper(): 288 """This wrapper actually catches the HTTPError.""" 289 try: 290 return method(*args, **kwargs) 291 except urllib.error.HTTPError as e: 292 error_markup = e.read() 293 raise DevServerException(_strip_http_message(error_markup)) 294 295 try: 296 return wrapper() 297 except Exception as e: 298 if ERR_MSG_FOR_TIMED_OUT_CALL in str(e): 299 dev_server = None 300 if args and isinstance(args[0], DevServer): 301 dev_server = args[0].hostname 302 elif 'devserver' in kwargs: 303 dev_server = get_hostname(kwargs['devserver']) 304 305 logging.debug('RPC call %s has timed out on devserver %s.', 306 label, dev_server) 307 c = metrics.Counter( 308 'chromeos/autotest/devserver/call_timeout') 309 c.increment(fields={'dev_server': dev_server, 310 'healthy': label}) 311 312 raise 313 314 return metrics_wrapper 315 316 return inner_decorator 317 318 319def get_hostname(url): 320 """Get the hostname portion of a URL 321 322 schema://hostname:port/path 323 324 @param url: a Url string 325 @return: a hostname string 326 """ 327 return six.moves.urllib.parse.urlparse(url).hostname 328 329 330def get_resolved_hostname(url): 331 """Get the symbolic hostname from url. 332 333 If the given `url` uses a numeric IP address, try and find a 334 symbolic name from the hostname map in the config file. 335 336 @param url The URL with which to perform the conversion/lookup. 337 """ 338 return _reverse_lookup_from_config(get_hostname(url)) 339 340 341class DevServer(object): 342 """Base class for all DevServer-like server stubs. 343 344 This is the base class for interacting with all Dev Server-like servers. 345 A caller should instantiate a sub-class of DevServer with: 346 347 host = SubClassServer.resolve(build) 348 server = SubClassServer(host) 349 """ 350 _MIN_FREE_DISK_SPACE_GB = 20 351 _MAX_APACHE_CLIENT_COUNT = 75 352 # Threshold for the CPU load percentage for a devserver to be selected. 353 MAX_CPU_LOAD = 80.0 354 # Threshold for the network IO, set to 80MB/s 355 MAX_NETWORK_IO = 1024 * 1024 * 80 356 DISK_IO = 'disk_total_bytes_per_second' 357 NETWORK_IO = 'network_total_bytes_per_second' 358 CPU_LOAD = 'cpu_percent' 359 FREE_DISK = 'free_disk' 360 AU_PROCESS = 'au_process_count' 361 STAGING_THREAD_COUNT = 'staging_thread_count' 362 APACHE_CLIENT_COUNT = 'apache_client_count' 363 364 365 def __init__(self, devserver): 366 self._devserver = devserver 367 368 369 def url(self): 370 """Returns the url for this devserver.""" 371 return self._devserver 372 373 374 @property 375 def hostname(self): 376 """Return devserver hostname parsed from the devserver URL. 377 378 Note that this is likely parsed from the devserver URL from 379 shadow_config.ini, meaning that the "hostname" part of the 380 devserver URL is actually an IP address. 381 382 @return hostname string 383 """ 384 return get_hostname(self.url()) 385 386 387 @property 388 def resolved_hostname(self): 389 """Return devserver hostname, resolved from its IP address. 390 391 Unlike the hostname property, this property attempts to look up 392 the proper hostname from the devserver IP address. If lookup 393 fails, then fall back to whatever the hostname property would 394 have returned. 395 396 @return hostname string 397 """ 398 return _reverse_lookup_from_config(self.hostname) 399 400 401 @staticmethod 402 def get_server_url(url): 403 """Get the devserver url from a repo url, which includes build info. 404 405 @param url: A job repo url. 406 407 @return A devserver url, e.g., http://127.0.0.10:8080 408 """ 409 res = six.moves.urllib.parse.urlparse(url) 410 if res.netloc: 411 return res.scheme + '://' + res.netloc 412 413 414 @classmethod 415 def get_devserver_load_wrapper(cls, devserver, timeout_sec, output): 416 """A wrapper function to call get_devserver_load in parallel. 417 418 @param devserver: url of the devserver. 419 @param timeout_sec: Number of seconds before time out the devserver 420 call. 421 @param output: An output queue to save results to. 422 """ 423 load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0) 424 if load: 425 load['devserver'] = devserver 426 output.put(load) 427 428 429 @classmethod 430 def get_devserver_load(cls, devserver, 431 timeout_min=DEVSERVER_SSH_TIMEOUT_MINS): 432 """Returns True if the |devserver| is healthy to stage build. 433 434 @param devserver: url of the devserver. 435 @param timeout_min: How long to wait in minutes before deciding the 436 the devserver is not up (float). 437 438 @return: A dictionary of the devserver's load. 439 440 """ 441 call = cls._build_call(devserver, 'check_health') 442 @remote_devserver_call(timeout_min=timeout_min) 443 def get_load(devserver=devserver): 444 """Inner method that makes the call.""" 445 return cls.run_call(call, timeout=timeout_min*60) 446 447 try: 448 return json.load(six.StringIO(get_load(devserver=devserver))) 449 except Exception as e: 450 logging.error('Devserver call failed: "%s", timeout: %s seconds,' 451 ' Error: %s', call, timeout_min * 60, e) 452 453 454 @classmethod 455 def is_free_disk_ok(cls, load): 456 """Check if a devserver has enough free disk. 457 458 @param load: A dict of the load of the devserver. 459 460 @return: True if the devserver has enough free disk or disk check is 461 skipped in global config. 462 463 """ 464 if SKIP_DEVSERVER_HEALTH_CHECK: 465 logging.debug('devserver health check is skipped.') 466 elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB: 467 return False 468 469 return True 470 471 472 @classmethod 473 def is_apache_client_count_ok(cls, load): 474 """Check if a devserver has enough Apache connections available. 475 476 Apache server by default has maximum of 150 concurrent connections. If 477 a devserver has too many live connections, it likely indicates the 478 server is busy handling many long running download requests, e.g., 479 downloading stateful partitions. It is better not to add more requests 480 to it. 481 482 @param load: A dict of the load of the devserver. 483 484 @return: True if the devserver has enough Apache connections available, 485 or disk check is skipped in global config. 486 487 """ 488 if SKIP_DEVSERVER_HEALTH_CHECK: 489 logging.debug('devserver health check is skipped.') 490 elif cls.APACHE_CLIENT_COUNT not in load: 491 logging.debug('Apache client count is not collected from devserver.') 492 elif (load[cls.APACHE_CLIENT_COUNT] > 493 cls._MAX_APACHE_CLIENT_COUNT): 494 return False 495 496 return True 497 498 499 @classmethod 500 def devserver_healthy(cls, devserver, 501 timeout_min=DEVSERVER_SSH_TIMEOUT_MINS): 502 """Returns True if the |devserver| is healthy to stage build. 503 504 @param devserver: url of the devserver. 505 @param timeout_min: How long to wait in minutes before deciding the 506 the devserver is not up (float). 507 508 @return: True if devserver is healthy. Return False otherwise. 509 510 """ 511 c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy') 512 reason = '' 513 healthy = False 514 load = cls.get_devserver_load(devserver, timeout_min=timeout_min) 515 try: 516 if not load: 517 # Failed to get the load of devserver. 518 reason = '(1) Failed to get load.' 519 return False 520 521 apache_ok = cls.is_apache_client_count_ok(load) 522 if not apache_ok: 523 reason = '(2) Apache client count too high.' 524 logging.error('Devserver check_health failed. Live Apache client ' 525 'count is too high: %d.', 526 load[cls.APACHE_CLIENT_COUNT]) 527 return False 528 529 disk_ok = cls.is_free_disk_ok(load) 530 if not disk_ok: 531 reason = '(3) Disk space too low.' 532 logging.error('Devserver check_health failed. Free disk space is ' 533 'low. Only %dGB is available.', 534 load[cls.FREE_DISK]) 535 healthy = bool(disk_ok) 536 return disk_ok 537 finally: 538 c.increment(fields={'dev_server': cls(devserver).resolved_hostname, 539 'healthy': healthy, 540 'reason': reason}) 541 # Monitor how many AU processes the devserver is currently running. 542 if load is not None and load.get(DevServer.AU_PROCESS): 543 c_au = metrics.Gauge( 544 'chromeos/autotest/devserver/devserver_au_count') 545 c_au.set( 546 load.get(DevServer.AU_PROCESS), 547 fields={'dev_server': cls(devserver).resolved_hostname}) 548 549 550 @staticmethod 551 def _build_call(host, method, **kwargs): 552 """Build a URL to |host| that calls |method|, passing |kwargs|. 553 554 Builds a URL that calls |method| on the dev server defined by |host|, 555 passing a set of key/value pairs built from the dict |kwargs|. 556 557 @param host: a string that is the host basename e.g. http://server:90. 558 @param method: the dev server method to call. 559 @param kwargs: a dict mapping arg names to arg values. 560 @return the URL string. 561 """ 562 # If the archive_url is a local path, the args expected by the devserver 563 # are a little different. 564 archive_url_args = _gs_or_local_archive_url_args( 565 kwargs.pop('archive_url', None)) 566 kwargs.update(archive_url_args) 567 if 'is_async' in kwargs: 568 f = kwargs.pop('is_async') 569 kwargs['async'] = f 570 argstr = '&'.join(["%s=%s" % x for x in six.iteritems(kwargs)]) 571 return "%(host)s/%(method)s?%(argstr)s" % dict( 572 host=host, method=method, argstr=argstr) 573 574 575 def build_call(self, method, **kwargs): 576 """Builds a devserver RPC string that is used by 'run_call()'. 577 578 @param method: remote devserver method to call. 579 """ 580 return self._build_call(self._devserver, method, **kwargs) 581 582 583 @classmethod 584 def build_all_calls(cls, method, **kwargs): 585 """Builds a list of URLs that makes RPC calls on all devservers. 586 587 Build a URL that calls |method| on the dev server, passing a set 588 of key/value pairs built from the dict |kwargs|. 589 590 @param method: the dev server method to call. 591 @param kwargs: a dict mapping arg names to arg values 592 593 @return the URL string 594 """ 595 calls = [] 596 # Note we use cls.servers as servers is class specific. 597 for server in cls.servers(): 598 if cls.devserver_healthy(server): 599 calls.append(cls._build_call(server, method, **kwargs)) 600 601 return calls 602 603 604 @classmethod 605 def run_call(cls, call, readline=False, timeout=None): 606 """Invoke a given devserver call using urllib.open. 607 608 Open the URL with HTTP, and return the text of the response. Exceptions 609 may be raised as for urllib2.urlopen(). 610 611 @param call: a url string that calls a method to a devserver. 612 @param readline: whether read http response line by line. 613 @param timeout: The timeout seconds for this urlopen call. 614 615 @return the results of this call. 616 """ 617 if timeout is not None: 618 return utils.urlopen_socket_timeout( 619 call, timeout=timeout).read() 620 elif readline: 621 response = urllib.request.urlopen(call) 622 return [line.rstrip() for line in response] 623 else: 624 return urllib.request.urlopen(call).read() 625 626 627 @staticmethod 628 def servers(): 629 """Returns a list of servers that can serve as this type of server.""" 630 raise NotImplementedError() 631 632 633 @classmethod 634 def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT, 635 unrestricted_only=False): 636 """Get the devservers in the same subnet of the given ip. 637 638 @param ip: The IP address of a dut to look for devserver. 639 @param mask_bits: Number of mask bits. Default is 19. 640 @param unrestricted_only: Set to True to select from devserver in 641 unrestricted subnet only. Default is False. 642 643 @return: A list of devservers in the same subnet of the given ip. 644 645 """ 646 # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so 647 # we need a dict to return the full devserver path once the IPs are 648 # filtered in get_servers_in_same_subnet. 649 server_names = {} 650 all_devservers = [] 651 devservers = (cls.get_unrestricted_devservers() if unrestricted_only 652 else cls.servers()) 653 for server in devservers: 654 server_name = get_hostname(server) 655 server_names[server_name] = server 656 all_devservers.append(server_name) 657 if not all_devservers: 658 devserver_type = 'unrestricted only' if unrestricted_only else 'all' 659 raise DevServerFailToLocateException( 660 'Fail to locate a devserver for dut %s in %s devservers' 661 % (ip, devserver_type)) 662 663 devservers = utils.get_servers_in_same_subnet(ip, mask_bits, 664 all_devservers) 665 return [server_names[s] for s in devservers] 666 667 668 @classmethod 669 def get_unrestricted_devservers( 670 cls, restricted_subnets=utils.RESTRICTED_SUBNETS): 671 """Get the devservers not in any restricted subnet specified in 672 restricted_subnets. 673 674 @param restricted_subnets: A list of restriected subnets. 675 676 @return: A list of devservers not in any restricted subnet. 677 678 """ 679 if not restricted_subnets: 680 return cls.servers() 681 682 metrics.Counter('chromeos/autotest/devserver/unrestricted_hotfix') 683 return cls.servers() 684 685 @classmethod 686 def get_healthy_devserver(cls, build, devservers, ban_list=None): 687 """"Get a healthy devserver instance from the list of devservers. 688 689 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 690 @param devservers: The devserver list to be chosen out a healthy one. 691 @param ban_list: The ban_list of devservers we don't want to choose. 692 Default is None. 693 694 @return: A DevServer object of a healthy devserver. Return None if no 695 healthy devserver is found. 696 697 """ 698 logging.debug('Pick one healthy devserver from %r', devservers) 699 while devservers: 700 hash_index = hash(build) % len(devservers) 701 devserver = devservers.pop(hash_index) 702 logging.debug('Check health for %s', devserver) 703 if ban_list and devserver in ban_list: 704 continue 705 706 if cls.devserver_healthy(devserver): 707 logging.debug('Pick %s', devserver) 708 return cls(devserver) 709 710 711 @classmethod 712 def get_available_devservers(cls, hostname=None, 713 prefer_local_devserver=PREFER_LOCAL_DEVSERVER, 714 restricted_subnets=utils.RESTRICTED_SUBNETS): 715 """Get devservers in the same subnet of the given hostname. 716 717 @param hostname: Hostname of a DUT to choose devserver for. 718 719 @return: A tuple of (devservers, can_retry), devservers is a list of 720 devservers that's available for the given hostname. can_retry 721 is a flag that indicate if caller can retry the selection of 722 devserver if no devserver in the returned devservers can be 723 used. For example, if hostname is in a restricted subnet, 724 can_retry will be False. 725 """ 726 logging.info('Getting devservers for host: %s', hostname) 727 host_ip = None 728 if hostname: 729 host_ip = bin_utils.get_ip_address(hostname) 730 if not host_ip: 731 logging.error('Failed to get IP address of %s. Will pick a ' 732 'devserver without subnet constraint.', hostname) 733 734 if not host_ip: 735 return cls.get_unrestricted_devservers(restricted_subnets), False 736 737 # Go through all restricted subnet settings and check if the DUT is 738 # inside a restricted subnet. If so, only return the devservers in the 739 # restricted subnet and doesn't allow retry. 740 if host_ip and restricted_subnets: 741 subnet_ip, mask_bits = _get_subnet_for_host_ip( 742 host_ip, restricted_subnets=restricted_subnets) 743 if subnet_ip: 744 logging.debug('The host %s (%s) is in a restricted subnet. ' 745 'Try to locate a devserver inside subnet ' 746 '%s:%d.', hostname, host_ip, subnet_ip, 747 mask_bits) 748 devservers = cls.get_devservers_in_same_subnet( 749 subnet_ip, mask_bits) 750 return devservers, False 751 752 # If prefer_local_devserver is set to True and the host is not in 753 # restricted subnet, pick a devserver in the same subnet if possible. 754 # Set can_retry to True so it can pick a different devserver if all 755 # devservers in the same subnet are down. 756 if prefer_local_devserver: 757 return (cls.get_devservers_in_same_subnet( 758 host_ip, DEFAULT_SUBNET_MASKBIT, True), True) 759 760 return cls.get_unrestricted_devservers(restricted_subnets), False 761 762 763 @classmethod 764 def resolve(cls, build, hostname=None, ban_list=None): 765 """"Resolves a build to a devserver instance. 766 767 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514). 768 @param hostname: The hostname of dut that requests a devserver. It's 769 used to make sure a devserver in the same subnet is 770 preferred. 771 @param ban_list: The ban_list of devservers shouldn't be chosen. 772 773 @raise DevServerException: If no devserver is available. 774 """ 775 tried_devservers = set() 776 devservers, can_retry = cls.get_available_devservers(hostname) 777 if devservers: 778 tried_devservers |= set(devservers) 779 780 devserver = cls.get_healthy_devserver(build, devservers, 781 ban_list=ban_list) 782 783 if not devserver and can_retry: 784 # Find available devservers without dut location constrain. 785 devservers, _ = cls.get_available_devservers() 786 devserver = cls.get_healthy_devserver(build, devservers, 787 ban_list=ban_list) 788 if devservers: 789 tried_devservers |= set(devservers) 790 if devserver: 791 return devserver 792 else: 793 subnet = 'unrestricted subnet' 794 if hostname is not None: 795 host_ip = bin_utils.get_ip_address(hostname) 796 if host_ip: 797 subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip) 798 subnet = '%s/%s' % (str(subnet_ip), str(mask_bits)) 799 800 error_msg = ('All devservers in subnet: %s are currently down: ' 801 '%s. (dut hostname: %s)' % 802 (subnet, tried_devservers, hostname)) 803 logging.error(error_msg) 804 c = metrics.Counter( 805 'chromeos/autotest/devserver/subnet_without_devservers') 806 c.increment(fields={'subnet': subnet, 'hostname': str(hostname)}) 807 raise DevServerException(error_msg) 808 809 810 @classmethod 811 def random(cls): 812 """Return a random devserver that's available. 813 814 Devserver election in `resolve` method is based on a hash of the 815 build that a caller wants to stage. The purpose is that different 816 callers requesting for the same build can get the same devserver, 817 while the lab is able to distribute different builds across all 818 devservers. That helps to reduce the duplication of builds across 819 all devservers. 820 This function returns a random devserver, by passing a random 821 pseudo build name to `resolve `method. 822 """ 823 return cls.resolve(build=str(time.time())) 824 825 826class CrashServer(DevServer): 827 """Class of DevServer that symbolicates crash dumps.""" 828 829 @staticmethod 830 def servers(): 831 return _get_crash_server_list() 832 833 834 @remote_devserver_call() 835 def symbolicate_dump(self, minidump_path, build): 836 """Ask the devserver to symbolicate the dump at minidump_path. 837 838 Stage the debug symbols for |build| and, if that works, ask the 839 devserver to symbolicate the dump at |minidump_path|. 840 841 @param minidump_path: the on-disk path of the minidump. 842 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 843 whose debug symbols are needed for symbolication. 844 @return The contents of the stack trace 845 @raise DevServerException upon any return code that's not HTTP OK. 846 """ 847 try: 848 import requests 849 except ImportError: 850 logging.warning("Can't 'import requests' to connect to dev server.") 851 return '' 852 f = {'dev_server': self.resolved_hostname} 853 c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump') 854 c.increment(fields=f) 855 # Symbolicate minidump. 856 m = 'chromeos/autotest/crashserver/symbolicate_dump_duration' 857 with metrics.SecondsTimer(m, fields=f): 858 call = self.build_call('symbolicate_dump', 859 archive_url=_get_image_storage_server() + build) 860 request = requests.post( 861 call, files={'minidump': open(minidump_path, 'rb')}) 862 if request.status_code == requests.codes.OK: 863 return request.text 864 865 error_fd = six.StringIO(request.text) 866 raise urllib.error.HTTPError( 867 call, request.status_code, request.text, request.headers, 868 error_fd) 869 870 871 @classmethod 872 def get_available_devservers(cls, hostname): 873 """Get all available crash servers. 874 875 Crash server election doesn't need to count the location of hostname. 876 877 @param hostname: Hostname of a DUT to choose devserver for. 878 879 @return: A tuple of (all crash servers, False). can_retry is set to 880 False, as all crash servers are returned. There is no point to 881 retry. 882 """ 883 return cls.servers(), False 884 885 886class ImageServerBase(DevServer): 887 """Base class for devservers used to stage builds. 888 889 CrOS and Android builds are staged in different ways as they have different 890 sets of artifacts. This base class abstracts the shared functions between 891 the two types of ImageServer. 892 """ 893 894 @classmethod 895 def servers(cls): 896 """Returns a list of servers that can serve as a desired type of 897 devserver. 898 """ 899 return _get_dev_server_list() 900 901 902 def _get_image_url(self, image): 903 """Returns the url of the directory for this image on the devserver. 904 905 @param image: the image that was fetched. 906 """ 907 image = self.translate(image) 908 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 909 type=str) 910 return (url_pattern % (self.url(), image)).replace('update', 'static') 911 912 913 @staticmethod 914 def create_metadata(server_name, image, artifacts=None, files=None): 915 """Create a metadata dictionary given the staged items. 916 917 The metadata can be send to metadata db along with stats. 918 919 @param server_name: name of the devserver, e.g 172.22.33.44. 920 @param image: The name of the image. 921 @param artifacts: A list of artifacts. 922 @param files: A list of files. 923 924 @return A metadata dictionary. 925 926 """ 927 metadata = {'devserver': server_name, 928 'image': image, 929 '_type': 'devserver'} 930 if artifacts: 931 metadata['artifacts'] = ' '.join(artifacts) 932 if files: 933 metadata['files'] = ' '.join(files) 934 return metadata 935 936 937 @classmethod 938 def run_ssh_call(cls, call, readline=False, timeout=None): 939 """Construct an ssh-based rpc call, and execute it. 940 941 @param call: a url string that calls a method to a devserver. 942 @param readline: whether read http response line by line. 943 @param timeout: The timeout seconds for ssh call. 944 945 @return the results of this call. 946 """ 947 hostname = get_hostname(call) 948 ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call)) 949 timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60 950 try: 951 result = utils.run(ssh_call, timeout=timeout_seconds) 952 except error.CmdError as e: 953 logging.debug('Error occurred with exit_code %d when executing the ' 954 'ssh call: %s.', e.result_obj.exit_status, 955 e.result_obj.stderr) 956 c = metrics.Counter('chromeos/autotest/devserver/ssh_failure') 957 c.increment(fields={'dev_server': hostname}) 958 raise 959 response = result.stdout 960 961 # If the curl command's returned HTTP response contains certain 962 # exception string, raise the DevServerException of the response. 963 if 'DownloaderException' in response: 964 raise DevServerException(_strip_http_message(response)) 965 966 if readline: 967 # Remove line terminators and trailing whitespace 968 response = response.splitlines() 969 return [line.rstrip() for line in response] 970 971 return response 972 973 974 @classmethod 975 def run_call(cls, call, readline=False, timeout=None): 976 """Invoke a given devserver call using urllib.open or ssh. 977 978 Open the URL with HTTP or SSH-based HTTP, and return the text of the 979 response. Exceptions may be raised as for urllib2.urlopen() or 980 utils.run(). 981 982 @param call: a url string that calls a method to a devserver. 983 @param readline: whether read http response line by line. 984 @param timeout: The timeout seconds for urlopen call or ssh call. 985 986 @return the results of this call. 987 """ 988 server_name = get_hostname(call) 989 is_in_restricted_subnet = utils.get_restricted_subnet( 990 server_name, utils.RESTRICTED_SUBNETS) 991 _EMPTY_SENTINEL_VALUE = object() 992 def kickoff_call(): 993 """Invoke a given devserver call using urllib.open or ssh. 994 995 @param call: a url string that calls a method to a devserver. 996 @param is_in_restricted_subnet: whether the devserver is in subnet. 997 @param readline: whether read http response line by line. 998 @param timeout: The timeout seconds for urlopen call or ssh call. 999 """ 1000 if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or 1001 not is_in_restricted_subnet): 1002 response = super(ImageServerBase, cls).run_call( 1003 call, readline=readline, timeout=timeout) 1004 else: 1005 response = cls.run_ssh_call( 1006 call, readline=readline, timeout=timeout) 1007 # Retry if devserver service is temporarily down, e.g. in a 1008 # devserver push. 1009 if ERR_MSG_FOR_DOWN_DEVSERVER in response: 1010 return False 1011 1012 # Don't return response directly since it may be empty string, 1013 # which causes poll_for_condition to retry. 1014 return _EMPTY_SENTINEL_VALUE if not response else response 1015 1016 try: 1017 response = bin_utils.poll_for_condition( 1018 kickoff_call, 1019 exception=bin_utils.TimeoutError(), 1020 timeout=60, 1021 sleep_interval=5) 1022 return '' if response is _EMPTY_SENTINEL_VALUE else response 1023 except bin_utils.TimeoutError: 1024 return ERR_MSG_FOR_DOWN_DEVSERVER 1025 1026 1027 @classmethod 1028 def download_file(cls, remote_file, local_file, timeout=None): 1029 """Download file from devserver. 1030 1031 The format of remote_file should be: 1032 http://devserver_ip:8082/static/board/... 1033 1034 @param remote_file: The URL of the file on devserver that need to be 1035 downloaded. 1036 @param local_file: The path of the file saved to local. 1037 @param timeout: The timeout seconds for this call. 1038 """ 1039 response = cls.run_call(remote_file, timeout=timeout) 1040 with open(local_file, 'w') as out_log: 1041 out_log.write(response) 1042 1043 1044 def _poll_is_staged(self, **kwargs): 1045 """Polling devserver.is_staged until all artifacts are staged. 1046 1047 @param kwargs: keyword arguments to make is_staged devserver call. 1048 1049 @return: True if all artifacts are staged in devserver. 1050 """ 1051 call = self.build_call('is_staged', **kwargs) 1052 1053 def all_staged(): 1054 """Call devserver.is_staged rpc to check if all files are staged. 1055 1056 @return: True if all artifacts are staged in devserver. False 1057 otherwise. 1058 @rasies DevServerException, the exception is a wrapper of all 1059 exceptions that were raised when devserver tried to download 1060 the artifacts. devserver raises an HTTPError or a CmdError 1061 when an exception was raised in the code. Such exception 1062 should be re-raised here to stop the caller from waiting. 1063 If the call to devserver failed for connection issue, a 1064 URLError exception is raised, and caller should retry the 1065 call to avoid such network flakiness. 1066 1067 """ 1068 try: 1069 result = self.run_call(call) 1070 logging.debug('whether artifact is staged: %r', result) 1071 return result == 'True' 1072 except urllib.error.HTTPError as e: 1073 error_markup = e.read() 1074 raise DevServerException(_strip_http_message(error_markup)) 1075 except urllib.error.URLError as e: 1076 # Could be connection issue, retry it. 1077 # For example: <urlopen error [Errno 111] Connection refused> 1078 logging.error('URLError happens in is_stage: %r', e) 1079 return False 1080 except error.CmdError as e: 1081 # Retry if SSH failed to connect to the devserver. 1082 logging.warning('CmdError happens in is_stage: %r, will retry', e) 1083 return False 1084 1085 bin_utils.poll_for_condition( 1086 all_staged, 1087 exception=bin_utils.TimeoutError(), 1088 timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60, 1089 sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL) 1090 1091 return True 1092 1093 1094 def _call_and_wait(self, call_name, error_message, 1095 expected_response=SUCCESS, **kwargs): 1096 """Helper method to make a urlopen call, and wait for artifacts staged. 1097 1098 @param call_name: name of devserver rpc call. 1099 @param error_message: Error message to be thrown if response does not 1100 match expected_response. 1101 @param expected_response: Expected response from rpc, default to 1102 |Success|. If it's set to None, do not compare 1103 the actual response. Any response is consider 1104 to be good. 1105 @param kwargs: keyword arguments to make is_staged devserver call. 1106 1107 @return: The response from rpc. 1108 @raise DevServerException upon any return code that's expected_response. 1109 1110 """ 1111 call = self.build_call(call_name, is_async=True, **kwargs) 1112 try: 1113 response = self.run_call(call) 1114 logging.debug('response for RPC: %r', response) 1115 if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response: 1116 logging.debug('Proxy error happens in RPC call, ' 1117 'will retry in 30 seconds') 1118 time.sleep(30) 1119 raise DevServerOverloadException() 1120 except six.moves.http_client.BadStatusLine as e: 1121 logging.error(e) 1122 raise DevServerException('Received Bad Status line, Devserver %s ' 1123 'might have gone down while handling ' 1124 'the call: %s' % (self.url(), call)) 1125 1126 if expected_response and not response == expected_response: 1127 raise DevServerException(error_message) 1128 1129 # `os_type` is needed in build a devserver call, but not needed for 1130 # wait_for_artifacts_staged, since that method is implemented by 1131 # each ImageServerBase child class. 1132 if 'os_type' in kwargs: 1133 del kwargs['os_type'] 1134 self.wait_for_artifacts_staged(**kwargs) 1135 return response 1136 1137 1138 def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs): 1139 """Tell the devserver to download and stage |artifacts| from |image| 1140 specified by kwargs. 1141 1142 This is the main call point for staging any specific artifacts for a 1143 given build. To see the list of artifacts one can stage see: 1144 1145 ~src/platfrom/dev/artifact_info.py. 1146 1147 This is maintained along with the actual devserver code. 1148 1149 @param artifacts: A list of artifacts. 1150 @param files: A list of files to stage. 1151 @param archive_url: Optional parameter that has the archive_url to stage 1152 this artifact from. Default is specified in autotest config + 1153 image. 1154 @param kwargs: keyword arguments that specify the build information, to 1155 make stage devserver call. 1156 1157 @raise DevServerException upon any return code that's not HTTP OK. 1158 """ 1159 if not archive_url: 1160 archive_url = _get_storage_server_for_artifacts(artifacts) + build 1161 1162 artifacts_arg = ','.join(artifacts) if artifacts else '' 1163 files_arg = ','.join(files) if files else '' 1164 error_message = ("staging %s for %s failed;" 1165 "HTTP OK not accompanied by 'Success'." % 1166 ('artifacts=%s files=%s ' % (artifacts_arg, files_arg), 1167 build)) 1168 1169 staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' % 1170 (build, artifacts, files, archive_url)) 1171 logging.info('Staging artifacts on devserver %s: %s', 1172 self.url(), staging_info) 1173 success = False 1174 try: 1175 arguments = {'archive_url': archive_url, 1176 'artifacts': artifacts_arg, 1177 'files': files_arg} 1178 if kwargs: 1179 arguments.update(kwargs) 1180 f = {'artifacts': artifacts_arg, 1181 'dev_server': self.resolved_hostname} 1182 with metrics.SecondsTimer( 1183 'chromeos/autotest/devserver/stage_artifact_duration', 1184 fields=f): 1185 self.call_and_wait(call_name='stage', error_message=error_message, 1186 **arguments) 1187 logging.info('Finished staging artifacts: %s', staging_info) 1188 success = True 1189 except (bin_utils.TimeoutError, error.TimeoutException): 1190 logging.error('stage_artifacts timed out: %s', staging_info) 1191 raise DevServerException( 1192 'stage_artifacts timed out: %s' % staging_info) 1193 finally: 1194 f = {'success': success, 1195 'artifacts': artifacts_arg, 1196 'dev_server': self.resolved_hostname} 1197 metrics.Counter('chromeos/autotest/devserver/stage_artifact' 1198 ).increment(fields=f) 1199 1200 1201 def call_and_wait(self, *args, **kwargs): 1202 """Helper method to make a urlopen call, and wait for artifacts staged. 1203 1204 This method needs to be overridden in the subclass to implement the 1205 logic to call _call_and_wait. 1206 """ 1207 raise NotImplementedError 1208 1209 1210 def _trigger_download(self, build, artifacts, files, synchronous=True, 1211 **kwargs_build_info): 1212 """Tell the devserver to download and stage image specified in 1213 kwargs_build_info. 1214 1215 Tells the devserver to fetch |image| from the image storage server 1216 named by _get_image_storage_server(). 1217 1218 If |synchronous| is True, waits for the entire download to finish 1219 staging before returning. Otherwise only the artifacts necessary 1220 to start installing images onto DUT's will be staged before returning. 1221 A caller can then call finish_download to guarantee the rest of the 1222 artifacts have finished staging. 1223 1224 @param synchronous: if True, waits until all components of the image are 1225 staged before returning. 1226 @param kwargs_build_info: Dictionary of build information. 1227 For CrOS, it is None as build is the CrOS image name. 1228 For Android, it is {'target': target, 1229 'build_id': build_id, 1230 'branch': branch} 1231 1232 @raise DevServerException upon any return code that's not HTTP OK. 1233 1234 """ 1235 if kwargs_build_info: 1236 archive_url = None 1237 else: 1238 archive_url = _get_image_storage_server() + build 1239 error_message = ("trigger_download for %s failed;" 1240 "HTTP OK not accompanied by 'Success'." % build) 1241 kwargs = {'archive_url': archive_url, 1242 'artifacts': artifacts, 1243 'files': files, 1244 'error_message': error_message} 1245 if kwargs_build_info: 1246 kwargs.update(kwargs_build_info) 1247 1248 logging.info('trigger_download starts for %s', build) 1249 try: 1250 response = self.call_and_wait(call_name='stage', **kwargs) 1251 logging.info('trigger_download finishes for %s', build) 1252 except (bin_utils.TimeoutError, error.TimeoutException): 1253 logging.error('trigger_download timed out for %s.', build) 1254 raise DevServerException( 1255 'trigger_download timed out for %s.' % build) 1256 was_successful = response == SUCCESS 1257 if was_successful and synchronous: 1258 self._finish_download(build, artifacts, files, **kwargs_build_info) 1259 1260 1261 def _finish_download(self, build, artifacts, files, **kwargs_build_info): 1262 """Tell the devserver to finish staging image specified in 1263 kwargs_build_info. 1264 1265 If trigger_download is called with synchronous=False, it will return 1266 before all artifacts have been staged. This method contacts the 1267 devserver and blocks until all staging is completed and should be 1268 called after a call to trigger_download. 1269 1270 @param kwargs_build_info: Dictionary of build information. 1271 For CrOS, it is None as build is the CrOS image name. 1272 For Android, it is {'target': target, 1273 'build_id': build_id, 1274 'branch': branch} 1275 1276 @raise DevServerException upon any return code that's not HTTP OK. 1277 """ 1278 archive_url = _get_image_storage_server() + build 1279 error_message = ("finish_download for %s failed;" 1280 "HTTP OK not accompanied by 'Success'." % build) 1281 kwargs = {'archive_url': archive_url, 1282 'artifacts': artifacts, 1283 'files': files, 1284 'error_message': error_message} 1285 if kwargs_build_info: 1286 kwargs.update(kwargs_build_info) 1287 try: 1288 self.call_and_wait(call_name='stage', **kwargs) 1289 except (bin_utils.TimeoutError, error.TimeoutException): 1290 logging.error('finish_download timed out for %s', build) 1291 raise DevServerException( 1292 'finish_download timed out for %s.' % build) 1293 1294 1295 @remote_devserver_call() 1296 def locate_file(self, file_name, artifacts, build, build_info): 1297 """Locate a file with the given file_name on devserver. 1298 1299 This method calls devserver RPC `locate_file` to look up a file with 1300 the given file name inside specified build artifacts. 1301 1302 @param file_name: Name of the file to look for a file. 1303 @param artifacts: A list of artifact names to search for the file. 1304 @param build: Name of the build. For Android, it's None as build_info 1305 should be used. 1306 @param build_info: Dictionary of build information. 1307 For CrOS, it is None as build is the CrOS image name. 1308 For Android, it is {'target': target, 1309 'build_id': build_id, 1310 'branch': branch} 1311 1312 @return: A devserver url to the file. 1313 @raise DevServerException upon any return code that's not HTTP OK. 1314 """ 1315 if not build and not build_info: 1316 raise DevServerException('You must specify build information to ' 1317 'look for file %s in artifacts %s.' % 1318 (file_name, artifacts)) 1319 kwargs = {'file_name': file_name, 1320 'artifacts': artifacts} 1321 if build_info: 1322 build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info 1323 kwargs.update(build_info) 1324 # Devserver treats Android and Brillo build in the same way as they 1325 # are both retrieved from Launch Control and have similar build 1326 # artifacts. Therefore, os_type for devserver calls is `android` for 1327 # both Android and Brillo builds. 1328 kwargs['os_type'] = 'android' 1329 else: 1330 build_path = build 1331 kwargs['build'] = build 1332 call = self.build_call('locate_file', is_async=False, **kwargs) 1333 try: 1334 file_path = self.run_call(call) 1335 return os.path.join(self.url(), 'static', build_path, file_path) 1336 except six.moves.http_client.BadStatusLine as e: 1337 logging.error(e) 1338 raise DevServerException('Received Bad Status line, Devserver %s ' 1339 'might have gone down while handling ' 1340 'the call: %s' % (self.url(), call)) 1341 1342 1343 @remote_devserver_call() 1344 def list_control_files(self, build, suite_name=''): 1345 """Ask the devserver to list all control files for |build|. 1346 1347 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1348 whose control files the caller wants listed. 1349 @param suite_name: The name of the suite for which we require control 1350 files. 1351 @return None on failure, or a list of control file paths 1352 (e.g. server/site_tests/autoupdate/control) 1353 @raise DevServerException upon any return code that's not HTTP OK. 1354 """ 1355 build = self.translate(build) 1356 call = self.build_call('controlfiles', build=build, 1357 suite_name=suite_name) 1358 return self.run_call(call, readline=True) 1359 1360 1361 @remote_devserver_call() 1362 def get_control_file(self, build, control_path): 1363 """Ask the devserver for the contents of a control file. 1364 1365 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1366 whose control file the caller wants to fetch. 1367 @param control_path: The file to fetch 1368 (e.g. server/site_tests/autoupdate/control) 1369 @return The contents of the desired file. 1370 @raise DevServerException upon any return code that's not HTTP OK. 1371 """ 1372 build = self.translate(build) 1373 call = self.build_call('controlfiles', build=build, 1374 control_path=control_path) 1375 return self.run_call(call) 1376 1377 1378 @remote_devserver_call() 1379 def list_suite_controls(self, build, suite_name=''): 1380 """Ask the devserver to list contents of all control files for |build|. 1381 1382 @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514) 1383 whose control files' contents the caller wants returned. 1384 @param suite_name: The name of the suite for which we require control 1385 files. 1386 @return None on failure, or a dict of contents of all control files 1387 (e.g. {'path1': "#Copyright controls ***", ..., 1388 pathX': "#Copyright controls ***"} 1389 @raise DevServerException upon any return code that's not HTTP OK. 1390 """ 1391 build = self.translate(build) 1392 call = self.build_call('list_suite_controls', build=build, 1393 suite_name=suite_name) 1394 return json.load(six.StringIO(self.run_call(call))) 1395 1396 1397class ImageServer(ImageServerBase): 1398 """Class for DevServer that handles RPCs related to CrOS images. 1399 1400 The calls to devserver to stage artifacts, including stage and download, are 1401 made in async mode. That is, when caller makes an RPC |stage| to request 1402 devserver to stage certain artifacts, devserver handles the call and starts 1403 staging artifacts in a new thread, and return |Success| without waiting for 1404 staging being completed. When caller receives message |Success|, it polls 1405 devserver's is_staged call until all artifacts are staged. 1406 Such mechanism is designed to prevent cherrypy threads in devserver being 1407 running out, as staging artifacts might take long time, and cherrypy starts 1408 with a fixed number of threads that handle devserver rpc. 1409 """ 1410 1411 class ArtifactUrls(object): 1412 """A container for URLs of staged artifacts. 1413 1414 Attributes: 1415 full_payload: URL for downloading a staged full release update 1416 mton_payload: URL for downloading a staged M-to-N release update 1417 nton_payload: URL for downloading a staged N-to-N release update 1418 1419 """ 1420 def __init__(self, full_payload=None, mton_payload=None, 1421 nton_payload=None): 1422 self.full_payload = full_payload 1423 self.mton_payload = mton_payload 1424 self.nton_payload = nton_payload 1425 1426 1427 def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''): 1428 """Polling devserver.is_staged until all artifacts are staged. 1429 1430 @param archive_url: Google Storage URL for the build. 1431 @param artifacts: Comma separated list of artifacts to download. 1432 @param files: Comma separated list of files to download. 1433 @return: True if all artifacts are staged in devserver. 1434 """ 1435 kwargs = {'archive_url': archive_url, 1436 'artifacts': artifacts, 1437 'files': files} 1438 return self._poll_is_staged(**kwargs) 1439 1440 1441 @remote_devserver_call() 1442 def call_and_wait(self, call_name, archive_url, artifacts, files, 1443 error_message, expected_response=SUCCESS): 1444 """Helper method to make a urlopen call, and wait for artifacts staged. 1445 1446 @param call_name: name of devserver rpc call. 1447 @param archive_url: Google Storage URL for the build.. 1448 @param artifacts: Comma separated list of artifacts to download. 1449 @param files: Comma separated list of files to download. 1450 @param expected_response: Expected response from rpc, default to 1451 |Success|. If it's set to None, do not compare 1452 the actual response. Any response is consider 1453 to be good. 1454 @param error_message: Error message to be thrown if response does not 1455 match expected_response. 1456 1457 @return: The response from rpc. 1458 @raise DevServerException upon any return code that's expected_response. 1459 1460 """ 1461 kwargs = {'archive_url': archive_url, 1462 'artifacts': artifacts, 1463 'files': files} 1464 return self._call_and_wait(call_name, error_message, 1465 expected_response, **kwargs) 1466 1467 1468 @remote_devserver_call() 1469 def stage_artifacts(self, image=None, artifacts=None, files='', 1470 archive_url=None): 1471 """Tell the devserver to download and stage |artifacts| from |image|. 1472 1473 This is the main call point for staging any specific artifacts for a 1474 given build. To see the list of artifacts one can stage see: 1475 1476 ~src/platfrom/dev/artifact_info.py. 1477 1478 This is maintained along with the actual devserver code. 1479 1480 @param image: the image to fetch and stage. 1481 @param artifacts: A list of artifacts. 1482 @param files: A list of files to stage. 1483 @param archive_url: Optional parameter that has the archive_url to stage 1484 this artifact from. Default is specified in autotest config + 1485 image. 1486 1487 @raise DevServerException upon any return code that's not HTTP OK. 1488 """ 1489 if not artifacts and not files: 1490 raise DevServerException('Must specify something to stage.') 1491 image = self.translate(image) 1492 self._stage_artifacts(image, artifacts, files, archive_url) 1493 1494 1495 @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS) 1496 def list_image_dir(self, image): 1497 """List the contents of the image stage directory, on the devserver. 1498 1499 @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>. 1500 1501 @raise DevServerException upon any return code that's not HTTP OK. 1502 """ 1503 image = self.translate(image) 1504 logging.info('Requesting contents from devserver %s for image %s', 1505 self.url(), image) 1506 archive_url = _get_storage_server_for_artifacts() + image 1507 call = self.build_call('list_image_dir', archive_url=archive_url) 1508 response = self.run_call(call, readline=True) 1509 for line in response: 1510 logging.info(line) 1511 1512 1513 def trigger_download(self, image, synchronous=True): 1514 """Tell the devserver to download and stage |image|. 1515 1516 Tells the devserver to fetch |image| from the image storage server 1517 named by _get_image_storage_server(). 1518 1519 If |synchronous| is True, waits for the entire download to finish 1520 staging before returning. Otherwise only the artifacts necessary 1521 to start installing images onto DUT's will be staged before returning. 1522 A caller can then call finish_download to guarantee the rest of the 1523 artifacts have finished staging. 1524 1525 @param image: the image to fetch and stage. 1526 @param synchronous: if True, waits until all components of the image are 1527 staged before returning. 1528 1529 @raise DevServerException upon any return code that's not HTTP OK. 1530 1531 """ 1532 image = self.translate(image) 1533 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE 1534 self._trigger_download(image, artifacts, files='', 1535 synchronous=synchronous) 1536 1537 1538 @remote_devserver_call() 1539 def setup_telemetry(self, build): 1540 """Tell the devserver to setup telemetry for this build. 1541 1542 The devserver will stage autotest and then extract the required files 1543 for telemetry. 1544 1545 @param build: the build to setup telemetry for. 1546 1547 @returns path on the devserver that telemetry is installed to. 1548 """ 1549 build = self.translate(build) 1550 archive_url = _get_image_storage_server() + build 1551 call = self.build_call('setup_telemetry', archive_url=archive_url) 1552 try: 1553 response = self.run_call(call) 1554 except six.moves.http_client.BadStatusLine as e: 1555 logging.error(e) 1556 raise DevServerException('Received Bad Status line, Devserver %s ' 1557 'might have gone down while handling ' 1558 'the call: %s' % (self.url(), call)) 1559 return response 1560 1561 1562 def finish_download(self, image): 1563 """Tell the devserver to finish staging |image|. 1564 1565 If trigger_download is called with synchronous=False, it will return 1566 before all artifacts have been staged. This method contacts the 1567 devserver and blocks until all staging is completed and should be 1568 called after a call to trigger_download. 1569 1570 @param image: the image to fetch and stage. 1571 @raise DevServerException upon any return code that's not HTTP OK. 1572 """ 1573 image = self.translate(image) 1574 artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST 1575 self._finish_download(image, artifacts, files='') 1576 1577 1578 def get_update_url(self, image): 1579 """Returns the url that should be passed to the updater. 1580 1581 @param image: the image that was fetched. 1582 """ 1583 image = self.translate(image) 1584 url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern', 1585 type=str) 1586 return (url_pattern % (self.url(), image)) 1587 1588 1589 def get_staged_file_url(self, filename, image): 1590 """Returns the url of a staged file for this image on the devserver.""" 1591 return '/'.join([self._get_image_url(image), filename]) 1592 1593 1594 def get_test_image_url(self, image): 1595 """Returns a URL to a staged test image. 1596 1597 @param image: the image that was fetched. 1598 1599 @return A fully qualified URL that can be used for downloading the 1600 image. 1601 1602 """ 1603 return self._get_image_url(image) + '/chromiumos_test_image.bin' 1604 1605 1606 def get_recovery_image_url(self, image): 1607 """Returns a URL to a staged recovery image. 1608 1609 @param image: the image that was fetched. 1610 1611 @return A fully qualified URL that can be used for downloading the 1612 image. 1613 1614 """ 1615 return self._get_image_url(image) + '/recovery_image.bin' 1616 1617 1618 @remote_devserver_call() 1619 def get_dependencies_file(self, build): 1620 """Ask the dev server for the contents of the suite dependencies file. 1621 1622 Ask the dev server at |self._dev_server| for the contents of the 1623 pre-processed suite dependencies file (at DEPENDENCIES_FILE) 1624 for |build|. 1625 1626 @param build: The build (e.g. x86-mario-release/R21-2333.0.0) 1627 whose dependencies the caller is interested in. 1628 @return The contents of the dependencies file, which should eval to 1629 a dict of dicts, as per bin_utils/suite_preprocessor.py. 1630 @raise DevServerException upon any return code that's not HTTP OK. 1631 """ 1632 build = self.translate(build) 1633 call = self.build_call('controlfiles', 1634 build=build, control_path=DEPENDENCIES_FILE) 1635 return self.run_call(call) 1636 1637 1638 @remote_devserver_call() 1639 def get_latest_build_in_gs(self, board): 1640 """Ask the devservers for the latest offical build in Google Storage. 1641 1642 @param board: The board for who we want the latest official build. 1643 @return A string of the returned build rambi-release/R37-5868.0.0 1644 @raise DevServerException upon any return code that's not HTTP OK. 1645 """ 1646 call = self.build_call( 1647 'xbuddy_translate/remote/%s/latest-official' % board, 1648 image_dir=_get_image_storage_server()) 1649 image_name = self.run_call(call) 1650 return os.path.dirname(image_name) 1651 1652 1653 def translate(self, build_name): 1654 """Translate the build name if it's in LATEST format. 1655 1656 If the build name is in the format [builder]/LATEST, return the latest 1657 build in Google Storage otherwise return the build name as is. 1658 1659 @param build_name: build_name to check. 1660 1661 @return The actual build name to use. 1662 """ 1663 match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I) 1664 if not match: 1665 return build_name 1666 translated_build = self.get_latest_build_in_gs(match.groups()[0]) 1667 logging.debug('Translated relative build %s to %s', build_name, 1668 translated_build) 1669 return translated_build 1670 1671 1672 @classmethod 1673 @remote_devserver_call() 1674 def get_latest_build(cls, target, milestone=''): 1675 """Ask all the devservers for the latest build for a given target. 1676 1677 @param target: The build target, typically a combination of the board 1678 and the type of build e.g. x86-mario-release. 1679 @param milestone: For latest build set to '', for builds only in a 1680 specific milestone set to a str of format Rxx 1681 (e.g. R16). Default: ''. Since we are dealing with a 1682 webserver sending an empty string, '', ensures that 1683 the variable in the URL is ignored as if it was set 1684 to None. 1685 @return A string of the returned build e.g. R20-2226.0.0. 1686 @raise DevServerException upon any return code that's not HTTP OK. 1687 """ 1688 calls = cls.build_all_calls('latestbuild', target=target, 1689 milestone=milestone) 1690 latest_builds = [] 1691 for call in calls: 1692 latest_builds.append(cls.run_call(call)) 1693 1694 return max(latest_builds, key=version.LooseVersion) 1695 1696 1697 def _read_json_response_from_devserver(self, response): 1698 """Reads the json response from the devserver. 1699 1700 This is extracted to its own function so that it can be easily mocked. 1701 @param response: the response for a devserver. 1702 """ 1703 try: 1704 return json.loads(response) 1705 except ValueError as e: 1706 logging.debug('Failed to load json response: %s', response) 1707 raise DevServerException(e) 1708 1709 1710 def _check_error_message(self, error_patterns_to_check, error_msg): 1711 """Detect whether specific error pattern exist in error message. 1712 1713 @param error_patterns_to_check: the error patterns to check 1714 @param error_msg: the error message which may include any error 1715 pattern. 1716 1717 @return A boolean variable, True if error_msg contains any error 1718 pattern in error_patterns_to_check, False otherwise. 1719 """ 1720 for err in error_patterns_to_check: 1721 if err in error_msg: 1722 return True 1723 1724 return False 1725 1726 1727class AndroidBuildServer(ImageServerBase): 1728 """Class for DevServer that handles RPCs related to Android builds. 1729 1730 The calls to devserver to stage artifacts, including stage and download, are 1731 made in async mode. That is, when caller makes an RPC |stage| to request 1732 devserver to stage certain artifacts, devserver handles the call and starts 1733 staging artifacts in a new thread, and return |Success| without waiting for 1734 staging being completed. When caller receives message |Success|, it polls 1735 devserver's is_staged call until all artifacts are staged. 1736 Such mechanism is designed to prevent cherrypy threads in devserver being 1737 running out, as staging artifacts might take long time, and cherrypy starts 1738 with a fixed number of threads that handle devserver rpc. 1739 """ 1740 1741 def wait_for_artifacts_staged(self, target, build_id, branch, 1742 archive_url=None, artifacts='', files=''): 1743 """Polling devserver.is_staged until all artifacts are staged. 1744 1745 @param target: Target of the android build to stage, e.g., 1746 shamu-userdebug. 1747 @param build_id: Build id of the android build to stage. 1748 @param branch: Branch of the android build to stage. 1749 @param archive_url: Google Storage URL for the build. 1750 @param artifacts: Comma separated list of artifacts to download. 1751 @param files: Comma separated list of files to download. 1752 1753 @return: True if all artifacts are staged in devserver. 1754 """ 1755 kwargs = {'target': target, 1756 'build_id': build_id, 1757 'branch': branch, 1758 'artifacts': artifacts, 1759 'files': files, 1760 'os_type': 'android'} 1761 if archive_url: 1762 kwargs['archive_url'] = archive_url 1763 return self._poll_is_staged(**kwargs) 1764 1765 1766 @remote_devserver_call() 1767 def call_and_wait(self, call_name, target, build_id, branch, archive_url, 1768 artifacts, files, error_message, 1769 expected_response=SUCCESS): 1770 """Helper method to make a urlopen call, and wait for artifacts staged. 1771 1772 @param call_name: name of devserver rpc call. 1773 @param target: Target of the android build to stage, e.g., 1774 shamu-userdebug. 1775 @param build_id: Build id of the android build to stage. 1776 @param branch: Branch of the android build to stage. 1777 @param archive_url: Google Storage URL for the CrOS build. 1778 @param artifacts: Comma separated list of artifacts to download. 1779 @param files: Comma separated list of files to download. 1780 @param expected_response: Expected response from rpc, default to 1781 |Success|. If it's set to None, do not compare 1782 the actual response. Any response is consider 1783 to be good. 1784 @param error_message: Error message to be thrown if response does not 1785 match expected_response. 1786 1787 @return: The response from rpc. 1788 @raise DevServerException upon any return code that's expected_response. 1789 1790 """ 1791 kwargs = {'target': target, 1792 'build_id': build_id, 1793 'branch': branch, 1794 'artifacts': artifacts, 1795 'files': files, 1796 'os_type': 'android'} 1797 if archive_url: 1798 kwargs['archive_url'] = archive_url 1799 return self._call_and_wait(call_name, error_message, expected_response, 1800 **kwargs) 1801 1802 1803 @remote_devserver_call() 1804 def stage_artifacts(self, target=None, build_id=None, branch=None, 1805 image=None, artifacts=None, files='', archive_url=None): 1806 """Tell the devserver to download and stage |artifacts| from |image|. 1807 1808 This is the main call point for staging any specific artifacts for a 1809 given build. To see the list of artifacts one can stage see: 1810 1811 ~src/platfrom/dev/artifact_info.py. 1812 1813 This is maintained along with the actual devserver code. 1814 1815 @param target: Target of the android build to stage, e.g., 1816 shamu-userdebug. 1817 @param build_id: Build id of the android build to stage. 1818 @param branch: Branch of the android build to stage. 1819 @param image: Name of a build to test, in the format of 1820 branch/target/build_id 1821 @param artifacts: A list of artifacts. 1822 @param files: A list of files to stage. 1823 @param archive_url: Optional parameter that has the archive_url to stage 1824 this artifact from. Default is specified in autotest config + 1825 image. 1826 1827 @raise DevServerException upon any return code that's not HTTP OK. 1828 """ 1829 if image and not target and not build_id and not branch: 1830 branch, target, build_id = utils.parse_launch_control_build(image) 1831 if not target or not build_id or not branch: 1832 raise DevServerException('Must specify all build info (target, ' 1833 'build_id and branch) to stage.') 1834 1835 android_build_info = {'target': target, 1836 'build_id': build_id, 1837 'branch': branch} 1838 if not artifacts and not files: 1839 raise DevServerException('Must specify something to stage.') 1840 if not all(android_build_info.values()): 1841 raise DevServerException( 1842 'To stage an Android build, must specify target, build id ' 1843 'and branch.') 1844 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1845 self._stage_artifacts(build, artifacts, files, archive_url, 1846 **android_build_info) 1847 1848 def trigger_download(self, target, build_id, branch, artifacts=None, 1849 files='', os='android', synchronous=True): 1850 """Tell the devserver to download and stage an Android build. 1851 1852 Tells the devserver to fetch an Android build from the image storage 1853 server named by _get_image_storage_server(). 1854 1855 If |synchronous| is True, waits for the entire download to finish 1856 staging before returning. Otherwise only the artifacts necessary 1857 to start installing images onto DUT's will be staged before returning. 1858 A caller can then call finish_download to guarantee the rest of the 1859 artifacts have finished staging. 1860 1861 @param target: Target of the android build to stage, e.g., 1862 shamu-userdebug. 1863 @param build_id: Build id of the android build to stage. 1864 @param branch: Branch of the android build to stage. 1865 @param artifacts: A string of artifacts separated by comma. If None, 1866 use the default artifacts for Android or Brillo build. 1867 @param files: String of file seperated by commas. 1868 @param os: OS artifacts to download (android/brillo). 1869 @param synchronous: if True, waits until all components of the image are 1870 staged before returning. 1871 1872 @raise DevServerException upon any return code that's not HTTP OK. 1873 1874 """ 1875 android_build_info = {'target': target, 1876 'build_id': build_id, 1877 'branch': branch} 1878 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1879 if not artifacts: 1880 board = target.split('-')[0] 1881 artifacts = ( 1882 android_utils.AndroidArtifacts.get_artifacts_for_reimage( 1883 board, os)) 1884 self._trigger_download(build, artifacts, files=files, 1885 synchronous=synchronous, **android_build_info) 1886 1887 1888 def finish_download(self, target, build_id, branch, os='android'): 1889 """Tell the devserver to finish staging an Android build. 1890 1891 If trigger_download is called with synchronous=False, it will return 1892 before all artifacts have been staged. This method contacts the 1893 devserver and blocks until all staging is completed and should be 1894 called after a call to trigger_download. 1895 1896 @param target: Target of the android build to stage, e.g., 1897 shamu-userdebug. 1898 @param build_id: Build id of the android build to stage. 1899 @param branch: Branch of the android build to stage. 1900 @param os: OS artifacts to download (android/brillo). 1901 1902 @raise DevServerException upon any return code that's not HTTP OK. 1903 """ 1904 android_build_info = {'target': target, 1905 'build_id': build_id, 1906 'branch': branch} 1907 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1908 board = target.split('-')[0] 1909 artifacts = ( 1910 android_utils.AndroidArtifacts.get_artifacts_for_reimage( 1911 board)) 1912 self._finish_download(build, artifacts, files='', **android_build_info) 1913 1914 1915 def get_staged_file_url(self, filename, target, build_id, branch): 1916 """Returns the url of a staged file for this image on the devserver. 1917 1918 @param filename: Name of the file. 1919 @param target: Target of the android build to stage, e.g., 1920 shamu-userdebug. 1921 @param build_id: Build id of the android build to stage. 1922 @param branch: Branch of the android build to stage. 1923 1924 @return: The url of a staged file for this image on the devserver. 1925 """ 1926 android_build_info = {'target': target, 1927 'build_id': build_id, 1928 'branch': branch, 1929 'os_type': 'android'} 1930 build = ANDROID_BUILD_NAME_PATTERN % android_build_info 1931 return '/'.join([self._get_image_url(build), filename]) 1932 1933 1934 @remote_devserver_call() 1935 def translate(self, build_name): 1936 """Translate the build name if it's in LATEST format. 1937 1938 If the build name is in the format [branch]/[target]/LATEST, return the 1939 latest build in Launch Control otherwise return the build name as is. 1940 1941 @param build_name: build_name to check. 1942 1943 @return The actual build name to use. 1944 """ 1945 branch, target, build_id = utils.parse_launch_control_build(build_name) 1946 if build_id.upper() != 'LATEST': 1947 return build_name 1948 call = self.build_call('latestbuild', branch=branch, target=target, 1949 os_type='android') 1950 translated_build_id = self.run_call(call) 1951 translated_build = (ANDROID_BUILD_NAME_PATTERN % 1952 {'branch': branch, 1953 'target': target, 1954 'build_id': translated_build_id}) 1955 logging.debug('Translated relative build %s to %s', build_name, 1956 translated_build) 1957 return translated_build 1958 1959 1960def _is_load_healthy(load): 1961 """Check if devserver's load meets the minimum threshold. 1962 1963 @param load: The devserver's load stats to check. 1964 1965 @return: True if the load meets the minimum threshold. Return False 1966 otherwise. 1967 1968 """ 1969 # Threshold checks, including CPU load. 1970 if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD: 1971 logging.debug('CPU load of devserver %s is at %s%%, which is higher ' 1972 'than the threshold of %s%%', load['devserver'], 1973 load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD) 1974 return False 1975 if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO: 1976 logging.debug('Network IO of devserver %s is at %i Bps, which is ' 1977 'higher than the threshold of %i bytes per second.', 1978 load['devserver'], load[DevServer.NETWORK_IO], 1979 DevServer.MAX_NETWORK_IO) 1980 return False 1981 return True 1982 1983 1984def _compare_load(devserver1, devserver2): 1985 """Comparator function to compare load between two devservers. 1986 1987 @param devserver1: A dictionary of devserver load stats to be compared. 1988 @param devserver2: A dictionary of devserver load stats to be compared. 1989 1990 @return: Negative value if the load of `devserver1` is less than the load 1991 of `devserver2`. Return positive value otherwise. 1992 1993 """ 1994 return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO]) 1995 1996 1997def _get_subnet_for_host_ip(host_ip, 1998 restricted_subnets=utils.RESTRICTED_SUBNETS): 1999 """Get the subnet for a given host IP. 2000 2001 @param host_ip: the IP of a DUT. 2002 @param restricted_subnets: A list of restriected subnets. 2003 2004 @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the 2005 host_ip, return (None, None). 2006 """ 2007 for subnet_ip, mask_bits in restricted_subnets: 2008 if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits): 2009 return subnet_ip, mask_bits 2010 2011 return None, None 2012 2013 2014def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None): 2015 """Get the devserver with the least load. 2016 2017 Iterate through all devservers and get the one with least load. 2018 2019 TODO(crbug.com/486278): Devserver with required build already staged should 2020 take higher priority. This will need check_health call to be able to verify 2021 existence of a given build/artifact. Also, in case all devservers are 2022 overloaded, the logic here should fall back to the old behavior that randomly 2023 selects a devserver based on the hash of the image name/url. 2024 2025 @param devserver_type: Type of devserver to select from. Default is set to 2026 ImageServer. 2027 @param hostname: Hostname of the dut that the devserver is used for. The 2028 picked devserver needs to respect the location of the host if 2029 `prefer_local_devserver` is set to True or `restricted_subnets` is 2030 set. 2031 2032 @return: Name of the devserver with the least load. 2033 2034 """ 2035 logging.debug('Get the least loaded %r', devserver_type) 2036 devservers, can_retry = devserver_type.get_available_devservers( 2037 hostname) 2038 # If no healthy devservers available and can_retry is False, return None. 2039 # Otherwise, relax the constrain on hostname, allow all devservers to be 2040 # available. 2041 if not devserver_type.get_healthy_devserver('', devservers): 2042 if not can_retry: 2043 return None 2044 else: 2045 devservers, _ = devserver_type.get_available_devservers() 2046 2047 # get_devserver_load call needs to be made in a new process to allow force 2048 # timeout using signal. 2049 output = multiprocessing.Queue() 2050 processes = [] 2051 for devserver in devservers: 2052 processes.append(multiprocessing.Process( 2053 target=devserver_type.get_devserver_load_wrapper, 2054 args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output))) 2055 2056 for p in processes: 2057 p.start() 2058 for p in processes: 2059 # The timeout for the process commands aren't reliable. Add 2060 # some extra time to the timeout for potential overhead in the 2061 # subprocesses. crbug.com/913695 2062 p.join(TIMEOUT_GET_DEVSERVER_LOAD + 10) 2063 # Read queue before killing processes to avoid corrupting the queue. 2064 loads = [output.get() for p in processes if not p.is_alive()] 2065 for p in processes: 2066 if p.is_alive(): 2067 p.terminate() 2068 # Filter out any load failed to be retrieved or does not support load check. 2069 loads = [load for load in loads if load and DevServer.CPU_LOAD in load and 2070 DevServer.is_free_disk_ok(load) and 2071 DevServer.is_apache_client_count_ok(load)] 2072 if not loads: 2073 logging.debug('Failed to retrieve load stats from any devserver. No ' 2074 'load balancing can be applied.') 2075 return None 2076 loads = [load for load in loads if _is_load_healthy(load)] 2077 if not loads: 2078 logging.error('No devserver has the capacity to be selected.') 2079 return None 2080 loads = sorted(loads, cmp=_compare_load) 2081 return loads[0]['devserver'] 2082 2083 2084def resolve(build, hostname=None, ban_list=None): 2085 """Resolve a devserver can be used for given build and hostname. 2086 2087 @param build: Name of a build to stage on devserver, e.g., 2088 ChromeOS build: daisy-release/R50-1234.0.0 2089 Launch Control build: git_mnc_release/shamu-eng 2090 @param hostname: Hostname of a devserver for, default is None, which means 2091 devserver is not restricted by the network location of the host. 2092 @param ban_list: The ban_list of devservers shouldn't be chosen. 2093 2094 @return: A DevServer instance that can be used to stage given build for the 2095 given host. 2096 """ 2097 if utils.is_launch_control_build(build): 2098 return AndroidBuildServer.resolve(build, hostname) 2099 else: 2100 return ImageServer.resolve(build, hostname, ban_list=ban_list) 2101