• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Lint as: python2, python3
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6from __future__ import absolute_import
7from __future__ import division
8from __future__ import print_function
9
10from distutils import version
11import json
12import logging
13import multiprocessing
14import os
15import re
16import six
17from six.moves import urllib
18import six.moves.html_parser
19import six.moves.http_client
20import six.moves.urllib.parse
21import time
22
23from autotest_lib.client.bin import utils as bin_utils
24from autotest_lib.client.common_lib import android_utils
25from autotest_lib.client.common_lib import error
26from autotest_lib.client.common_lib import global_config
27from autotest_lib.client.common_lib import seven
28from autotest_lib.client.common_lib import utils
29from autotest_lib.client.common_lib.cros import retry
30
31# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107
32
33try:
34    from chromite.lib import metrics
35except ImportError:
36    metrics = utils.metrics_mock
37
38
39CONFIG = global_config.global_config
40# This file is generated at build time and specifies, per suite and per test,
41# the DEPENDENCIES list specified in each control file.  It's a dict of dicts:
42# {'bvt':   {'/path/to/autotest/control/site_tests/test1/control': ['dep1']}
43#  'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']}
44#  'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'],
45#            '/path/to/autotest/control/site_tests/test3/control': ['dep3']}
46# }
47DEPENDENCIES_FILE = 'test_suites/dependency_info'
48# Number of seconds for caller to poll devserver's is_staged call to check if
49# artifacts are staged.
50_ARTIFACT_STAGE_POLLING_INTERVAL = 5
51# Artifacts that should be staged when client calls devserver RPC to stage an
52# image.
53_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful'
54# Artifacts that should be staged when client calls devserver RPC to stage an
55# image with autotest artifact.
56_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,'
57                                                   'control_files,stateful,'
58                                                   'autotest_packages')
59# Artifacts that should be staged when client calls devserver RPC to stage an
60# Android build.
61SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value(
62        'CROS', 'skip_devserver_health_check', type=bool)
63# Number of seconds for the call to get devserver load to time out.
64TIMEOUT_GET_DEVSERVER_LOAD = 2.0
65
66# Android artifact path in devserver
67ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value(
68        'CROS', 'android_build_name_pattern', type=str).replace('\\', '')
69
70# Return value from a devserver RPC indicating the call succeeded.
71SUCCESS = 'Success'
72
73# The timeout minutes for a given devserver ssh call.
74DEVSERVER_SSH_TIMEOUT_MINS = 1
75
76# Error message for invalid devserver response.
77ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error'
78ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable'
79
80# Error message for devserver call timedout.
81ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout'
82
83# The timeout minutes for waiting a devserver staging.
84DEVSERVER_IS_STAGING_RETRY_MIN = 100
85
86# Provision error patterns.
87# People who see this should know that they shouldn't change these
88# classification strings. These strings are used for monitoring provision
89# failures. Any changes may mess up the stats.
90_EXCEPTION_PATTERNS = [
91        # Raised when devserver portfile does not exist on host.
92        (r".*Devserver portfile does not exist!.*$",
93         '(1) Devserver portfile does not exist on host'),
94        # Raised when devserver cannot copy packages to host.
95        (r".*Could not copy .* to device.*$",
96         '(2) Cannot copy packages to host'),
97        # Raised when devserver fails to run specific commands on host.
98        (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$",
99         '(3) Fail to run specific command on host'),
100        # Raised when new build fails to boot on the host.
101        (r'.*RootfsUpdateError: Build .* failed to boot on.*$',
102         '(4) Build failed to boot on host'),
103        # Raised when the auto-update process is timed out.
104        (r'.*The CrOS auto-update process is timed out, '
105         'thus will be terminated.*$',
106         '(5) Auto-update is timed out'),
107        # Raised when the host is not pingable.
108        (r".*DeviceNotPingableError.*$",
109         '(6) Host is not pingable during auto-update'),
110        # Raised when hosts have unexpected status after rootfs update.
111        (r'.*Update failed with unexpected update status: '
112         'UPDATE_STATUS_IDLE.*$',
113         '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs '
114         'update'),
115        # Raised when devserver returns non-json response to shard/drone.
116        (r'.*No JSON object could be decoded.*$',
117         '(8) Devserver returned non-json object'),
118        # Raised when devserver loses host's ssh connection
119        (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$',
120         "(9) Devserver lost host's ssh connection"),
121        # Raised when error happens in writing files to host
122        (r'.*Write failed\: Broken pipe.*$',
123         "(10) Broken pipe while writing or connecting to host")]
124
125PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value(
126        'CROS', 'prefer_local_devserver', type=bool, default=False)
127
128ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value(
129        'CROS', 'enable_ssh_connection_for_devserver', type=bool,
130        default=False)
131
132DEFAULT_SUBNET_MASKBIT = 19
133
134
135class DevServerException(Exception):
136    """Raised when the dev server returns a non-200 HTTP response."""
137    pass
138
139
140class DevServerOverloadException(Exception):
141    """Raised when the dev server returns a 502 HTTP response."""
142    pass
143
144class DevServerFailToLocateException(Exception):
145    """Raised when fail to locate any devserver."""
146    pass
147
148
149class MarkupStripper(six.moves.html_parser.HTMLParser):
150    """HTML parser that strips HTML tags, coded characters like &
151
152    Works by, basically, not doing anything for any tags, and only recording
153    the content of text nodes in an internal data structure.
154    """
155    def __init__(self):
156        self.reset()
157        self.fed = []
158
159
160    def handle_data(self, d):
161        """Consume content of text nodes, store it away."""
162        self.fed.append(d)
163
164
165    def get_data(self):
166        """Concatenate and return all stored data."""
167        return ''.join(self.fed)
168
169
170def _strip_http_message(message):
171    """Strip the HTTP marker from the an HTTP message.
172
173    @param message: A string returned by an HTTP call.
174
175    @return: A string with HTTP marker being stripped.
176    """
177    strip = MarkupStripper()
178    try:
179        strip.feed(seven.ensure_text(message, 'utf_32'))
180    except UnicodeDecodeError:
181        strip.feed(message)
182    return strip.get_data()
183
184
185def _get_image_storage_server():
186    return CONFIG.get_config_value('CROS', 'image_storage_server', type=str)
187
188
189def _get_canary_channel_server():
190    """
191    Get the url of the canary-channel server,
192    eg: gsutil://chromeos-releases/canary-channel/<board>/<release>
193
194    @return: The url to the canary channel server.
195    """
196    return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str)
197
198
199def _get_storage_server_for_artifacts(artifacts=None):
200    """Gets the appropriate storage server for the given artifacts.
201
202    @param artifacts: A list of artifacts we need to stage.
203    @return: The address of the storage server that has these artifacts.
204             The default image storage server if no artifacts are specified.
205    """
206    factory_artifact = global_config.global_config.get_config_value(
207            'CROS', 'factory_artifact', type=str, default='')
208    if artifacts and factory_artifact and factory_artifact in artifacts:
209        return _get_canary_channel_server()
210    return _get_image_storage_server()
211
212
213def _gs_or_local_archive_url_args(archive_url):
214    """Infer the devserver call arguments to use with the given archive_url.
215
216    @param archive_url: The archive url to include the in devserver RPC. This
217            can either e a GS path or a local path.
218    @return: A dict of arguments to include in the devserver call.
219    """
220    if not archive_url:
221        return {}
222    elif archive_url.startswith('gs://'):
223        return {'archive_url': archive_url}
224    else:
225        # For a local path, we direct the devserver to move the files while
226        # staging. This is the fastest way to stage local files, but deletes the
227        # files from the source. This is OK because the files are available on
228        # the devserver once staged.
229        return {
230                'local_path': archive_url,
231                'delete_source': True,
232        }
233
234
235def _reverse_lookup_from_config(address):
236    """Look up hostname for the given IP address.
237
238    This uses the hostname-address map from the config file.
239
240    If multiple hostnames map to the same IP address, the first one
241    defined in the configuration file takes precedence.
242
243    @param address: IP address string
244    @returns: hostname string, or original input if not found
245    """
246    for hostname, addr in six.iteritems(_get_hostname_addr_map()):
247        if addr == address:
248            return hostname
249    return address
250
251
252def _get_hostname_addr_map():
253    """Get hostname address mapping from config.
254
255    @return: dict mapping server hostnames to addresses
256    """
257    return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP')
258
259
260def _get_dev_server_list():
261    return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[])
262
263
264def _get_crash_server_list():
265    return CONFIG.get_config_value('CROS', 'crash_server', type=list,
266        default=[])
267
268
269def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN,
270                          exception_to_raise=DevServerException):
271    """A decorator to use with remote devserver calls.
272
273    This decorator converts urllib2.HTTPErrors into DevServerExceptions
274    with any embedded error info converted into plain text. The method
275    retries on urllib2.URLError or error.CmdError to avoid devserver flakiness.
276    """
277    #pylint: disable=C0111
278
279    def inner_decorator(method):
280        label = method.__name__ if hasattr(method, '__name__') else None
281        def metrics_wrapper(*args, **kwargs):
282            @retry.retry((urllib.error.URLError, error.CmdError,
283                          DevServerOverloadException),
284                         timeout_min=timeout_min,
285                         exception_to_raise=exception_to_raise,
286                        label=label)
287            def wrapper():
288                """This wrapper actually catches the HTTPError."""
289                try:
290                    return method(*args, **kwargs)
291                except urllib.error.HTTPError as e:
292                    error_markup = e.read()
293                    raise DevServerException(_strip_http_message(error_markup))
294
295            try:
296                return wrapper()
297            except Exception as e:
298                if ERR_MSG_FOR_TIMED_OUT_CALL in str(e):
299                    dev_server = None
300                    if args and isinstance(args[0], DevServer):
301                        dev_server = args[0].hostname
302                    elif 'devserver' in kwargs:
303                        dev_server = get_hostname(kwargs['devserver'])
304
305                    logging.debug('RPC call %s has timed out on devserver %s.',
306                                  label, dev_server)
307                    c = metrics.Counter(
308                            'chromeos/autotest/devserver/call_timeout')
309                    c.increment(fields={'dev_server': dev_server,
310                                        'healthy': label})
311
312                raise
313
314        return metrics_wrapper
315
316    return inner_decorator
317
318
319def get_hostname(url):
320    """Get the hostname portion of a URL
321
322    schema://hostname:port/path
323
324    @param url: a Url string
325    @return: a hostname string
326    """
327    return six.moves.urllib.parse.urlparse(url).hostname
328
329
330def get_resolved_hostname(url):
331    """Get the symbolic hostname from url.
332
333    If the given `url` uses a numeric IP address, try and find a
334    symbolic name from the hostname map in the config file.
335
336    @param url  The URL with which to perform the conversion/lookup.
337    """
338    return _reverse_lookup_from_config(get_hostname(url))
339
340
341class DevServer(object):
342    """Base class for all DevServer-like server stubs.
343
344    This is the base class for interacting with all Dev Server-like servers.
345    A caller should instantiate a sub-class of DevServer with:
346
347    host = SubClassServer.resolve(build)
348    server = SubClassServer(host)
349    """
350    _MIN_FREE_DISK_SPACE_GB = 20
351    _MAX_APACHE_CLIENT_COUNT = 75
352    # Threshold for the CPU load percentage for a devserver to be selected.
353    MAX_CPU_LOAD = 80.0
354    # Threshold for the network IO, set to 80MB/s
355    MAX_NETWORK_IO = 1024 * 1024 * 80
356    DISK_IO = 'disk_total_bytes_per_second'
357    NETWORK_IO = 'network_total_bytes_per_second'
358    CPU_LOAD = 'cpu_percent'
359    FREE_DISK = 'free_disk'
360    AU_PROCESS = 'au_process_count'
361    STAGING_THREAD_COUNT = 'staging_thread_count'
362    APACHE_CLIENT_COUNT = 'apache_client_count'
363
364
365    def __init__(self, devserver):
366        self._devserver = devserver
367
368
369    def url(self):
370        """Returns the url for this devserver."""
371        return self._devserver
372
373
374    @property
375    def hostname(self):
376        """Return devserver hostname parsed from the devserver URL.
377
378        Note that this is likely parsed from the devserver URL from
379        shadow_config.ini, meaning that the "hostname" part of the
380        devserver URL is actually an IP address.
381
382        @return hostname string
383        """
384        return get_hostname(self.url())
385
386
387    @property
388    def resolved_hostname(self):
389        """Return devserver hostname, resolved from its IP address.
390
391        Unlike the hostname property, this property attempts to look up
392        the proper hostname from the devserver IP address.  If lookup
393        fails, then fall back to whatever the hostname property would
394        have returned.
395
396        @return hostname string
397        """
398        return _reverse_lookup_from_config(self.hostname)
399
400
401    @staticmethod
402    def get_server_url(url):
403        """Get the devserver url from a repo url, which includes build info.
404
405        @param url: A job repo url.
406
407        @return A devserver url, e.g., http://127.0.0.10:8080
408        """
409        res = six.moves.urllib.parse.urlparse(url)
410        if res.netloc:
411            return res.scheme + '://' + res.netloc
412
413
414    @classmethod
415    def get_devserver_load_wrapper(cls, devserver, timeout_sec, output):
416        """A wrapper function to call get_devserver_load in parallel.
417
418        @param devserver: url of the devserver.
419        @param timeout_sec: Number of seconds before time out the devserver
420                            call.
421        @param output: An output queue to save results to.
422        """
423        load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0)
424        if load:
425            load['devserver'] = devserver
426        output.put(load)
427
428
429    @classmethod
430    def get_devserver_load(cls, devserver,
431                           timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
432        """Returns True if the |devserver| is healthy to stage build.
433
434        @param devserver: url of the devserver.
435        @param timeout_min: How long to wait in minutes before deciding the
436                            the devserver is not up (float).
437
438        @return: A dictionary of the devserver's load.
439
440        """
441        call = cls._build_call(devserver, 'check_health')
442        @remote_devserver_call(timeout_min=timeout_min)
443        def get_load(devserver=devserver):
444            """Inner method that makes the call."""
445            return cls.run_call(call, timeout=timeout_min*60)
446
447        try:
448            return json.load(six.StringIO(get_load(devserver=devserver)))
449        except Exception as e:
450            logging.error('Devserver call failed: "%s", timeout: %s seconds,'
451                          ' Error: %s', call, timeout_min * 60, e)
452
453
454    @classmethod
455    def is_free_disk_ok(cls, load):
456        """Check if a devserver has enough free disk.
457
458        @param load: A dict of the load of the devserver.
459
460        @return: True if the devserver has enough free disk or disk check is
461                 skipped in global config.
462
463        """
464        if SKIP_DEVSERVER_HEALTH_CHECK:
465            logging.debug('devserver health check is skipped.')
466        elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB:
467            return False
468
469        return True
470
471
472    @classmethod
473    def is_apache_client_count_ok(cls, load):
474        """Check if a devserver has enough Apache connections available.
475
476        Apache server by default has maximum of 150 concurrent connections. If
477        a devserver has too many live connections, it likely indicates the
478        server is busy handling many long running download requests, e.g.,
479        downloading stateful partitions. It is better not to add more requests
480        to it.
481
482        @param load: A dict of the load of the devserver.
483
484        @return: True if the devserver has enough Apache connections available,
485                 or disk check is skipped in global config.
486
487        """
488        if SKIP_DEVSERVER_HEALTH_CHECK:
489            logging.debug('devserver health check is skipped.')
490        elif cls.APACHE_CLIENT_COUNT not in load:
491            logging.debug('Apache client count is not collected from devserver.')
492        elif (load[cls.APACHE_CLIENT_COUNT] >
493              cls._MAX_APACHE_CLIENT_COUNT):
494            return False
495
496        return True
497
498
499    @classmethod
500    def devserver_healthy(cls, devserver,
501                          timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
502        """Returns True if the |devserver| is healthy to stage build.
503
504        @param devserver: url of the devserver.
505        @param timeout_min: How long to wait in minutes before deciding the
506                            the devserver is not up (float).
507
508        @return: True if devserver is healthy. Return False otherwise.
509
510        """
511        c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy')
512        reason = ''
513        healthy = False
514        load = cls.get_devserver_load(devserver, timeout_min=timeout_min)
515        try:
516            if not load:
517                # Failed to get the load of devserver.
518                reason = '(1) Failed to get load.'
519                return False
520
521            apache_ok = cls.is_apache_client_count_ok(load)
522            if not apache_ok:
523                reason = '(2) Apache client count too high.'
524                logging.error('Devserver check_health failed. Live Apache client '
525                              'count is too high: %d.',
526                              load[cls.APACHE_CLIENT_COUNT])
527                return False
528
529            disk_ok = cls.is_free_disk_ok(load)
530            if not disk_ok:
531                reason = '(3) Disk space too low.'
532                logging.error('Devserver check_health failed. Free disk space is '
533                              'low. Only %dGB is available.',
534                              load[cls.FREE_DISK])
535            healthy = bool(disk_ok)
536            return disk_ok
537        finally:
538            c.increment(fields={'dev_server': cls(devserver).resolved_hostname,
539                                'healthy': healthy,
540                                'reason': reason})
541            # Monitor how many AU processes the devserver is currently running.
542            if load is not None and load.get(DevServer.AU_PROCESS):
543                c_au = metrics.Gauge(
544                        'chromeos/autotest/devserver/devserver_au_count')
545                c_au.set(
546                    load.get(DevServer.AU_PROCESS),
547                    fields={'dev_server': cls(devserver).resolved_hostname})
548
549
550    @staticmethod
551    def _build_call(host, method, **kwargs):
552        """Build a URL to |host| that calls |method|, passing |kwargs|.
553
554        Builds a URL that calls |method| on the dev server defined by |host|,
555        passing a set of key/value pairs built from the dict |kwargs|.
556
557        @param host: a string that is the host basename e.g. http://server:90.
558        @param method: the dev server method to call.
559        @param kwargs: a dict mapping arg names to arg values.
560        @return the URL string.
561        """
562        # If the archive_url is a local path, the args expected by the devserver
563        # are a little different.
564        archive_url_args = _gs_or_local_archive_url_args(
565                kwargs.pop('archive_url', None))
566        kwargs.update(archive_url_args)
567        if 'is_async' in kwargs:
568            f = kwargs.pop('is_async')
569            kwargs['async'] = f
570        argstr = '&'.join(["%s=%s" % x for x in six.iteritems(kwargs)])
571        return "%(host)s/%(method)s?%(argstr)s" % dict(
572                host=host, method=method, argstr=argstr)
573
574
575    def build_call(self, method, **kwargs):
576        """Builds a devserver RPC string that is used by 'run_call()'.
577
578        @param method: remote devserver method to call.
579        """
580        return self._build_call(self._devserver, method, **kwargs)
581
582
583    @classmethod
584    def build_all_calls(cls, method, **kwargs):
585        """Builds a list of URLs that makes RPC calls on all devservers.
586
587        Build a URL that calls |method| on the dev server, passing a set
588        of key/value pairs built from the dict |kwargs|.
589
590        @param method: the dev server method to call.
591        @param kwargs: a dict mapping arg names to arg values
592
593        @return the URL string
594        """
595        calls = []
596        # Note we use cls.servers as servers is class specific.
597        for server in cls.servers():
598            if cls.devserver_healthy(server):
599                calls.append(cls._build_call(server, method, **kwargs))
600
601        return calls
602
603
604    @classmethod
605    def run_call(cls, call, readline=False, timeout=None):
606        """Invoke a given devserver call using urllib.open.
607
608        Open the URL with HTTP, and return the text of the response. Exceptions
609        may be raised as for urllib2.urlopen().
610
611        @param call: a url string that calls a method to a devserver.
612        @param readline: whether read http response line by line.
613        @param timeout: The timeout seconds for this urlopen call.
614
615        @return the results of this call.
616        """
617        if timeout is not None:
618            return utils.urlopen_socket_timeout(
619                    call, timeout=timeout).read()
620        elif readline:
621            response = urllib.request.urlopen(call)
622            return [line.rstrip() for line in response]
623        else:
624            return urllib.request.urlopen(call).read()
625
626
627    @staticmethod
628    def servers():
629        """Returns a list of servers that can serve as this type of server."""
630        raise NotImplementedError()
631
632
633    @classmethod
634    def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT,
635                                      unrestricted_only=False):
636        """Get the devservers in the same subnet of the given ip.
637
638        @param ip: The IP address of a dut to look for devserver.
639        @param mask_bits: Number of mask bits. Default is 19.
640        @param unrestricted_only: Set to True to select from devserver in
641                unrestricted subnet only. Default is False.
642
643        @return: A list of devservers in the same subnet of the given ip.
644
645        """
646        # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so
647        # we need a dict to return the full devserver path once the IPs are
648        # filtered in get_servers_in_same_subnet.
649        server_names = {}
650        all_devservers = []
651        devservers = (cls.get_unrestricted_devservers() if unrestricted_only
652                      else cls.servers())
653        for server in devservers:
654            server_name = get_hostname(server)
655            server_names[server_name] = server
656            all_devservers.append(server_name)
657        if not all_devservers:
658            devserver_type = 'unrestricted only' if unrestricted_only else 'all'
659            raise DevServerFailToLocateException(
660                'Fail to locate a devserver for dut %s in %s devservers'
661                % (ip, devserver_type))
662
663        devservers = utils.get_servers_in_same_subnet(ip, mask_bits,
664                                                      all_devservers)
665        return [server_names[s] for s in devservers]
666
667
668    @classmethod
669    def get_unrestricted_devservers(
670                cls, restricted_subnets=utils.RESTRICTED_SUBNETS):
671        """Get the devservers not in any restricted subnet specified in
672        restricted_subnets.
673
674        @param restricted_subnets: A list of restriected subnets.
675
676        @return: A list of devservers not in any restricted subnet.
677
678        """
679        if not restricted_subnets:
680            return cls.servers()
681
682        metrics.Counter('chromeos/autotest/devserver/unrestricted_hotfix')
683        return cls.servers()
684
685    @classmethod
686    def get_healthy_devserver(cls, build, devservers, ban_list=None):
687        """"Get a healthy devserver instance from the list of devservers.
688
689        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
690        @param devservers: The devserver list to be chosen out a healthy one.
691        @param ban_list: The ban_list of devservers we don't want to choose.
692                Default is None.
693
694        @return: A DevServer object of a healthy devserver. Return None if no
695                healthy devserver is found.
696
697        """
698        logging.debug('Pick one healthy devserver from %r', devservers)
699        while devservers:
700            hash_index = hash(build) % len(devservers)
701            devserver = devservers.pop(hash_index)
702            logging.debug('Check health for %s', devserver)
703            if ban_list and devserver in ban_list:
704                continue
705
706            if cls.devserver_healthy(devserver):
707                logging.debug('Pick %s', devserver)
708                return cls(devserver)
709
710
711    @classmethod
712    def get_available_devservers(cls, hostname=None,
713                                 prefer_local_devserver=PREFER_LOCAL_DEVSERVER,
714                                 restricted_subnets=utils.RESTRICTED_SUBNETS):
715        """Get devservers in the same subnet of the given hostname.
716
717        @param hostname: Hostname of a DUT to choose devserver for.
718
719        @return: A tuple of (devservers, can_retry), devservers is a list of
720                 devservers that's available for the given hostname. can_retry
721                 is a flag that indicate if caller can retry the selection of
722                 devserver if no devserver in the returned devservers can be
723                 used. For example, if hostname is in a restricted subnet,
724                 can_retry will be False.
725        """
726        logging.info('Getting devservers for host: %s',  hostname)
727        host_ip = None
728        if hostname:
729            host_ip = bin_utils.get_ip_address(hostname)
730            if not host_ip:
731                logging.error('Failed to get IP address of %s. Will pick a '
732                              'devserver without subnet constraint.', hostname)
733
734        if not host_ip:
735            return cls.get_unrestricted_devservers(restricted_subnets), False
736
737        # Go through all restricted subnet settings and check if the DUT is
738        # inside a restricted subnet. If so, only return the devservers in the
739        # restricted subnet and doesn't allow retry.
740        if host_ip and restricted_subnets:
741            subnet_ip, mask_bits = _get_subnet_for_host_ip(
742                    host_ip, restricted_subnets=restricted_subnets)
743            if subnet_ip:
744                logging.debug('The host %s (%s) is in a restricted subnet. '
745                              'Try to locate a devserver inside subnet '
746                              '%s:%d.', hostname, host_ip, subnet_ip,
747                              mask_bits)
748                devservers = cls.get_devservers_in_same_subnet(
749                        subnet_ip, mask_bits)
750                return devservers, False
751
752        # If prefer_local_devserver is set to True and the host is not in
753        # restricted subnet, pick a devserver in the same subnet if possible.
754        # Set can_retry to True so it can pick a different devserver if all
755        # devservers in the same subnet are down.
756        if prefer_local_devserver:
757            return (cls.get_devservers_in_same_subnet(
758                    host_ip, DEFAULT_SUBNET_MASKBIT, True), True)
759
760        return cls.get_unrestricted_devservers(restricted_subnets), False
761
762
763    @classmethod
764    def resolve(cls, build, hostname=None, ban_list=None):
765        """"Resolves a build to a devserver instance.
766
767        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
768        @param hostname: The hostname of dut that requests a devserver. It's
769                         used to make sure a devserver in the same subnet is
770                         preferred.
771        @param ban_list: The ban_list of devservers shouldn't be chosen.
772
773        @raise DevServerException: If no devserver is available.
774        """
775        tried_devservers = set()
776        devservers, can_retry = cls.get_available_devservers(hostname)
777        if devservers:
778            tried_devservers |= set(devservers)
779
780        devserver = cls.get_healthy_devserver(build, devservers,
781                                              ban_list=ban_list)
782
783        if not devserver and can_retry:
784            # Find available devservers without dut location constrain.
785            devservers, _ = cls.get_available_devservers()
786            devserver = cls.get_healthy_devserver(build, devservers,
787                                                  ban_list=ban_list)
788            if devservers:
789                tried_devservers |= set(devservers)
790        if devserver:
791            return devserver
792        else:
793            subnet = 'unrestricted subnet'
794            if hostname is not None:
795                host_ip = bin_utils.get_ip_address(hostname)
796                if host_ip:
797                    subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip)
798                    subnet = '%s/%s' % (str(subnet_ip), str(mask_bits))
799
800            error_msg = ('All devservers in subnet: %s are currently down: '
801                         '%s. (dut hostname: %s)' %
802                         (subnet, tried_devservers, hostname))
803            logging.error(error_msg)
804            c = metrics.Counter(
805                    'chromeos/autotest/devserver/subnet_without_devservers')
806            c.increment(fields={'subnet': subnet, 'hostname': str(hostname)})
807            raise DevServerException(error_msg)
808
809
810    @classmethod
811    def random(cls):
812        """Return a random devserver that's available.
813
814        Devserver election in `resolve` method is based on a hash of the
815        build that a caller wants to stage. The purpose is that different
816        callers requesting for the same build can get the same devserver,
817        while the lab is able to distribute different builds across all
818        devservers. That helps to reduce the duplication of builds across
819        all devservers.
820        This function returns a random devserver, by passing a random
821        pseudo build name to `resolve `method.
822        """
823        return cls.resolve(build=str(time.time()))
824
825
826class CrashServer(DevServer):
827    """Class of DevServer that symbolicates crash dumps."""
828
829    @staticmethod
830    def servers():
831        return _get_crash_server_list()
832
833
834    @remote_devserver_call()
835    def symbolicate_dump(self, minidump_path, build):
836        """Ask the devserver to symbolicate the dump at minidump_path.
837
838        Stage the debug symbols for |build| and, if that works, ask the
839        devserver to symbolicate the dump at |minidump_path|.
840
841        @param minidump_path: the on-disk path of the minidump.
842        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
843                      whose debug symbols are needed for symbolication.
844        @return The contents of the stack trace
845        @raise DevServerException upon any return code that's not HTTP OK.
846        """
847        try:
848            import requests
849        except ImportError:
850            logging.warning("Can't 'import requests' to connect to dev server.")
851            return ''
852        f = {'dev_server': self.resolved_hostname}
853        c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump')
854        c.increment(fields=f)
855        # Symbolicate minidump.
856        m = 'chromeos/autotest/crashserver/symbolicate_dump_duration'
857        with metrics.SecondsTimer(m, fields=f):
858            call = self.build_call('symbolicate_dump',
859                                   archive_url=_get_image_storage_server() + build)
860            request = requests.post(
861                    call, files={'minidump': open(minidump_path, 'rb')})
862            if request.status_code == requests.codes.OK:
863                return request.text
864
865        error_fd = six.StringIO(request.text)
866        raise urllib.error.HTTPError(
867                call, request.status_code, request.text, request.headers,
868                error_fd)
869
870
871    @classmethod
872    def get_available_devservers(cls, hostname):
873        """Get all available crash servers.
874
875        Crash server election doesn't need to count the location of hostname.
876
877        @param hostname: Hostname of a DUT to choose devserver for.
878
879        @return: A tuple of (all crash servers, False). can_retry is set to
880                 False, as all crash servers are returned. There is no point to
881                 retry.
882        """
883        return cls.servers(), False
884
885
886class ImageServerBase(DevServer):
887    """Base class for devservers used to stage builds.
888
889    CrOS and Android builds are staged in different ways as they have different
890    sets of artifacts. This base class abstracts the shared functions between
891    the two types of ImageServer.
892    """
893
894    @classmethod
895    def servers(cls):
896        """Returns a list of servers that can serve as a desired type of
897        devserver.
898        """
899        return _get_dev_server_list()
900
901
902    def _get_image_url(self, image):
903        """Returns the url of the directory for this image on the devserver.
904
905        @param image: the image that was fetched.
906        """
907        image = self.translate(image)
908        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
909                                              type=str)
910        return (url_pattern % (self.url(), image)).replace('update', 'static')
911
912
913    @staticmethod
914    def create_metadata(server_name, image, artifacts=None, files=None):
915        """Create a metadata dictionary given the staged items.
916
917        The metadata can be send to metadata db along with stats.
918
919        @param server_name: name of the devserver, e.g 172.22.33.44.
920        @param image: The name of the image.
921        @param artifacts: A list of artifacts.
922        @param files: A list of files.
923
924        @return A metadata dictionary.
925
926        """
927        metadata = {'devserver': server_name,
928                    'image': image,
929                    '_type': 'devserver'}
930        if artifacts:
931            metadata['artifacts'] = ' '.join(artifacts)
932        if files:
933            metadata['files'] = ' '.join(files)
934        return metadata
935
936
937    @classmethod
938    def run_ssh_call(cls, call, readline=False, timeout=None):
939        """Construct an ssh-based rpc call, and execute it.
940
941        @param call: a url string that calls a method to a devserver.
942        @param readline: whether read http response line by line.
943        @param timeout: The timeout seconds for ssh call.
944
945        @return the results of this call.
946        """
947        hostname = get_hostname(call)
948        ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call))
949        timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60
950        try:
951            result = utils.run(ssh_call, timeout=timeout_seconds)
952        except error.CmdError as e:
953            logging.debug('Error occurred with exit_code %d when executing the '
954                          'ssh call: %s.', e.result_obj.exit_status,
955                          e.result_obj.stderr)
956            c = metrics.Counter('chromeos/autotest/devserver/ssh_failure')
957            c.increment(fields={'dev_server': hostname})
958            raise
959        response = result.stdout
960
961        # If the curl command's returned HTTP response contains certain
962        # exception string, raise the DevServerException of the response.
963        if 'DownloaderException' in response:
964            raise DevServerException(_strip_http_message(response))
965
966        if readline:
967            # Remove line terminators and trailing whitespace
968            response = response.splitlines()
969            return [line.rstrip() for line in response]
970
971        return response
972
973
974    @classmethod
975    def run_call(cls, call, readline=False, timeout=None):
976        """Invoke a given devserver call using urllib.open or ssh.
977
978        Open the URL with HTTP or SSH-based HTTP, and return the text of the
979        response. Exceptions may be raised as for urllib2.urlopen() or
980        utils.run().
981
982        @param call: a url string that calls a method to a devserver.
983        @param readline: whether read http response line by line.
984        @param timeout: The timeout seconds for urlopen call or ssh call.
985
986        @return the results of this call.
987        """
988        server_name = get_hostname(call)
989        is_in_restricted_subnet = utils.get_restricted_subnet(
990                server_name, utils.RESTRICTED_SUBNETS)
991        _EMPTY_SENTINEL_VALUE = object()
992        def kickoff_call():
993            """Invoke a given devserver call using urllib.open or ssh.
994
995            @param call: a url string that calls a method to a devserver.
996            @param is_in_restricted_subnet: whether the devserver is in subnet.
997            @param readline: whether read http response line by line.
998            @param timeout: The timeout seconds for urlopen call or ssh call.
999            """
1000            if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or
1001                not is_in_restricted_subnet):
1002                response = super(ImageServerBase, cls).run_call(
1003                        call, readline=readline, timeout=timeout)
1004            else:
1005                response = cls.run_ssh_call(
1006                        call, readline=readline, timeout=timeout)
1007            # Retry if devserver service is temporarily down, e.g. in a
1008            # devserver push.
1009            if ERR_MSG_FOR_DOWN_DEVSERVER in response:
1010                return False
1011
1012            # Don't return response directly since it may be empty string,
1013            # which causes poll_for_condition to retry.
1014            return _EMPTY_SENTINEL_VALUE if not response else response
1015
1016        try:
1017            response = bin_utils.poll_for_condition(
1018                    kickoff_call,
1019                    exception=bin_utils.TimeoutError(),
1020                    timeout=60,
1021                    sleep_interval=5)
1022            return '' if response is _EMPTY_SENTINEL_VALUE else response
1023        except bin_utils.TimeoutError:
1024            return ERR_MSG_FOR_DOWN_DEVSERVER
1025
1026
1027    @classmethod
1028    def download_file(cls, remote_file, local_file, timeout=None):
1029        """Download file from devserver.
1030
1031        The format of remote_file should be:
1032            http://devserver_ip:8082/static/board/...
1033
1034        @param remote_file: The URL of the file on devserver that need to be
1035            downloaded.
1036        @param local_file: The path of the file saved to local.
1037        @param timeout: The timeout seconds for this call.
1038        """
1039        response = cls.run_call(remote_file, timeout=timeout)
1040        with open(local_file, 'w') as out_log:
1041            out_log.write(response)
1042
1043
1044    def _poll_is_staged(self, **kwargs):
1045        """Polling devserver.is_staged until all artifacts are staged.
1046
1047        @param kwargs: keyword arguments to make is_staged devserver call.
1048
1049        @return: True if all artifacts are staged in devserver.
1050        """
1051        call = self.build_call('is_staged', **kwargs)
1052
1053        def all_staged():
1054            """Call devserver.is_staged rpc to check if all files are staged.
1055
1056            @return: True if all artifacts are staged in devserver. False
1057                     otherwise.
1058            @rasies DevServerException, the exception is a wrapper of all
1059                    exceptions that were raised when devserver tried to download
1060                    the artifacts. devserver raises an HTTPError or a CmdError
1061                    when an exception was raised in the code. Such exception
1062                    should be re-raised here to stop the caller from waiting.
1063                    If the call to devserver failed for connection issue, a
1064                    URLError exception is raised, and caller should retry the
1065                    call to avoid such network flakiness.
1066
1067            """
1068            try:
1069                result = self.run_call(call)
1070                logging.debug('whether artifact is staged: %r', result)
1071                return result == 'True'
1072            except urllib.error.HTTPError as e:
1073                error_markup = e.read()
1074                raise DevServerException(_strip_http_message(error_markup))
1075            except urllib.error.URLError as e:
1076                # Could be connection issue, retry it.
1077                # For example: <urlopen error [Errno 111] Connection refused>
1078                logging.error('URLError happens in is_stage: %r', e)
1079                return False
1080            except error.CmdError as e:
1081                # Retry if SSH failed to connect to the devserver.
1082                logging.warning('CmdError happens in is_stage: %r, will retry', e)
1083                return False
1084
1085        bin_utils.poll_for_condition(
1086                all_staged,
1087                exception=bin_utils.TimeoutError(),
1088                timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60,
1089                sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL)
1090
1091        return True
1092
1093
1094    def _call_and_wait(self, call_name, error_message,
1095                       expected_response=SUCCESS, **kwargs):
1096        """Helper method to make a urlopen call, and wait for artifacts staged.
1097
1098        @param call_name: name of devserver rpc call.
1099        @param error_message: Error message to be thrown if response does not
1100                              match expected_response.
1101        @param expected_response: Expected response from rpc, default to
1102                                  |Success|. If it's set to None, do not compare
1103                                  the actual response. Any response is consider
1104                                  to be good.
1105        @param kwargs: keyword arguments to make is_staged devserver call.
1106
1107        @return: The response from rpc.
1108        @raise DevServerException upon any return code that's expected_response.
1109
1110        """
1111        call = self.build_call(call_name, is_async=True, **kwargs)
1112        try:
1113            response = self.run_call(call)
1114            logging.debug('response for RPC: %r', response)
1115            if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response:
1116                logging.debug('Proxy error happens in RPC call, '
1117                              'will retry in 30 seconds')
1118                time.sleep(30)
1119                raise DevServerOverloadException()
1120        except six.moves.http_client.BadStatusLine as e:
1121            logging.error(e)
1122            raise DevServerException('Received Bad Status line, Devserver %s '
1123                                     'might have gone down while handling '
1124                                     'the call: %s' % (self.url(), call))
1125
1126        if expected_response and not response == expected_response:
1127                raise DevServerException(error_message)
1128
1129        # `os_type` is needed in build a devserver call, but not needed for
1130        # wait_for_artifacts_staged, since that method is implemented by
1131        # each ImageServerBase child class.
1132        if 'os_type' in kwargs:
1133            del kwargs['os_type']
1134        self.wait_for_artifacts_staged(**kwargs)
1135        return response
1136
1137
1138    def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs):
1139        """Tell the devserver to download and stage |artifacts| from |image|
1140        specified by kwargs.
1141
1142        This is the main call point for staging any specific artifacts for a
1143        given build. To see the list of artifacts one can stage see:
1144
1145        ~src/platfrom/dev/artifact_info.py.
1146
1147        This is maintained along with the actual devserver code.
1148
1149        @param artifacts: A list of artifacts.
1150        @param files: A list of files to stage.
1151        @param archive_url: Optional parameter that has the archive_url to stage
1152                this artifact from. Default is specified in autotest config +
1153                image.
1154        @param kwargs: keyword arguments that specify the build information, to
1155                make stage devserver call.
1156
1157        @raise DevServerException upon any return code that's not HTTP OK.
1158        """
1159        if not archive_url:
1160            archive_url = _get_storage_server_for_artifacts(artifacts) + build
1161
1162        artifacts_arg = ','.join(artifacts) if artifacts else ''
1163        files_arg = ','.join(files) if files else ''
1164        error_message = ("staging %s for %s failed;"
1165                         "HTTP OK not accompanied by 'Success'." %
1166                         ('artifacts=%s files=%s ' % (artifacts_arg, files_arg),
1167                          build))
1168
1169        staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' %
1170                        (build, artifacts, files, archive_url))
1171        logging.info('Staging artifacts on devserver %s: %s',
1172                     self.url(), staging_info)
1173        success = False
1174        try:
1175            arguments = {'archive_url': archive_url,
1176                         'artifacts': artifacts_arg,
1177                         'files': files_arg}
1178            if kwargs:
1179                arguments.update(kwargs)
1180            f = {'artifacts': artifacts_arg,
1181                 'dev_server': self.resolved_hostname}
1182            with metrics.SecondsTimer(
1183                    'chromeos/autotest/devserver/stage_artifact_duration',
1184                    fields=f):
1185                self.call_and_wait(call_name='stage', error_message=error_message,
1186                                   **arguments)
1187            logging.info('Finished staging artifacts: %s', staging_info)
1188            success = True
1189        except (bin_utils.TimeoutError, error.TimeoutException):
1190            logging.error('stage_artifacts timed out: %s', staging_info)
1191            raise DevServerException(
1192                    'stage_artifacts timed out: %s' % staging_info)
1193        finally:
1194            f = {'success': success,
1195                 'artifacts': artifacts_arg,
1196                 'dev_server': self.resolved_hostname}
1197            metrics.Counter('chromeos/autotest/devserver/stage_artifact'
1198                            ).increment(fields=f)
1199
1200
1201    def call_and_wait(self, *args, **kwargs):
1202        """Helper method to make a urlopen call, and wait for artifacts staged.
1203
1204        This method needs to be overridden in the subclass to implement the
1205        logic to call _call_and_wait.
1206        """
1207        raise NotImplementedError
1208
1209
1210    def _trigger_download(self, build, artifacts, files, synchronous=True,
1211                          **kwargs_build_info):
1212        """Tell the devserver to download and stage image specified in
1213        kwargs_build_info.
1214
1215        Tells the devserver to fetch |image| from the image storage server
1216        named by _get_image_storage_server().
1217
1218        If |synchronous| is True, waits for the entire download to finish
1219        staging before returning. Otherwise only the artifacts necessary
1220        to start installing images onto DUT's will be staged before returning.
1221        A caller can then call finish_download to guarantee the rest of the
1222        artifacts have finished staging.
1223
1224        @param synchronous: if True, waits until all components of the image are
1225               staged before returning.
1226        @param kwargs_build_info: Dictionary of build information.
1227                For CrOS, it is None as build is the CrOS image name.
1228                For Android, it is {'target': target,
1229                                    'build_id': build_id,
1230                                    'branch': branch}
1231
1232        @raise DevServerException upon any return code that's not HTTP OK.
1233
1234        """
1235        if kwargs_build_info:
1236            archive_url = None
1237        else:
1238            archive_url = _get_image_storage_server() + build
1239        error_message = ("trigger_download for %s failed;"
1240                         "HTTP OK not accompanied by 'Success'." % build)
1241        kwargs = {'archive_url': archive_url,
1242                  'artifacts': artifacts,
1243                  'files': files,
1244                  'error_message': error_message}
1245        if kwargs_build_info:
1246            kwargs.update(kwargs_build_info)
1247
1248        logging.info('trigger_download starts for %s', build)
1249        try:
1250            response = self.call_and_wait(call_name='stage', **kwargs)
1251            logging.info('trigger_download finishes for %s', build)
1252        except (bin_utils.TimeoutError, error.TimeoutException):
1253            logging.error('trigger_download timed out for %s.', build)
1254            raise DevServerException(
1255                    'trigger_download timed out for %s.' % build)
1256        was_successful = response == SUCCESS
1257        if was_successful and synchronous:
1258            self._finish_download(build, artifacts, files, **kwargs_build_info)
1259
1260
1261    def _finish_download(self, build, artifacts, files, **kwargs_build_info):
1262        """Tell the devserver to finish staging image specified in
1263        kwargs_build_info.
1264
1265        If trigger_download is called with synchronous=False, it will return
1266        before all artifacts have been staged. This method contacts the
1267        devserver and blocks until all staging is completed and should be
1268        called after a call to trigger_download.
1269
1270        @param kwargs_build_info: Dictionary of build information.
1271                For CrOS, it is None as build is the CrOS image name.
1272                For Android, it is {'target': target,
1273                                    'build_id': build_id,
1274                                    'branch': branch}
1275
1276        @raise DevServerException upon any return code that's not HTTP OK.
1277        """
1278        archive_url = _get_image_storage_server() + build
1279        error_message = ("finish_download for %s failed;"
1280                         "HTTP OK not accompanied by 'Success'." % build)
1281        kwargs = {'archive_url': archive_url,
1282                  'artifacts': artifacts,
1283                  'files': files,
1284                  'error_message': error_message}
1285        if kwargs_build_info:
1286            kwargs.update(kwargs_build_info)
1287        try:
1288            self.call_and_wait(call_name='stage', **kwargs)
1289        except (bin_utils.TimeoutError, error.TimeoutException):
1290            logging.error('finish_download timed out for %s', build)
1291            raise DevServerException(
1292                    'finish_download timed out for %s.' % build)
1293
1294
1295    @remote_devserver_call()
1296    def locate_file(self, file_name, artifacts, build, build_info):
1297        """Locate a file with the given file_name on devserver.
1298
1299        This method calls devserver RPC `locate_file` to look up a file with
1300        the given file name inside specified build artifacts.
1301
1302        @param file_name: Name of the file to look for a file.
1303        @param artifacts: A list of artifact names to search for the file.
1304        @param build: Name of the build. For Android, it's None as build_info
1305                should be used.
1306        @param build_info: Dictionary of build information.
1307                For CrOS, it is None as build is the CrOS image name.
1308                For Android, it is {'target': target,
1309                                    'build_id': build_id,
1310                                    'branch': branch}
1311
1312        @return: A devserver url to the file.
1313        @raise DevServerException upon any return code that's not HTTP OK.
1314        """
1315        if not build and not build_info:
1316            raise DevServerException('You must specify build information to '
1317                                     'look for file %s in artifacts %s.' %
1318                                     (file_name, artifacts))
1319        kwargs = {'file_name': file_name,
1320                  'artifacts': artifacts}
1321        if build_info:
1322            build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info
1323            kwargs.update(build_info)
1324            # Devserver treats Android and Brillo build in the same way as they
1325            # are both retrieved from Launch Control and have similar build
1326            # artifacts. Therefore, os_type for devserver calls is `android` for
1327            # both Android and Brillo builds.
1328            kwargs['os_type'] = 'android'
1329        else:
1330            build_path = build
1331            kwargs['build'] = build
1332        call = self.build_call('locate_file', is_async=False, **kwargs)
1333        try:
1334            file_path = self.run_call(call)
1335            return os.path.join(self.url(), 'static', build_path, file_path)
1336        except six.moves.http_client.BadStatusLine as e:
1337            logging.error(e)
1338            raise DevServerException('Received Bad Status line, Devserver %s '
1339                                     'might have gone down while handling '
1340                                     'the call: %s' % (self.url(), call))
1341
1342
1343    @remote_devserver_call()
1344    def list_control_files(self, build, suite_name=''):
1345        """Ask the devserver to list all control files for |build|.
1346
1347        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1348                      whose control files the caller wants listed.
1349        @param suite_name: The name of the suite for which we require control
1350                           files.
1351        @return None on failure, or a list of control file paths
1352                (e.g. server/site_tests/autoupdate/control)
1353        @raise DevServerException upon any return code that's not HTTP OK.
1354        """
1355        build = self.translate(build)
1356        call = self.build_call('controlfiles', build=build,
1357                               suite_name=suite_name)
1358        return self.run_call(call, readline=True)
1359
1360
1361    @remote_devserver_call()
1362    def get_control_file(self, build, control_path):
1363        """Ask the devserver for the contents of a control file.
1364
1365        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1366                      whose control file the caller wants to fetch.
1367        @param control_path: The file to fetch
1368                             (e.g. server/site_tests/autoupdate/control)
1369        @return The contents of the desired file.
1370        @raise DevServerException upon any return code that's not HTTP OK.
1371        """
1372        build = self.translate(build)
1373        call = self.build_call('controlfiles', build=build,
1374                               control_path=control_path)
1375        return self.run_call(call)
1376
1377
1378    @remote_devserver_call()
1379    def list_suite_controls(self, build, suite_name=''):
1380        """Ask the devserver to list contents of all control files for |build|.
1381
1382        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1383                      whose control files' contents the caller wants returned.
1384        @param suite_name: The name of the suite for which we require control
1385                           files.
1386        @return None on failure, or a dict of contents of all control files
1387            (e.g. {'path1': "#Copyright controls ***", ...,
1388                pathX': "#Copyright controls ***"}
1389        @raise DevServerException upon any return code that's not HTTP OK.
1390        """
1391        build = self.translate(build)
1392        call = self.build_call('list_suite_controls', build=build,
1393                               suite_name=suite_name)
1394        return json.load(six.StringIO(self.run_call(call)))
1395
1396
1397class ImageServer(ImageServerBase):
1398    """Class for DevServer that handles RPCs related to CrOS images.
1399
1400    The calls to devserver to stage artifacts, including stage and download, are
1401    made in async mode. That is, when caller makes an RPC |stage| to request
1402    devserver to stage certain artifacts, devserver handles the call and starts
1403    staging artifacts in a new thread, and return |Success| without waiting for
1404    staging being completed. When caller receives message |Success|, it polls
1405    devserver's is_staged call until all artifacts are staged.
1406    Such mechanism is designed to prevent cherrypy threads in devserver being
1407    running out, as staging artifacts might take long time, and cherrypy starts
1408    with a fixed number of threads that handle devserver rpc.
1409    """
1410
1411    class ArtifactUrls(object):
1412        """A container for URLs of staged artifacts.
1413
1414        Attributes:
1415            full_payload: URL for downloading a staged full release update
1416            mton_payload: URL for downloading a staged M-to-N release update
1417            nton_payload: URL for downloading a staged N-to-N release update
1418
1419        """
1420        def __init__(self, full_payload=None, mton_payload=None,
1421                     nton_payload=None):
1422            self.full_payload = full_payload
1423            self.mton_payload = mton_payload
1424            self.nton_payload = nton_payload
1425
1426
1427    def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''):
1428        """Polling devserver.is_staged until all artifacts are staged.
1429
1430        @param archive_url: Google Storage URL for the build.
1431        @param artifacts: Comma separated list of artifacts to download.
1432        @param files: Comma separated list of files to download.
1433        @return: True if all artifacts are staged in devserver.
1434        """
1435        kwargs = {'archive_url': archive_url,
1436                  'artifacts': artifacts,
1437                  'files': files}
1438        return self._poll_is_staged(**kwargs)
1439
1440
1441    @remote_devserver_call()
1442    def call_and_wait(self, call_name, archive_url, artifacts, files,
1443                      error_message, expected_response=SUCCESS):
1444        """Helper method to make a urlopen call, and wait for artifacts staged.
1445
1446        @param call_name: name of devserver rpc call.
1447        @param archive_url: Google Storage URL for the build..
1448        @param artifacts: Comma separated list of artifacts to download.
1449        @param files: Comma separated list of files to download.
1450        @param expected_response: Expected response from rpc, default to
1451                                  |Success|. If it's set to None, do not compare
1452                                  the actual response. Any response is consider
1453                                  to be good.
1454        @param error_message: Error message to be thrown if response does not
1455                              match expected_response.
1456
1457        @return: The response from rpc.
1458        @raise DevServerException upon any return code that's expected_response.
1459
1460        """
1461        kwargs = {'archive_url': archive_url,
1462                  'artifacts': artifacts,
1463                  'files': files}
1464        return self._call_and_wait(call_name, error_message,
1465                                   expected_response, **kwargs)
1466
1467
1468    @remote_devserver_call()
1469    def stage_artifacts(self, image=None, artifacts=None, files='',
1470                        archive_url=None):
1471        """Tell the devserver to download and stage |artifacts| from |image|.
1472
1473         This is the main call point for staging any specific artifacts for a
1474        given build. To see the list of artifacts one can stage see:
1475
1476        ~src/platfrom/dev/artifact_info.py.
1477
1478        This is maintained along with the actual devserver code.
1479
1480        @param image: the image to fetch and stage.
1481        @param artifacts: A list of artifacts.
1482        @param files: A list of files to stage.
1483        @param archive_url: Optional parameter that has the archive_url to stage
1484                this artifact from. Default is specified in autotest config +
1485                image.
1486
1487        @raise DevServerException upon any return code that's not HTTP OK.
1488        """
1489        if not artifacts and not files:
1490            raise DevServerException('Must specify something to stage.')
1491        image = self.translate(image)
1492        self._stage_artifacts(image, artifacts, files, archive_url)
1493
1494
1495    @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS)
1496    def list_image_dir(self, image):
1497        """List the contents of the image stage directory, on the devserver.
1498
1499        @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>.
1500
1501        @raise DevServerException upon any return code that's not HTTP OK.
1502        """
1503        image = self.translate(image)
1504        logging.info('Requesting contents from devserver %s for image %s',
1505                     self.url(), image)
1506        archive_url = _get_storage_server_for_artifacts() + image
1507        call = self.build_call('list_image_dir', archive_url=archive_url)
1508        response = self.run_call(call, readline=True)
1509        for line in response:
1510            logging.info(line)
1511
1512
1513    def trigger_download(self, image, synchronous=True):
1514        """Tell the devserver to download and stage |image|.
1515
1516        Tells the devserver to fetch |image| from the image storage server
1517        named by _get_image_storage_server().
1518
1519        If |synchronous| is True, waits for the entire download to finish
1520        staging before returning. Otherwise only the artifacts necessary
1521        to start installing images onto DUT's will be staged before returning.
1522        A caller can then call finish_download to guarantee the rest of the
1523        artifacts have finished staging.
1524
1525        @param image: the image to fetch and stage.
1526        @param synchronous: if True, waits until all components of the image are
1527               staged before returning.
1528
1529        @raise DevServerException upon any return code that's not HTTP OK.
1530
1531        """
1532        image = self.translate(image)
1533        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE
1534        self._trigger_download(image, artifacts, files='',
1535                               synchronous=synchronous)
1536
1537
1538    @remote_devserver_call()
1539    def setup_telemetry(self, build):
1540        """Tell the devserver to setup telemetry for this build.
1541
1542        The devserver will stage autotest and then extract the required files
1543        for telemetry.
1544
1545        @param build: the build to setup telemetry for.
1546
1547        @returns path on the devserver that telemetry is installed to.
1548        """
1549        build = self.translate(build)
1550        archive_url = _get_image_storage_server() + build
1551        call = self.build_call('setup_telemetry', archive_url=archive_url)
1552        try:
1553            response = self.run_call(call)
1554        except six.moves.http_client.BadStatusLine as e:
1555            logging.error(e)
1556            raise DevServerException('Received Bad Status line, Devserver %s '
1557                                     'might have gone down while handling '
1558                                     'the call: %s' % (self.url(), call))
1559        return response
1560
1561
1562    def finish_download(self, image):
1563        """Tell the devserver to finish staging |image|.
1564
1565        If trigger_download is called with synchronous=False, it will return
1566        before all artifacts have been staged. This method contacts the
1567        devserver and blocks until all staging is completed and should be
1568        called after a call to trigger_download.
1569
1570        @param image: the image to fetch and stage.
1571        @raise DevServerException upon any return code that's not HTTP OK.
1572        """
1573        image = self.translate(image)
1574        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST
1575        self._finish_download(image, artifacts, files='')
1576
1577
1578    def get_update_url(self, image):
1579        """Returns the url that should be passed to the updater.
1580
1581        @param image: the image that was fetched.
1582        """
1583        image = self.translate(image)
1584        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
1585                                              type=str)
1586        return (url_pattern % (self.url(), image))
1587
1588
1589    def get_staged_file_url(self, filename, image):
1590        """Returns the url of a staged file for this image on the devserver."""
1591        return '/'.join([self._get_image_url(image), filename])
1592
1593
1594    def get_test_image_url(self, image):
1595        """Returns a URL to a staged test image.
1596
1597        @param image: the image that was fetched.
1598
1599        @return A fully qualified URL that can be used for downloading the
1600                image.
1601
1602        """
1603        return self._get_image_url(image) + '/chromiumos_test_image.bin'
1604
1605
1606    def get_recovery_image_url(self, image):
1607        """Returns a URL to a staged recovery image.
1608
1609        @param image: the image that was fetched.
1610
1611        @return A fully qualified URL that can be used for downloading the
1612                image.
1613
1614        """
1615        return self._get_image_url(image) + '/recovery_image.bin'
1616
1617
1618    @remote_devserver_call()
1619    def get_dependencies_file(self, build):
1620        """Ask the dev server for the contents of the suite dependencies file.
1621
1622        Ask the dev server at |self._dev_server| for the contents of the
1623        pre-processed suite dependencies file (at DEPENDENCIES_FILE)
1624        for |build|.
1625
1626        @param build: The build (e.g. x86-mario-release/R21-2333.0.0)
1627                      whose dependencies the caller is interested in.
1628        @return The contents of the dependencies file, which should eval to
1629                a dict of dicts, as per bin_utils/suite_preprocessor.py.
1630        @raise DevServerException upon any return code that's not HTTP OK.
1631        """
1632        build = self.translate(build)
1633        call = self.build_call('controlfiles',
1634                               build=build, control_path=DEPENDENCIES_FILE)
1635        return self.run_call(call)
1636
1637
1638    @remote_devserver_call()
1639    def get_latest_build_in_gs(self, board):
1640        """Ask the devservers for the latest offical build in Google Storage.
1641
1642        @param board: The board for who we want the latest official build.
1643        @return A string of the returned build rambi-release/R37-5868.0.0
1644        @raise DevServerException upon any return code that's not HTTP OK.
1645        """
1646        call = self.build_call(
1647                'xbuddy_translate/remote/%s/latest-official' % board,
1648                image_dir=_get_image_storage_server())
1649        image_name = self.run_call(call)
1650        return os.path.dirname(image_name)
1651
1652
1653    def translate(self, build_name):
1654        """Translate the build name if it's in LATEST format.
1655
1656        If the build name is in the format [builder]/LATEST, return the latest
1657        build in Google Storage otherwise return the build name as is.
1658
1659        @param build_name: build_name to check.
1660
1661        @return The actual build name to use.
1662        """
1663        match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I)
1664        if not match:
1665            return build_name
1666        translated_build = self.get_latest_build_in_gs(match.groups()[0])
1667        logging.debug('Translated relative build %s to %s', build_name,
1668                      translated_build)
1669        return translated_build
1670
1671
1672    @classmethod
1673    @remote_devserver_call()
1674    def get_latest_build(cls, target, milestone=''):
1675        """Ask all the devservers for the latest build for a given target.
1676
1677        @param target: The build target, typically a combination of the board
1678                       and the type of build e.g. x86-mario-release.
1679        @param milestone:  For latest build set to '', for builds only in a
1680                           specific milestone set to a str of format Rxx
1681                           (e.g. R16). Default: ''. Since we are dealing with a
1682                           webserver sending an empty string, '', ensures that
1683                           the variable in the URL is ignored as if it was set
1684                           to None.
1685        @return A string of the returned build e.g. R20-2226.0.0.
1686        @raise DevServerException upon any return code that's not HTTP OK.
1687        """
1688        calls = cls.build_all_calls('latestbuild', target=target,
1689                                    milestone=milestone)
1690        latest_builds = []
1691        for call in calls:
1692            latest_builds.append(cls.run_call(call))
1693
1694        return max(latest_builds, key=version.LooseVersion)
1695
1696
1697    def _read_json_response_from_devserver(self, response):
1698        """Reads the json response from the devserver.
1699
1700        This is extracted to its own function so that it can be easily mocked.
1701        @param response: the response for a devserver.
1702        """
1703        try:
1704            return json.loads(response)
1705        except ValueError as e:
1706            logging.debug('Failed to load json response: %s', response)
1707            raise DevServerException(e)
1708
1709
1710    def _check_error_message(self, error_patterns_to_check, error_msg):
1711        """Detect whether specific error pattern exist in error message.
1712
1713        @param error_patterns_to_check: the error patterns to check
1714        @param error_msg: the error message which may include any error
1715                          pattern.
1716
1717        @return A boolean variable, True if error_msg contains any error
1718            pattern in error_patterns_to_check, False otherwise.
1719        """
1720        for err in error_patterns_to_check:
1721            if err in error_msg:
1722                return True
1723
1724        return False
1725
1726
1727class AndroidBuildServer(ImageServerBase):
1728    """Class for DevServer that handles RPCs related to Android builds.
1729
1730    The calls to devserver to stage artifacts, including stage and download, are
1731    made in async mode. That is, when caller makes an RPC |stage| to request
1732    devserver to stage certain artifacts, devserver handles the call and starts
1733    staging artifacts in a new thread, and return |Success| without waiting for
1734    staging being completed. When caller receives message |Success|, it polls
1735    devserver's is_staged call until all artifacts are staged.
1736    Such mechanism is designed to prevent cherrypy threads in devserver being
1737    running out, as staging artifacts might take long time, and cherrypy starts
1738    with a fixed number of threads that handle devserver rpc.
1739    """
1740
1741    def wait_for_artifacts_staged(self, target, build_id, branch,
1742                                  archive_url=None, artifacts='', files=''):
1743        """Polling devserver.is_staged until all artifacts are staged.
1744
1745        @param target: Target of the android build to stage, e.g.,
1746                       shamu-userdebug.
1747        @param build_id: Build id of the android build to stage.
1748        @param branch: Branch of the android build to stage.
1749        @param archive_url: Google Storage URL for the build.
1750        @param artifacts: Comma separated list of artifacts to download.
1751        @param files: Comma separated list of files to download.
1752
1753        @return: True if all artifacts are staged in devserver.
1754        """
1755        kwargs = {'target': target,
1756                  'build_id': build_id,
1757                  'branch': branch,
1758                  'artifacts': artifacts,
1759                  'files': files,
1760                  'os_type': 'android'}
1761        if archive_url:
1762            kwargs['archive_url'] = archive_url
1763        return self._poll_is_staged(**kwargs)
1764
1765
1766    @remote_devserver_call()
1767    def call_and_wait(self, call_name, target, build_id, branch, archive_url,
1768                      artifacts, files, error_message,
1769                      expected_response=SUCCESS):
1770        """Helper method to make a urlopen call, and wait for artifacts staged.
1771
1772        @param call_name: name of devserver rpc call.
1773        @param target: Target of the android build to stage, e.g.,
1774                       shamu-userdebug.
1775        @param build_id: Build id of the android build to stage.
1776        @param branch: Branch of the android build to stage.
1777        @param archive_url: Google Storage URL for the CrOS build.
1778        @param artifacts: Comma separated list of artifacts to download.
1779        @param files: Comma separated list of files to download.
1780        @param expected_response: Expected response from rpc, default to
1781                                  |Success|. If it's set to None, do not compare
1782                                  the actual response. Any response is consider
1783                                  to be good.
1784        @param error_message: Error message to be thrown if response does not
1785                              match expected_response.
1786
1787        @return: The response from rpc.
1788        @raise DevServerException upon any return code that's expected_response.
1789
1790        """
1791        kwargs = {'target': target,
1792                  'build_id': build_id,
1793                  'branch': branch,
1794                  'artifacts': artifacts,
1795                  'files': files,
1796                  'os_type': 'android'}
1797        if archive_url:
1798            kwargs['archive_url'] = archive_url
1799        return self._call_and_wait(call_name, error_message, expected_response,
1800                                   **kwargs)
1801
1802
1803    @remote_devserver_call()
1804    def stage_artifacts(self, target=None, build_id=None, branch=None,
1805                        image=None, artifacts=None, files='', archive_url=None):
1806        """Tell the devserver to download and stage |artifacts| from |image|.
1807
1808         This is the main call point for staging any specific artifacts for a
1809        given build. To see the list of artifacts one can stage see:
1810
1811        ~src/platfrom/dev/artifact_info.py.
1812
1813        This is maintained along with the actual devserver code.
1814
1815        @param target: Target of the android build to stage, e.g.,
1816                               shamu-userdebug.
1817        @param build_id: Build id of the android build to stage.
1818        @param branch: Branch of the android build to stage.
1819        @param image: Name of a build to test, in the format of
1820                      branch/target/build_id
1821        @param artifacts: A list of artifacts.
1822        @param files: A list of files to stage.
1823        @param archive_url: Optional parameter that has the archive_url to stage
1824                this artifact from. Default is specified in autotest config +
1825                image.
1826
1827        @raise DevServerException upon any return code that's not HTTP OK.
1828        """
1829        if image and not target and not build_id and not branch:
1830            branch, target, build_id = utils.parse_launch_control_build(image)
1831        if not target or not build_id or not branch:
1832            raise DevServerException('Must specify all build info (target, '
1833                                     'build_id and branch) to stage.')
1834
1835        android_build_info = {'target': target,
1836                              'build_id': build_id,
1837                              'branch': branch}
1838        if not artifacts and not files:
1839            raise DevServerException('Must specify something to stage.')
1840        if not all(android_build_info.values()):
1841            raise DevServerException(
1842                    'To stage an Android build, must specify target, build id '
1843                    'and branch.')
1844        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1845        self._stage_artifacts(build, artifacts, files, archive_url,
1846                              **android_build_info)
1847
1848    def trigger_download(self, target, build_id, branch, artifacts=None,
1849                         files='', os='android', synchronous=True):
1850        """Tell the devserver to download and stage an Android build.
1851
1852        Tells the devserver to fetch an Android build from the image storage
1853        server named by _get_image_storage_server().
1854
1855        If |synchronous| is True, waits for the entire download to finish
1856        staging before returning. Otherwise only the artifacts necessary
1857        to start installing images onto DUT's will be staged before returning.
1858        A caller can then call finish_download to guarantee the rest of the
1859        artifacts have finished staging.
1860
1861        @param target: Target of the android build to stage, e.g.,
1862                       shamu-userdebug.
1863        @param build_id: Build id of the android build to stage.
1864        @param branch: Branch of the android build to stage.
1865        @param artifacts: A string of artifacts separated by comma. If None,
1866               use the default artifacts for Android or Brillo build.
1867        @param files: String of file seperated by commas.
1868        @param os: OS artifacts to download (android/brillo).
1869        @param synchronous: if True, waits until all components of the image are
1870               staged before returning.
1871
1872        @raise DevServerException upon any return code that's not HTTP OK.
1873
1874        """
1875        android_build_info = {'target': target,
1876                              'build_id': build_id,
1877                              'branch': branch}
1878        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1879        if not artifacts:
1880            board = target.split('-')[0]
1881            artifacts = (
1882                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
1883                        board, os))
1884        self._trigger_download(build, artifacts, files=files,
1885                               synchronous=synchronous, **android_build_info)
1886
1887
1888    def finish_download(self, target, build_id, branch, os='android'):
1889        """Tell the devserver to finish staging an Android build.
1890
1891        If trigger_download is called with synchronous=False, it will return
1892        before all artifacts have been staged. This method contacts the
1893        devserver and blocks until all staging is completed and should be
1894        called after a call to trigger_download.
1895
1896        @param target: Target of the android build to stage, e.g.,
1897                       shamu-userdebug.
1898        @param build_id: Build id of the android build to stage.
1899        @param branch: Branch of the android build to stage.
1900        @param os: OS artifacts to download (android/brillo).
1901
1902        @raise DevServerException upon any return code that's not HTTP OK.
1903        """
1904        android_build_info = {'target': target,
1905                              'build_id': build_id,
1906                              'branch': branch}
1907        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1908        board = target.split('-')[0]
1909        artifacts = (
1910                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
1911                        board))
1912        self._finish_download(build, artifacts, files='', **android_build_info)
1913
1914
1915    def get_staged_file_url(self, filename, target, build_id, branch):
1916        """Returns the url of a staged file for this image on the devserver.
1917
1918        @param filename: Name of the file.
1919        @param target: Target of the android build to stage, e.g.,
1920                       shamu-userdebug.
1921        @param build_id: Build id of the android build to stage.
1922        @param branch: Branch of the android build to stage.
1923
1924        @return: The url of a staged file for this image on the devserver.
1925        """
1926        android_build_info = {'target': target,
1927                              'build_id': build_id,
1928                              'branch': branch,
1929                              'os_type': 'android'}
1930        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
1931        return '/'.join([self._get_image_url(build), filename])
1932
1933
1934    @remote_devserver_call()
1935    def translate(self, build_name):
1936        """Translate the build name if it's in LATEST format.
1937
1938        If the build name is in the format [branch]/[target]/LATEST, return the
1939        latest build in Launch Control otherwise return the build name as is.
1940
1941        @param build_name: build_name to check.
1942
1943        @return The actual build name to use.
1944        """
1945        branch, target, build_id = utils.parse_launch_control_build(build_name)
1946        if build_id.upper() != 'LATEST':
1947            return build_name
1948        call = self.build_call('latestbuild', branch=branch, target=target,
1949                               os_type='android')
1950        translated_build_id = self.run_call(call)
1951        translated_build = (ANDROID_BUILD_NAME_PATTERN %
1952                            {'branch': branch,
1953                             'target': target,
1954                             'build_id': translated_build_id})
1955        logging.debug('Translated relative build %s to %s', build_name,
1956                      translated_build)
1957        return translated_build
1958
1959
1960def _is_load_healthy(load):
1961    """Check if devserver's load meets the minimum threshold.
1962
1963    @param load: The devserver's load stats to check.
1964
1965    @return: True if the load meets the minimum threshold. Return False
1966             otherwise.
1967
1968    """
1969    # Threshold checks, including CPU load.
1970    if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD:
1971        logging.debug('CPU load of devserver %s is at %s%%, which is higher '
1972                      'than the threshold of %s%%', load['devserver'],
1973                      load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD)
1974        return False
1975    if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO:
1976        logging.debug('Network IO of devserver %s is at %i Bps, which is '
1977                      'higher than the threshold of %i bytes per second.',
1978                      load['devserver'], load[DevServer.NETWORK_IO],
1979                      DevServer.MAX_NETWORK_IO)
1980        return False
1981    return True
1982
1983
1984def _compare_load(devserver1, devserver2):
1985    """Comparator function to compare load between two devservers.
1986
1987    @param devserver1: A dictionary of devserver load stats to be compared.
1988    @param devserver2: A dictionary of devserver load stats to be compared.
1989
1990    @return: Negative value if the load of `devserver1` is less than the load
1991             of `devserver2`. Return positive value otherwise.
1992
1993    """
1994    return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO])
1995
1996
1997def _get_subnet_for_host_ip(host_ip,
1998                            restricted_subnets=utils.RESTRICTED_SUBNETS):
1999    """Get the subnet for a given host IP.
2000
2001    @param host_ip: the IP of a DUT.
2002    @param restricted_subnets: A list of restriected subnets.
2003
2004    @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the
2005             host_ip, return (None, None).
2006    """
2007    for subnet_ip, mask_bits in restricted_subnets:
2008        if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits):
2009            return subnet_ip, mask_bits
2010
2011    return None, None
2012
2013
2014def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None):
2015    """Get the devserver with the least load.
2016
2017    Iterate through all devservers and get the one with least load.
2018
2019    TODO(crbug.com/486278): Devserver with required build already staged should
2020    take higher priority. This will need check_health call to be able to verify
2021    existence of a given build/artifact. Also, in case all devservers are
2022    overloaded, the logic here should fall back to the old behavior that randomly
2023    selects a devserver based on the hash of the image name/url.
2024
2025    @param devserver_type: Type of devserver to select from. Default is set to
2026                           ImageServer.
2027    @param hostname: Hostname of the dut that the devserver is used for. The
2028            picked devserver needs to respect the location of the host if
2029            `prefer_local_devserver` is set to True or `restricted_subnets` is
2030            set.
2031
2032    @return: Name of the devserver with the least load.
2033
2034    """
2035    logging.debug('Get the least loaded %r', devserver_type)
2036    devservers, can_retry = devserver_type.get_available_devservers(
2037            hostname)
2038    # If no healthy devservers available and can_retry is False, return None.
2039    # Otherwise, relax the constrain on hostname, allow all devservers to be
2040    # available.
2041    if not devserver_type.get_healthy_devserver('', devservers):
2042        if not can_retry:
2043            return None
2044        else:
2045            devservers, _ = devserver_type.get_available_devservers()
2046
2047    # get_devserver_load call needs to be made in a new process to allow force
2048    # timeout using signal.
2049    output = multiprocessing.Queue()
2050    processes = []
2051    for devserver in devservers:
2052        processes.append(multiprocessing.Process(
2053                target=devserver_type.get_devserver_load_wrapper,
2054                args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output)))
2055
2056    for p in processes:
2057        p.start()
2058    for p in processes:
2059        # The timeout for the process commands aren't reliable.  Add
2060        # some extra time to the timeout for potential overhead in the
2061        # subprocesses.  crbug.com/913695
2062        p.join(TIMEOUT_GET_DEVSERVER_LOAD + 10)
2063    # Read queue before killing processes to avoid corrupting the queue.
2064    loads = [output.get() for p in processes if not p.is_alive()]
2065    for p in processes:
2066        if p.is_alive():
2067            p.terminate()
2068    # Filter out any load failed to be retrieved or does not support load check.
2069    loads = [load for load in loads if load and DevServer.CPU_LOAD in load and
2070             DevServer.is_free_disk_ok(load) and
2071             DevServer.is_apache_client_count_ok(load)]
2072    if not loads:
2073        logging.debug('Failed to retrieve load stats from any devserver. No '
2074                      'load balancing can be applied.')
2075        return None
2076    loads = [load for load in loads if _is_load_healthy(load)]
2077    if not loads:
2078        logging.error('No devserver has the capacity to be selected.')
2079        return None
2080    loads = sorted(loads, cmp=_compare_load)
2081    return loads[0]['devserver']
2082
2083
2084def resolve(build, hostname=None, ban_list=None):
2085    """Resolve a devserver can be used for given build and hostname.
2086
2087    @param build: Name of a build to stage on devserver, e.g.,
2088                  ChromeOS build: daisy-release/R50-1234.0.0
2089                  Launch Control build: git_mnc_release/shamu-eng
2090    @param hostname: Hostname of a devserver for, default is None, which means
2091            devserver is not restricted by the network location of the host.
2092    @param ban_list: The ban_list of devservers shouldn't be chosen.
2093
2094    @return: A DevServer instance that can be used to stage given build for the
2095             given host.
2096    """
2097    if utils.is_launch_control_build(build):
2098        return AndroidBuildServer.resolve(build, hostname)
2099    else:
2100        return ImageServer.resolve(build, hostname, ban_list=ban_list)
2101