• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from distutils import version
6import cStringIO
7import HTMLParser
8import httplib
9import json
10import logging
11import multiprocessing
12import os
13import re
14import socket
15import time
16import urllib2
17import urlparse
18
19from autotest_lib.client.bin import utils as bin_utils
20from autotest_lib.client.common_lib import android_utils
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import utils
24from autotest_lib.client.common_lib.cros import retry
25from autotest_lib.server import utils as server_utils
26# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107
27
28try:
29    from chromite.lib import metrics
30except ImportError:
31    metrics = utils.metrics_mock
32
33
34CONFIG = global_config.global_config
35# This file is generated at build time and specifies, per suite and per test,
36# the DEPENDENCIES list specified in each control file.  It's a dict of dicts:
37# {'bvt':   {'/path/to/autotest/control/site_tests/test1/control': ['dep1']}
38#  'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']}
39#  'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'],
40#            '/path/to/autotest/control/site_tests/test3/control': ['dep3']}
41# }
42DEPENDENCIES_FILE = 'test_suites/dependency_info'
43# Number of seconds for caller to poll devserver's is_staged call to check if
44# artifacts are staged.
45_ARTIFACT_STAGE_POLLING_INTERVAL = 5
46# Artifacts that should be staged when client calls devserver RPC to stage an
47# image.
48_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful'
49# Artifacts that should be staged when client calls devserver RPC to stage an
50# image with autotest artifact.
51_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,'
52                                                   'control_files,stateful,'
53                                                   'autotest_packages')
54# Artifacts that should be staged when client calls devserver RPC to stage an
55# Android build.
56_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('zip_images,vendor_partitions')
57SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value(
58        'CROS', 'skip_devserver_health_check', type=bool)
59# Number of seconds for the call to get devserver load to time out.
60TIMEOUT_GET_DEVSERVER_LOAD = 2.0
61
62# Android artifact path in devserver
63ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value(
64        'CROS', 'android_build_name_pattern', type=str).replace('\\', '')
65
66# Return value from a devserver RPC indicating the call succeeded.
67SUCCESS = 'Success'
68
69# The timeout minutes for a given devserver ssh call.
70DEVSERVER_SSH_TIMEOUT_MINS = 1
71
72# Error message for invalid devserver response.
73ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error'
74ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable'
75
76# Error message for devserver call timedout.
77ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout'
78
79# The timeout minutes for waiting a devserver staging.
80DEVSERVER_IS_STAGING_RETRY_MIN = 100
81
82# The timeout minutes for waiting a DUT auto-update finished.
83DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN = 100
84
85# The total times of devserver triggering CrOS auto-update.
86AU_RETRY_LIMIT = 2
87
88# Number of seconds for caller to poll devserver's get_au_status call to
89# check if cros auto-update is finished.
90CROS_AU_POLLING_INTERVAL = 10
91
92# Number of seconds for intervals between retrying auto-update calls.
93CROS_AU_RETRY_INTERVAL = 20
94
95# The file name for auto-update logs.
96CROS_AU_LOG_FILENAME = 'CrOS_update_%s_%s.log'
97
98# Provision error patterns.
99# People who see this should know that they shouldn't change these
100# classification strings. These strings are used for monitoring provision
101# failures. Any changes may mess up the stats.
102_EXCEPTION_PATTERNS = [
103        # Raised when devserver portfile does not exist on host.
104        (r".*Devserver portfile does not exist!.*$",
105         '(1) Devserver portfile does not exist on host'),
106        # Raised when devserver cannot copy packages to host.
107        (r".*Could not copy .* to device.*$",
108         '(2) Cannot copy packages to host'),
109        # Raised when devserver fails to run specific commands on host.
110        (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$",
111         '(3) Fail to run specific command on host'),
112        # Raised when new build fails to boot on the host.
113        (r'.*RootfsUpdateError: Build .* failed to boot on.*$',
114         '(4) Build failed to boot on host'),
115        # Raised when the auto-update process is timed out.
116        (r'.*The CrOS auto-update process is timed out, '
117         'thus will be terminated.*$',
118         '(5) Auto-update is timed out'),
119        # Raised when the host is not pingable.
120        (r".*DeviceNotPingableError.*$",
121         '(6) Host is not pingable during auto-update'),
122        # Raised when hosts have unexpected status after rootfs update.
123        (r'.*Update failed with unexpected update status: '
124         'UPDATE_STATUS_IDLE.*$',
125         '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs '
126         'update'),
127        # Raised when devserver returns non-json response to shard/drone.
128        (r'.*No JSON object could be decoded.*$',
129         '(8) Devserver returned non-json object'),
130        # Raised when devserver loses host's ssh connection
131        (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$',
132         "(9) Devserver lost host's ssh connection"),
133        # Raised when error happens in writing files to host
134        (r'.*Write failed\: Broken pipe.*$',
135         "(10) Broken pipe while writing or connecting to host")]
136
137PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value(
138        'CROS', 'prefer_local_devserver', type=bool, default=False)
139
140ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value(
141        'CROS', 'enable_ssh_connection_for_devserver', type=bool,
142        default=False)
143
144# Directory to save auto-update logs
145AUTO_UPDATE_LOG_DIR = 'autoupdate_logs'
146
147DEFAULT_SUBNET_MASKBIT = 19
148
149# Metrics basepaths.
150METRICS_PATH = 'chromeos/autotest'
151PROVISION_PATH = METRICS_PATH + '/provision'
152
153
154class DevServerException(Exception):
155    """Raised when the dev server returns a non-200 HTTP response."""
156    pass
157
158
159class BadBuildException(DevServerException):
160    """Raised when build failed to boot on DUT."""
161    pass
162
163
164class RetryableProvisionException(DevServerException):
165    """Raised when provision fails due to a retryable reason."""
166    pass
167
168class DevServerOverloadException(Exception):
169    """Raised when the dev server returns a 502 HTTP response."""
170    pass
171
172class DevServerFailToLocateException(Exception):
173    """Raised when fail to locate any devserver."""
174    pass
175
176
177class DevServerExceptionClassifier(object):
178    """A Class represents exceptions raised from DUT by calling auto_update."""
179    def __init__(self, err, keep_full_trace=True):
180        """
181        @param err: A single string representing one time provision
182            error happened in auto_update().
183        @param keep_full_trace: True to keep the whole track trace of error.
184            False when just keep the last line.
185        """
186        self._err = err if keep_full_trace else err.split('\n')[-1]
187        self._classification = None
188
189    def _classify(self):
190        for err_pattern, classification in _EXCEPTION_PATTERNS:
191            if re.match(err_pattern, self._err):
192                return classification
193
194        return '(0) Unknown exception'
195
196    @property
197    def classification(self):
198        """Classify the error
199
200        @return: return a classified exception type (string) from
201            _EXCEPTION_PATTERNS or 'Unknown exception'. Current patterns in
202            _EXCEPTION_PATTERNS are very specific so that errors cannot match
203            more than one pattern.
204        """
205        if not self._classification:
206            self._classification = self._classify()
207        return self._classification
208
209    @property
210    def summary(self):
211        """Use one line to show the error message."""
212        return ' '.join(self._err.splitlines())
213
214    @property
215    def classified_exception(self):
216        """What kind of exception will be raised to higher.
217
218        @return: return a special Exception when the raised error is an
219            RootfsUpdateError. Otherwise, return general DevServerException.
220        """
221        # The classification of RootfsUpdateError in _EXCEPTION_PATTERNS starts
222        # with "(4)"
223        if self.classification.startswith('(4)'):
224            return BadBuildException
225
226        return DevServerException
227
228
229class MarkupStripper(HTMLParser.HTMLParser):
230    """HTML parser that strips HTML tags, coded characters like &
231
232    Works by, basically, not doing anything for any tags, and only recording
233    the content of text nodes in an internal data structure.
234    """
235    def __init__(self):
236        self.reset()
237        self.fed = []
238
239
240    def handle_data(self, d):
241        """Consume content of text nodes, store it away."""
242        self.fed.append(d)
243
244
245    def get_data(self):
246        """Concatenate and return all stored data."""
247        return ''.join(self.fed)
248
249
250def _strip_http_message(message):
251    """Strip the HTTP marker from the an HTTP message.
252
253    @param message: A string returned by an HTTP call.
254
255    @return: A string with HTTP marker being stripped.
256    """
257    strip = MarkupStripper()
258    try:
259        strip.feed(message.decode('utf_32'))
260    except UnicodeDecodeError:
261        strip.feed(message)
262    return strip.get_data()
263
264
265def _get_image_storage_server():
266    return CONFIG.get_config_value('CROS', 'image_storage_server', type=str)
267
268
269def _get_canary_channel_server():
270    """
271    Get the url of the canary-channel server,
272    eg: gsutil://chromeos-releases/canary-channel/<board>/<release>
273
274    @return: The url to the canary channel server.
275    """
276    return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str)
277
278
279def _get_storage_server_for_artifacts(artifacts=None):
280    """Gets the appropriate storage server for the given artifacts.
281
282    @param artifacts: A list of artifacts we need to stage.
283    @return: The address of the storage server that has these artifacts.
284             The default image storage server if no artifacts are specified.
285    """
286    factory_artifact = global_config.global_config.get_config_value(
287            'CROS', 'factory_artifact', type=str, default='')
288    if artifacts and factory_artifact and factory_artifact in artifacts:
289        return _get_canary_channel_server()
290    return _get_image_storage_server()
291
292
293def _gs_or_local_archive_url_args(archive_url):
294    """Infer the devserver call arguments to use with the given archive_url.
295
296    @param archive_url: The archive url to include the in devserver RPC. This
297            can either e a GS path or a local path.
298    @return: A dict of arguments to include in the devserver call.
299    """
300    if not archive_url:
301        return {}
302    elif archive_url.startswith('gs://'):
303        return {'archive_url': archive_url}
304    else:
305        # For a local path, we direct the devserver to move the files while
306        # staging. This is the fastest way to stage local files, but deletes the
307        # files from the source. This is OK because the files are available on
308        # the devserver once staged.
309        return {
310                'local_path': archive_url,
311                'delete_source': True,
312        }
313
314
315def _reverse_lookup_from_config(address):
316    """Look up hostname for the given IP address.
317
318    This uses the hostname-address map from the config file.
319
320    If multiple hostnames map to the same IP address, the first one
321    defined in the configuration file takes precedence.
322
323    @param address: IP address string
324    @returns: hostname string, or original input if not found
325    """
326    for hostname, addr in _get_hostname_addr_map().iteritems():
327        if addr == address:
328            return hostname
329    return address
330
331
332def _get_hostname_addr_map():
333    """Get hostname address mapping from config.
334
335    @return: dict mapping server hostnames to addresses
336    """
337    return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP')
338
339
340def _get_dev_server_list():
341    return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[])
342
343
344def _get_crash_server_list():
345    return CONFIG.get_config_value('CROS', 'crash_server', type=list,
346        default=[])
347
348
349def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN,
350                          exception_to_raise=DevServerException):
351    """A decorator to use with remote devserver calls.
352
353    This decorator converts urllib2.HTTPErrors into DevServerExceptions
354    with any embedded error info converted into plain text. The method
355    retries on urllib2.URLError or error.CmdError to avoid devserver flakiness.
356    """
357    #pylint: disable=C0111
358
359    def inner_decorator(method):
360        label = method.__name__ if hasattr(method, '__name__') else None
361        def metrics_wrapper(*args, **kwargs):
362            @retry.retry((urllib2.URLError, error.CmdError,
363                          DevServerOverloadException),
364                         timeout_min=timeout_min,
365                         exception_to_raise=exception_to_raise,
366                        label=label)
367            def wrapper():
368                """This wrapper actually catches the HTTPError."""
369                try:
370                    return method(*args, **kwargs)
371                except urllib2.HTTPError as e:
372                    error_markup = e.read()
373                    raise DevServerException(_strip_http_message(error_markup))
374
375            try:
376                return wrapper()
377            except Exception as e:
378                if ERR_MSG_FOR_TIMED_OUT_CALL in str(e):
379                    dev_server = None
380                    if args and isinstance(args[0], DevServer):
381                        dev_server = args[0].hostname
382                    elif 'devserver' in kwargs:
383                        dev_server = get_hostname(kwargs['devserver'])
384
385                    logging.debug('RPC call %s has timed out on devserver %s.',
386                                  label, dev_server)
387                    c = metrics.Counter(
388                            'chromeos/autotest/devserver/call_timeout')
389                    c.increment(fields={'dev_server': dev_server,
390                                        'healthy': label})
391
392                raise
393
394        return metrics_wrapper
395
396    return inner_decorator
397
398
399def get_hostname(url):
400    """Get the hostname portion of a URL
401
402    schema://hostname:port/path
403
404    @param url: a Url string
405    @return: a hostname string
406    """
407    return urlparse.urlparse(url).hostname
408
409
410def get_resolved_hostname(url):
411    """Get the symbolic hostname from url.
412
413    If the given `url` uses a numeric IP address, try and find a
414    symbolic name from the hostname map in the config file.
415
416    @param url  The URL with which to perform the conversion/lookup.
417    """
418    return _reverse_lookup_from_config(get_hostname(url))
419
420
421class DevServer(object):
422    """Base class for all DevServer-like server stubs.
423
424    This is the base class for interacting with all Dev Server-like servers.
425    A caller should instantiate a sub-class of DevServer with:
426
427    host = SubClassServer.resolve(build)
428    server = SubClassServer(host)
429    """
430    _MIN_FREE_DISK_SPACE_GB = 20
431    _MAX_APACHE_CLIENT_COUNT = 75
432    # Threshold for the CPU load percentage for a devserver to be selected.
433    MAX_CPU_LOAD = 80.0
434    # Threshold for the network IO, set to 80MB/s
435    MAX_NETWORK_IO = 1024 * 1024 * 80
436    DISK_IO = 'disk_total_bytes_per_second'
437    NETWORK_IO = 'network_total_bytes_per_second'
438    CPU_LOAD = 'cpu_percent'
439    FREE_DISK = 'free_disk'
440    AU_PROCESS = 'au_process_count'
441    STAGING_THREAD_COUNT = 'staging_thread_count'
442    APACHE_CLIENT_COUNT = 'apache_client_count'
443
444
445    def __init__(self, devserver):
446        self._devserver = devserver
447
448
449    def url(self):
450        """Returns the url for this devserver."""
451        return self._devserver
452
453
454    @property
455    def hostname(self):
456        """Return devserver hostname parsed from the devserver URL.
457
458        Note that this is likely parsed from the devserver URL from
459        shadow_config.ini, meaning that the "hostname" part of the
460        devserver URL is actually an IP address.
461
462        @return hostname string
463        """
464        return get_hostname(self.url())
465
466
467    @property
468    def resolved_hostname(self):
469        """Return devserver hostname, resolved from its IP address.
470
471        Unlike the hostname property, this property attempts to look up
472        the proper hostname from the devserver IP address.  If lookup
473        fails, then fall back to whatever the hostname property would
474        have returned.
475
476        @return hostname string
477        """
478        return _reverse_lookup_from_config(self.hostname)
479
480
481    @staticmethod
482    def get_server_url(url):
483        """Get the devserver url from a repo url, which includes build info.
484
485        @param url: A job repo url.
486
487        @return A devserver url, e.g., http://127.0.0.10:8080
488        """
489        res = urlparse.urlparse(url)
490        if res.netloc:
491            return res.scheme + '://' + res.netloc
492
493
494    @classmethod
495    def get_devserver_load_wrapper(cls, devserver, timeout_sec, output):
496        """A wrapper function to call get_devserver_load in parallel.
497
498        @param devserver: url of the devserver.
499        @param timeout_sec: Number of seconds before time out the devserver
500                            call.
501        @param output: An output queue to save results to.
502        """
503        load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0)
504        if load:
505            load['devserver'] = devserver
506        output.put(load)
507
508
509    @classmethod
510    def get_devserver_load(cls, devserver,
511                           timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
512        """Returns True if the |devserver| is healthy to stage build.
513
514        @param devserver: url of the devserver.
515        @param timeout_min: How long to wait in minutes before deciding the
516                            the devserver is not up (float).
517
518        @return: A dictionary of the devserver's load.
519
520        """
521        call = cls._build_call(devserver, 'check_health')
522        @remote_devserver_call(timeout_min=timeout_min)
523        def get_load(devserver=devserver):
524            """Inner method that makes the call."""
525            return cls.run_call(call, timeout=timeout_min*60)
526
527        try:
528            return json.load(cStringIO.StringIO(get_load(devserver=devserver)))
529        except Exception as e:
530            logging.error('Devserver call failed: "%s", timeout: %s seconds,'
531                          ' Error: %s', call, timeout_min * 60, e)
532
533
534    @classmethod
535    def is_free_disk_ok(cls, load):
536        """Check if a devserver has enough free disk.
537
538        @param load: A dict of the load of the devserver.
539
540        @return: True if the devserver has enough free disk or disk check is
541                 skipped in global config.
542
543        """
544        if SKIP_DEVSERVER_HEALTH_CHECK:
545            logging.debug('devserver health check is skipped.')
546        elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB:
547            return False
548
549        return True
550
551
552    @classmethod
553    def is_apache_client_count_ok(cls, load):
554        """Check if a devserver has enough Apache connections available.
555
556        Apache server by default has maximum of 150 concurrent connections. If
557        a devserver has too many live connections, it likely indicates the
558        server is busy handling many long running download requests, e.g.,
559        downloading stateful partitions. It is better not to add more requests
560        to it.
561
562        @param load: A dict of the load of the devserver.
563
564        @return: True if the devserver has enough Apache connections available,
565                 or disk check is skipped in global config.
566
567        """
568        if SKIP_DEVSERVER_HEALTH_CHECK:
569            logging.debug('devserver health check is skipped.')
570        elif cls.APACHE_CLIENT_COUNT not in load:
571            logging.debug('Apache client count is not collected from devserver.')
572        elif (load[cls.APACHE_CLIENT_COUNT] >
573              cls._MAX_APACHE_CLIENT_COUNT):
574            return False
575
576        return True
577
578
579    @classmethod
580    def devserver_healthy(cls, devserver,
581                          timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
582        """Returns True if the |devserver| is healthy to stage build.
583
584        @param devserver: url of the devserver.
585        @param timeout_min: How long to wait in minutes before deciding the
586                            the devserver is not up (float).
587
588        @return: True if devserver is healthy. Return False otherwise.
589
590        """
591        c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy')
592        reason = ''
593        healthy = False
594        load = cls.get_devserver_load(devserver, timeout_min=timeout_min)
595        try:
596            if not load:
597                # Failed to get the load of devserver.
598                reason = '(1) Failed to get load.'
599                return False
600
601            apache_ok = cls.is_apache_client_count_ok(load)
602            if not apache_ok:
603                reason = '(2) Apache client count too high.'
604                logging.error('Devserver check_health failed. Live Apache client '
605                              'count is too high: %d.',
606                              load[cls.APACHE_CLIENT_COUNT])
607                return False
608
609            disk_ok = cls.is_free_disk_ok(load)
610            if not disk_ok:
611                reason = '(3) Disk space too low.'
612                logging.error('Devserver check_health failed. Free disk space is '
613                              'low. Only %dGB is available.',
614                              load[cls.FREE_DISK])
615            healthy = bool(disk_ok)
616            return disk_ok
617        finally:
618            c.increment(fields={'dev_server': cls(devserver).resolved_hostname,
619                                'healthy': healthy,
620                                'reason': reason})
621            # Monitor how many AU processes the devserver is currently running.
622            if load is not None and load.get(DevServer.AU_PROCESS):
623                c_au = metrics.Gauge(
624                        'chromeos/autotest/devserver/devserver_au_count')
625                c_au.set(
626                    load.get(DevServer.AU_PROCESS),
627                    fields={'dev_server': cls(devserver).resolved_hostname})
628
629
630    @staticmethod
631    def _build_call(host, method, **kwargs):
632        """Build a URL to |host| that calls |method|, passing |kwargs|.
633
634        Builds a URL that calls |method| on the dev server defined by |host|,
635        passing a set of key/value pairs built from the dict |kwargs|.
636
637        @param host: a string that is the host basename e.g. http://server:90.
638        @param method: the dev server method to call.
639        @param kwargs: a dict mapping arg names to arg values.
640        @return the URL string.
641        """
642        # If the archive_url is a local path, the args expected by the devserver
643        # are a little different.
644        archive_url_args = _gs_or_local_archive_url_args(
645                kwargs.pop('archive_url', None))
646        kwargs.update(archive_url_args)
647
648        argstr = '&'.join(map(lambda x: "%s=%s" % x, kwargs.iteritems()))
649        return "%(host)s/%(method)s?%(argstr)s" % dict(
650                host=host, method=method, argstr=argstr)
651
652
653    def build_call(self, method, **kwargs):
654        """Builds a devserver RPC string that is used by 'run_call()'.
655
656        @param method: remote devserver method to call.
657        """
658        return self._build_call(self._devserver, method, **kwargs)
659
660
661    @classmethod
662    def build_all_calls(cls, method, **kwargs):
663        """Builds a list of URLs that makes RPC calls on all devservers.
664
665        Build a URL that calls |method| on the dev server, passing a set
666        of key/value pairs built from the dict |kwargs|.
667
668        @param method: the dev server method to call.
669        @param kwargs: a dict mapping arg names to arg values
670
671        @return the URL string
672        """
673        calls = []
674        # Note we use cls.servers as servers is class specific.
675        for server in cls.servers():
676            if cls.devserver_healthy(server):
677                calls.append(cls._build_call(server, method, **kwargs))
678
679        return calls
680
681
682    @classmethod
683    def run_call(cls, call, readline=False, timeout=None):
684        """Invoke a given devserver call using urllib.open.
685
686        Open the URL with HTTP, and return the text of the response. Exceptions
687        may be raised as for urllib2.urlopen().
688
689        @param call: a url string that calls a method to a devserver.
690        @param readline: whether read http response line by line.
691        @param timeout: The timeout seconds for this urlopen call.
692
693        @return the results of this call.
694        """
695        if timeout is not None:
696            return utils.urlopen_socket_timeout(
697                    call, timeout=timeout).read()
698        elif readline:
699            response = urllib2.urlopen(call)
700            return [line.rstrip() for line in response]
701        else:
702            return urllib2.urlopen(call).read()
703
704
705    @staticmethod
706    def servers():
707        """Returns a list of servers that can serve as this type of server."""
708        raise NotImplementedError()
709
710
711    @classmethod
712    def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT,
713                                      unrestricted_only=False):
714        """Get the devservers in the same subnet of the given ip.
715
716        @param ip: The IP address of a dut to look for devserver.
717        @param mask_bits: Number of mask bits. Default is 19.
718        @param unrestricted_only: Set to True to select from devserver in
719                unrestricted subnet only. Default is False.
720
721        @return: A list of devservers in the same subnet of the given ip.
722
723        """
724        # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so
725        # we need a dict to return the full devserver path once the IPs are
726        # filtered in get_servers_in_same_subnet.
727        server_names = {}
728        all_devservers = []
729        devservers = (cls.get_unrestricted_devservers() if unrestricted_only
730                      else cls.servers())
731        for server in devservers:
732            server_name = get_hostname(server)
733            server_names[server_name] = server
734            all_devservers.append(server_name)
735        if not all_devservers:
736            devserver_type = 'unrestricted only' if unrestricted_only else 'all'
737            raise DevServerFailToLocateException(
738                'Fail to locate a devserver for dut %s in %s devservers'
739                % (ip, devserver_type))
740
741        devservers = utils.get_servers_in_same_subnet(ip, mask_bits,
742                                                      all_devservers)
743        return [server_names[s] for s in devservers]
744
745
746    @classmethod
747    def get_unrestricted_devservers(
748                cls, restricted_subnets=utils.RESTRICTED_SUBNETS):
749        """Get the devservers not in any restricted subnet specified in
750        restricted_subnets.
751
752        @param restricted_subnets: A list of restriected subnets.
753
754        @return: A list of devservers not in any restricted subnet.
755
756        """
757        if not restricted_subnets:
758            return cls.servers()
759
760        metrics.Counter('chromeos/autotest/devserver/unrestricted_hotfix')
761        return cls.servers()
762
763    @classmethod
764    def get_healthy_devserver(cls, build, devservers, ban_list=None):
765        """"Get a healthy devserver instance from the list of devservers.
766
767        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
768        @param devservers: The devserver list to be chosen out a healthy one.
769        @param ban_list: The blacklist of devservers we don't want to choose.
770                Default is None.
771
772        @return: A DevServer object of a healthy devserver. Return None if no
773                healthy devserver is found.
774
775        """
776        logging.debug('Pick one healthy devserver from %r', devservers)
777        while devservers:
778            hash_index = hash(build) % len(devservers)
779            devserver = devservers.pop(hash_index)
780            logging.debug('Check health for %s', devserver)
781            if ban_list and devserver in ban_list:
782                continue
783
784            if cls.devserver_healthy(devserver):
785                logging.debug('Pick %s', devserver)
786                return cls(devserver)
787
788
789    @classmethod
790    def get_available_devservers(cls, hostname=None,
791                                 prefer_local_devserver=PREFER_LOCAL_DEVSERVER,
792                                 restricted_subnets=utils.RESTRICTED_SUBNETS):
793        """Get devservers in the same subnet of the given hostname.
794
795        @param hostname: Hostname of a DUT to choose devserver for.
796
797        @return: A tuple of (devservers, can_retry), devservers is a list of
798                 devservers that's available for the given hostname. can_retry
799                 is a flag that indicate if caller can retry the selection of
800                 devserver if no devserver in the returned devservers can be
801                 used. For example, if hostname is in a restricted subnet,
802                 can_retry will be False.
803        """
804        logging.info('Getting devservers for host: %s',  hostname)
805        host_ip = None
806        if hostname:
807            host_ip = bin_utils.get_ip_address(hostname)
808            if not host_ip:
809                logging.error('Failed to get IP address of %s. Will pick a '
810                              'devserver without subnet constraint.', hostname)
811
812        if not host_ip:
813            return cls.get_unrestricted_devservers(restricted_subnets), False
814
815        # Go through all restricted subnet settings and check if the DUT is
816        # inside a restricted subnet. If so, only return the devservers in the
817        # restricted subnet and doesn't allow retry.
818        if host_ip and restricted_subnets:
819            subnet_ip, mask_bits = _get_subnet_for_host_ip(
820                    host_ip, restricted_subnets=restricted_subnets)
821            if subnet_ip:
822                logging.debug('The host %s (%s) is in a restricted subnet. '
823                              'Try to locate a devserver inside subnet '
824                              '%s:%d.', hostname, host_ip, subnet_ip,
825                              mask_bits)
826                devservers = cls.get_devservers_in_same_subnet(
827                        subnet_ip, mask_bits)
828                return devservers, False
829
830        # If prefer_local_devserver is set to True and the host is not in
831        # restricted subnet, pick a devserver in the same subnet if possible.
832        # Set can_retry to True so it can pick a different devserver if all
833        # devservers in the same subnet are down.
834        if prefer_local_devserver:
835            return (cls.get_devservers_in_same_subnet(
836                    host_ip, DEFAULT_SUBNET_MASKBIT, True), True)
837
838        return cls.get_unrestricted_devservers(restricted_subnets), False
839
840
841    @classmethod
842    def resolve(cls, build, hostname=None, ban_list=None):
843        """"Resolves a build to a devserver instance.
844
845        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
846        @param hostname: The hostname of dut that requests a devserver. It's
847                         used to make sure a devserver in the same subnet is
848                         preferred.
849        @param ban_list: The blacklist of devservers shouldn't be chosen.
850
851        @raise DevServerException: If no devserver is available.
852        """
853        tried_devservers = set()
854        devservers, can_retry = cls.get_available_devservers(hostname)
855        if devservers:
856            tried_devservers |= set(devservers)
857
858        devserver = cls.get_healthy_devserver(build, devservers,
859                                              ban_list=ban_list)
860
861        if not devserver and can_retry:
862            # Find available devservers without dut location constrain.
863            devservers, _ = cls.get_available_devservers()
864            devserver = cls.get_healthy_devserver(build, devservers,
865                                                  ban_list=ban_list)
866            if devservers:
867                tried_devservers |= set(devservers)
868        if devserver:
869            return devserver
870        else:
871            subnet = 'unrestricted subnet'
872            if hostname is not None:
873                host_ip = bin_utils.get_ip_address(hostname)
874                if host_ip:
875                    subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip)
876                    subnet = '%s/%s' % (str(subnet_ip), str(mask_bits))
877
878            error_msg = ('All devservers in subnet: %s are currently down: '
879                         '%s. (dut hostname: %s)' %
880                         (subnet, tried_devservers, hostname))
881            logging.error(error_msg)
882            c = metrics.Counter(
883                    'chromeos/autotest/devserver/subnet_without_devservers')
884            c.increment(fields={'subnet': subnet, 'hostname': str(hostname)})
885            raise DevServerException(error_msg)
886
887
888    @classmethod
889    def random(cls):
890        """Return a random devserver that's available.
891
892        Devserver election in `resolve` method is based on a hash of the
893        build that a caller wants to stage. The purpose is that different
894        callers requesting for the same build can get the same devserver,
895        while the lab is able to distribute different builds across all
896        devservers. That helps to reduce the duplication of builds across
897        all devservers.
898        This function returns a random devserver, by passing a random
899        pseudo build name to `resolve `method.
900        """
901        return cls.resolve(build=str(time.time()))
902
903
904class CrashServer(DevServer):
905    """Class of DevServer that symbolicates crash dumps."""
906
907    @staticmethod
908    def servers():
909        return _get_crash_server_list()
910
911
912    @remote_devserver_call()
913    def symbolicate_dump(self, minidump_path, build):
914        """Ask the devserver to symbolicate the dump at minidump_path.
915
916        Stage the debug symbols for |build| and, if that works, ask the
917        devserver to symbolicate the dump at |minidump_path|.
918
919        @param minidump_path: the on-disk path of the minidump.
920        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
921                      whose debug symbols are needed for symbolication.
922        @return The contents of the stack trace
923        @raise DevServerException upon any return code that's not HTTP OK.
924        """
925        try:
926            import requests
927        except ImportError:
928            logging.warning("Can't 'import requests' to connect to dev server.")
929            return ''
930        f = {'dev_server': self.resolved_hostname}
931        c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump')
932        c.increment(fields=f)
933        # Symbolicate minidump.
934        m = 'chromeos/autotest/crashserver/symbolicate_dump_duration'
935        with metrics.SecondsTimer(m, fields=f):
936            call = self.build_call('symbolicate_dump',
937                                   archive_url=_get_image_storage_server() + build)
938            request = requests.post(
939                    call, files={'minidump': open(minidump_path, 'rb')})
940            if request.status_code == requests.codes.OK:
941                return request.text
942
943        error_fd = cStringIO.StringIO(request.text)
944        raise urllib2.HTTPError(
945                call, request.status_code, request.text, request.headers,
946                error_fd)
947
948
949    @classmethod
950    def get_available_devservers(cls, hostname):
951        """Get all available crash servers.
952
953        Crash server election doesn't need to count the location of hostname.
954
955        @param hostname: Hostname of a DUT to choose devserver for.
956
957        @return: A tuple of (all crash servers, False). can_retry is set to
958                 False, as all crash servers are returned. There is no point to
959                 retry.
960        """
961        return cls.servers(), False
962
963
964class ImageServerBase(DevServer):
965    """Base class for devservers used to stage builds.
966
967    CrOS and Android builds are staged in different ways as they have different
968    sets of artifacts. This base class abstracts the shared functions between
969    the two types of ImageServer.
970    """
971
972    @classmethod
973    def servers(cls):
974        """Returns a list of servers that can serve as a desired type of
975        devserver.
976        """
977        return _get_dev_server_list()
978
979
980    def _get_image_url(self, image):
981        """Returns the url of the directory for this image on the devserver.
982
983        @param image: the image that was fetched.
984        """
985        image = self.translate(image)
986        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
987                                              type=str)
988        return (url_pattern % (self.url(), image)).replace('update', 'static')
989
990
991    @staticmethod
992    def create_metadata(server_name, image, artifacts=None, files=None):
993        """Create a metadata dictionary given the staged items.
994
995        The metadata can be send to metadata db along with stats.
996
997        @param server_name: name of the devserver, e.g 172.22.33.44.
998        @param image: The name of the image.
999        @param artifacts: A list of artifacts.
1000        @param files: A list of files.
1001
1002        @return A metadata dictionary.
1003
1004        """
1005        metadata = {'devserver': server_name,
1006                    'image': image,
1007                    '_type': 'devserver'}
1008        if artifacts:
1009            metadata['artifacts'] = ' '.join(artifacts)
1010        if files:
1011            metadata['files'] = ' '.join(files)
1012        return metadata
1013
1014
1015    @classmethod
1016    def run_ssh_call(cls, call, readline=False, timeout=None):
1017        """Construct an ssh-based rpc call, and execute it.
1018
1019        @param call: a url string that calls a method to a devserver.
1020        @param readline: whether read http response line by line.
1021        @param timeout: The timeout seconds for ssh call.
1022
1023        @return the results of this call.
1024        """
1025        hostname = get_hostname(call)
1026        ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call))
1027        timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60
1028        try:
1029            result = utils.run(ssh_call, timeout=timeout_seconds)
1030        except error.CmdError as e:
1031            logging.debug('Error occurred with exit_code %d when executing the '
1032                          'ssh call: %s.', e.result_obj.exit_status,
1033                          e.result_obj.stderr)
1034            c = metrics.Counter('chromeos/autotest/devserver/ssh_failure')
1035            c.increment(fields={'dev_server': hostname})
1036            raise
1037        response = result.stdout
1038
1039        # If the curl command's returned HTTP response contains certain
1040        # exception string, raise the DevServerException of the response.
1041        if 'DownloaderException' in response:
1042            raise DevServerException(_strip_http_message(response))
1043
1044        if readline:
1045            # Remove line terminators and trailing whitespace
1046            response = response.splitlines()
1047            return [line.rstrip() for line in response]
1048
1049        return response
1050
1051
1052    @classmethod
1053    def run_call(cls, call, readline=False, timeout=None):
1054        """Invoke a given devserver call using urllib.open or ssh.
1055
1056        Open the URL with HTTP or SSH-based HTTP, and return the text of the
1057        response. Exceptions may be raised as for urllib2.urlopen() or
1058        utils.run().
1059
1060        @param call: a url string that calls a method to a devserver.
1061        @param readline: whether read http response line by line.
1062        @param timeout: The timeout seconds for urlopen call or ssh call.
1063
1064        @return the results of this call.
1065        """
1066        server_name = get_hostname(call)
1067        is_in_restricted_subnet = utils.get_restricted_subnet(
1068                server_name, utils.RESTRICTED_SUBNETS)
1069        _EMPTY_SENTINEL_VALUE = object()
1070        def kickoff_call():
1071            """Invoke a given devserver call using urllib.open or ssh.
1072
1073            @param call: a url string that calls a method to a devserver.
1074            @param is_in_restricted_subnet: whether the devserver is in subnet.
1075            @param readline: whether read http response line by line.
1076            @param timeout: The timeout seconds for urlopen call or ssh call.
1077            """
1078            if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or
1079                not is_in_restricted_subnet):
1080                response = super(ImageServerBase, cls).run_call(
1081                        call, readline=readline, timeout=timeout)
1082            else:
1083                response = cls.run_ssh_call(
1084                        call, readline=readline, timeout=timeout)
1085            # Retry if devserver service is temporarily down, e.g. in a
1086            # devserver push.
1087            if ERR_MSG_FOR_DOWN_DEVSERVER in response:
1088                return False
1089
1090            # Don't return response directly since it may be empty string,
1091            # which causes poll_for_condition to retry.
1092            return _EMPTY_SENTINEL_VALUE if not response else response
1093
1094        try:
1095            response = bin_utils.poll_for_condition(
1096                    kickoff_call,
1097                    exception=bin_utils.TimeoutError(),
1098                    timeout=60,
1099                    sleep_interval=5)
1100            return '' if response is _EMPTY_SENTINEL_VALUE else response
1101        except bin_utils.TimeoutError:
1102            return ERR_MSG_FOR_DOWN_DEVSERVER
1103
1104
1105    @classmethod
1106    def download_file(cls, remote_file, local_file, timeout=None):
1107        """Download file from devserver.
1108
1109        The format of remote_file should be:
1110            http://devserver_ip:8082/static/board/...
1111
1112        @param remote_file: The URL of the file on devserver that need to be
1113            downloaded.
1114        @param local_file: The path of the file saved to local.
1115        @param timeout: The timeout seconds for this call.
1116        """
1117        response = cls.run_call(remote_file, timeout=timeout)
1118        with open(local_file, 'w') as out_log:
1119            out_log.write(response)
1120
1121
1122    def _poll_is_staged(self, **kwargs):
1123        """Polling devserver.is_staged until all artifacts are staged.
1124
1125        @param kwargs: keyword arguments to make is_staged devserver call.
1126
1127        @return: True if all artifacts are staged in devserver.
1128        """
1129        call = self.build_call('is_staged', **kwargs)
1130
1131        def all_staged():
1132            """Call devserver.is_staged rpc to check if all files are staged.
1133
1134            @return: True if all artifacts are staged in devserver. False
1135                     otherwise.
1136            @rasies DevServerException, the exception is a wrapper of all
1137                    exceptions that were raised when devserver tried to download
1138                    the artifacts. devserver raises an HTTPError or a CmdError
1139                    when an exception was raised in the code. Such exception
1140                    should be re-raised here to stop the caller from waiting.
1141                    If the call to devserver failed for connection issue, a
1142                    URLError exception is raised, and caller should retry the
1143                    call to avoid such network flakiness.
1144
1145            """
1146            try:
1147                result = self.run_call(call)
1148                logging.debug('whether artifact is staged: %r', result)
1149                return result == 'True'
1150            except urllib2.HTTPError as e:
1151                error_markup = e.read()
1152                raise DevServerException(_strip_http_message(error_markup))
1153            except urllib2.URLError as e:
1154                # Could be connection issue, retry it.
1155                # For example: <urlopen error [Errno 111] Connection refused>
1156                logging.error('URLError happens in is_stage: %r', e)
1157                return False
1158            except error.CmdError as e:
1159                # Retry if SSH failed to connect to the devserver.
1160                logging.warning('CmdError happens in is_stage: %r, will retry', e)
1161                return False
1162
1163        bin_utils.poll_for_condition(
1164                all_staged,
1165                exception=bin_utils.TimeoutError(),
1166                timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60,
1167                sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL)
1168
1169        return True
1170
1171
1172    def _call_and_wait(self, call_name, error_message,
1173                       expected_response=SUCCESS, **kwargs):
1174        """Helper method to make a urlopen call, and wait for artifacts staged.
1175
1176        @param call_name: name of devserver rpc call.
1177        @param error_message: Error message to be thrown if response does not
1178                              match expected_response.
1179        @param expected_response: Expected response from rpc, default to
1180                                  |Success|. If it's set to None, do not compare
1181                                  the actual response. Any response is consider
1182                                  to be good.
1183        @param kwargs: keyword arguments to make is_staged devserver call.
1184
1185        @return: The response from rpc.
1186        @raise DevServerException upon any return code that's expected_response.
1187
1188        """
1189        call = self.build_call(call_name, async=True, **kwargs)
1190        try:
1191            response = self.run_call(call)
1192            logging.debug('response for RPC: %r', response)
1193            if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response:
1194                logging.debug('Proxy error happens in RPC call, '
1195                              'will retry in 30 seconds')
1196                time.sleep(30)
1197                raise DevServerOverloadException()
1198        except httplib.BadStatusLine as e:
1199            logging.error(e)
1200            raise DevServerException('Received Bad Status line, Devserver %s '
1201                                     'might have gone down while handling '
1202                                     'the call: %s' % (self.url(), call))
1203
1204        if expected_response and not response == expected_response:
1205                raise DevServerException(error_message)
1206
1207        # `os_type` is needed in build a devserver call, but not needed for
1208        # wait_for_artifacts_staged, since that method is implemented by
1209        # each ImageServerBase child class.
1210        if 'os_type' in kwargs:
1211            del kwargs['os_type']
1212        self.wait_for_artifacts_staged(**kwargs)
1213        return response
1214
1215
1216    def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs):
1217        """Tell the devserver to download and stage |artifacts| from |image|
1218        specified by kwargs.
1219
1220        This is the main call point for staging any specific artifacts for a
1221        given build. To see the list of artifacts one can stage see:
1222
1223        ~src/platfrom/dev/artifact_info.py.
1224
1225        This is maintained along with the actual devserver code.
1226
1227        @param artifacts: A list of artifacts.
1228        @param files: A list of files to stage.
1229        @param archive_url: Optional parameter that has the archive_url to stage
1230                this artifact from. Default is specified in autotest config +
1231                image.
1232        @param kwargs: keyword arguments that specify the build information, to
1233                make stage devserver call.
1234
1235        @raise DevServerException upon any return code that's not HTTP OK.
1236        """
1237        if not archive_url:
1238            archive_url = _get_storage_server_for_artifacts(artifacts) + build
1239
1240        artifacts_arg = ','.join(artifacts) if artifacts else ''
1241        files_arg = ','.join(files) if files else ''
1242        error_message = ("staging %s for %s failed;"
1243                         "HTTP OK not accompanied by 'Success'." %
1244                         ('artifacts=%s files=%s ' % (artifacts_arg, files_arg),
1245                          build))
1246
1247        staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' %
1248                        (build, artifacts, files, archive_url))
1249        logging.info('Staging artifacts on devserver %s: %s',
1250                     self.url(), staging_info)
1251        success = False
1252        try:
1253            arguments = {'archive_url': archive_url,
1254                         'artifacts': artifacts_arg,
1255                         'files': files_arg}
1256            if kwargs:
1257                arguments.update(kwargs)
1258            f = {'artifacts': artifacts_arg,
1259                 'dev_server': self.resolved_hostname}
1260            with metrics.SecondsTimer(
1261                    'chromeos/autotest/devserver/stage_artifact_duration',
1262                    fields=f):
1263                self.call_and_wait(call_name='stage', error_message=error_message,
1264                                   **arguments)
1265            logging.info('Finished staging artifacts: %s', staging_info)
1266            success = True
1267        except (bin_utils.TimeoutError, error.TimeoutException):
1268            logging.error('stage_artifacts timed out: %s', staging_info)
1269            raise DevServerException(
1270                    'stage_artifacts timed out: %s' % staging_info)
1271        finally:
1272            f = {'success': success,
1273                 'artifacts': artifacts_arg,
1274                 'dev_server': self.resolved_hostname}
1275            metrics.Counter('chromeos/autotest/devserver/stage_artifact'
1276                            ).increment(fields=f)
1277
1278
1279    def call_and_wait(self, *args, **kwargs):
1280        """Helper method to make a urlopen call, and wait for artifacts staged.
1281
1282        This method needs to be overridden in the subclass to implement the
1283        logic to call _call_and_wait.
1284        """
1285        raise NotImplementedError
1286
1287
1288    def _trigger_download(self, build, artifacts, files, synchronous=True,
1289                          **kwargs_build_info):
1290        """Tell the devserver to download and stage image specified in
1291        kwargs_build_info.
1292
1293        Tells the devserver to fetch |image| from the image storage server
1294        named by _get_image_storage_server().
1295
1296        If |synchronous| is True, waits for the entire download to finish
1297        staging before returning. Otherwise only the artifacts necessary
1298        to start installing images onto DUT's will be staged before returning.
1299        A caller can then call finish_download to guarantee the rest of the
1300        artifacts have finished staging.
1301
1302        @param synchronous: if True, waits until all components of the image are
1303               staged before returning.
1304        @param kwargs_build_info: Dictionary of build information.
1305                For CrOS, it is None as build is the CrOS image name.
1306                For Android, it is {'target': target,
1307                                    'build_id': build_id,
1308                                    'branch': branch}
1309
1310        @raise DevServerException upon any return code that's not HTTP OK.
1311
1312        """
1313        if kwargs_build_info:
1314            archive_url = None
1315        else:
1316            archive_url = _get_image_storage_server() + build
1317        error_message = ("trigger_download for %s failed;"
1318                         "HTTP OK not accompanied by 'Success'." % build)
1319        kwargs = {'archive_url': archive_url,
1320                  'artifacts': artifacts,
1321                  'files': files,
1322                  'error_message': error_message}
1323        if kwargs_build_info:
1324            kwargs.update(kwargs_build_info)
1325
1326        logging.info('trigger_download starts for %s', build)
1327        try:
1328            response = self.call_and_wait(call_name='stage', **kwargs)
1329            logging.info('trigger_download finishes for %s', build)
1330        except (bin_utils.TimeoutError, error.TimeoutException):
1331            logging.error('trigger_download timed out for %s.', build)
1332            raise DevServerException(
1333                    'trigger_download timed out for %s.' % build)
1334        was_successful = response == SUCCESS
1335        if was_successful and synchronous:
1336            self._finish_download(build, artifacts, files, **kwargs_build_info)
1337
1338
1339    def _finish_download(self, build, artifacts, files, **kwargs_build_info):
1340        """Tell the devserver to finish staging image specified in
1341        kwargs_build_info.
1342
1343        If trigger_download is called with synchronous=False, it will return
1344        before all artifacts have been staged. This method contacts the
1345        devserver and blocks until all staging is completed and should be
1346        called after a call to trigger_download.
1347
1348        @param kwargs_build_info: Dictionary of build information.
1349                For CrOS, it is None as build is the CrOS image name.
1350                For Android, it is {'target': target,
1351                                    'build_id': build_id,
1352                                    'branch': branch}
1353
1354        @raise DevServerException upon any return code that's not HTTP OK.
1355        """
1356        archive_url = _get_image_storage_server() + build
1357        error_message = ("finish_download for %s failed;"
1358                         "HTTP OK not accompanied by 'Success'." % build)
1359        kwargs = {'archive_url': archive_url,
1360                  'artifacts': artifacts,
1361                  'files': files,
1362                  'error_message': error_message}
1363        if kwargs_build_info:
1364            kwargs.update(kwargs_build_info)
1365        try:
1366            self.call_and_wait(call_name='stage', **kwargs)
1367        except (bin_utils.TimeoutError, error.TimeoutException):
1368            logging.error('finish_download timed out for %s', build)
1369            raise DevServerException(
1370                    'finish_download timed out for %s.' % build)
1371
1372
1373    @remote_devserver_call()
1374    def locate_file(self, file_name, artifacts, build, build_info):
1375        """Locate a file with the given file_name on devserver.
1376
1377        This method calls devserver RPC `locate_file` to look up a file with
1378        the given file name inside specified build artifacts.
1379
1380        @param file_name: Name of the file to look for a file.
1381        @param artifacts: A list of artifact names to search for the file.
1382        @param build: Name of the build. For Android, it's None as build_info
1383                should be used.
1384        @param build_info: Dictionary of build information.
1385                For CrOS, it is None as build is the CrOS image name.
1386                For Android, it is {'target': target,
1387                                    'build_id': build_id,
1388                                    'branch': branch}
1389
1390        @return: A devserver url to the file.
1391        @raise DevServerException upon any return code that's not HTTP OK.
1392        """
1393        if not build and not build_info:
1394            raise DevServerException('You must specify build information to '
1395                                     'look for file %s in artifacts %s.' %
1396                                     (file_name, artifacts))
1397        kwargs = {'file_name': file_name,
1398                  'artifacts': artifacts}
1399        if build_info:
1400            build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info
1401            kwargs.update(build_info)
1402            # Devserver treats Android and Brillo build in the same way as they
1403            # are both retrieved from Launch Control and have similar build
1404            # artifacts. Therefore, os_type for devserver calls is `android` for
1405            # both Android and Brillo builds.
1406            kwargs['os_type'] = 'android'
1407        else:
1408            build_path = build
1409            kwargs['build'] = build
1410        call = self.build_call('locate_file', async=False, **kwargs)
1411        try:
1412            file_path = self.run_call(call)
1413            return os.path.join(self.url(), 'static', build_path, file_path)
1414        except httplib.BadStatusLine as e:
1415            logging.error(e)
1416            raise DevServerException('Received Bad Status line, Devserver %s '
1417                                     'might have gone down while handling '
1418                                     'the call: %s' % (self.url(), call))
1419
1420
1421    @remote_devserver_call()
1422    def list_control_files(self, build, suite_name=''):
1423        """Ask the devserver to list all control files for |build|.
1424
1425        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1426                      whose control files the caller wants listed.
1427        @param suite_name: The name of the suite for which we require control
1428                           files.
1429        @return None on failure, or a list of control file paths
1430                (e.g. server/site_tests/autoupdate/control)
1431        @raise DevServerException upon any return code that's not HTTP OK.
1432        """
1433        build = self.translate(build)
1434        call = self.build_call('controlfiles', build=build,
1435                               suite_name=suite_name)
1436        return self.run_call(call, readline=True)
1437
1438
1439    @remote_devserver_call()
1440    def get_control_file(self, build, control_path):
1441        """Ask the devserver for the contents of a control file.
1442
1443        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1444                      whose control file the caller wants to fetch.
1445        @param control_path: The file to fetch
1446                             (e.g. server/site_tests/autoupdate/control)
1447        @return The contents of the desired file.
1448        @raise DevServerException upon any return code that's not HTTP OK.
1449        """
1450        build = self.translate(build)
1451        call = self.build_call('controlfiles', build=build,
1452                               control_path=control_path)
1453        return self.run_call(call)
1454
1455
1456    @remote_devserver_call()
1457    def list_suite_controls(self, build, suite_name=''):
1458        """Ask the devserver to list contents of all control files for |build|.
1459
1460        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1461                      whose control files' contents the caller wants returned.
1462        @param suite_name: The name of the suite for which we require control
1463                           files.
1464        @return None on failure, or a dict of contents of all control files
1465            (e.g. {'path1': "#Copyright controls ***", ...,
1466                pathX': "#Copyright controls ***"}
1467        @raise DevServerException upon any return code that's not HTTP OK.
1468        """
1469        build = self.translate(build)
1470        call = self.build_call('list_suite_controls', build=build,
1471                               suite_name=suite_name)
1472        return json.load(cStringIO.StringIO(self.run_call(call)))
1473
1474
1475class ImageServer(ImageServerBase):
1476    """Class for DevServer that handles RPCs related to CrOS images.
1477
1478    The calls to devserver to stage artifacts, including stage and download, are
1479    made in async mode. That is, when caller makes an RPC |stage| to request
1480    devserver to stage certain artifacts, devserver handles the call and starts
1481    staging artifacts in a new thread, and return |Success| without waiting for
1482    staging being completed. When caller receives message |Success|, it polls
1483    devserver's is_staged call until all artifacts are staged.
1484    Such mechanism is designed to prevent cherrypy threads in devserver being
1485    running out, as staging artifacts might take long time, and cherrypy starts
1486    with a fixed number of threads that handle devserver rpc.
1487    """
1488
1489    class ArtifactUrls(object):
1490        """A container for URLs of staged artifacts.
1491
1492        Attributes:
1493            full_payload: URL for downloading a staged full release update
1494            mton_payload: URL for downloading a staged M-to-N release update
1495            nton_payload: URL for downloading a staged N-to-N release update
1496
1497        """
1498        def __init__(self, full_payload=None, mton_payload=None,
1499                     nton_payload=None):
1500            self.full_payload = full_payload
1501            self.mton_payload = mton_payload
1502            self.nton_payload = nton_payload
1503
1504
1505    def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''):
1506        """Polling devserver.is_staged until all artifacts are staged.
1507
1508        @param archive_url: Google Storage URL for the build.
1509        @param artifacts: Comma separated list of artifacts to download.
1510        @param files: Comma separated list of files to download.
1511        @return: True if all artifacts are staged in devserver.
1512        """
1513        kwargs = {'archive_url': archive_url,
1514                  'artifacts': artifacts,
1515                  'files': files}
1516        return self._poll_is_staged(**kwargs)
1517
1518
1519    @remote_devserver_call()
1520    def call_and_wait(self, call_name, archive_url, artifacts, files,
1521                      error_message, expected_response=SUCCESS):
1522        """Helper method to make a urlopen call, and wait for artifacts staged.
1523
1524        @param call_name: name of devserver rpc call.
1525        @param archive_url: Google Storage URL for the build..
1526        @param artifacts: Comma separated list of artifacts to download.
1527        @param files: Comma separated list of files to download.
1528        @param expected_response: Expected response from rpc, default to
1529                                  |Success|. If it's set to None, do not compare
1530                                  the actual response. Any response is consider
1531                                  to be good.
1532        @param error_message: Error message to be thrown if response does not
1533                              match expected_response.
1534
1535        @return: The response from rpc.
1536        @raise DevServerException upon any return code that's expected_response.
1537
1538        """
1539        kwargs = {'archive_url': archive_url,
1540                  'artifacts': artifacts,
1541                  'files': files}
1542        return self._call_and_wait(call_name, error_message,
1543                                   expected_response, **kwargs)
1544
1545
1546    @remote_devserver_call()
1547    def stage_artifacts(self, image=None, artifacts=None, files='',
1548                        archive_url=None):
1549        """Tell the devserver to download and stage |artifacts| from |image|.
1550
1551         This is the main call point for staging any specific artifacts for a
1552        given build. To see the list of artifacts one can stage see:
1553
1554        ~src/platfrom/dev/artifact_info.py.
1555
1556        This is maintained along with the actual devserver code.
1557
1558        @param image: the image to fetch and stage.
1559        @param artifacts: A list of artifacts.
1560        @param files: A list of files to stage.
1561        @param archive_url: Optional parameter that has the archive_url to stage
1562                this artifact from. Default is specified in autotest config +
1563                image.
1564
1565        @raise DevServerException upon any return code that's not HTTP OK.
1566        """
1567        if not artifacts and not files:
1568            raise DevServerException('Must specify something to stage.')
1569        image = self.translate(image)
1570        self._stage_artifacts(image, artifacts, files, archive_url)
1571
1572
1573    @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS)
1574    def list_image_dir(self, image):
1575        """List the contents of the image stage directory, on the devserver.
1576
1577        @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>.
1578
1579        @raise DevServerException upon any return code that's not HTTP OK.
1580        """
1581        image = self.translate(image)
1582        logging.info('Requesting contents from devserver %s for image %s',
1583                     self.url(), image)
1584        archive_url = _get_storage_server_for_artifacts() + image
1585        call = self.build_call('list_image_dir', archive_url=archive_url)
1586        response = self.run_call(call, readline=True)
1587        for line in response:
1588            logging.info(line)
1589
1590
1591    def trigger_download(self, image, synchronous=True):
1592        """Tell the devserver to download and stage |image|.
1593
1594        Tells the devserver to fetch |image| from the image storage server
1595        named by _get_image_storage_server().
1596
1597        If |synchronous| is True, waits for the entire download to finish
1598        staging before returning. Otherwise only the artifacts necessary
1599        to start installing images onto DUT's will be staged before returning.
1600        A caller can then call finish_download to guarantee the rest of the
1601        artifacts have finished staging.
1602
1603        @param image: the image to fetch and stage.
1604        @param synchronous: if True, waits until all components of the image are
1605               staged before returning.
1606
1607        @raise DevServerException upon any return code that's not HTTP OK.
1608
1609        """
1610        image = self.translate(image)
1611        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE
1612        self._trigger_download(image, artifacts, files='',
1613                               synchronous=synchronous)
1614
1615
1616    @remote_devserver_call()
1617    def setup_telemetry(self, build):
1618        """Tell the devserver to setup telemetry for this build.
1619
1620        The devserver will stage autotest and then extract the required files
1621        for telemetry.
1622
1623        @param build: the build to setup telemetry for.
1624
1625        @returns path on the devserver that telemetry is installed to.
1626        """
1627        build = self.translate(build)
1628        archive_url = _get_image_storage_server() + build
1629        call = self.build_call('setup_telemetry', archive_url=archive_url)
1630        try:
1631            response = self.run_call(call)
1632        except httplib.BadStatusLine as e:
1633            logging.error(e)
1634            raise DevServerException('Received Bad Status line, Devserver %s '
1635                                     'might have gone down while handling '
1636                                     'the call: %s' % (self.url(), call))
1637        return response
1638
1639
1640    def finish_download(self, image):
1641        """Tell the devserver to finish staging |image|.
1642
1643        If trigger_download is called with synchronous=False, it will return
1644        before all artifacts have been staged. This method contacts the
1645        devserver and blocks until all staging is completed and should be
1646        called after a call to trigger_download.
1647
1648        @param image: the image to fetch and stage.
1649        @raise DevServerException upon any return code that's not HTTP OK.
1650        """
1651        image = self.translate(image)
1652        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST
1653        self._finish_download(image, artifacts, files='')
1654
1655
1656    def get_update_url(self, image):
1657        """Returns the url that should be passed to the updater.
1658
1659        @param image: the image that was fetched.
1660        """
1661        image = self.translate(image)
1662        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
1663                                              type=str)
1664        return (url_pattern % (self.url(), image))
1665
1666
1667    def get_staged_file_url(self, filename, image):
1668        """Returns the url of a staged file for this image on the devserver."""
1669        return '/'.join([self._get_image_url(image), filename])
1670
1671
1672    def get_full_payload_url(self, image):
1673        """Returns a URL to a staged full payload.
1674
1675        @param image: the image that was fetched.
1676
1677        @return A fully qualified URL that can be used for downloading the
1678                payload.
1679
1680        """
1681        return self._get_image_url(image) + '/update.gz'
1682
1683
1684    def get_test_image_url(self, image):
1685        """Returns a URL to a staged test image.
1686
1687        @param image: the image that was fetched.
1688
1689        @return A fully qualified URL that can be used for downloading the
1690                image.
1691
1692        """
1693        return self._get_image_url(image) + '/chromiumos_test_image.bin'
1694
1695
1696    def get_recovery_image_url(self, image):
1697        """Returns a URL to a staged recovery image.
1698
1699        @param image: the image that was fetched.
1700
1701        @return A fully qualified URL that can be used for downloading the
1702                image.
1703
1704        """
1705        return self._get_image_url(image) + '/recovery_image.bin'
1706
1707
1708    @remote_devserver_call()
1709    def get_dependencies_file(self, build):
1710        """Ask the dev server for the contents of the suite dependencies file.
1711
1712        Ask the dev server at |self._dev_server| for the contents of the
1713        pre-processed suite dependencies file (at DEPENDENCIES_FILE)
1714        for |build|.
1715
1716        @param build: The build (e.g. x86-mario-release/R21-2333.0.0)
1717                      whose dependencies the caller is interested in.
1718        @return The contents of the dependencies file, which should eval to
1719                a dict of dicts, as per bin_utils/suite_preprocessor.py.
1720        @raise DevServerException upon any return code that's not HTTP OK.
1721        """
1722        build = self.translate(build)
1723        call = self.build_call('controlfiles',
1724                               build=build, control_path=DEPENDENCIES_FILE)
1725        return self.run_call(call)
1726
1727
1728    @remote_devserver_call()
1729    def get_latest_build_in_gs(self, board):
1730        """Ask the devservers for the latest offical build in Google Storage.
1731
1732        @param board: The board for who we want the latest official build.
1733        @return A string of the returned build rambi-release/R37-5868.0.0
1734        @raise DevServerException upon any return code that's not HTTP OK.
1735        """
1736        call = self.build_call(
1737                'xbuddy_translate/remote/%s/latest-official' % board,
1738                image_dir=_get_image_storage_server())
1739        image_name = self.run_call(call)
1740        return os.path.dirname(image_name)
1741
1742
1743    def translate(self, build_name):
1744        """Translate the build name if it's in LATEST format.
1745
1746        If the build name is in the format [builder]/LATEST, return the latest
1747        build in Google Storage otherwise return the build name as is.
1748
1749        @param build_name: build_name to check.
1750
1751        @return The actual build name to use.
1752        """
1753        match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I)
1754        if not match:
1755            return build_name
1756        translated_build = self.get_latest_build_in_gs(match.groups()[0])
1757        logging.debug('Translated relative build %s to %s', build_name,
1758                      translated_build)
1759        return translated_build
1760
1761
1762    @classmethod
1763    @remote_devserver_call()
1764    def get_latest_build(cls, target, milestone=''):
1765        """Ask all the devservers for the latest build for a given target.
1766
1767        @param target: The build target, typically a combination of the board
1768                       and the type of build e.g. x86-mario-release.
1769        @param milestone:  For latest build set to '', for builds only in a
1770                           specific milestone set to a str of format Rxx
1771                           (e.g. R16). Default: ''. Since we are dealing with a
1772                           webserver sending an empty string, '', ensures that
1773                           the variable in the URL is ignored as if it was set
1774                           to None.
1775        @return A string of the returned build e.g. R20-2226.0.0.
1776        @raise DevServerException upon any return code that's not HTTP OK.
1777        """
1778        calls = cls.build_all_calls('latestbuild', target=target,
1779                                    milestone=milestone)
1780        latest_builds = []
1781        for call in calls:
1782            latest_builds.append(cls.run_call(call))
1783
1784        return max(latest_builds, key=version.LooseVersion)
1785
1786
1787    @remote_devserver_call()
1788    def _kill_au_process_for_host(self, **kwargs):
1789        """Kill the triggerred auto_update process if error happens in cros_au.
1790
1791        @param kwargs: Arguments to make kill_au_proc devserver call.
1792        """
1793        call = self.build_call('kill_au_proc', **kwargs)
1794        response = self.run_call(call)
1795        if not response == 'True':
1796            raise DevServerException(
1797                    'Failed to kill the triggerred CrOS auto_update process'
1798                    'on devserver %s, the response is %s' % (
1799                            self.url(), response))
1800
1801
1802    def kill_au_process_for_host(self, host_name, pid):
1803        """Kill the triggerred auto_update process if error happens.
1804
1805        Usually this function is used to clear all potential left au processes
1806        of the given host name.
1807
1808        If pid is specified, the devserver will further check the given pid to
1809        make sure the process is killed. This is used for the case that the au
1810        process has started in background, but then provision fails due to
1811        some unknown issues very fast. In this case, when 'kill_au_proc' is
1812        called, there's no corresponding background track log created for this
1813        ongoing au process, which prevents this RPC call from killing this au
1814        process.
1815
1816        @param host_name: The DUT's hostname.
1817        @param pid: The ongoing au process's pid.
1818
1819        @return: True if successfully kill the auto-update process for host.
1820        """
1821        kwargs = {'host_name': host_name, 'pid': pid}
1822        try:
1823            self._kill_au_process_for_host(**kwargs)
1824        except DevServerException:
1825            return False
1826
1827        return True
1828
1829
1830    @remote_devserver_call()
1831    def _clean_track_log(self, **kwargs):
1832        """Clean track log for the current auto-update process."""
1833        call = self.build_call('handler_cleanup', **kwargs)
1834        self.run_call(call)
1835
1836
1837    def clean_track_log(self, host_name, pid):
1838        """Clean track log for the current auto-update process.
1839
1840        @param host_name: The host name to be updated.
1841        @param pid: The auto-update process id.
1842
1843        @return: True if track log is successfully cleaned, False otherwise.
1844        """
1845        if not pid:
1846            return False
1847
1848        kwargs = {'host_name': host_name, 'pid': pid}
1849        try:
1850            self._clean_track_log(**kwargs)
1851        except DevServerException as e:
1852            logging.debug('Failed to clean track_status_file on '
1853                          'devserver for host %s and process id %s: %s',
1854                          host_name, pid, str(e))
1855            return False
1856
1857        return True
1858
1859
1860    def _get_au_log_filename(self, log_dir, host_name, pid):
1861        """Return the auto-update log's filename."""
1862        return os.path.join(log_dir, CROS_AU_LOG_FILENAME % (
1863                    host_name, pid))
1864
1865    def _read_json_response_from_devserver(self, response):
1866        """Reads the json response from the devserver.
1867
1868        This is extracted to its own function so that it can be easily mocked.
1869        @param response: the response for a devserver.
1870        """
1871        try:
1872            return json.loads(response)
1873        except ValueError as e:
1874            logging.debug('Failed to load json response: %s', response)
1875            raise DevServerException(e)
1876
1877
1878    @remote_devserver_call()
1879    def _collect_au_log(self, log_dir, **kwargs):
1880        """Collect logs from devserver after cros-update process is finished.
1881
1882        Collect the logs that recording the whole cros-update process, and
1883        write it to sysinfo path of a job.
1884
1885        The example log file name that is stored is like:
1886            '1220-repair/sysinfo/CrOS_update_host_name_pid.log'
1887
1888        @param host_name: the DUT's hostname.
1889        @param pid: the auto-update process id on devserver.
1890        @param log_dir: The directory to save the cros-update process log
1891                        retrieved from devserver.
1892        """
1893        call = self.build_call('collect_cros_au_log', **kwargs)
1894        response = self.run_call(call)
1895        if not os.path.exists(log_dir):
1896            os.mkdir(log_dir)
1897        write_file = self._get_au_log_filename(
1898                log_dir, kwargs['host_name'], kwargs['pid'])
1899        logging.debug('Saving auto-update logs into %s', write_file)
1900
1901        au_logs = self._read_json_response_from_devserver(response)
1902
1903        try:
1904            for k, v in au_logs['host_logs'].items():
1905                log_name = '%s_%s_%s' % (k, kwargs['host_name'], kwargs['pid'])
1906                log_path = os.path.join(log_dir, log_name)
1907                with open(log_path, 'w') as out_log:
1908                    out_log.write(v)
1909        except IOError as e:
1910            raise DevServerException('Failed to write auto-update hostlogs: '
1911                                     '%s' % e)
1912
1913        try:
1914            with open(write_file, 'w') as out_log:
1915                out_log.write(au_logs['cros_au_log'])
1916        except:
1917            raise DevServerException('Failed to write auto-update logs into '
1918                                     '%s' % write_file)
1919
1920
1921    def collect_au_log(self, host_name, pid, log_dir):
1922        """Collect logs from devserver after cros-update process is finished.
1923
1924        @param host_name: the DUT's hostname.
1925        @param pid: the auto-update process id on devserver.
1926        @param log_dir: The directory to save the cros-update process log
1927                        retrieved from devserver.
1928
1929        @return: True if auto-update log is successfully collected, False
1930          otherwise.
1931        """
1932        if not pid:
1933            return False
1934
1935        kwargs = {'host_name': host_name, 'pid': pid}
1936        try:
1937            self._collect_au_log(log_dir, **kwargs)
1938        except DevServerException as e:
1939            logging.debug('Failed to collect auto-update log on '
1940                          'devserver for host %s and process id %s: %s',
1941                          host_name, pid, str(e))
1942            return False
1943
1944        return True
1945
1946
1947    @remote_devserver_call()
1948    def _trigger_auto_update(self, **kwargs):
1949        """Trigger auto-update by calling devserver.cros_au.
1950
1951        @param kwargs:  Arguments to make cros_au devserver call.
1952
1953        @return: a tuple indicates whether the RPC call cros_au succeeds and
1954          the auto-update process id running on devserver.
1955        """
1956        host_name = kwargs['host_name']
1957        call = self.build_call('cros_au', async=True, **kwargs)
1958        try:
1959            response = self.run_call(call)
1960            logging.info(
1961                'Received response from devserver for cros_au call: %r',
1962                response)
1963        except httplib.BadStatusLine as e:
1964            logging.error(e)
1965            raise DevServerException('Received Bad Status line, Devserver %s '
1966                                     'might have gone down while handling '
1967                                     'the call: %s' % (self.url(), call))
1968
1969        return response
1970
1971
1972    def _check_for_auto_update_finished(self, pid, wait=True, **kwargs):
1973        """Polling devserver.get_au_status to get current auto-update status.
1974
1975        The current auto-update status is used to identify whether the update
1976        process is finished.
1977
1978        @param pid:    The background process id for auto-update in devserver.
1979        @param kwargs: keyword arguments to make get_au_status devserver call.
1980        @param wait:   Should the check wait for completion.
1981
1982        @return: True if auto-update is finished for a given dut.
1983        """
1984        logging.debug('Check the progress for auto-update process %r', pid)
1985        kwargs['pid'] = pid
1986        call = self.build_call('get_au_status', **kwargs)
1987
1988        def all_finished():
1989            """Call devserver.get_au_status rpc to check if auto-update
1990               is finished.
1991
1992            @return: True if auto-update is finished for a given dut. False
1993                     otherwise.
1994            @rasies  DevServerException, the exception is a wrapper of all
1995                     exceptions that were raised when devserver tried to
1996                     download the artifacts. devserver raises an HTTPError or
1997                     a CmdError when an exception was raised in the code. Such
1998                     exception should be re-raised here to stop the caller from
1999                     waiting. If the call to devserver failed for connection
2000                     issue, a URLError exception is raised, and caller should
2001                     retry the call to avoid such network flakiness.
2002
2003            """
2004            try:
2005                au_status = self.run_call(call)
2006                response = json.loads(au_status)
2007                # This is a temp fix to fit both dict and tuple returning
2008                # values. The dict check will be removed after a corresponding
2009                # devserver CL is deployed.
2010                if isinstance(response, dict):
2011                    if response.get('detailed_error_msg'):
2012                        raise DevServerException(
2013                                response.get('detailed_error_msg'))
2014
2015                    if response.get('finished'):
2016                        logging.debug('CrOS auto-update is finished')
2017                        return True
2018                    else:
2019                        logging.debug('Current CrOS auto-update status: %s',
2020                                      response.get('status'))
2021                        return False
2022
2023                if not response[0]:
2024                    logging.debug('Current CrOS auto-update status: %s',
2025                                  response[1])
2026                    return False
2027                else:
2028                    logging.debug('CrOS auto-update is finished')
2029                    return True
2030            except urllib2.HTTPError as e:
2031                error_markup = e.read()
2032                raise DevServerException(_strip_http_message(error_markup))
2033            except urllib2.URLError as e:
2034                # Could be connection issue, retry it.
2035                # For example: <urlopen error [Errno 111] Connection refused>
2036                logging.warning('URLError (%r): Retrying connection to '
2037                                'devserver to check auto-update status.', e)
2038                return False
2039            except error.CmdError:
2040                # Retry if SSH failed to connect to the devserver.
2041                logging.warning('CmdError: Retrying SSH connection to check '
2042                                'auto-update status.')
2043                return False
2044            except socket.error as e:
2045                # Could be some temporary devserver connection issues.
2046                logging.warning('Socket Error (%r): Retrying connection to '
2047                                'devserver to check auto-update status.', e)
2048                return False
2049            except ValueError as e:
2050                raise DevServerException(
2051                        '%s (Got AU status: %r)' % (str(e), au_status))
2052
2053        if wait:
2054            bin_utils.poll_for_condition(
2055                    all_finished,
2056                    exception=bin_utils.TimeoutError(),
2057                    timeout=DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN * 60,
2058                    sleep_interval=CROS_AU_POLLING_INTERVAL)
2059
2060            return True
2061        else:
2062            return all_finished()
2063
2064
2065    def check_for_auto_update_finished(self, response, wait=True, **kwargs):
2066        """Processing response of 'cros_au' and polling for auto-update status.
2067
2068        Will wait for the whole auto-update process is finished.
2069
2070        @param response: The response from RPC 'cros_au'
2071        @param kwargs: keyword arguments to make get_au_status devserver call.
2072
2073        @return: a tuple includes two elements.
2074          finished: True if the operation has completed.
2075          raised_error: None if everything works well or the raised error.
2076          pid: the auto-update process id on devserver.
2077        """
2078
2079        pid = 0
2080        raised_error = None
2081        finished = False
2082        try:
2083            response = json.loads(response)
2084            if response[0]:
2085                pid = response[1]
2086                # If provision is kicked off asynchronously, pid will be -1.
2087                # If provision is not successfully kicked off , pid continues
2088                # to be 0.
2089                if pid > 0:
2090                    logging.debug('start process %r for auto_update in '
2091                                  'devserver', pid)
2092                    finished = self._check_for_auto_update_finished(
2093                            pid, wait=wait, **kwargs)
2094        except Exception as e:
2095            logging.debug('Failed to trigger auto-update process on devserver')
2096            finished = True
2097            raised_error = e
2098        finally:
2099            return finished, raised_error, pid
2100
2101
2102    def _check_error_message(self, error_patterns_to_check, error_msg):
2103        """Detect whether specific error pattern exist in error message.
2104
2105        @param error_patterns_to_check: the error patterns to check
2106        @param error_msg: the error message which may include any error
2107                          pattern.
2108
2109        @return A boolean variable, True if error_msg contains any error
2110            pattern in error_patterns_to_check, False otherwise.
2111        """
2112        for err in error_patterns_to_check:
2113            if err in error_msg:
2114                return True
2115
2116        return False
2117
2118
2119    def _is_retryable(self, error_msg):
2120        """Detect whether we will retry auto-update based on error_msg.
2121
2122        @param error_msg: The given error message.
2123
2124        @return A boolean variable which indicates whether we will retry
2125            auto_update with another devserver based on the given error_msg.
2126        """
2127        # For now we just hard-code the error message we think it's suspicious.
2128        # When we get more date about what's the json response when devserver
2129        # is overloaded, we can update this part.
2130        retryable_error_patterns = [ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE,
2131                                    'is not pingable']
2132        return self._check_error_message(retryable_error_patterns, error_msg)
2133
2134
2135    def _should_use_original_payload(self, error_msg):
2136        devserver_error_patterns = ['DevserverCannotStartError']
2137        return self._check_error_message(devserver_error_patterns, error_msg)
2138
2139
2140    def _parse_buildname_safely(self, build_name):
2141        """Parse a given buildname safely.
2142
2143        @param build_name: the build name to be parsed.
2144
2145        @return: a tuple (board, build_type, milestone)
2146        """
2147        try:
2148            board, build_type, milestone, _ = server_utils.ParseBuildName(
2149                    build_name)
2150        except server_utils.ParseBuildNameException:
2151            logging.warning('Unable to parse build name %s for metrics. '
2152                            'Continuing anyway.', build_name)
2153            board, build_type, milestone = ('', '', '')
2154
2155        return board, build_type, milestone
2156
2157
2158    def _emit_auto_update_metrics(self, board, build_type, dut_host_name,
2159                                  build_name, attempt,
2160                                  success, failure_reason, duration):
2161        """Send metrics for a single auto_update attempt.
2162
2163        @param board: a field in metrics representing which board this
2164            auto_update tries to update.
2165        @param build_type: a field in metrics representing which build type this
2166            auto_update tries to update.
2167        @param dut_host_name: a field in metrics representing which DUT this
2168            auto_update tries to update.
2169        @param build_name: auto update build being updated to.
2170        @param attempt: a field in metrics, representing which attempt/retry
2171            this auto_update is.
2172        @param success: a field in metrics, representing whether this
2173            auto_update succeeds or not.
2174        @param failure_reason: DevServerExceptionClassifier object to show
2175            auto update failure reason, or None.
2176        @param duration: auto update duration time, in seconds.
2177        """
2178        # The following is high cardinality, but sparse.
2179        # Each DUT is of a single board type, and likely build type.
2180        # The affinity also results in each DUT being attached to the same
2181        # dev_server as well.
2182        fields = {
2183                'board': board,
2184                'build_type': build_type,
2185                'dut_host_name': dut_host_name,
2186                'dev_server': self.resolved_hostname,
2187                'attempt': attempt,
2188                'success': success,
2189        }
2190
2191        # reset_after=True is required for String gauges events to ensure that
2192        # the metrics are not repeatedly emitted until the server restarts.
2193
2194        metrics.String(PROVISION_PATH + '/auto_update_build_by_devserver_dut',
2195                       reset_after=True).set(build_name, fields=fields)
2196
2197        if not success:
2198            metrics.String(
2199                PROVISION_PATH +
2200                '/auto_update_failure_reason_by_devserver_dut',
2201                reset_after=True).set(
2202                    failure_reason.classification if failure_reason else '',
2203                    fields=fields)
2204
2205        metrics.SecondsDistribution(
2206                PROVISION_PATH + '/auto_update_duration_by_devserver_dut').add(
2207                        duration, fields=fields)
2208
2209
2210    def _emit_provision_metrics(self, error_list, duration_list,
2211                                is_au_success, board, build_type, milestone,
2212                                dut_host_name, is_aue2etest,
2213                                total_duration, build_name):
2214        """Send metrics for provision request.
2215
2216        Provision represents potentially multiple auto update attempts.
2217
2218        Please note: to avoid reaching or exceeding the monarch field
2219        cardinality limit, we avoid a metric that includes both dut hostname
2220        and other high cardinality fields.
2221
2222        @param error_list: a list of DevServerExceptionClassifier objects to
2223            show errors happened in provision. Usually it contains 1 ~
2224            AU_RETRY_LIMIT objects since we only retry provision for several
2225            times.
2226        @param duration_list: a list of provision duration time, counted by
2227            seconds.
2228        @param is_au_success: a field in metrics, representing whether this
2229            auto_update succeeds or not.
2230        @param board: a field in metrics representing which board this
2231            auto_update tries to update.
2232        @param build_type: a field in metrics representing which build type this
2233            auto_update tries to update.
2234        @param milestone: a field in metrics representing which milestone this
2235            auto_update tries to update.
2236        @param dut_host_name: a field in metrics representing which DUT this
2237            auto_update tries to update.
2238        @param is_aue2etest: a field in metrics representing if provision was
2239            done as part of the autoupdate_EndToEndTest.
2240        """
2241        # The following is high cardinality, but sparse.
2242        # Each DUT is of a single board type, and likely build type.
2243        # The affinity also results in each DUT being attached to the same
2244        # dev_server as well.
2245        fields = {
2246                'board': board,
2247                'build_type': build_type,
2248                'dut_host_name': dut_host_name,
2249                'dev_server': self.resolved_hostname,
2250                'success': is_au_success,
2251        }
2252
2253        # reset_after=True is required for String gauges events to ensure that
2254        # the metrics are not repeatedly emitted until the server restarts.
2255
2256        metrics.String(PROVISION_PATH + '/provision_build_by_devserver_dut',
2257                       reset_after=True).set(build_name, fields=fields)
2258
2259        if error_list:
2260            metrics.String(
2261                    PROVISION_PATH +
2262                    '/provision_failure_reason_by_devserver_dut',
2263                    reset_after=True).set(error_list[0].classification,
2264                                          fields=fields)
2265
2266        metrics.SecondsDistribution(
2267                PROVISION_PATH + '/provision_duration_by_devserver_dut').add(
2268                        total_duration, fields=fields)
2269
2270
2271    def _parse_buildname_from_gs_uri(self, uri):
2272        """Get parameters needed for AU metrics when build_name is not known.
2273
2274        autoupdate_EndToEndTest is run with two Google Storage URIs from the
2275        gs://chromeos-releases bucket. URIs in this bucket do not have the
2276        build_name in the format samus-release/R60-0000.0.0.
2277
2278        We can get the milestone and board by checking the instructions.json
2279        file contained in the bucket with the payloads.
2280
2281        @param uri: The partial uri we received from autoupdate_EndToEndTest.
2282        """
2283        try:
2284            # Get the instructions file that contains info about the build.
2285            gs_file = 'gs://chromeos-releases/' + uri + '/*instructions.json'
2286            files = bin_utils.gs_ls(gs_file)
2287            for f in files:
2288                gs_folder, _, instruction_file = f.rpartition('/')
2289                self.stage_artifacts(image=uri,
2290                                     files=[instruction_file],
2291                                     archive_url=gs_folder)
2292                json_file = self.get_staged_file_url(instruction_file, uri)
2293                response = urllib2.urlopen(json_file)
2294                data = json.load(response)
2295                return data['board'], 'release', data['version']['milestone']
2296        except (ValueError, error.CmdError, urllib2.URLError) as e:
2297            logging.debug('Problem getting values for metrics: %s', e)
2298            logging.warning('Unable to parse build name %s from AU test for '
2299                            'metrics. Continuing anyway.', uri)
2300
2301        return '', '', ''
2302
2303
2304    def auto_update(self, host_name, build_name, original_board=None,
2305                    original_release_version=None, log_dir=None,
2306                    force_update=False, full_update=False,
2307                    payload_filename=None, force_original=False,
2308                    clobber_stateful=True, quick_provision=False):
2309        """Auto-update a CrOS host.
2310
2311        @param host_name: The hostname of the DUT to auto-update.
2312        @param build_name:  The build name to be auto-updated on the DUT.
2313        @param original_board: The original board of the DUT to auto-update.
2314        @param original_release_version: The release version of the DUT's
2315            current build.
2316        @param log_dir: The log directory to store auto-update logs from
2317            devserver.
2318        @param force_update: Force an update even if the version installed
2319                             is the same. Default: False.
2320        @param full_update:  If True, do not run stateful update, directly
2321                             force a full reimage. If False, try stateful
2322                             update first if the dut is already installed
2323                             with the same version.
2324        @param payload_filename: Used to specify the exact file to
2325                                 use for autoupdating. If None, the payload
2326                                 will be determined by build_name. You
2327                                 must have already staged this file before
2328                                 passing it in here.
2329        @param force_original: Whether to force stateful update with the
2330                               original payload.
2331        @param clobber_stateful: If True do a clean install of stateful.
2332        @param quick_provision: Attempt to use quick provision path first.
2333
2334        @return A set (is_success, pid) in which:
2335            1. is_success indicates whether this auto_update succeeds.
2336            2. pid is the process id of the successful autoupdate run.
2337
2338        @raise DevServerException if auto_update fails and is not retryable.
2339        @raise RetryableProvisionException if it fails and is retryable.
2340        """
2341        kwargs = {'host_name': host_name,
2342                  'build_name': build_name,
2343                  'force_update': force_update,
2344                  'full_update': full_update,
2345                  'clobber_stateful': clobber_stateful,
2346                  'quick_provision': quick_provision}
2347
2348        is_aue2etest = payload_filename is not None
2349
2350        if is_aue2etest:
2351            kwargs['payload_filename'] = payload_filename
2352
2353        error_msg_attempt = 'Exception raised on auto_update attempt #%s:\n%s'
2354        is_au_success = False
2355        au_log_dir = os.path.join(log_dir,
2356                                  AUTO_UPDATE_LOG_DIR) if log_dir else None
2357        error_list = []
2358        retry_with_another_devserver = False
2359        duration_list = []
2360
2361        if is_aue2etest:
2362            board, build_type, milestone = self._parse_buildname_from_gs_uri(
2363                build_name)
2364        else:
2365            board, build_type, milestone = self._parse_buildname_safely(
2366                build_name)
2367
2368        provision_start_time = time.time()
2369        for au_attempt in range(AU_RETRY_LIMIT):
2370            logging.debug('Start CrOS auto-update for host %s at %d time(s).',
2371                          host_name, au_attempt + 1)
2372            au_start_time = time.time()
2373            failure_reason = None
2374            # No matter _trigger_auto_update succeeds or fails, the auto-update
2375            # track_status_file should be cleaned, and the auto-update execute
2376            # log should be collected to directory sysinfo. Also, the error
2377            # raised by _trigger_auto_update should be displayed.
2378            try:
2379                # Try update with stateful.tgz of old release version in the
2380                # last try of auto-update.
2381                if force_original and original_release_version:
2382                    # Monitor this case in monarch
2383                    original_build = '%s/%s' % (original_board,
2384                                                original_release_version)
2385                    c = metrics.Counter(
2386                            'chromeos/autotest/provision/'
2387                            'cros_update_with_original_build')
2388                    f = {'dev_server': self.resolved_hostname,
2389                         'board': board,
2390                         'build_type': build_type,
2391                         'milestone': milestone,
2392                         'original_build': original_build}
2393                    c.increment(fields=f)
2394
2395                    logging.debug('Try updating stateful partition of the '
2396                                  'host with the same version of its current '
2397                                  'rootfs partition: %s', original_build)
2398                    response = self._trigger_auto_update(
2399                            original_build=original_build, **kwargs)
2400                else:
2401                    response = self._trigger_auto_update(**kwargs)
2402            except DevServerException as e:
2403                logging.debug(error_msg_attempt, au_attempt+1, str(e))
2404                failure_reason = DevServerExceptionClassifier(str(e))
2405            else:
2406                _, raised_error, pid = self.check_for_auto_update_finished(
2407                        response, **kwargs)
2408
2409                # Error happens in _collect_au_log won't be raised.
2410                if au_log_dir:
2411                    is_collect_success = self.collect_au_log(
2412                            kwargs['host_name'], pid, au_log_dir)
2413                else:
2414                    is_collect_success = True
2415
2416                # Error happens in _clean_track_log won't be raised.
2417                if pid >= 0:
2418                    is_clean_success = self.clean_track_log(
2419                            kwargs['host_name'], pid)
2420                else:
2421                    is_clean_success = True
2422
2423                # If any error is raised previously, log it and retry
2424                # auto-update. Otherwise, claim a successful CrOS auto-update.
2425                if (not raised_error and is_clean_success and
2426                    is_collect_success):
2427                    logging.debug('CrOS auto-update succeed for host %s',
2428                                  host_name)
2429                    is_au_success = True
2430                    break
2431                else:
2432                    if not self.kill_au_process_for_host(kwargs['host_name'],
2433                                                         pid):
2434                        logging.debug('Failed to kill auto_update process %d',
2435                                      pid)
2436                    if raised_error:
2437                        error_str = str(raised_error)
2438                        logging.debug(error_msg_attempt, au_attempt + 1,
2439                                      error_str)
2440                        if au_log_dir:
2441                            logging.debug('Please see error details in log %s',
2442                                          self._get_au_log_filename(
2443                                                  au_log_dir,
2444                                                  kwargs['host_name'],
2445                                                  pid))
2446                        failure_reason = DevServerExceptionClassifier(
2447                            error_str, keep_full_trace=False)
2448                        if self._is_retryable(error_str):
2449                            retry_with_another_devserver = True
2450
2451                        if self._should_use_original_payload(error_str):
2452                            force_original = True
2453
2454            finally:
2455                duration = int(time.time() - au_start_time)
2456                duration_list.append(duration)
2457                if failure_reason:
2458                    error_list.append(failure_reason)
2459                self._emit_auto_update_metrics(board, build_type, host_name,
2460                                               build_name, au_attempt + 1,
2461                                               is_au_success, failure_reason,
2462                                               duration)
2463                if retry_with_another_devserver:
2464                    break
2465
2466                if not is_au_success and au_attempt < AU_RETRY_LIMIT - 1:
2467                    time.sleep(CROS_AU_RETRY_INTERVAL)
2468                    # Use the IP of DUT if the hostname failed.
2469                    host_name_ip = socket.gethostbyname(host_name)
2470                    kwargs['host_name'] = host_name_ip
2471                    logging.debug(
2472                            'AU failed, trying IP instead of hostname: %s',
2473                            host_name_ip)
2474
2475        total_duration = int(time.time() - provision_start_time)
2476        self._emit_provision_metrics(error_list, duration_list, is_au_success,
2477                                     board, build_type, milestone, host_name,
2478                                     is_aue2etest, total_duration, build_name)
2479
2480        if is_au_success:
2481            return (is_au_success, pid)
2482
2483        # If errors happen in the CrOS AU process, report the concatenation
2484        # of the errors happening in first & second provision.
2485        # If error happens in RPCs of cleaning track log, collecting
2486        # auto-update logs, or killing auto-update processes, just report a
2487        # common error here.
2488        if error_list:
2489            real_error = ', '.join(['%d) %s' % (i, e.summary)
2490                                    for i, e in enumerate(error_list)])
2491            if retry_with_another_devserver:
2492                raise RetryableProvisionException(real_error)
2493            else:
2494                raise error_list[0].classified_exception(real_error)
2495        else:
2496            raise DevServerException('RPC calls after the whole auto-update '
2497                                     'process failed.')
2498
2499
2500class AndroidBuildServer(ImageServerBase):
2501    """Class for DevServer that handles RPCs related to Android builds.
2502
2503    The calls to devserver to stage artifacts, including stage and download, are
2504    made in async mode. That is, when caller makes an RPC |stage| to request
2505    devserver to stage certain artifacts, devserver handles the call and starts
2506    staging artifacts in a new thread, and return |Success| without waiting for
2507    staging being completed. When caller receives message |Success|, it polls
2508    devserver's is_staged call until all artifacts are staged.
2509    Such mechanism is designed to prevent cherrypy threads in devserver being
2510    running out, as staging artifacts might take long time, and cherrypy starts
2511    with a fixed number of threads that handle devserver rpc.
2512    """
2513
2514    def wait_for_artifacts_staged(self, target, build_id, branch,
2515                                  archive_url=None, artifacts='', files=''):
2516        """Polling devserver.is_staged until all artifacts are staged.
2517
2518        @param target: Target of the android build to stage, e.g.,
2519                       shamu-userdebug.
2520        @param build_id: Build id of the android build to stage.
2521        @param branch: Branch of the android build to stage.
2522        @param archive_url: Google Storage URL for the build.
2523        @param artifacts: Comma separated list of artifacts to download.
2524        @param files: Comma separated list of files to download.
2525
2526        @return: True if all artifacts are staged in devserver.
2527        """
2528        kwargs = {'target': target,
2529                  'build_id': build_id,
2530                  'branch': branch,
2531                  'artifacts': artifacts,
2532                  'files': files,
2533                  'os_type': 'android'}
2534        if archive_url:
2535            kwargs['archive_url'] = archive_url
2536        return self._poll_is_staged(**kwargs)
2537
2538
2539    @remote_devserver_call()
2540    def call_and_wait(self, call_name, target, build_id, branch, archive_url,
2541                      artifacts, files, error_message,
2542                      expected_response=SUCCESS):
2543        """Helper method to make a urlopen call, and wait for artifacts staged.
2544
2545        @param call_name: name of devserver rpc call.
2546        @param target: Target of the android build to stage, e.g.,
2547                       shamu-userdebug.
2548        @param build_id: Build id of the android build to stage.
2549        @param branch: Branch of the android build to stage.
2550        @param archive_url: Google Storage URL for the CrOS build.
2551        @param artifacts: Comma separated list of artifacts to download.
2552        @param files: Comma separated list of files to download.
2553        @param expected_response: Expected response from rpc, default to
2554                                  |Success|. If it's set to None, do not compare
2555                                  the actual response. Any response is consider
2556                                  to be good.
2557        @param error_message: Error message to be thrown if response does not
2558                              match expected_response.
2559
2560        @return: The response from rpc.
2561        @raise DevServerException upon any return code that's expected_response.
2562
2563        """
2564        kwargs = {'target': target,
2565                  'build_id': build_id,
2566                  'branch': branch,
2567                  'artifacts': artifacts,
2568                  'files': files,
2569                  'os_type': 'android'}
2570        if archive_url:
2571            kwargs['archive_url'] = archive_url
2572        return self._call_and_wait(call_name, error_message, expected_response,
2573                                   **kwargs)
2574
2575
2576    @remote_devserver_call()
2577    def stage_artifacts(self, target=None, build_id=None, branch=None,
2578                        image=None, artifacts=None, files='', archive_url=None):
2579        """Tell the devserver to download and stage |artifacts| from |image|.
2580
2581         This is the main call point for staging any specific artifacts for a
2582        given build. To see the list of artifacts one can stage see:
2583
2584        ~src/platfrom/dev/artifact_info.py.
2585
2586        This is maintained along with the actual devserver code.
2587
2588        @param target: Target of the android build to stage, e.g.,
2589                               shamu-userdebug.
2590        @param build_id: Build id of the android build to stage.
2591        @param branch: Branch of the android build to stage.
2592        @param image: Name of a build to test, in the format of
2593                      branch/target/build_id
2594        @param artifacts: A list of artifacts.
2595        @param files: A list of files to stage.
2596        @param archive_url: Optional parameter that has the archive_url to stage
2597                this artifact from. Default is specified in autotest config +
2598                image.
2599
2600        @raise DevServerException upon any return code that's not HTTP OK.
2601        """
2602        if image and not target and not build_id and not branch:
2603            branch, target, build_id = utils.parse_launch_control_build(image)
2604        if not target or not build_id or not branch:
2605            raise DevServerException('Must specify all build info (target, '
2606                                     'build_id and branch) to stage.')
2607
2608        android_build_info = {'target': target,
2609                              'build_id': build_id,
2610                              'branch': branch}
2611        if not artifacts and not files:
2612            raise DevServerException('Must specify something to stage.')
2613        if not all(android_build_info.values()):
2614            raise DevServerException(
2615                    'To stage an Android build, must specify target, build id '
2616                    'and branch.')
2617        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2618        self._stage_artifacts(build, artifacts, files, archive_url,
2619                              **android_build_info)
2620
2621    def trigger_download(self, target, build_id, branch, artifacts=None,
2622                         files='', os='android', synchronous=True):
2623        """Tell the devserver to download and stage an Android build.
2624
2625        Tells the devserver to fetch an Android build from the image storage
2626        server named by _get_image_storage_server().
2627
2628        If |synchronous| is True, waits for the entire download to finish
2629        staging before returning. Otherwise only the artifacts necessary
2630        to start installing images onto DUT's will be staged before returning.
2631        A caller can then call finish_download to guarantee the rest of the
2632        artifacts have finished staging.
2633
2634        @param target: Target of the android build to stage, e.g.,
2635                       shamu-userdebug.
2636        @param build_id: Build id of the android build to stage.
2637        @param branch: Branch of the android build to stage.
2638        @param artifacts: A string of artifacts separated by comma. If None,
2639               use the default artifacts for Android or Brillo build.
2640        @param files: String of file seperated by commas.
2641        @param os: OS artifacts to download (android/brillo).
2642        @param synchronous: if True, waits until all components of the image are
2643               staged before returning.
2644
2645        @raise DevServerException upon any return code that's not HTTP OK.
2646
2647        """
2648        android_build_info = {'target': target,
2649                              'build_id': build_id,
2650                              'branch': branch}
2651        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2652        if not artifacts:
2653            board = target.split('-')[0]
2654            artifacts = (
2655                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2656                        board, os))
2657        self._trigger_download(build, artifacts, files=files,
2658                               synchronous=synchronous, **android_build_info)
2659
2660
2661    def finish_download(self, target, build_id, branch, os='android'):
2662        """Tell the devserver to finish staging an Android build.
2663
2664        If trigger_download is called with synchronous=False, it will return
2665        before all artifacts have been staged. This method contacts the
2666        devserver and blocks until all staging is completed and should be
2667        called after a call to trigger_download.
2668
2669        @param target: Target of the android build to stage, e.g.,
2670                       shamu-userdebug.
2671        @param build_id: Build id of the android build to stage.
2672        @param branch: Branch of the android build to stage.
2673        @param os: OS artifacts to download (android/brillo).
2674
2675        @raise DevServerException upon any return code that's not HTTP OK.
2676        """
2677        android_build_info = {'target': target,
2678                              'build_id': build_id,
2679                              'branch': branch}
2680        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2681        board = target.split('-')[0]
2682        artifacts = (
2683                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2684                        board))
2685        self._finish_download(build, artifacts, files='', **android_build_info)
2686
2687
2688    def get_staged_file_url(self, filename, target, build_id, branch):
2689        """Returns the url of a staged file for this image on the devserver.
2690
2691        @param filename: Name of the file.
2692        @param target: Target of the android build to stage, e.g.,
2693                       shamu-userdebug.
2694        @param build_id: Build id of the android build to stage.
2695        @param branch: Branch of the android build to stage.
2696
2697        @return: The url of a staged file for this image on the devserver.
2698        """
2699        android_build_info = {'target': target,
2700                              'build_id': build_id,
2701                              'branch': branch,
2702                              'os_type': 'android'}
2703        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2704        return '/'.join([self._get_image_url(build), filename])
2705
2706
2707    @remote_devserver_call()
2708    def translate(self, build_name):
2709        """Translate the build name if it's in LATEST format.
2710
2711        If the build name is in the format [branch]/[target]/LATEST, return the
2712        latest build in Launch Control otherwise return the build name as is.
2713
2714        @param build_name: build_name to check.
2715
2716        @return The actual build name to use.
2717        """
2718        branch, target, build_id = utils.parse_launch_control_build(build_name)
2719        if build_id.upper() != 'LATEST':
2720            return build_name
2721        call = self.build_call('latestbuild', branch=branch, target=target,
2722                               os_type='android')
2723        translated_build_id = self.run_call(call)
2724        translated_build = (ANDROID_BUILD_NAME_PATTERN %
2725                            {'branch': branch,
2726                             'target': target,
2727                             'build_id': translated_build_id})
2728        logging.debug('Translated relative build %s to %s', build_name,
2729                      translated_build)
2730        return translated_build
2731
2732
2733def _is_load_healthy(load):
2734    """Check if devserver's load meets the minimum threshold.
2735
2736    @param load: The devserver's load stats to check.
2737
2738    @return: True if the load meets the minimum threshold. Return False
2739             otherwise.
2740
2741    """
2742    # Threshold checks, including CPU load.
2743    if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD:
2744        logging.debug('CPU load of devserver %s is at %s%%, which is higher '
2745                      'than the threshold of %s%%', load['devserver'],
2746                      load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD)
2747        return False
2748    if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO:
2749        logging.debug('Network IO of devserver %s is at %i Bps, which is '
2750                      'higher than the threshold of %i bytes per second.',
2751                      load['devserver'], load[DevServer.NETWORK_IO],
2752                      DevServer.MAX_NETWORK_IO)
2753        return False
2754    return True
2755
2756
2757def _compare_load(devserver1, devserver2):
2758    """Comparator function to compare load between two devservers.
2759
2760    @param devserver1: A dictionary of devserver load stats to be compared.
2761    @param devserver2: A dictionary of devserver load stats to be compared.
2762
2763    @return: Negative value if the load of `devserver1` is less than the load
2764             of `devserver2`. Return positive value otherwise.
2765
2766    """
2767    return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO])
2768
2769
2770def _get_subnet_for_host_ip(host_ip,
2771                            restricted_subnets=utils.RESTRICTED_SUBNETS):
2772    """Get the subnet for a given host IP.
2773
2774    @param host_ip: the IP of a DUT.
2775    @param restricted_subnets: A list of restriected subnets.
2776
2777    @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the
2778             host_ip, return (None, None).
2779    """
2780    for subnet_ip, mask_bits in restricted_subnets:
2781        if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits):
2782            return subnet_ip, mask_bits
2783
2784    return None, None
2785
2786
2787def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None):
2788    """Get the devserver with the least load.
2789
2790    Iterate through all devservers and get the one with least load.
2791
2792    TODO(crbug.com/486278): Devserver with required build already staged should
2793    take higher priority. This will need check_health call to be able to verify
2794    existence of a given build/artifact. Also, in case all devservers are
2795    overloaded, the logic here should fall back to the old behavior that randomly
2796    selects a devserver based on the hash of the image name/url.
2797
2798    @param devserver_type: Type of devserver to select from. Default is set to
2799                           ImageServer.
2800    @param hostname: Hostname of the dut that the devserver is used for. The
2801            picked devserver needs to respect the location of the host if
2802            `prefer_local_devserver` is set to True or `restricted_subnets` is
2803            set.
2804
2805    @return: Name of the devserver with the least load.
2806
2807    """
2808    logging.debug('Get the least loaded %r', devserver_type)
2809    devservers, can_retry = devserver_type.get_available_devservers(
2810            hostname)
2811    # If no healthy devservers available and can_retry is False, return None.
2812    # Otherwise, relax the constrain on hostname, allow all devservers to be
2813    # available.
2814    if not devserver_type.get_healthy_devserver('', devservers):
2815        if not can_retry:
2816            return None
2817        else:
2818            devservers, _ = devserver_type.get_available_devservers()
2819
2820    # get_devserver_load call needs to be made in a new process to allow force
2821    # timeout using signal.
2822    output = multiprocessing.Queue()
2823    processes = []
2824    for devserver in devservers:
2825        processes.append(multiprocessing.Process(
2826                target=devserver_type.get_devserver_load_wrapper,
2827                args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output)))
2828
2829    for p in processes:
2830        p.start()
2831    for p in processes:
2832        # The timeout for the process commands aren't reliable.  Add
2833        # some extra time to the timeout for potential overhead in the
2834        # subprocesses.  crbug.com/913695
2835        p.join(TIMEOUT_GET_DEVSERVER_LOAD + 10)
2836    # Read queue before killing processes to avoid corrupting the queue.
2837    loads = [output.get() for p in processes if not p.is_alive()]
2838    for p in processes:
2839        if p.is_alive():
2840            p.terminate()
2841    # Filter out any load failed to be retrieved or does not support load check.
2842    loads = [load for load in loads if load and DevServer.CPU_LOAD in load and
2843             DevServer.is_free_disk_ok(load) and
2844             DevServer.is_apache_client_count_ok(load)]
2845    if not loads:
2846        logging.debug('Failed to retrieve load stats from any devserver. No '
2847                      'load balancing can be applied.')
2848        return None
2849    loads = [load for load in loads if _is_load_healthy(load)]
2850    if not loads:
2851        logging.error('No devserver has the capacity to be selected.')
2852        return None
2853    loads = sorted(loads, cmp=_compare_load)
2854    return loads[0]['devserver']
2855
2856
2857def resolve(build, hostname=None, ban_list=None):
2858    """Resolve a devserver can be used for given build and hostname.
2859
2860    @param build: Name of a build to stage on devserver, e.g.,
2861                  ChromeOS build: daisy-release/R50-1234.0.0
2862                  Launch Control build: git_mnc_release/shamu-eng
2863    @param hostname: Hostname of a devserver for, default is None, which means
2864            devserver is not restricted by the network location of the host.
2865    @param ban_list: The blacklist of devservers shouldn't be chosen.
2866
2867    @return: A DevServer instance that can be used to stage given build for the
2868             given host.
2869    """
2870    if utils.is_launch_control_build(build):
2871        return AndroidBuildServer.resolve(build, hostname)
2872    else:
2873        return ImageServer.resolve(build, hostname, ban_list=ban_list)
2874