• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5from distutils import version
6import cStringIO
7import HTMLParser
8import httplib
9import json
10import logging
11import multiprocessing
12import os
13import re
14import socket
15import time
16import urllib2
17import urlparse
18
19from autotest_lib.client.bin import utils as bin_utils
20from autotest_lib.client.common_lib import android_utils
21from autotest_lib.client.common_lib import error
22from autotest_lib.client.common_lib import global_config
23from autotest_lib.client.common_lib import utils
24from autotest_lib.client.common_lib.cros import retry
25from autotest_lib.server import utils as server_utils
26# TODO(cmasone): redo this class using requests module; http://crosbug.com/30107
27
28try:
29    from chromite.lib import metrics
30except ImportError:
31    metrics = utils.metrics_mock
32
33
34CONFIG = global_config.global_config
35# This file is generated at build time and specifies, per suite and per test,
36# the DEPENDENCIES list specified in each control file.  It's a dict of dicts:
37# {'bvt':   {'/path/to/autotest/control/site_tests/test1/control': ['dep1']}
38#  'suite': {'/path/to/autotest/control/site_tests/test2/control': ['dep2']}
39#  'power': {'/path/to/autotest/control/site_tests/test1/control': ['dep1'],
40#            '/path/to/autotest/control/site_tests/test3/control': ['dep3']}
41# }
42DEPENDENCIES_FILE = 'test_suites/dependency_info'
43# Number of seconds for caller to poll devserver's is_staged call to check if
44# artifacts are staged.
45_ARTIFACT_STAGE_POLLING_INTERVAL = 5
46# Artifacts that should be staged when client calls devserver RPC to stage an
47# image.
48_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = 'full_payload,test_suites,stateful'
49# Artifacts that should be staged when client calls devserver RPC to stage an
50# image with autotest artifact.
51_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST = ('full_payload,test_suites,'
52                                                   'control_files,stateful,'
53                                                   'autotest_packages')
54# Artifacts that should be staged when client calls devserver RPC to stage an
55# Android build.
56_BRILLO_ARTIFACTS_TO_BE_STAGED_FOR_IMAGE = ('zip_images,vendor_partitions')
57SKIP_DEVSERVER_HEALTH_CHECK = CONFIG.get_config_value(
58        'CROS', 'skip_devserver_health_check', type=bool)
59# Number of seconds for the call to get devserver load to time out.
60TIMEOUT_GET_DEVSERVER_LOAD = 2.0
61
62# Android artifact path in devserver
63ANDROID_BUILD_NAME_PATTERN = CONFIG.get_config_value(
64        'CROS', 'android_build_name_pattern', type=str).replace('\\', '')
65
66# Return value from a devserver RPC indicating the call succeeded.
67SUCCESS = 'Success'
68
69# The timeout minutes for a given devserver ssh call.
70DEVSERVER_SSH_TIMEOUT_MINS = 1
71
72# Error message for invalid devserver response.
73ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE = 'Proxy Error'
74ERR_MSG_FOR_DOWN_DEVSERVER = 'Service Unavailable'
75
76# Error message for devserver call timedout.
77ERR_MSG_FOR_TIMED_OUT_CALL = 'timeout'
78
79# The timeout minutes for waiting a devserver staging.
80DEVSERVER_IS_STAGING_RETRY_MIN = 100
81
82# The timeout minutes for waiting a DUT auto-update finished.
83DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN = 100
84
85# The total times of devserver triggering CrOS auto-update.
86AU_RETRY_LIMIT = 2
87
88# Number of seconds for caller to poll devserver's get_au_status call to
89# check if cros auto-update is finished.
90CROS_AU_POLLING_INTERVAL = 10
91
92# Number of seconds for intervals between retrying auto-update calls.
93CROS_AU_RETRY_INTERVAL = 20
94
95# The file name for auto-update logs.
96CROS_AU_LOG_FILENAME = 'CrOS_update_%s_%s.log'
97
98# Provision error patterns.
99# People who see this should know that they shouldn't change these
100# classification strings. These strings are used for monitoring provision
101# failures. Any changes may mess up the stats.
102_EXCEPTION_PATTERNS = [
103        # Raised when devserver portfile does not exist on host.
104        (r".*Devserver portfile does not exist!.*$",
105         '(1) Devserver portfile does not exist on host'),
106        # Raised when devserver cannot copy packages to host.
107        (r".*Could not copy .* to device.*$",
108         '(2) Cannot copy packages to host'),
109        # Raised when devserver fails to run specific commands on host.
110        (r".*cwd=None, extra env=\{'LC_MESSAGES': 'C'\}.*$",
111         '(3) Fail to run specific command on host'),
112        # Raised when new build fails to boot on the host.
113        (r'.*RootfsUpdateError: Build .* failed to boot on.*$',
114         '(4) Build failed to boot on host'),
115        # Raised when the auto-update process is timed out.
116        (r'.*The CrOS auto-update process is timed out, '
117         'thus will be terminated.*$',
118         '(5) Auto-update is timed out'),
119        # Raised when the host is not pingable.
120        (r".*DeviceNotPingableError.*$",
121         '(6) Host is not pingable during auto-update'),
122        # Raised when hosts have unexpected status after rootfs update.
123        (r'.*Update failed with unexpected update status: '
124         'UPDATE_STATUS_IDLE.*$',
125         '(7) Host has unexpected status: UPDATE_STATUS_IDLE after rootfs '
126         'update'),
127        # Raised when devserver returns non-json response to shard/drone.
128        (r'.*No JSON object could be decoded.*$',
129         '(8) Devserver returned non-json object'),
130        # Raised when devserver loses host's ssh connection
131        (r'.*SSHConnectionError\: .* port 22\: Connection timed out.*$',
132         "(9) Devserver lost host's ssh connection"),
133        # Raised when error happens in writing files to host
134        (r'.*Write failed\: Broken pipe.*$',
135         "(10) Broken pipe while writing or connecting to host")]
136
137PREFER_LOCAL_DEVSERVER = CONFIG.get_config_value(
138        'CROS', 'prefer_local_devserver', type=bool, default=False)
139
140ENABLE_SSH_CONNECTION_FOR_DEVSERVER = CONFIG.get_config_value(
141        'CROS', 'enable_ssh_connection_for_devserver', type=bool,
142        default=False)
143
144# Directory to save auto-update logs
145AUTO_UPDATE_LOG_DIR = 'autoupdate_logs'
146
147DEFAULT_SUBNET_MASKBIT = 19
148
149# Metrics basepaths.
150METRICS_PATH = 'chromeos/autotest'
151PROVISION_PATH = METRICS_PATH + '/provision'
152
153
154class DevServerException(Exception):
155    """Raised when the dev server returns a non-200 HTTP response."""
156    pass
157
158
159class BadBuildException(DevServerException):
160    """Raised when build failed to boot on DUT."""
161    pass
162
163
164class RetryableProvisionException(DevServerException):
165    """Raised when provision fails due to a retryable reason."""
166    pass
167
168class DevServerOverloadException(Exception):
169    """Raised when the dev server returns a 502 HTTP response."""
170    pass
171
172class DevServerFailToLocateException(Exception):
173    """Raised when fail to locate any devserver."""
174    pass
175
176
177class DevServerExceptionClassifier(object):
178    """A Class represents exceptions raised from DUT by calling auto_update."""
179    def __init__(self, err, keep_full_trace=True):
180        """
181        @param err: A single string representing one time provision
182            error happened in auto_update().
183        @param keep_full_trace: True to keep the whole track trace of error.
184            False when just keep the last line.
185        """
186        self._err = err if keep_full_trace else err.split('\n')[-1]
187        self._classification = None
188
189    def _classify(self):
190        for err_pattern, classification in _EXCEPTION_PATTERNS:
191            if re.match(err_pattern, self._err):
192                return classification
193
194        return '(0) Unknown exception'
195
196    @property
197    def classification(self):
198        """Classify the error
199
200        @return: return a classified exception type (string) from
201            _EXCEPTION_PATTERNS or 'Unknown exception'. Current patterns in
202            _EXCEPTION_PATTERNS are very specific so that errors cannot match
203            more than one pattern.
204        """
205        if not self._classification:
206            self._classification = self._classify()
207        return self._classification
208
209    @property
210    def summary(self):
211        """Use one line to show the error message."""
212        return ' '.join(self._err.splitlines())
213
214    @property
215    def classified_exception(self):
216        """What kind of exception will be raised to higher.
217
218        @return: return a special Exception when the raised error is an
219            RootfsUpdateError. Otherwise, return general DevServerException.
220        """
221        # The classification of RootfsUpdateError in _EXCEPTION_PATTERNS starts
222        # with "(4)"
223        if self.classification.startswith('(4)'):
224            return BadBuildException
225
226        return DevServerException
227
228
229class MarkupStripper(HTMLParser.HTMLParser):
230    """HTML parser that strips HTML tags, coded characters like &
231
232    Works by, basically, not doing anything for any tags, and only recording
233    the content of text nodes in an internal data structure.
234    """
235    def __init__(self):
236        self.reset()
237        self.fed = []
238
239
240    def handle_data(self, d):
241        """Consume content of text nodes, store it away."""
242        self.fed.append(d)
243
244
245    def get_data(self):
246        """Concatenate and return all stored data."""
247        return ''.join(self.fed)
248
249
250def _strip_http_message(message):
251    """Strip the HTTP marker from the an HTTP message.
252
253    @param message: A string returned by an HTTP call.
254
255    @return: A string with HTTP marker being stripped.
256    """
257    strip = MarkupStripper()
258    try:
259        strip.feed(message.decode('utf_32'))
260    except UnicodeDecodeError:
261        strip.feed(message)
262    return strip.get_data()
263
264
265def _get_image_storage_server():
266    return CONFIG.get_config_value('CROS', 'image_storage_server', type=str)
267
268
269def _get_canary_channel_server():
270    """
271    Get the url of the canary-channel server,
272    eg: gsutil://chromeos-releases/canary-channel/<board>/<release>
273
274    @return: The url to the canary channel server.
275    """
276    return CONFIG.get_config_value('CROS', 'canary_channel_server', type=str)
277
278
279def _get_storage_server_for_artifacts(artifacts=None):
280    """Gets the appropriate storage server for the given artifacts.
281
282    @param artifacts: A list of artifacts we need to stage.
283    @return: The address of the storage server that has these artifacts.
284             The default image storage server if no artifacts are specified.
285    """
286    factory_artifact = global_config.global_config.get_config_value(
287            'CROS', 'factory_artifact', type=str, default='')
288    if artifacts and factory_artifact and factory_artifact in artifacts:
289        return _get_canary_channel_server()
290    return _get_image_storage_server()
291
292
293def _gs_or_local_archive_url_args(archive_url):
294    """Infer the devserver call arguments to use with the given archive_url.
295
296    @param archive_url: The archive url to include the in devserver RPC. This
297            can either e a GS path or a local path.
298    @return: A dict of arguments to include in the devserver call.
299    """
300    if not archive_url:
301        return {}
302    elif archive_url.startswith('gs://'):
303        return {'archive_url': archive_url}
304    else:
305        # For a local path, we direct the devserver to move the files while
306        # staging. This is the fastest way to stage local files, but deletes the
307        # files from the source. This is OK because the files are available on
308        # the devserver once staged.
309        return {
310                'local_path': archive_url,
311                'delete_source': True,
312        }
313
314
315def _reverse_lookup_from_config(address):
316    """Look up hostname for the given IP address.
317
318    This uses the hostname-address map from the config file.
319
320    If multiple hostnames map to the same IP address, the first one
321    defined in the configuration file takes precedence.
322
323    @param address: IP address string
324    @returns: hostname string, or original input if not found
325    """
326    for hostname, addr in _get_hostname_addr_map().iteritems():
327        if addr == address:
328            return hostname
329    return address
330
331
332def _get_hostname_addr_map():
333    """Get hostname address mapping from config.
334
335    @return: dict mapping server hostnames to addresses
336    """
337    return CONFIG.get_section_as_dict('HOSTNAME_ADDR_MAP')
338
339
340def _get_dev_server_list():
341    return CONFIG.get_config_value('CROS', 'dev_server', type=list, default=[])
342
343
344def _get_crash_server_list():
345    return CONFIG.get_config_value('CROS', 'crash_server', type=list,
346        default=[])
347
348
349def remote_devserver_call(timeout_min=DEVSERVER_IS_STAGING_RETRY_MIN,
350                          exception_to_raise=DevServerException):
351    """A decorator to use with remote devserver calls.
352
353    This decorator converts urllib2.HTTPErrors into DevServerExceptions
354    with any embedded error info converted into plain text. The method
355    retries on urllib2.URLError or error.CmdError to avoid devserver flakiness.
356    """
357    #pylint: disable=C0111
358
359    def inner_decorator(method):
360        label = method.__name__ if hasattr(method, '__name__') else None
361        def metrics_wrapper(*args, **kwargs):
362            @retry.retry((urllib2.URLError, error.CmdError,
363                          DevServerOverloadException),
364                         timeout_min=timeout_min,
365                         exception_to_raise=exception_to_raise,
366                        label=label)
367            def wrapper():
368                """This wrapper actually catches the HTTPError."""
369                try:
370                    return method(*args, **kwargs)
371                except urllib2.HTTPError as e:
372                    error_markup = e.read()
373                    raise DevServerException(_strip_http_message(error_markup))
374
375            try:
376                return wrapper()
377            except Exception as e:
378                if ERR_MSG_FOR_TIMED_OUT_CALL in str(e):
379                    dev_server = None
380                    if args and isinstance(args[0], DevServer):
381                        dev_server = args[0].hostname
382                    elif 'devserver' in kwargs:
383                        dev_server = get_hostname(kwargs['devserver'])
384
385                    logging.debug('RPC call %s has timed out on devserver %s.',
386                                  label, dev_server)
387                    c = metrics.Counter(
388                            'chromeos/autotest/devserver/call_timeout')
389                    c.increment(fields={'dev_server': dev_server,
390                                        'healthy': label})
391
392                raise
393
394        return metrics_wrapper
395
396    return inner_decorator
397
398
399def get_hostname(url):
400    """Get the hostname portion of a URL
401
402    schema://hostname:port/path
403
404    @param url: a Url string
405    @return: a hostname string
406    """
407    return urlparse.urlparse(url).hostname
408
409
410class DevServer(object):
411    """Base class for all DevServer-like server stubs.
412
413    This is the base class for interacting with all Dev Server-like servers.
414    A caller should instantiate a sub-class of DevServer with:
415
416    host = SubClassServer.resolve(build)
417    server = SubClassServer(host)
418    """
419    _MIN_FREE_DISK_SPACE_GB = 20
420    _MAX_APACHE_CLIENT_COUNT = 75
421    # Threshold for the CPU load percentage for a devserver to be selected.
422    MAX_CPU_LOAD = 80.0
423    # Threshold for the network IO, set to 80MB/s
424    MAX_NETWORK_IO = 1024 * 1024 * 80
425    DISK_IO = 'disk_total_bytes_per_second'
426    NETWORK_IO = 'network_total_bytes_per_second'
427    CPU_LOAD = 'cpu_percent'
428    FREE_DISK = 'free_disk'
429    AU_PROCESS = 'au_process_count'
430    STAGING_THREAD_COUNT = 'staging_thread_count'
431    APACHE_CLIENT_COUNT = 'apache_client_count'
432
433
434    def __init__(self, devserver):
435        self._devserver = devserver
436
437
438    def url(self):
439        """Returns the url for this devserver."""
440        return self._devserver
441
442
443    @property
444    def hostname(self):
445        """Return devserver hostname parsed from the devserver URL.
446
447        Note that this is likely parsed from the devserver URL from
448        shadow_config.ini, meaning that the "hostname" part of the
449        devserver URL is actually an IP address.
450
451        @return hostname string
452        """
453        return get_hostname(self.url())
454
455
456    @property
457    def resolved_hostname(self):
458        """Return devserver hostname, resolved from its IP address.
459
460        Unlike the hostname property, this property attempts to look up
461        the proper hostname from the devserver IP address.  If lookup
462        fails, then fall back to whatever the hostname property would
463        have returned.
464
465        @return hostname string
466        """
467        return _reverse_lookup_from_config(self.hostname)
468
469
470    @staticmethod
471    def get_server_url(url):
472        """Get the devserver url from a repo url, which includes build info.
473
474        @param url: A job repo url.
475
476        @return A devserver url, e.g., http://127.0.0.10:8080
477        """
478        res = urlparse.urlparse(url)
479        if res.netloc:
480            return res.scheme + '://' + res.netloc
481
482
483    @classmethod
484    def get_devserver_load_wrapper(cls, devserver, timeout_sec, output):
485        """A wrapper function to call get_devserver_load in parallel.
486
487        @param devserver: url of the devserver.
488        @param timeout_sec: Number of seconds before time out the devserver
489                            call.
490        @param output: An output queue to save results to.
491        """
492        load = cls.get_devserver_load(devserver, timeout_min=timeout_sec/60.0)
493        if load:
494            load['devserver'] = devserver
495        output.put(load)
496
497
498    @classmethod
499    def get_devserver_load(cls, devserver,
500                           timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
501        """Returns True if the |devserver| is healthy to stage build.
502
503        @param devserver: url of the devserver.
504        @param timeout_min: How long to wait in minutes before deciding the
505                            the devserver is not up (float).
506
507        @return: A dictionary of the devserver's load.
508
509        """
510        call = cls._build_call(devserver, 'check_health')
511        @remote_devserver_call(timeout_min=timeout_min)
512        def get_load(devserver=devserver):
513            """Inner method that makes the call."""
514            return cls.run_call(call, timeout=timeout_min*60)
515
516        try:
517            return json.load(cStringIO.StringIO(get_load(devserver=devserver)))
518        except Exception as e:
519            logging.error('Devserver call failed: "%s", timeout: %s seconds,'
520                          ' Error: %s', call, timeout_min * 60, e)
521
522
523    @classmethod
524    def is_free_disk_ok(cls, load):
525        """Check if a devserver has enough free disk.
526
527        @param load: A dict of the load of the devserver.
528
529        @return: True if the devserver has enough free disk or disk check is
530                 skipped in global config.
531
532        """
533        if SKIP_DEVSERVER_HEALTH_CHECK:
534            logging.debug('devserver health check is skipped.')
535        elif load[cls.FREE_DISK] < cls._MIN_FREE_DISK_SPACE_GB:
536            return False
537
538        return True
539
540
541    @classmethod
542    def is_apache_client_count_ok(cls, load):
543        """Check if a devserver has enough Apache connections available.
544
545        Apache server by default has maximum of 150 concurrent connections. If
546        a devserver has too many live connections, it likely indicates the
547        server is busy handling many long running download requests, e.g.,
548        downloading stateful partitions. It is better not to add more requests
549        to it.
550
551        @param load: A dict of the load of the devserver.
552
553        @return: True if the devserver has enough Apache connections available,
554                 or disk check is skipped in global config.
555
556        """
557        if SKIP_DEVSERVER_HEALTH_CHECK:
558            logging.debug('devserver health check is skipped.')
559        elif cls.APACHE_CLIENT_COUNT not in load:
560            logging.debug('Apache client count is not collected from devserver.')
561        elif (load[cls.APACHE_CLIENT_COUNT] >
562              cls._MAX_APACHE_CLIENT_COUNT):
563            return False
564
565        return True
566
567
568    @classmethod
569    def devserver_healthy(cls, devserver,
570                          timeout_min=DEVSERVER_SSH_TIMEOUT_MINS):
571        """Returns True if the |devserver| is healthy to stage build.
572
573        @param devserver: url of the devserver.
574        @param timeout_min: How long to wait in minutes before deciding the
575                            the devserver is not up (float).
576
577        @return: True if devserver is healthy. Return False otherwise.
578
579        """
580        c = metrics.Counter('chromeos/autotest/devserver/devserver_healthy')
581        reason = ''
582        healthy = False
583        load = cls.get_devserver_load(devserver, timeout_min=timeout_min)
584        try:
585            if not load:
586                # Failed to get the load of devserver.
587                reason = '(1) Failed to get load.'
588                return False
589
590            apache_ok = cls.is_apache_client_count_ok(load)
591            if not apache_ok:
592                reason = '(2) Apache client count too high.'
593                logging.error('Devserver check_health failed. Live Apache client '
594                              'count is too high: %d.',
595                              load[cls.APACHE_CLIENT_COUNT])
596                return False
597
598            disk_ok = cls.is_free_disk_ok(load)
599            if not disk_ok:
600                reason = '(3) Disk space too low.'
601                logging.error('Devserver check_health failed. Free disk space is '
602                              'low. Only %dGB is available.',
603                              load[cls.FREE_DISK])
604            healthy = bool(disk_ok)
605            return disk_ok
606        finally:
607            c.increment(fields={'dev_server': cls(devserver).resolved_hostname,
608                                'healthy': healthy,
609                                'reason': reason})
610            # Monitor how many AU processes the devserver is currently running.
611            if load is not None and load.get(DevServer.AU_PROCESS):
612                c_au = metrics.Gauge(
613                        'chromeos/autotest/devserver/devserver_au_count')
614                c_au.set(
615                    load.get(DevServer.AU_PROCESS),
616                    fields={'dev_server': cls(devserver).resolved_hostname})
617
618
619    @staticmethod
620    def _build_call(host, method, **kwargs):
621        """Build a URL to |host| that calls |method|, passing |kwargs|.
622
623        Builds a URL that calls |method| on the dev server defined by |host|,
624        passing a set of key/value pairs built from the dict |kwargs|.
625
626        @param host: a string that is the host basename e.g. http://server:90.
627        @param method: the dev server method to call.
628        @param kwargs: a dict mapping arg names to arg values.
629        @return the URL string.
630        """
631        # If the archive_url is a local path, the args expected by the devserver
632        # are a little different.
633        archive_url_args = _gs_or_local_archive_url_args(
634                kwargs.pop('archive_url', None))
635        kwargs.update(archive_url_args)
636
637        argstr = '&'.join(map(lambda x: "%s=%s" % x, kwargs.iteritems()))
638        return "%(host)s/%(method)s?%(argstr)s" % dict(
639                host=host, method=method, argstr=argstr)
640
641
642    def build_call(self, method, **kwargs):
643        """Builds a devserver RPC string that is used by 'run_call()'.
644
645        @param method: remote devserver method to call.
646        """
647        return self._build_call(self._devserver, method, **kwargs)
648
649
650    @classmethod
651    def build_all_calls(cls, method, **kwargs):
652        """Builds a list of URLs that makes RPC calls on all devservers.
653
654        Build a URL that calls |method| on the dev server, passing a set
655        of key/value pairs built from the dict |kwargs|.
656
657        @param method: the dev server method to call.
658        @param kwargs: a dict mapping arg names to arg values
659
660        @return the URL string
661        """
662        calls = []
663        # Note we use cls.servers as servers is class specific.
664        for server in cls.servers():
665            if cls.devserver_healthy(server):
666                calls.append(cls._build_call(server, method, **kwargs))
667
668        return calls
669
670
671    @classmethod
672    def run_call(cls, call, readline=False, timeout=None):
673        """Invoke a given devserver call using urllib.open.
674
675        Open the URL with HTTP, and return the text of the response. Exceptions
676        may be raised as for urllib2.urlopen().
677
678        @param call: a url string that calls a method to a devserver.
679        @param readline: whether read http response line by line.
680        @param timeout: The timeout seconds for this urlopen call.
681
682        @return the results of this call.
683        """
684        if timeout is not None:
685            return utils.urlopen_socket_timeout(
686                    call, timeout=timeout).read()
687        elif readline:
688            response = urllib2.urlopen(call)
689            return [line.rstrip() for line in response]
690        else:
691            return urllib2.urlopen(call).read()
692
693
694    @staticmethod
695    def servers():
696        """Returns a list of servers that can serve as this type of server."""
697        raise NotImplementedError()
698
699
700    @classmethod
701    def get_devservers_in_same_subnet(cls, ip, mask_bits=DEFAULT_SUBNET_MASKBIT,
702                                      unrestricted_only=False):
703        """Get the devservers in the same subnet of the given ip.
704
705        @param ip: The IP address of a dut to look for devserver.
706        @param mask_bits: Number of mask bits. Default is 19.
707        @param unrestricted_only: Set to True to select from devserver in
708                unrestricted subnet only. Default is False.
709
710        @return: A list of devservers in the same subnet of the given ip.
711
712        """
713        # server from cls.servers() is a URL, e.g., http://10.1.1.10:8082, so
714        # we need a dict to return the full devserver path once the IPs are
715        # filtered in get_servers_in_same_subnet.
716        server_names = {}
717        all_devservers = []
718        devservers = (cls.get_unrestricted_devservers() if unrestricted_only
719                      else cls.servers())
720        for server in devservers:
721            server_name = get_hostname(server)
722            server_names[server_name] = server
723            all_devservers.append(server_name)
724        if not all_devservers:
725            devserver_type = 'unrestricted only' if unrestricted_only else 'all'
726            raise DevServerFailToLocateException(
727                'Fail to locate a devserver for dut %s in %s devservers'
728                % (ip, devserver_type))
729
730        devservers = utils.get_servers_in_same_subnet(ip, mask_bits,
731                                                      all_devservers)
732        return [server_names[s] for s in devservers]
733
734
735    @classmethod
736    def get_unrestricted_devservers(
737                cls, restricted_subnets=utils.RESTRICTED_SUBNETS):
738        """Get the devservers not in any restricted subnet specified in
739        restricted_subnets.
740
741        @param restricted_subnets: A list of restriected subnets.
742
743        @return: A list of devservers not in any restricted subnet.
744
745        """
746        if not restricted_subnets:
747            return cls.servers()
748
749        devservers = []
750        for server in cls.servers():
751            server_name = get_hostname(server)
752            if not utils.get_restricted_subnet(server_name, restricted_subnets):
753                devservers.append(server)
754        return devservers
755
756
757    @classmethod
758    def get_healthy_devserver(cls, build, devservers, ban_list=None):
759        """"Get a healthy devserver instance from the list of devservers.
760
761        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
762        @param devservers: The devserver list to be chosen out a healthy one.
763        @param ban_list: The blacklist of devservers we don't want to choose.
764                Default is None.
765
766        @return: A DevServer object of a healthy devserver. Return None if no
767                healthy devserver is found.
768
769        """
770        logging.debug('Pick one healthy devserver from %r', devservers)
771        while devservers:
772            hash_index = hash(build) % len(devservers)
773            devserver = devservers.pop(hash_index)
774            logging.debug('Check health for %s', devserver)
775            if ban_list and devserver in ban_list:
776                continue
777
778            if cls.devserver_healthy(devserver):
779                logging.debug('Pick %s', devserver)
780                return cls(devserver)
781
782
783    @classmethod
784    def get_available_devservers(cls, hostname=None,
785                                 prefer_local_devserver=PREFER_LOCAL_DEVSERVER,
786                                 restricted_subnets=utils.RESTRICTED_SUBNETS):
787        """Get devservers in the same subnet of the given hostname.
788
789        @param hostname: Hostname of a DUT to choose devserver for.
790
791        @return: A tuple of (devservers, can_retry), devservers is a list of
792                 devservers that's available for the given hostname. can_retry
793                 is a flag that indicate if caller can retry the selection of
794                 devserver if no devserver in the returned devservers can be
795                 used. For example, if hostname is in a restricted subnet,
796                 can_retry will be False.
797        """
798        logging.info('Getting devservers for host: %s',  hostname)
799        host_ip = None
800        if hostname:
801            host_ip = bin_utils.get_ip_address(hostname)
802            if not host_ip:
803                logging.error('Failed to get IP address of %s. Will pick a '
804                              'devserver without subnet constraint.', hostname)
805
806        if not host_ip:
807            return cls.get_unrestricted_devservers(restricted_subnets), False
808
809        # Go through all restricted subnet settings and check if the DUT is
810        # inside a restricted subnet. If so, only return the devservers in the
811        # restricted subnet and doesn't allow retry.
812        if host_ip and restricted_subnets:
813            subnet_ip, mask_bits = _get_subnet_for_host_ip(
814                    host_ip, restricted_subnets=restricted_subnets)
815            if subnet_ip:
816                logging.debug('The host %s (%s) is in a restricted subnet. '
817                              'Try to locate a devserver inside subnet '
818                              '%s:%d.', hostname, host_ip, subnet_ip,
819                              mask_bits)
820                devservers = cls.get_devservers_in_same_subnet(
821                        subnet_ip, mask_bits)
822                return devservers, False
823
824        # If prefer_local_devserver is set to True and the host is not in
825        # restricted subnet, pick a devserver in the same subnet if possible.
826        # Set can_retry to True so it can pick a different devserver if all
827        # devservers in the same subnet are down.
828        if prefer_local_devserver:
829            return (cls.get_devservers_in_same_subnet(
830                    host_ip, DEFAULT_SUBNET_MASKBIT, True), True)
831
832        return cls.get_unrestricted_devservers(restricted_subnets), False
833
834
835    @classmethod
836    def resolve(cls, build, hostname=None, ban_list=None):
837        """"Resolves a build to a devserver instance.
838
839        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514).
840        @param hostname: The hostname of dut that requests a devserver. It's
841                         used to make sure a devserver in the same subnet is
842                         preferred.
843        @param ban_list: The blacklist of devservers shouldn't be chosen.
844
845        @raise DevServerException: If no devserver is available.
846        """
847        tried_devservers = set()
848        devservers, can_retry = cls.get_available_devservers(hostname)
849        if devservers:
850            tried_devservers |= set(devservers)
851
852        devserver = cls.get_healthy_devserver(build, devservers,
853                                              ban_list=ban_list)
854
855        if not devserver and can_retry:
856            # Find available devservers without dut location constrain.
857            devservers, _ = cls.get_available_devservers()
858            devserver = cls.get_healthy_devserver(build, devservers,
859                                                  ban_list=ban_list)
860            if devservers:
861                tried_devservers |= set(devservers)
862        if devserver:
863            return devserver
864        else:
865            subnet = 'unrestricted subnet'
866            if hostname is not None:
867                host_ip = bin_utils.get_ip_address(hostname)
868                if host_ip:
869                    subnet_ip, mask_bits = _get_subnet_for_host_ip(host_ip)
870                    subnet = '%s/%s' % (str(subnet_ip), str(mask_bits))
871
872            error_msg = ('All devservers in subnet: %s are currently down: '
873                         '%s. (dut hostname: %s)' %
874                         (subnet, tried_devservers, hostname))
875            logging.error(error_msg)
876            c = metrics.Counter(
877                    'chromeos/autotest/devserver/subnet_without_devservers')
878            c.increment(fields={'subnet': subnet, 'hostname': str(hostname)})
879            raise DevServerException(error_msg)
880
881
882    @classmethod
883    def random(cls):
884        """Return a random devserver that's available.
885
886        Devserver election in `resolve` method is based on a hash of the
887        build that a caller wants to stage. The purpose is that different
888        callers requesting for the same build can get the same devserver,
889        while the lab is able to distribute different builds across all
890        devservers. That helps to reduce the duplication of builds across
891        all devservers.
892        This function returns a random devserver, by passing a random
893        pseudo build name to `resolve `method.
894        """
895        return cls.resolve(build=str(time.time()))
896
897
898class CrashServer(DevServer):
899    """Class of DevServer that symbolicates crash dumps."""
900
901    @staticmethod
902    def servers():
903        return _get_crash_server_list()
904
905
906    @remote_devserver_call()
907    def symbolicate_dump(self, minidump_path, build):
908        """Ask the devserver to symbolicate the dump at minidump_path.
909
910        Stage the debug symbols for |build| and, if that works, ask the
911        devserver to symbolicate the dump at |minidump_path|.
912
913        @param minidump_path: the on-disk path of the minidump.
914        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
915                      whose debug symbols are needed for symbolication.
916        @return The contents of the stack trace
917        @raise DevServerException upon any return code that's not HTTP OK.
918        """
919        try:
920            import requests
921        except ImportError:
922            logging.warning("Can't 'import requests' to connect to dev server.")
923            return ''
924        f = {'dev_server': self.resolved_hostname}
925        c = metrics.Counter('chromeos/autotest/crashserver/symbolicate_dump')
926        c.increment(fields=f)
927        # Symbolicate minidump.
928        m = 'chromeos/autotest/crashserver/symbolicate_dump_duration'
929        with metrics.SecondsTimer(m, fields=f):
930            call = self.build_call('symbolicate_dump',
931                                   archive_url=_get_image_storage_server() + build)
932            request = requests.post(
933                    call, files={'minidump': open(minidump_path, 'rb')})
934            if request.status_code == requests.codes.OK:
935                return request.text
936
937        error_fd = cStringIO.StringIO(request.text)
938        raise urllib2.HTTPError(
939                call, request.status_code, request.text, request.headers,
940                error_fd)
941
942
943    @classmethod
944    def get_available_devservers(cls, hostname):
945        """Get all available crash servers.
946
947        Crash server election doesn't need to count the location of hostname.
948
949        @param hostname: Hostname of a DUT to choose devserver for.
950
951        @return: A tuple of (all crash servers, False). can_retry is set to
952                 False, as all crash servers are returned. There is no point to
953                 retry.
954        """
955        return cls.servers(), False
956
957
958class ImageServerBase(DevServer):
959    """Base class for devservers used to stage builds.
960
961    CrOS and Android builds are staged in different ways as they have different
962    sets of artifacts. This base class abstracts the shared functions between
963    the two types of ImageServer.
964    """
965
966    @classmethod
967    def servers(cls):
968        """Returns a list of servers that can serve as a desired type of
969        devserver.
970        """
971        return _get_dev_server_list()
972
973
974    def _get_image_url(self, image):
975        """Returns the url of the directory for this image on the devserver.
976
977        @param image: the image that was fetched.
978        """
979        image = self.translate(image)
980        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
981                                              type=str)
982        return (url_pattern % (self.url(), image)).replace('update', 'static')
983
984
985    @staticmethod
986    def create_metadata(server_name, image, artifacts=None, files=None):
987        """Create a metadata dictionary given the staged items.
988
989        The metadata can be send to metadata db along with stats.
990
991        @param server_name: name of the devserver, e.g 172.22.33.44.
992        @param image: The name of the image.
993        @param artifacts: A list of artifacts.
994        @param files: A list of files.
995
996        @return A metadata dictionary.
997
998        """
999        metadata = {'devserver': server_name,
1000                    'image': image,
1001                    '_type': 'devserver'}
1002        if artifacts:
1003            metadata['artifacts'] = ' '.join(artifacts)
1004        if files:
1005            metadata['files'] = ' '.join(files)
1006        return metadata
1007
1008
1009    @classmethod
1010    def run_ssh_call(cls, call, readline=False, timeout=None):
1011        """Construct an ssh-based rpc call, and execute it.
1012
1013        @param call: a url string that calls a method to a devserver.
1014        @param readline: whether read http response line by line.
1015        @param timeout: The timeout seconds for ssh call.
1016
1017        @return the results of this call.
1018        """
1019        hostname = get_hostname(call)
1020        ssh_call = 'ssh %s \'curl "%s"\'' % (hostname, utils.sh_escape(call))
1021        timeout_seconds = timeout if timeout else DEVSERVER_SSH_TIMEOUT_MINS*60
1022        try:
1023            result = utils.run(ssh_call, timeout=timeout_seconds)
1024        except error.CmdError as e:
1025            logging.debug('Error occurred with exit_code %d when executing the '
1026                          'ssh call: %s.', e.result_obj.exit_status,
1027                          e.result_obj.stderr)
1028            c = metrics.Counter('chromeos/autotest/devserver/ssh_failure')
1029            c.increment(fields={'dev_server': hostname})
1030            raise
1031        response = result.stdout
1032
1033        # If the curl command's returned HTTP response contains certain
1034        # exception string, raise the DevServerException of the response.
1035        if 'DownloaderException' in response:
1036            raise DevServerException(_strip_http_message(response))
1037
1038        if readline:
1039            # Remove line terminators and trailing whitespace
1040            response = response.splitlines()
1041            return [line.rstrip() for line in response]
1042
1043        return response
1044
1045
1046    @classmethod
1047    def run_call(cls, call, readline=False, timeout=None):
1048        """Invoke a given devserver call using urllib.open or ssh.
1049
1050        Open the URL with HTTP or SSH-based HTTP, and return the text of the
1051        response. Exceptions may be raised as for urllib2.urlopen() or
1052        utils.run().
1053
1054        @param call: a url string that calls a method to a devserver.
1055        @param readline: whether read http response line by line.
1056        @param timeout: The timeout seconds for urlopen call or ssh call.
1057
1058        @return the results of this call.
1059        """
1060        server_name = get_hostname(call)
1061        is_in_restricted_subnet = utils.get_restricted_subnet(
1062                server_name, utils.RESTRICTED_SUBNETS)
1063        _EMPTY_SENTINEL_VALUE = object()
1064        def kickoff_call():
1065            """Invoke a given devserver call using urllib.open or ssh.
1066
1067            @param call: a url string that calls a method to a devserver.
1068            @param is_in_restricted_subnet: whether the devserver is in subnet.
1069            @param readline: whether read http response line by line.
1070            @param timeout: The timeout seconds for urlopen call or ssh call.
1071            """
1072            if (not ENABLE_SSH_CONNECTION_FOR_DEVSERVER or
1073                not is_in_restricted_subnet):
1074                response = super(ImageServerBase, cls).run_call(
1075                        call, readline=readline, timeout=timeout)
1076            else:
1077                response = cls.run_ssh_call(
1078                        call, readline=readline, timeout=timeout)
1079            # Retry if devserver service is temporarily down, e.g. in a
1080            # devserver push.
1081            if ERR_MSG_FOR_DOWN_DEVSERVER in response:
1082                return False
1083
1084            # Don't return response directly since it may be empty string,
1085            # which causes poll_for_condition to retry.
1086            return _EMPTY_SENTINEL_VALUE if not response else response
1087
1088        try:
1089            response = bin_utils.poll_for_condition(
1090                    kickoff_call,
1091                    exception=bin_utils.TimeoutError(),
1092                    timeout=60,
1093                    sleep_interval=5)
1094            return '' if response is _EMPTY_SENTINEL_VALUE else response
1095        except bin_utils.TimeoutError:
1096            return ERR_MSG_FOR_DOWN_DEVSERVER
1097
1098
1099    @classmethod
1100    def download_file(cls, remote_file, local_file, timeout=None):
1101        """Download file from devserver.
1102
1103        The format of remote_file should be:
1104            http://devserver_ip:8082/static/board/...
1105
1106        @param remote_file: The URL of the file on devserver that need to be
1107            downloaded.
1108        @param local_file: The path of the file saved to local.
1109        @param timeout: The timeout seconds for this call.
1110        """
1111        response = cls.run_call(remote_file, timeout=timeout)
1112        with open(local_file, 'w') as out_log:
1113            out_log.write(response)
1114
1115
1116    def _poll_is_staged(self, **kwargs):
1117        """Polling devserver.is_staged until all artifacts are staged.
1118
1119        @param kwargs: keyword arguments to make is_staged devserver call.
1120
1121        @return: True if all artifacts are staged in devserver.
1122        """
1123        call = self.build_call('is_staged', **kwargs)
1124
1125        def all_staged():
1126            """Call devserver.is_staged rpc to check if all files are staged.
1127
1128            @return: True if all artifacts are staged in devserver. False
1129                     otherwise.
1130            @rasies DevServerException, the exception is a wrapper of all
1131                    exceptions that were raised when devserver tried to download
1132                    the artifacts. devserver raises an HTTPError or a CmdError
1133                    when an exception was raised in the code. Such exception
1134                    should be re-raised here to stop the caller from waiting.
1135                    If the call to devserver failed for connection issue, a
1136                    URLError exception is raised, and caller should retry the
1137                    call to avoid such network flakiness.
1138
1139            """
1140            try:
1141                result = self.run_call(call)
1142                logging.debug('whether artifact is staged: %r', result)
1143                return result == 'True'
1144            except urllib2.HTTPError as e:
1145                error_markup = e.read()
1146                raise DevServerException(_strip_http_message(error_markup))
1147            except urllib2.URLError as e:
1148                # Could be connection issue, retry it.
1149                # For example: <urlopen error [Errno 111] Connection refused>
1150                logging.error('URLError happens in is_stage: %r', e)
1151                return False
1152            except error.CmdError as e:
1153                # Retry if SSH failed to connect to the devserver.
1154                logging.warning('CmdError happens in is_stage: %r, will retry', e)
1155                return False
1156
1157        bin_utils.poll_for_condition(
1158                all_staged,
1159                exception=bin_utils.TimeoutError(),
1160                timeout=DEVSERVER_IS_STAGING_RETRY_MIN * 60,
1161                sleep_interval=_ARTIFACT_STAGE_POLLING_INTERVAL)
1162
1163        return True
1164
1165
1166    def _call_and_wait(self, call_name, error_message,
1167                       expected_response=SUCCESS, **kwargs):
1168        """Helper method to make a urlopen call, and wait for artifacts staged.
1169
1170        @param call_name: name of devserver rpc call.
1171        @param error_message: Error message to be thrown if response does not
1172                              match expected_response.
1173        @param expected_response: Expected response from rpc, default to
1174                                  |Success|. If it's set to None, do not compare
1175                                  the actual response. Any response is consider
1176                                  to be good.
1177        @param kwargs: keyword arguments to make is_staged devserver call.
1178
1179        @return: The response from rpc.
1180        @raise DevServerException upon any return code that's expected_response.
1181
1182        """
1183        call = self.build_call(call_name, async=True, **kwargs)
1184        try:
1185            response = self.run_call(call)
1186            logging.debug('response for RPC: %r', response)
1187            if ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE in response:
1188                logging.debug('Proxy error happens in RPC call, '
1189                              'will retry in 30 seconds')
1190                time.sleep(30)
1191                raise DevServerOverloadException()
1192        except httplib.BadStatusLine as e:
1193            logging.error(e)
1194            raise DevServerException('Received Bad Status line, Devserver %s '
1195                                     'might have gone down while handling '
1196                                     'the call: %s' % (self.url(), call))
1197
1198        if expected_response and not response == expected_response:
1199                raise DevServerException(error_message)
1200
1201        # `os_type` is needed in build a devserver call, but not needed for
1202        # wait_for_artifacts_staged, since that method is implemented by
1203        # each ImageServerBase child class.
1204        if 'os_type' in kwargs:
1205            del kwargs['os_type']
1206        self.wait_for_artifacts_staged(**kwargs)
1207        return response
1208
1209
1210    def _stage_artifacts(self, build, artifacts, files, archive_url, **kwargs):
1211        """Tell the devserver to download and stage |artifacts| from |image|
1212        specified by kwargs.
1213
1214        This is the main call point for staging any specific artifacts for a
1215        given build. To see the list of artifacts one can stage see:
1216
1217        ~src/platfrom/dev/artifact_info.py.
1218
1219        This is maintained along with the actual devserver code.
1220
1221        @param artifacts: A list of artifacts.
1222        @param files: A list of files to stage.
1223        @param archive_url: Optional parameter that has the archive_url to stage
1224                this artifact from. Default is specified in autotest config +
1225                image.
1226        @param kwargs: keyword arguments that specify the build information, to
1227                make stage devserver call.
1228
1229        @raise DevServerException upon any return code that's not HTTP OK.
1230        """
1231        if not archive_url:
1232            archive_url = _get_storage_server_for_artifacts(artifacts) + build
1233
1234        artifacts_arg = ','.join(artifacts) if artifacts else ''
1235        files_arg = ','.join(files) if files else ''
1236        error_message = ("staging %s for %s failed;"
1237                         "HTTP OK not accompanied by 'Success'." %
1238                         ('artifacts=%s files=%s ' % (artifacts_arg, files_arg),
1239                          build))
1240
1241        staging_info = ('build=%s, artifacts=%s, files=%s, archive_url=%s' %
1242                        (build, artifacts, files, archive_url))
1243        logging.info('Staging artifacts on devserver %s: %s',
1244                     self.url(), staging_info)
1245        success = False
1246        try:
1247            arguments = {'archive_url': archive_url,
1248                         'artifacts': artifacts_arg,
1249                         'files': files_arg}
1250            if kwargs:
1251                arguments.update(kwargs)
1252            # TODO(akeshet): canonicalize artifacts_arg before using it as a
1253            # metric field (as it stands it is a not-very-well-controlled
1254            # string).
1255            f = {'artifacts': artifacts_arg,
1256                 'dev_server': self.resolved_hostname}
1257            with metrics.SecondsTimer(
1258                    'chromeos/autotest/devserver/stage_artifact_duration',
1259                    fields=f):
1260                self.call_and_wait(call_name='stage', error_message=error_message,
1261                                   **arguments)
1262            logging.info('Finished staging artifacts: %s', staging_info)
1263            success = True
1264        except (bin_utils.TimeoutError, error.TimeoutException):
1265            logging.error('stage_artifacts timed out: %s', staging_info)
1266            raise DevServerException(
1267                    'stage_artifacts timed out: %s' % staging_info)
1268        finally:
1269            f = {'success': success,
1270                 'artifacts': artifacts_arg,
1271                 'dev_server': self.resolved_hostname}
1272            metrics.Counter('chromeos/autotest/devserver/stage_artifact'
1273                            ).increment(fields=f)
1274
1275
1276    def call_and_wait(self, *args, **kwargs):
1277        """Helper method to make a urlopen call, and wait for artifacts staged.
1278
1279        This method needs to be overridden in the subclass to implement the
1280        logic to call _call_and_wait.
1281        """
1282        raise NotImplementedError
1283
1284
1285    def _trigger_download(self, build, artifacts, files, synchronous=True,
1286                          **kwargs_build_info):
1287        """Tell the devserver to download and stage image specified in
1288        kwargs_build_info.
1289
1290        Tells the devserver to fetch |image| from the image storage server
1291        named by _get_image_storage_server().
1292
1293        If |synchronous| is True, waits for the entire download to finish
1294        staging before returning. Otherwise only the artifacts necessary
1295        to start installing images onto DUT's will be staged before returning.
1296        A caller can then call finish_download to guarantee the rest of the
1297        artifacts have finished staging.
1298
1299        @param synchronous: if True, waits until all components of the image are
1300               staged before returning.
1301        @param kwargs_build_info: Dictionary of build information.
1302                For CrOS, it is None as build is the CrOS image name.
1303                For Android, it is {'target': target,
1304                                    'build_id': build_id,
1305                                    'branch': branch}
1306
1307        @raise DevServerException upon any return code that's not HTTP OK.
1308
1309        """
1310        if kwargs_build_info:
1311            archive_url = None
1312        else:
1313            archive_url = _get_image_storage_server() + build
1314        error_message = ("trigger_download for %s failed;"
1315                         "HTTP OK not accompanied by 'Success'." % build)
1316        kwargs = {'archive_url': archive_url,
1317                  'artifacts': artifacts,
1318                  'files': files,
1319                  'error_message': error_message}
1320        if kwargs_build_info:
1321            kwargs.update(kwargs_build_info)
1322
1323        logging.info('trigger_download starts for %s', build)
1324        try:
1325            response = self.call_and_wait(call_name='stage', **kwargs)
1326            logging.info('trigger_download finishes for %s', build)
1327        except (bin_utils.TimeoutError, error.TimeoutException):
1328            logging.error('trigger_download timed out for %s.', build)
1329            raise DevServerException(
1330                    'trigger_download timed out for %s.' % build)
1331        was_successful = response == SUCCESS
1332        if was_successful and synchronous:
1333            self._finish_download(build, artifacts, files, **kwargs_build_info)
1334
1335
1336    def _finish_download(self, build, artifacts, files, **kwargs_build_info):
1337        """Tell the devserver to finish staging image specified in
1338        kwargs_build_info.
1339
1340        If trigger_download is called with synchronous=False, it will return
1341        before all artifacts have been staged. This method contacts the
1342        devserver and blocks until all staging is completed and should be
1343        called after a call to trigger_download.
1344
1345        @param kwargs_build_info: Dictionary of build information.
1346                For CrOS, it is None as build is the CrOS image name.
1347                For Android, it is {'target': target,
1348                                    'build_id': build_id,
1349                                    'branch': branch}
1350
1351        @raise DevServerException upon any return code that's not HTTP OK.
1352        """
1353        archive_url = _get_image_storage_server() + build
1354        error_message = ("finish_download for %s failed;"
1355                         "HTTP OK not accompanied by 'Success'." % build)
1356        kwargs = {'archive_url': archive_url,
1357                  'artifacts': artifacts,
1358                  'files': files,
1359                  'error_message': error_message}
1360        if kwargs_build_info:
1361            kwargs.update(kwargs_build_info)
1362        try:
1363            self.call_and_wait(call_name='stage', **kwargs)
1364        except (bin_utils.TimeoutError, error.TimeoutException):
1365            logging.error('finish_download timed out for %s', build)
1366            raise DevServerException(
1367                    'finish_download timed out for %s.' % build)
1368
1369
1370    @remote_devserver_call()
1371    def locate_file(self, file_name, artifacts, build, build_info):
1372        """Locate a file with the given file_name on devserver.
1373
1374        This method calls devserver RPC `locate_file` to look up a file with
1375        the given file name inside specified build artifacts.
1376
1377        @param file_name: Name of the file to look for a file.
1378        @param artifacts: A list of artifact names to search for the file.
1379        @param build: Name of the build. For Android, it's None as build_info
1380                should be used.
1381        @param build_info: Dictionary of build information.
1382                For CrOS, it is None as build is the CrOS image name.
1383                For Android, it is {'target': target,
1384                                    'build_id': build_id,
1385                                    'branch': branch}
1386
1387        @return: A devserver url to the file.
1388        @raise DevServerException upon any return code that's not HTTP OK.
1389        """
1390        if not build and not build_info:
1391            raise DevServerException('You must specify build information to '
1392                                     'look for file %s in artifacts %s.' %
1393                                     (file_name, artifacts))
1394        kwargs = {'file_name': file_name,
1395                  'artifacts': artifacts}
1396        if build_info:
1397            build_path = '%(branch)s/%(target)s/%(build_id)s' % build_info
1398            kwargs.update(build_info)
1399            # Devserver treats Android and Brillo build in the same way as they
1400            # are both retrieved from Launch Control and have similar build
1401            # artifacts. Therefore, os_type for devserver calls is `android` for
1402            # both Android and Brillo builds.
1403            kwargs['os_type'] = 'android'
1404        else:
1405            build_path = build
1406            kwargs['build'] = build
1407        call = self.build_call('locate_file', async=False, **kwargs)
1408        try:
1409            file_path = self.run_call(call)
1410            return os.path.join(self.url(), 'static', build_path, file_path)
1411        except httplib.BadStatusLine as e:
1412            logging.error(e)
1413            raise DevServerException('Received Bad Status line, Devserver %s '
1414                                     'might have gone down while handling '
1415                                     'the call: %s' % (self.url(), call))
1416
1417
1418    @remote_devserver_call()
1419    def list_control_files(self, build, suite_name=''):
1420        """Ask the devserver to list all control files for |build|.
1421
1422        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1423                      whose control files the caller wants listed.
1424        @param suite_name: The name of the suite for which we require control
1425                           files.
1426        @return None on failure, or a list of control file paths
1427                (e.g. server/site_tests/autoupdate/control)
1428        @raise DevServerException upon any return code that's not HTTP OK.
1429        """
1430        build = self.translate(build)
1431        call = self.build_call('controlfiles', build=build,
1432                               suite_name=suite_name)
1433        return self.run_call(call, readline=True)
1434
1435
1436    @remote_devserver_call()
1437    def get_control_file(self, build, control_path):
1438        """Ask the devserver for the contents of a control file.
1439
1440        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1441                      whose control file the caller wants to fetch.
1442        @param control_path: The file to fetch
1443                             (e.g. server/site_tests/autoupdate/control)
1444        @return The contents of the desired file.
1445        @raise DevServerException upon any return code that's not HTTP OK.
1446        """
1447        build = self.translate(build)
1448        call = self.build_call('controlfiles', build=build,
1449                               control_path=control_path)
1450        return self.run_call(call)
1451
1452
1453    @remote_devserver_call()
1454    def list_suite_controls(self, build, suite_name=''):
1455        """Ask the devserver to list contents of all control files for |build|.
1456
1457        @param build: The build (e.g. x86-mario-release/R18-1586.0.0-a1-b1514)
1458                      whose control files' contents the caller wants returned.
1459        @param suite_name: The name of the suite for which we require control
1460                           files.
1461        @return None on failure, or a dict of contents of all control files
1462            (e.g. {'path1': "#Copyright controls ***", ...,
1463                pathX': "#Copyright controls ***"}
1464        @raise DevServerException upon any return code that's not HTTP OK.
1465        """
1466        build = self.translate(build)
1467        call = self.build_call('list_suite_controls', build=build,
1468                               suite_name=suite_name)
1469        return json.load(cStringIO.StringIO(self.run_call(call)))
1470
1471
1472class ImageServer(ImageServerBase):
1473    """Class for DevServer that handles RPCs related to CrOS images.
1474
1475    The calls to devserver to stage artifacts, including stage and download, are
1476    made in async mode. That is, when caller makes an RPC |stage| to request
1477    devserver to stage certain artifacts, devserver handles the call and starts
1478    staging artifacts in a new thread, and return |Success| without waiting for
1479    staging being completed. When caller receives message |Success|, it polls
1480    devserver's is_staged call until all artifacts are staged.
1481    Such mechanism is designed to prevent cherrypy threads in devserver being
1482    running out, as staging artifacts might take long time, and cherrypy starts
1483    with a fixed number of threads that handle devserver rpc.
1484    """
1485
1486    class ArtifactUrls(object):
1487        """A container for URLs of staged artifacts.
1488
1489        Attributes:
1490            full_payload: URL for downloading a staged full release update
1491            mton_payload: URL for downloading a staged M-to-N release update
1492            nton_payload: URL for downloading a staged N-to-N release update
1493
1494        """
1495        def __init__(self, full_payload=None, mton_payload=None,
1496                     nton_payload=None):
1497            self.full_payload = full_payload
1498            self.mton_payload = mton_payload
1499            self.nton_payload = nton_payload
1500
1501
1502    def wait_for_artifacts_staged(self, archive_url, artifacts='', files=''):
1503        """Polling devserver.is_staged until all artifacts are staged.
1504
1505        @param archive_url: Google Storage URL for the build.
1506        @param artifacts: Comma separated list of artifacts to download.
1507        @param files: Comma separated list of files to download.
1508        @return: True if all artifacts are staged in devserver.
1509        """
1510        kwargs = {'archive_url': archive_url,
1511                  'artifacts': artifacts,
1512                  'files': files}
1513        return self._poll_is_staged(**kwargs)
1514
1515
1516    @remote_devserver_call()
1517    def call_and_wait(self, call_name, archive_url, artifacts, files,
1518                      error_message, expected_response=SUCCESS):
1519        """Helper method to make a urlopen call, and wait for artifacts staged.
1520
1521        @param call_name: name of devserver rpc call.
1522        @param archive_url: Google Storage URL for the build..
1523        @param artifacts: Comma separated list of artifacts to download.
1524        @param files: Comma separated list of files to download.
1525        @param expected_response: Expected response from rpc, default to
1526                                  |Success|. If it's set to None, do not compare
1527                                  the actual response. Any response is consider
1528                                  to be good.
1529        @param error_message: Error message to be thrown if response does not
1530                              match expected_response.
1531
1532        @return: The response from rpc.
1533        @raise DevServerException upon any return code that's expected_response.
1534
1535        """
1536        kwargs = {'archive_url': archive_url,
1537                  'artifacts': artifacts,
1538                  'files': files}
1539        return self._call_and_wait(call_name, error_message,
1540                                   expected_response, **kwargs)
1541
1542
1543    @remote_devserver_call()
1544    def stage_artifacts(self, image=None, artifacts=None, files='',
1545                        archive_url=None):
1546        """Tell the devserver to download and stage |artifacts| from |image|.
1547
1548         This is the main call point for staging any specific artifacts for a
1549        given build. To see the list of artifacts one can stage see:
1550
1551        ~src/platfrom/dev/artifact_info.py.
1552
1553        This is maintained along with the actual devserver code.
1554
1555        @param image: the image to fetch and stage.
1556        @param artifacts: A list of artifacts.
1557        @param files: A list of files to stage.
1558        @param archive_url: Optional parameter that has the archive_url to stage
1559                this artifact from. Default is specified in autotest config +
1560                image.
1561
1562        @raise DevServerException upon any return code that's not HTTP OK.
1563        """
1564        if not artifacts and not files:
1565            raise DevServerException('Must specify something to stage.')
1566        image = self.translate(image)
1567        self._stage_artifacts(image, artifacts, files, archive_url)
1568
1569
1570    @remote_devserver_call(timeout_min=DEVSERVER_SSH_TIMEOUT_MINS)
1571    def list_image_dir(self, image):
1572        """List the contents of the image stage directory, on the devserver.
1573
1574        @param image: The image name, eg: <board>-<branch>/<Milestone>-<build>.
1575
1576        @raise DevServerException upon any return code that's not HTTP OK.
1577        """
1578        image = self.translate(image)
1579        logging.info('Requesting contents from devserver %s for image %s',
1580                     self.url(), image)
1581        archive_url = _get_storage_server_for_artifacts() + image
1582        call = self.build_call('list_image_dir', archive_url=archive_url)
1583        response = self.run_call(call, readline=True)
1584        for line in response:
1585            logging.info(line)
1586
1587
1588    def trigger_download(self, image, synchronous=True):
1589        """Tell the devserver to download and stage |image|.
1590
1591        Tells the devserver to fetch |image| from the image storage server
1592        named by _get_image_storage_server().
1593
1594        If |synchronous| is True, waits for the entire download to finish
1595        staging before returning. Otherwise only the artifacts necessary
1596        to start installing images onto DUT's will be staged before returning.
1597        A caller can then call finish_download to guarantee the rest of the
1598        artifacts have finished staging.
1599
1600        @param image: the image to fetch and stage.
1601        @param synchronous: if True, waits until all components of the image are
1602               staged before returning.
1603
1604        @raise DevServerException upon any return code that's not HTTP OK.
1605
1606        """
1607        image = self.translate(image)
1608        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE
1609        self._trigger_download(image, artifacts, files='',
1610                               synchronous=synchronous)
1611
1612
1613    @remote_devserver_call()
1614    def setup_telemetry(self, build):
1615        """Tell the devserver to setup telemetry for this build.
1616
1617        The devserver will stage autotest and then extract the required files
1618        for telemetry.
1619
1620        @param build: the build to setup telemetry for.
1621
1622        @returns path on the devserver that telemetry is installed to.
1623        """
1624        build = self.translate(build)
1625        archive_url = _get_image_storage_server() + build
1626        call = self.build_call('setup_telemetry', archive_url=archive_url)
1627        try:
1628            response = self.run_call(call)
1629        except httplib.BadStatusLine as e:
1630            logging.error(e)
1631            raise DevServerException('Received Bad Status line, Devserver %s '
1632                                     'might have gone down while handling '
1633                                     'the call: %s' % (self.url(), call))
1634        return response
1635
1636
1637    def finish_download(self, image):
1638        """Tell the devserver to finish staging |image|.
1639
1640        If trigger_download is called with synchronous=False, it will return
1641        before all artifacts have been staged. This method contacts the
1642        devserver and blocks until all staging is completed and should be
1643        called after a call to trigger_download.
1644
1645        @param image: the image to fetch and stage.
1646        @raise DevServerException upon any return code that's not HTTP OK.
1647        """
1648        image = self.translate(image)
1649        artifacts = _ARTIFACTS_TO_BE_STAGED_FOR_IMAGE_WITH_AUTOTEST
1650        self._finish_download(image, artifacts, files='')
1651
1652
1653    def get_update_url(self, image):
1654        """Returns the url that should be passed to the updater.
1655
1656        @param image: the image that was fetched.
1657        """
1658        image = self.translate(image)
1659        url_pattern = CONFIG.get_config_value('CROS', 'image_url_pattern',
1660                                              type=str)
1661        return (url_pattern % (self.url(), image))
1662
1663
1664    def get_staged_file_url(self, filename, image):
1665        """Returns the url of a staged file for this image on the devserver."""
1666        return '/'.join([self._get_image_url(image), filename])
1667
1668
1669    def get_full_payload_url(self, image):
1670        """Returns a URL to a staged full payload.
1671
1672        @param image: the image that was fetched.
1673
1674        @return A fully qualified URL that can be used for downloading the
1675                payload.
1676
1677        """
1678        return self._get_image_url(image) + '/update.gz'
1679
1680
1681    def get_test_image_url(self, image):
1682        """Returns a URL to a staged test image.
1683
1684        @param image: the image that was fetched.
1685
1686        @return A fully qualified URL that can be used for downloading the
1687                image.
1688
1689        """
1690        return self._get_image_url(image) + '/chromiumos_test_image.bin'
1691
1692
1693    @remote_devserver_call()
1694    def get_dependencies_file(self, build):
1695        """Ask the dev server for the contents of the suite dependencies file.
1696
1697        Ask the dev server at |self._dev_server| for the contents of the
1698        pre-processed suite dependencies file (at DEPENDENCIES_FILE)
1699        for |build|.
1700
1701        @param build: The build (e.g. x86-mario-release/R21-2333.0.0)
1702                      whose dependencies the caller is interested in.
1703        @return The contents of the dependencies file, which should eval to
1704                a dict of dicts, as per bin_utils/suite_preprocessor.py.
1705        @raise DevServerException upon any return code that's not HTTP OK.
1706        """
1707        build = self.translate(build)
1708        call = self.build_call('controlfiles',
1709                               build=build, control_path=DEPENDENCIES_FILE)
1710        return self.run_call(call)
1711
1712
1713    @remote_devserver_call()
1714    def get_latest_build_in_gs(self, board):
1715        """Ask the devservers for the latest offical build in Google Storage.
1716
1717        @param board: The board for who we want the latest official build.
1718        @return A string of the returned build rambi-release/R37-5868.0.0
1719        @raise DevServerException upon any return code that's not HTTP OK.
1720        """
1721        call = self.build_call(
1722                'xbuddy_translate/remote/%s/latest-official' % board,
1723                image_dir=_get_image_storage_server())
1724        image_name = self.run_call(call)
1725        return os.path.dirname(image_name)
1726
1727
1728    def translate(self, build_name):
1729        """Translate the build name if it's in LATEST format.
1730
1731        If the build name is in the format [builder]/LATEST, return the latest
1732        build in Google Storage otherwise return the build name as is.
1733
1734        @param build_name: build_name to check.
1735
1736        @return The actual build name to use.
1737        """
1738        match = re.match(r'([\w-]+)-(\w+)/LATEST', build_name, re.I)
1739        if not match:
1740            return build_name
1741        translated_build = self.get_latest_build_in_gs(match.groups()[0])
1742        logging.debug('Translated relative build %s to %s', build_name,
1743                      translated_build)
1744        return translated_build
1745
1746
1747    @classmethod
1748    @remote_devserver_call()
1749    def get_latest_build(cls, target, milestone=''):
1750        """Ask all the devservers for the latest build for a given target.
1751
1752        @param target: The build target, typically a combination of the board
1753                       and the type of build e.g. x86-mario-release.
1754        @param milestone:  For latest build set to '', for builds only in a
1755                           specific milestone set to a str of format Rxx
1756                           (e.g. R16). Default: ''. Since we are dealing with a
1757                           webserver sending an empty string, '', ensures that
1758                           the variable in the URL is ignored as if it was set
1759                           to None.
1760        @return A string of the returned build e.g. R20-2226.0.0.
1761        @raise DevServerException upon any return code that's not HTTP OK.
1762        """
1763        calls = cls.build_all_calls('latestbuild', target=target,
1764                                    milestone=milestone)
1765        latest_builds = []
1766        for call in calls:
1767            latest_builds.append(cls.run_call(call))
1768
1769        return max(latest_builds, key=version.LooseVersion)
1770
1771
1772    @remote_devserver_call()
1773    def _kill_au_process_for_host(self, **kwargs):
1774        """Kill the triggerred auto_update process if error happens in cros_au.
1775
1776        @param kwargs: Arguments to make kill_au_proc devserver call.
1777        """
1778        call = self.build_call('kill_au_proc', **kwargs)
1779        response = self.run_call(call)
1780        if not response == 'True':
1781            raise DevServerException(
1782                    'Failed to kill the triggerred CrOS auto_update process'
1783                    'on devserver %s, the response is %s' % (
1784                            self.url(), response))
1785
1786
1787    def kill_au_process_for_host(self, host_name, pid):
1788        """Kill the triggerred auto_update process if error happens.
1789
1790        Usually this function is used to clear all potential left au processes
1791        of the given host name.
1792
1793        If pid is specified, the devserver will further check the given pid to
1794        make sure the process is killed. This is used for the case that the au
1795        process has started in background, but then provision fails due to
1796        some unknown issues very fast. In this case, when 'kill_au_proc' is
1797        called, there's no corresponding background track log created for this
1798        ongoing au process, which prevents this RPC call from killing this au
1799        process.
1800
1801        @param host_name: The DUT's hostname.
1802        @param pid: The ongoing au process's pid.
1803
1804        @return: True if successfully kill the auto-update process for host.
1805        """
1806        kwargs = {'host_name': host_name, 'pid': pid}
1807        try:
1808            self._kill_au_process_for_host(**kwargs)
1809        except DevServerException:
1810            return False
1811
1812        return True
1813
1814
1815    @remote_devserver_call()
1816    def _clean_track_log(self, **kwargs):
1817        """Clean track log for the current auto-update process."""
1818        call = self.build_call('handler_cleanup', **kwargs)
1819        self.run_call(call)
1820
1821
1822    def clean_track_log(self, host_name, pid):
1823        """Clean track log for the current auto-update process.
1824
1825        @param host_name: The host name to be updated.
1826        @param pid: The auto-update process id.
1827
1828        @return: True if track log is successfully cleaned, False otherwise.
1829        """
1830        if not pid:
1831            return False
1832
1833        kwargs = {'host_name': host_name, 'pid': pid}
1834        try:
1835            self._clean_track_log(**kwargs)
1836        except DevServerException as e:
1837            logging.debug('Failed to clean track_status_file on '
1838                          'devserver for host %s and process id %s: %s',
1839                          host_name, pid, str(e))
1840            return False
1841
1842        return True
1843
1844
1845    def _get_au_log_filename(self, log_dir, host_name, pid):
1846        """Return the auto-update log's filename."""
1847        return os.path.join(log_dir, CROS_AU_LOG_FILENAME % (
1848                    host_name, pid))
1849
1850    def _read_json_response_from_devserver(self, response):
1851        """Reads the json response from the devserver.
1852
1853        This is extracted to its own function so that it can be easily mocked.
1854        @param response: the response for a devserver.
1855        """
1856        try:
1857            return json.loads(response)
1858        except ValueError as e:
1859            logging.debug('Failed to load json response: %s', response)
1860            raise DevServerException(e)
1861
1862
1863    @remote_devserver_call()
1864    def _collect_au_log(self, log_dir, **kwargs):
1865        """Collect logs from devserver after cros-update process is finished.
1866
1867        Collect the logs that recording the whole cros-update process, and
1868        write it to sysinfo path of a job.
1869
1870        The example log file name that is stored is like:
1871            '1220-repair/sysinfo/CrOS_update_host_name_pid.log'
1872
1873        @param host_name: the DUT's hostname.
1874        @param pid: the auto-update process id on devserver.
1875        @param log_dir: The directory to save the cros-update process log
1876                        retrieved from devserver.
1877        """
1878        call = self.build_call('collect_cros_au_log', **kwargs)
1879        response = self.run_call(call)
1880        if not os.path.exists(log_dir):
1881            os.mkdir(log_dir)
1882        write_file = self._get_au_log_filename(
1883                log_dir, kwargs['host_name'], kwargs['pid'])
1884        logging.debug('Saving auto-update logs into %s', write_file)
1885
1886        au_logs = self._read_json_response_from_devserver(response)
1887
1888        try:
1889            for k, v in au_logs['host_logs'].items():
1890                log_name = '%s_%s_%s' % (k, kwargs['host_name'], kwargs['pid'])
1891                log_path = os.path.join(log_dir, log_name)
1892                with open(log_path, 'w') as out_log:
1893                    out_log.write(v)
1894        except IOError as e:
1895            raise DevServerException('Failed to write auto-update hostlogs: '
1896                                     '%s' % e)
1897
1898        try:
1899            with open(write_file, 'w') as out_log:
1900                out_log.write(au_logs['cros_au_log'])
1901        except:
1902            raise DevServerException('Failed to write auto-update logs into '
1903                                     '%s' % write_file)
1904
1905
1906    def collect_au_log(self, host_name, pid, log_dir):
1907        """Collect logs from devserver after cros-update process is finished.
1908
1909        @param host_name: the DUT's hostname.
1910        @param pid: the auto-update process id on devserver.
1911        @param log_dir: The directory to save the cros-update process log
1912                        retrieved from devserver.
1913
1914        @return: True if auto-update log is successfully collected, False
1915          otherwise.
1916        """
1917        if not pid:
1918            return False
1919
1920        kwargs = {'host_name': host_name, 'pid': pid}
1921        try:
1922            self._collect_au_log(log_dir, **kwargs)
1923        except DevServerException as e:
1924            logging.debug('Failed to collect auto-update log on '
1925                          'devserver for host %s and process id %s: %s',
1926                          host_name, pid, str(e))
1927            return False
1928
1929        return True
1930
1931
1932    @remote_devserver_call()
1933    def _trigger_auto_update(self, **kwargs):
1934        """Trigger auto-update by calling devserver.cros_au.
1935
1936        @param kwargs:  Arguments to make cros_au devserver call.
1937
1938        @return: a tuple indicates whether the RPC call cros_au succeeds and
1939          the auto-update process id running on devserver.
1940        """
1941        host_name = kwargs['host_name']
1942        call = self.build_call('cros_au', async=True, **kwargs)
1943        try:
1944            response = self.run_call(call)
1945            logging.info(
1946                'Received response from devserver for cros_au call: %r',
1947                response)
1948        except httplib.BadStatusLine as e:
1949            logging.error(e)
1950            raise DevServerException('Received Bad Status line, Devserver %s '
1951                                     'might have gone down while handling '
1952                                     'the call: %s' % (self.url(), call))
1953
1954        return response
1955
1956
1957    def _check_for_auto_update_finished(self, pid, wait=True, **kwargs):
1958        """Polling devserver.get_au_status to get current auto-update status.
1959
1960        The current auto-update status is used to identify whether the update
1961        process is finished.
1962
1963        @param pid:    The background process id for auto-update in devserver.
1964        @param kwargs: keyword arguments to make get_au_status devserver call.
1965        @param wait:   Should the check wait for completion.
1966
1967        @return: True if auto-update is finished for a given dut.
1968        """
1969        logging.debug('Check the progress for auto-update process %r', pid)
1970        kwargs['pid'] = pid
1971        call = self.build_call('get_au_status', **kwargs)
1972
1973        def all_finished():
1974            """Call devserver.get_au_status rpc to check if auto-update
1975               is finished.
1976
1977            @return: True if auto-update is finished for a given dut. False
1978                     otherwise.
1979            @rasies  DevServerException, the exception is a wrapper of all
1980                     exceptions that were raised when devserver tried to
1981                     download the artifacts. devserver raises an HTTPError or
1982                     a CmdError when an exception was raised in the code. Such
1983                     exception should be re-raised here to stop the caller from
1984                     waiting. If the call to devserver failed for connection
1985                     issue, a URLError exception is raised, and caller should
1986                     retry the call to avoid such network flakiness.
1987
1988            """
1989            try:
1990                au_status = self.run_call(call)
1991                response = json.loads(au_status)
1992                # This is a temp fix to fit both dict and tuple returning
1993                # values. The dict check will be removed after a corresponding
1994                # devserver CL is deployed.
1995                if isinstance(response, dict):
1996                    if response.get('detailed_error_msg'):
1997                        raise DevServerException(
1998                                response.get('detailed_error_msg'))
1999
2000                    if response.get('finished'):
2001                        logging.debug('CrOS auto-update is finished')
2002                        return True
2003                    else:
2004                        logging.debug('Current CrOS auto-update status: %s',
2005                                      response.get('status'))
2006                        return False
2007
2008                if not response[0]:
2009                    logging.debug('Current CrOS auto-update status: %s',
2010                                  response[1])
2011                    return False
2012                else:
2013                    logging.debug('CrOS auto-update is finished')
2014                    return True
2015            except urllib2.HTTPError as e:
2016                error_markup = e.read()
2017                raise DevServerException(_strip_http_message(error_markup))
2018            except urllib2.URLError as e:
2019                # Could be connection issue, retry it.
2020                # For example: <urlopen error [Errno 111] Connection refused>
2021                logging.warning('URLError (%r): Retrying connection to '
2022                                'devserver to check auto-update status.', e)
2023                return False
2024            except error.CmdError:
2025                # Retry if SSH failed to connect to the devserver.
2026                logging.warning('CmdError: Retrying SSH connection to check '
2027                                'auto-update status.')
2028                return False
2029            except socket.error as e:
2030                # Could be some temporary devserver connection issues.
2031                logging.warning('Socket Error (%r): Retrying connection to '
2032                                'devserver to check auto-update status.', e)
2033                return False
2034            except ValueError as e:
2035                raise DevServerException(
2036                        '%s (Got AU status: %r)' % (str(e), au_status))
2037
2038        if wait:
2039            bin_utils.poll_for_condition(
2040                    all_finished,
2041                    exception=bin_utils.TimeoutError(),
2042                    timeout=DEVSERVER_IS_CROS_AU_FINISHED_TIMEOUT_MIN * 60,
2043                    sleep_interval=CROS_AU_POLLING_INTERVAL)
2044
2045            return True
2046        else:
2047            return all_finished()
2048
2049
2050    def check_for_auto_update_finished(self, response, wait=True, **kwargs):
2051        """Processing response of 'cros_au' and polling for auto-update status.
2052
2053        Will wait for the whole auto-update process is finished.
2054
2055        @param response: The response from RPC 'cros_au'
2056        @param kwargs: keyword arguments to make get_au_status devserver call.
2057
2058        @return: a tuple includes two elements.
2059          finished: True if the operation has completed.
2060          raised_error: None if everything works well or the raised error.
2061          pid: the auto-update process id on devserver.
2062        """
2063
2064        pid = 0
2065        raised_error = None
2066        finished = False
2067        try:
2068            response = json.loads(response)
2069            if response[0]:
2070                pid = response[1]
2071                # If provision is kicked off asynchronously, pid will be -1.
2072                # If provision is not successfully kicked off , pid continues
2073                # to be 0.
2074                if pid > 0:
2075                    logging.debug('start process %r for auto_update in '
2076                                  'devserver', pid)
2077                    finished = self._check_for_auto_update_finished(
2078                            pid, wait=wait, **kwargs)
2079        except Exception as e:
2080            logging.debug('Failed to trigger auto-update process on devserver')
2081            finished = True
2082            raised_error = e
2083        finally:
2084            return finished, raised_error, pid
2085
2086
2087    def _check_error_message(self, error_patterns_to_check, error_msg):
2088        """Detect whether specific error pattern exist in error message.
2089
2090        @param error_patterns_to_check: the error patterns to check
2091        @param error_msg: the error message which may include any error
2092                          pattern.
2093
2094        @return A boolean variable, True if error_msg contains any error
2095            pattern in error_patterns_to_check, False otherwise.
2096        """
2097        for err in error_patterns_to_check:
2098            if err in error_msg:
2099                return True
2100
2101        return False
2102
2103
2104    def _is_retryable(self, error_msg):
2105        """Detect whether we will retry auto-update based on error_msg.
2106
2107        @param error_msg: The given error message.
2108
2109        @return A boolean variable which indicates whether we will retry
2110            auto_update with another devserver based on the given error_msg.
2111        """
2112        # For now we just hard-code the error message we think it's suspicious.
2113        # When we get more date about what's the json response when devserver
2114        # is overloaded, we can update this part.
2115        retryable_error_patterns = [ERR_MSG_FOR_INVALID_DEVSERVER_RESPONSE,
2116                                    'is not pingable']
2117        return self._check_error_message(retryable_error_patterns, error_msg)
2118
2119
2120    def _should_use_original_payload(self, error_msg):
2121        devserver_error_patterns = ['DevserverCannotStartError']
2122        return self._check_error_message(devserver_error_patterns, error_msg)
2123
2124
2125    def _parse_buildname_safely(self, build_name):
2126        """Parse a given buildname safely.
2127
2128        @param build_name: the build name to be parsed.
2129
2130        @return: a tuple (board, build_type, milestone)
2131        """
2132        try:
2133            board, build_type, milestone, _ = server_utils.ParseBuildName(
2134                    build_name)
2135        except server_utils.ParseBuildNameException:
2136            logging.warning('Unable to parse build name %s for metrics. '
2137                            'Continuing anyway.', build_name)
2138            board, build_type, milestone = ('', '', '')
2139
2140        return board, build_type, milestone
2141
2142
2143    def _emit_auto_update_metrics(self, board, build_type, dut_host_name,
2144                                  build_name, attempt,
2145                                  success, failure_reason, duration):
2146        """Send metrics for a single auto_update attempt.
2147
2148        @param board: a field in metrics representing which board this
2149            auto_update tries to update.
2150        @param build_type: a field in metrics representing which build type this
2151            auto_update tries to update.
2152        @param dut_host_name: a field in metrics representing which DUT this
2153            auto_update tries to update.
2154        @param build_name: auto update build being updated to.
2155        @param attempt: a field in metrics, representing which attempt/retry
2156            this auto_update is.
2157        @param success: a field in metrics, representing whether this
2158            auto_update succeeds or not.
2159        @param failure_reason: DevServerExceptionClassifier object to show
2160            auto update failure reason, or None.
2161        @param duration: auto update duration time, in seconds.
2162        """
2163        # The following is high cardinality, but sparse.
2164        # Each DUT is of a single board type, and likely build type.
2165        # The affinity also results in each DUT being attached to the same
2166        # dev_server as well.
2167        fields = {
2168                'board': board,
2169                'build_type': build_type,
2170                'dut_host_name': dut_host_name,
2171                'dev_server': self.resolved_hostname,
2172                'attempt': attempt,
2173                'success': success,
2174        }
2175
2176        # reset_after=True is required for String gauges events to ensure that
2177        # the metrics are not repeatedly emitted until the server restarts.
2178
2179        metrics.String(PROVISION_PATH + '/auto_update_build_by_devserver_dut',
2180                       reset_after=True).set(build_name, fields=fields)
2181
2182        if not success:
2183            metrics.String(
2184                PROVISION_PATH +
2185                '/auto_update_failure_reason_by_devserver_dut',
2186                reset_after=True).set(
2187                    failure_reason.classification if failure_reason else '',
2188                    fields=fields)
2189
2190        metrics.SecondsDistribution(
2191                PROVISION_PATH + '/auto_update_duration_by_devserver_dut').add(
2192                        duration, fields=fields)
2193
2194
2195    def _emit_provision_metrics(self, error_list, duration_list,
2196                                is_au_success, board, build_type, milestone,
2197                                dut_host_name, is_aue2etest,
2198                                total_duration, build_name):
2199        """Send metrics for provision request.
2200
2201        Provision represents potentially multiple auto update attempts.
2202
2203        Please note: to avoid reaching or exceeding the monarch field
2204        cardinality limit, we avoid a metric that includes both dut hostname
2205        and other high cardinality fields.
2206
2207        @param error_list: a list of DevServerExceptionClassifier objects to
2208            show errors happened in provision. Usually it contains 1 ~
2209            AU_RETRY_LIMIT objects since we only retry provision for several
2210            times.
2211        @param duration_list: a list of provision duration time, counted by
2212            seconds.
2213        @param is_au_success: a field in metrics, representing whether this
2214            auto_update succeeds or not.
2215        @param board: a field in metrics representing which board this
2216            auto_update tries to update.
2217        @param build_type: a field in metrics representing which build type this
2218            auto_update tries to update.
2219        @param milestone: a field in metrics representing which milestone this
2220            auto_update tries to update.
2221        @param dut_host_name: a field in metrics representing which DUT this
2222            auto_update tries to update.
2223        @param is_aue2etest: a field in metrics representing if provision was
2224            done as part of the autoupdate_EndToEndTest.
2225        """
2226        # The following is high cardinality, but sparse.
2227        # Each DUT is of a single board type, and likely build type.
2228        # The affinity also results in each DUT being attached to the same
2229        # dev_server as well.
2230        fields = {
2231                'board': board,
2232                'build_type': build_type,
2233                'dut_host_name': dut_host_name,
2234                'dev_server': self.resolved_hostname,
2235                'success': is_au_success,
2236        }
2237
2238        # reset_after=True is required for String gauges events to ensure that
2239        # the metrics are not repeatedly emitted until the server restarts.
2240
2241        metrics.String(PROVISION_PATH + '/provision_build_by_devserver_dut',
2242                       reset_after=True).set(build_name, fields=fields)
2243
2244        if error_list:
2245            metrics.String(
2246                    PROVISION_PATH +
2247                    '/provision_failure_reason_by_devserver_dut',
2248                    reset_after=True).set(error_list[0].classification,
2249                                          fields=fields)
2250
2251        metrics.SecondsDistribution(
2252                PROVISION_PATH + '/provision_duration_by_devserver_dut').add(
2253                        total_duration, fields=fields)
2254
2255
2256    def _parse_buildname_from_gs_uri(self, uri):
2257        """Get parameters needed for AU metrics when build_name is not known.
2258
2259        autoupdate_EndToEndTest is run with two Google Storage URIs from the
2260        gs://chromeos-releases bucket. URIs in this bucket do not have the
2261        build_name in the format samus-release/R60-0000.0.0.
2262
2263        We can get the milestone and board by checking the instructions.json
2264        file contained in the bucket with the payloads.
2265
2266        @param uri: The partial uri we received from autoupdate_EndToEndTest.
2267        """
2268        try:
2269            # Get the instructions file that contains info about the build.
2270            gs_file = 'gs://chromeos-releases/' + uri + '/*instructions.json'
2271            files = bin_utils.gs_ls(gs_file)
2272            for f in files:
2273                gs_folder, _, instruction_file = f.rpartition('/')
2274                self.stage_artifacts(image=uri,
2275                                     files=[instruction_file],
2276                                     archive_url=gs_folder)
2277                json_file = self.get_staged_file_url(instruction_file, uri)
2278                response = urllib2.urlopen(json_file)
2279                data = json.load(response)
2280                return data['board'], 'release', data['version']['milestone']
2281        except (ValueError, error.CmdError, urllib2.URLError) as e:
2282            logging.debug('Problem getting values for metrics: %s', e)
2283            logging.warning('Unable to parse build name %s from AU test for '
2284                            'metrics. Continuing anyway.', uri)
2285
2286        return '', '', ''
2287
2288
2289    def auto_update(self, host_name, build_name, original_board=None,
2290                    original_release_version=None, log_dir=None,
2291                    force_update=False, full_update=False,
2292                    payload_filename=None, force_original=False,
2293                    clobber_stateful=True, quick_provision=False):
2294        """Auto-update a CrOS host.
2295
2296        @param host_name: The hostname of the DUT to auto-update.
2297        @param build_name:  The build name to be auto-updated on the DUT.
2298        @param original_board: The original board of the DUT to auto-update.
2299        @param original_release_version: The release version of the DUT's
2300            current build.
2301        @param log_dir: The log directory to store auto-update logs from
2302            devserver.
2303        @param force_update: Force an update even if the version installed
2304                             is the same. Default: False.
2305        @param full_update:  If True, do not run stateful update, directly
2306                             force a full reimage. If False, try stateful
2307                             update first if the dut is already installed
2308                             with the same version.
2309        @param payload_filename: Used to specify the exact file to
2310                                 use for autoupdating. If None, the payload
2311                                 will be determined by build_name. You
2312                                 must have already staged this file before
2313                                 passing it in here.
2314        @param force_original: Whether to force stateful update with the
2315                               original payload.
2316        @param clobber_stateful: If True do a clean install of stateful.
2317        @param quick_provision: Attempt to use quick provision path first.
2318
2319        @return A set (is_success, pid) in which:
2320            1. is_success indicates whether this auto_update succeeds.
2321            2. pid is the process id of the successful autoupdate run.
2322
2323        @raise DevServerException if auto_update fails and is not retryable.
2324        @raise RetryableProvisionException if it fails and is retryable.
2325        """
2326        kwargs = {'host_name': host_name,
2327                  'build_name': build_name,
2328                  'force_update': force_update,
2329                  'full_update': full_update,
2330                  'clobber_stateful': clobber_stateful,
2331                  'quick_provision': quick_provision}
2332
2333        is_aue2etest = payload_filename is not None
2334
2335        if is_aue2etest:
2336            kwargs['payload_filename'] = payload_filename
2337
2338        error_msg = 'CrOS auto-update failed for host %s: %s'
2339        error_msg_attempt = 'Exception raised on auto_update attempt #%s:\n%s'
2340        is_au_success = False
2341        au_log_dir = os.path.join(log_dir,
2342                                  AUTO_UPDATE_LOG_DIR) if log_dir else None
2343        error_list = []
2344        retry_with_another_devserver = False
2345        duration_list = []
2346
2347        if is_aue2etest:
2348            board, build_type, milestone = self._parse_buildname_from_gs_uri(
2349                build_name)
2350        else:
2351            board, build_type, milestone = self._parse_buildname_safely(
2352                build_name)
2353
2354        provision_start_time = time.time()
2355        for au_attempt in range(AU_RETRY_LIMIT):
2356            logging.debug('Start CrOS auto-update for host %s at %d time(s).',
2357                          host_name, au_attempt + 1)
2358            au_start_time = time.time()
2359            failure_reason = None
2360            # No matter _trigger_auto_update succeeds or fails, the auto-update
2361            # track_status_file should be cleaned, and the auto-update execute
2362            # log should be collected to directory sysinfo. Also, the error
2363            # raised by _trigger_auto_update should be displayed.
2364            try:
2365                # Try update with stateful.tgz of old release version in the
2366                # last try of auto-update.
2367                if force_original and original_release_version:
2368                    # Monitor this case in monarch
2369                    original_build = '%s/%s' % (original_board,
2370                                                original_release_version)
2371                    c = metrics.Counter(
2372                            'chromeos/autotest/provision/'
2373                            'cros_update_with_original_build')
2374                    f = {'dev_server': self.resolved_hostname,
2375                         'board': board,
2376                         'build_type': build_type,
2377                         'milestone': milestone,
2378                         'original_build': original_build}
2379                    c.increment(fields=f)
2380
2381                    logging.debug('Try updating stateful partition of the '
2382                                  'host with the same version of its current '
2383                                  'rootfs partition: %s', original_build)
2384                    response = self._trigger_auto_update(
2385                            original_build=original_build, **kwargs)
2386                else:
2387                    response = self._trigger_auto_update(**kwargs)
2388            except DevServerException as e:
2389                logging.debug(error_msg_attempt, au_attempt+1, str(e))
2390                failure_reason = DevServerExceptionClassifier(str(e))
2391            else:
2392                _, raised_error, pid = self.check_for_auto_update_finished(
2393                        response, **kwargs)
2394
2395                # Error happens in _collect_au_log won't be raised.
2396                if au_log_dir:
2397                    is_collect_success = self.collect_au_log(
2398                            kwargs['host_name'], pid, au_log_dir)
2399                else:
2400                    is_collect_success = True
2401
2402                # Error happens in _clean_track_log won't be raised.
2403                if pid >= 0:
2404                    is_clean_success = self.clean_track_log(
2405                            kwargs['host_name'], pid)
2406                else:
2407                    is_clean_success = True
2408
2409                # If any error is raised previously, log it and retry
2410                # auto-update. Otherwise, claim a successful CrOS auto-update.
2411                if (not raised_error and is_clean_success and
2412                    is_collect_success):
2413                    logging.debug('CrOS auto-update succeed for host %s',
2414                                  host_name)
2415                    is_au_success = True
2416                    break
2417                else:
2418                    if not self.kill_au_process_for_host(kwargs['host_name'],
2419                                                         pid):
2420                        logging.debug('Failed to kill auto_update process %d',
2421                                      pid)
2422                    if raised_error:
2423                        error_str = str(raised_error)
2424                        logging.debug(error_msg_attempt, au_attempt + 1,
2425                                      error_str)
2426                        if au_log_dir:
2427                            logging.debug('Please see error details in log %s',
2428                                          self._get_au_log_filename(
2429                                                  au_log_dir,
2430                                                  kwargs['host_name'],
2431                                                  pid))
2432                        failure_reason = DevServerExceptionClassifier(
2433                            error_str, keep_full_trace=False)
2434                        if self._is_retryable(error_str):
2435                            retry_with_another_devserver = True
2436
2437                        if self._should_use_original_payload(error_str):
2438                            force_original = True
2439
2440            finally:
2441                duration = int(time.time() - au_start_time)
2442                duration_list.append(duration)
2443                if failure_reason:
2444                    error_list.append(failure_reason)
2445                self._emit_auto_update_metrics(board, build_type, host_name,
2446                                               build_name, au_attempt + 1,
2447                                               is_au_success, failure_reason,
2448                                               duration)
2449                if retry_with_another_devserver:
2450                    break
2451
2452                if not is_au_success and au_attempt < AU_RETRY_LIMIT - 1:
2453                    time.sleep(CROS_AU_RETRY_INTERVAL)
2454                    # Use the IP of DUT if the hostname failed.
2455                    host_name_ip = socket.gethostbyname(host_name)
2456                    kwargs['host_name'] = host_name_ip
2457                    logging.debug(
2458                            'AU failed, trying IP instead of hostname: %s',
2459                            host_name_ip)
2460
2461        total_duration = int(time.time() - provision_start_time)
2462        self._emit_provision_metrics(error_list, duration_list, is_au_success,
2463                                     board, build_type, milestone, host_name,
2464                                     is_aue2etest, total_duration, build_name)
2465
2466        if is_au_success:
2467            return (is_au_success, pid)
2468
2469        # If errors happen in the CrOS AU process, report the concatenation
2470        # of the errors happening in first & second provision.
2471        # If error happens in RPCs of cleaning track log, collecting
2472        # auto-update logs, or killing auto-update processes, just report a
2473        # common error here.
2474        if error_list:
2475            real_error = ', '.join(['%d) %s' % (i, e.summary)
2476                                    for i, e in enumerate(error_list)])
2477            if retry_with_another_devserver:
2478                raise RetryableProvisionException(
2479                        error_msg % (host_name, real_error))
2480            else:
2481                raise error_list[0].classified_exception(
2482                    error_msg % (host_name, real_error))
2483        else:
2484            raise DevServerException(error_msg % (
2485                        host_name, ('RPC calls after the whole auto-update '
2486                                    'process failed.')))
2487
2488
2489class AndroidBuildServer(ImageServerBase):
2490    """Class for DevServer that handles RPCs related to Android builds.
2491
2492    The calls to devserver to stage artifacts, including stage and download, are
2493    made in async mode. That is, when caller makes an RPC |stage| to request
2494    devserver to stage certain artifacts, devserver handles the call and starts
2495    staging artifacts in a new thread, and return |Success| without waiting for
2496    staging being completed. When caller receives message |Success|, it polls
2497    devserver's is_staged call until all artifacts are staged.
2498    Such mechanism is designed to prevent cherrypy threads in devserver being
2499    running out, as staging artifacts might take long time, and cherrypy starts
2500    with a fixed number of threads that handle devserver rpc.
2501    """
2502
2503    def wait_for_artifacts_staged(self, target, build_id, branch,
2504                                  archive_url=None, artifacts='', files=''):
2505        """Polling devserver.is_staged until all artifacts are staged.
2506
2507        @param target: Target of the android build to stage, e.g.,
2508                       shamu-userdebug.
2509        @param build_id: Build id of the android build to stage.
2510        @param branch: Branch of the android build to stage.
2511        @param archive_url: Google Storage URL for the build.
2512        @param artifacts: Comma separated list of artifacts to download.
2513        @param files: Comma separated list of files to download.
2514
2515        @return: True if all artifacts are staged in devserver.
2516        """
2517        kwargs = {'target': target,
2518                  'build_id': build_id,
2519                  'branch': branch,
2520                  'artifacts': artifacts,
2521                  'files': files,
2522                  'os_type': 'android'}
2523        if archive_url:
2524            kwargs['archive_url'] = archive_url
2525        return self._poll_is_staged(**kwargs)
2526
2527
2528    @remote_devserver_call()
2529    def call_and_wait(self, call_name, target, build_id, branch, archive_url,
2530                      artifacts, files, error_message,
2531                      expected_response=SUCCESS):
2532        """Helper method to make a urlopen call, and wait for artifacts staged.
2533
2534        @param call_name: name of devserver rpc call.
2535        @param target: Target of the android build to stage, e.g.,
2536                       shamu-userdebug.
2537        @param build_id: Build id of the android build to stage.
2538        @param branch: Branch of the android build to stage.
2539        @param archive_url: Google Storage URL for the CrOS build.
2540        @param artifacts: Comma separated list of artifacts to download.
2541        @param files: Comma separated list of files to download.
2542        @param expected_response: Expected response from rpc, default to
2543                                  |Success|. If it's set to None, do not compare
2544                                  the actual response. Any response is consider
2545                                  to be good.
2546        @param error_message: Error message to be thrown if response does not
2547                              match expected_response.
2548
2549        @return: The response from rpc.
2550        @raise DevServerException upon any return code that's expected_response.
2551
2552        """
2553        kwargs = {'target': target,
2554                  'build_id': build_id,
2555                  'branch': branch,
2556                  'artifacts': artifacts,
2557                  'files': files,
2558                  'os_type': 'android'}
2559        if archive_url:
2560            kwargs['archive_url'] = archive_url
2561        return self._call_and_wait(call_name, error_message, expected_response,
2562                                   **kwargs)
2563
2564
2565    @remote_devserver_call()
2566    def stage_artifacts(self, target=None, build_id=None, branch=None,
2567                        image=None, artifacts=None, files='', archive_url=None):
2568        """Tell the devserver to download and stage |artifacts| from |image|.
2569
2570         This is the main call point for staging any specific artifacts for a
2571        given build. To see the list of artifacts one can stage see:
2572
2573        ~src/platfrom/dev/artifact_info.py.
2574
2575        This is maintained along with the actual devserver code.
2576
2577        @param target: Target of the android build to stage, e.g.,
2578                               shamu-userdebug.
2579        @param build_id: Build id of the android build to stage.
2580        @param branch: Branch of the android build to stage.
2581        @param image: Name of a build to test, in the format of
2582                      branch/target/build_id
2583        @param artifacts: A list of artifacts.
2584        @param files: A list of files to stage.
2585        @param archive_url: Optional parameter that has the archive_url to stage
2586                this artifact from. Default is specified in autotest config +
2587                image.
2588
2589        @raise DevServerException upon any return code that's not HTTP OK.
2590        """
2591        if image and not target and not build_id and not branch:
2592            branch, target, build_id = utils.parse_launch_control_build(image)
2593        if not target or not build_id or not branch:
2594            raise DevServerException('Must specify all build info (target, '
2595                                     'build_id and branch) to stage.')
2596
2597        android_build_info = {'target': target,
2598                              'build_id': build_id,
2599                              'branch': branch}
2600        if not artifacts and not files:
2601            raise DevServerException('Must specify something to stage.')
2602        if not all(android_build_info.values()):
2603            raise DevServerException(
2604                    'To stage an Android build, must specify target, build id '
2605                    'and branch.')
2606        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2607        self._stage_artifacts(build, artifacts, files, archive_url,
2608                              **android_build_info)
2609
2610    def get_pull_url(self, target, build_id, branch):
2611        """Get the url to pull files from the devserver.
2612
2613        @param target: Target of the android build, e.g., shamu_userdebug
2614        @param build_id: Build id of the android build.
2615        @param branch: Branch of the android build.
2616
2617        @return A url to pull files from the dev server given a specific
2618                android build.
2619        """
2620        return os.path.join(self.url(), 'static', branch, target, build_id)
2621
2622
2623    def trigger_download(self, target, build_id, branch, artifacts=None,
2624                         files='', os='android', synchronous=True):
2625        """Tell the devserver to download and stage an Android build.
2626
2627        Tells the devserver to fetch an Android build from the image storage
2628        server named by _get_image_storage_server().
2629
2630        If |synchronous| is True, waits for the entire download to finish
2631        staging before returning. Otherwise only the artifacts necessary
2632        to start installing images onto DUT's will be staged before returning.
2633        A caller can then call finish_download to guarantee the rest of the
2634        artifacts have finished staging.
2635
2636        @param target: Target of the android build to stage, e.g.,
2637                       shamu-userdebug.
2638        @param build_id: Build id of the android build to stage.
2639        @param branch: Branch of the android build to stage.
2640        @param artifacts: A string of artifacts separated by comma. If None,
2641               use the default artifacts for Android or Brillo build.
2642        @param files: String of file seperated by commas.
2643        @param os: OS artifacts to download (android/brillo).
2644        @param synchronous: if True, waits until all components of the image are
2645               staged before returning.
2646
2647        @raise DevServerException upon any return code that's not HTTP OK.
2648
2649        """
2650        android_build_info = {'target': target,
2651                              'build_id': build_id,
2652                              'branch': branch}
2653        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2654        if not artifacts:
2655            board = target.split('-')[0]
2656            artifacts = (
2657                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2658                        board, os))
2659        self._trigger_download(build, artifacts, files=files,
2660                               synchronous=synchronous, **android_build_info)
2661
2662
2663    def finish_download(self, target, build_id, branch, os='android'):
2664        """Tell the devserver to finish staging an Android build.
2665
2666        If trigger_download is called with synchronous=False, it will return
2667        before all artifacts have been staged. This method contacts the
2668        devserver and blocks until all staging is completed and should be
2669        called after a call to trigger_download.
2670
2671        @param target: Target of the android build to stage, e.g.,
2672                       shamu-userdebug.
2673        @param build_id: Build id of the android build to stage.
2674        @param branch: Branch of the android build to stage.
2675        @param os: OS artifacts to download (android/brillo).
2676
2677        @raise DevServerException upon any return code that's not HTTP OK.
2678        """
2679        android_build_info = {'target': target,
2680                              'build_id': build_id,
2681                              'branch': branch}
2682        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2683        board = target.split('-')[0]
2684        artifacts = (
2685                android_utils.AndroidArtifacts.get_artifacts_for_reimage(
2686                        board))
2687        self._finish_download(build, artifacts, files='', **android_build_info)
2688
2689
2690    def get_staged_file_url(self, filename, target, build_id, branch):
2691        """Returns the url of a staged file for this image on the devserver.
2692
2693        @param filename: Name of the file.
2694        @param target: Target of the android build to stage, e.g.,
2695                       shamu-userdebug.
2696        @param build_id: Build id of the android build to stage.
2697        @param branch: Branch of the android build to stage.
2698
2699        @return: The url of a staged file for this image on the devserver.
2700        """
2701        android_build_info = {'target': target,
2702                              'build_id': build_id,
2703                              'branch': branch,
2704                              'os_type': 'android'}
2705        build = ANDROID_BUILD_NAME_PATTERN % android_build_info
2706        return '/'.join([self._get_image_url(build), filename])
2707
2708
2709    @remote_devserver_call()
2710    def translate(self, build_name):
2711        """Translate the build name if it's in LATEST format.
2712
2713        If the build name is in the format [branch]/[target]/LATEST, return the
2714        latest build in Launch Control otherwise return the build name as is.
2715
2716        @param build_name: build_name to check.
2717
2718        @return The actual build name to use.
2719        """
2720        branch, target, build_id = utils.parse_launch_control_build(build_name)
2721        if build_id.upper() != 'LATEST':
2722            return build_name
2723        call = self.build_call('latestbuild', branch=branch, target=target,
2724                               os_type='android')
2725        translated_build_id = self.run_call(call)
2726        translated_build = (ANDROID_BUILD_NAME_PATTERN %
2727                            {'branch': branch,
2728                             'target': target,
2729                             'build_id': translated_build_id})
2730        logging.debug('Translated relative build %s to %s', build_name,
2731                      translated_build)
2732        return translated_build
2733
2734
2735def _is_load_healthy(load):
2736    """Check if devserver's load meets the minimum threshold.
2737
2738    @param load: The devserver's load stats to check.
2739
2740    @return: True if the load meets the minimum threshold. Return False
2741             otherwise.
2742
2743    """
2744    # Threshold checks, including CPU load.
2745    if load[DevServer.CPU_LOAD] > DevServer.MAX_CPU_LOAD:
2746        logging.debug('CPU load of devserver %s is at %s%%, which is higher '
2747                      'than the threshold of %s%%', load['devserver'],
2748                      load[DevServer.CPU_LOAD], DevServer.MAX_CPU_LOAD)
2749        return False
2750    if load[DevServer.NETWORK_IO] > DevServer.MAX_NETWORK_IO:
2751        logging.debug('Network IO of devserver %s is at %i Bps, which is '
2752                      'higher than the threshold of %i bytes per second.',
2753                      load['devserver'], load[DevServer.NETWORK_IO],
2754                      DevServer.MAX_NETWORK_IO)
2755        return False
2756    return True
2757
2758
2759def _compare_load(devserver1, devserver2):
2760    """Comparator function to compare load between two devservers.
2761
2762    @param devserver1: A dictionary of devserver load stats to be compared.
2763    @param devserver2: A dictionary of devserver load stats to be compared.
2764
2765    @return: Negative value if the load of `devserver1` is less than the load
2766             of `devserver2`. Return positive value otherwise.
2767
2768    """
2769    return int(devserver1[DevServer.DISK_IO] - devserver2[DevServer.DISK_IO])
2770
2771
2772def _get_subnet_for_host_ip(host_ip,
2773                            restricted_subnets=utils.RESTRICTED_SUBNETS):
2774    """Get the subnet for a given host IP.
2775
2776    @param host_ip: the IP of a DUT.
2777    @param restricted_subnets: A list of restriected subnets.
2778
2779    @return: a (subnet_ip, mask_bits) tuple. If no matched subnet for the
2780             host_ip, return (None, None).
2781    """
2782    for subnet_ip, mask_bits in restricted_subnets:
2783        if utils.is_in_same_subnet(host_ip, subnet_ip, mask_bits):
2784            return subnet_ip, mask_bits
2785
2786    return None, None
2787
2788
2789def get_least_loaded_devserver(devserver_type=ImageServer, hostname=None):
2790    """Get the devserver with the least load.
2791
2792    Iterate through all devservers and get the one with least load.
2793
2794    TODO(crbug.com/486278): Devserver with required build already staged should
2795    take higher priority. This will need check_health call to be able to verify
2796    existence of a given build/artifact. Also, in case all devservers are
2797    overloaded, the logic here should fall back to the old behavior that randomly
2798    selects a devserver based on the hash of the image name/url.
2799
2800    @param devserver_type: Type of devserver to select from. Default is set to
2801                           ImageServer.
2802    @param hostname: Hostname of the dut that the devserver is used for. The
2803            picked devserver needs to respect the location of the host if
2804            `prefer_local_devserver` is set to True or `restricted_subnets` is
2805            set.
2806
2807    @return: Name of the devserver with the least load.
2808
2809    """
2810    logging.debug('Get the least loaded %r', devserver_type)
2811    devservers, can_retry = devserver_type.get_available_devservers(
2812            hostname)
2813    # If no healthy devservers available and can_retry is False, return None.
2814    # Otherwise, relax the constrain on hostname, allow all devservers to be
2815    # available.
2816    if not devserver_type.get_healthy_devserver('', devservers):
2817        if not can_retry:
2818            return None
2819        else:
2820            devservers, _ = devserver_type.get_available_devservers()
2821
2822    # get_devserver_load call needs to be made in a new process to allow force
2823    # timeout using signal.
2824    output = multiprocessing.Queue()
2825    processes = []
2826    for devserver in devservers:
2827        processes.append(multiprocessing.Process(
2828                target=devserver_type.get_devserver_load_wrapper,
2829                args=(devserver, TIMEOUT_GET_DEVSERVER_LOAD, output)))
2830
2831    for p in processes:
2832        p.start()
2833    for p in processes:
2834        p.join()
2835    loads = [output.get() for p in processes]
2836    # Filter out any load failed to be retrieved or does not support load check.
2837    loads = [load for load in loads if load and DevServer.CPU_LOAD in load and
2838             DevServer.is_free_disk_ok(load) and
2839             DevServer.is_apache_client_count_ok(load)]
2840    if not loads:
2841        logging.debug('Failed to retrieve load stats from any devserver. No '
2842                      'load balancing can be applied.')
2843        return None
2844    loads = [load for load in loads if _is_load_healthy(load)]
2845    if not loads:
2846        logging.error('No devserver has the capacity to be selected.')
2847        return None
2848    loads = sorted(loads, cmp=_compare_load)
2849    return loads[0]['devserver']
2850
2851
2852def resolve(build, hostname=None, ban_list=None):
2853    """Resolve a devserver can be used for given build and hostname.
2854
2855    @param build: Name of a build to stage on devserver, e.g.,
2856                  ChromeOS build: daisy-release/R50-1234.0.0
2857                  Launch Control build: git_mnc_release/shamu-eng
2858    @param hostname: Hostname of a devserver for, default is None, which means
2859            devserver is not restricted by the network location of the host.
2860    @param ban_list: The blacklist of devservers shouldn't be chosen.
2861
2862    @return: A DevServer instance that can be used to stage given build for the
2863             given host.
2864    """
2865    if utils.is_launch_control_build(build):
2866        return AndroidBuildServer.resolve(build, hostname)
2867    else:
2868        return ImageServer.resolve(build, hostname, ban_list=ban_list)
2869