• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import logging
7import os
8import re
9import sys
10import urllib2
11import urlparse
12
13from autotest_lib.client.bin import utils
14from autotest_lib.client.common_lib import error, global_config
15from autotest_lib.client.common_lib.cros import dev_server
16from autotest_lib.server import autotest
17from autotest_lib.server import utils as server_utils
18from autotest_lib.server.cros.dynamic_suite import constants as ds_constants
19from autotest_lib.server.cros.dynamic_suite import tools
20from chromite.lib import retry_util
21
22try:
23    from chromite.lib import metrics
24except ImportError:
25    metrics = utils.metrics_mock
26
27
28def _metric_name(base_name):
29    return 'chromeos/autotest/provision/' + base_name
30
31
32# Local stateful update path is relative to the CrOS source directory.
33UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
34UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
35# A list of update engine client states that occur after an update is triggered.
36UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FOR_UPDATE',
37                             'UPDATE_STATUS_UPDATE_AVAILABLE',
38                             'UPDATE_STATUS_DOWNLOADING',
39                             'UPDATE_STATUS_FINALIZING',
40                             'UPDATE_STATUS_VERIFYING',
41                             'UPDATE_STATUS_REPORTING_ERROR_EVENT',
42                             'UPDATE_STATUS_ATTEMPTING_ROLLBACK']
43
44
45_STATEFUL_UPDATE_SCRIPT = 'stateful_update'
46_QUICK_PROVISION_SCRIPT = 'quick-provision'
47
48_UPDATER_BIN = '/usr/bin/update_engine_client'
49_UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
50
51_KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
52_KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
53
54# Time to wait for new kernel to be marked successful after
55# auto update.
56_KERNEL_UPDATE_TIMEOUT = 120
57
58
59# PROVISION_FAILED - A flag file to indicate provision failures.  The
60# file is created at the start of any AU procedure (see
61# `ChromiumOSUpdater._prepare_host()`).  The file's location in
62# stateful means that on successul update it will be removed.  Thus, if
63# this file exists, it indicates that we've tried and failed in a
64# previous attempt to update.
65PROVISION_FAILED = '/var/tmp/provision_failed'
66
67
68# A flag file used to enable special handling in lab DUTs.  Some
69# parts of the system in Chromium OS test images will behave in ways
70# convenient to the test lab when this file is present.  Generally,
71# we create this immediately after any update completes.
72_LAB_MACHINE_FILE = '/mnt/stateful_partition/.labmachine'
73
74
75# _TARGET_VERSION - A file containing the new version to which we plan
76# to update.  This file is used by the CrOS shutdown code to detect and
77# handle certain version downgrade cases.  Specifically:  Downgrading
78# may trigger an unwanted powerwash in the target build when the
79# following conditions are met:
80#  * Source build is a v4.4 kernel with R69-10756.0.0 or later.
81#  * Target build predates the R69-10756.0.0 cutoff.
82# When this file is present and indicates a downgrade, the OS shutdown
83# code on the DUT knows how to prevent the powerwash.
84_TARGET_VERSION = '/run/update_target_version'
85
86
87# _REBOOT_FAILURE_MESSAGE - This is the standard message text returned
88# when the Host.reboot() method fails.  The source of this text comes
89# from `wait_for_restart()` in client/common_lib/hosts/base_classes.py.
90
91_REBOOT_FAILURE_MESSAGE = 'Host did not return from reboot'
92
93
94DEVSERVER_PORT = '8082'
95GS_CACHE_PORT = '8888'
96
97
98class RootFSUpdateError(error.TestFail):
99    """Raised when the RootFS fails to update."""
100
101
102class StatefulUpdateError(error.TestFail):
103    """Raised when the stateful partition fails to update."""
104
105
106class _AttributedUpdateError(error.TestFail):
107    """Update failure with an attributed cause."""
108
109    def __init__(self, attribution, msg):
110        super(_AttributedUpdateError, self).__init__(
111            '%s: %s' % (attribution, msg))
112        self._message = msg
113
114    def _classify(self):
115        for err_pattern, classification in self._CLASSIFIERS:
116            if re.match(err_pattern, self._message):
117                return classification
118        return None
119
120    @property
121    def failure_summary(self):
122        """Summarize this error for metrics reporting."""
123        classification = self._classify()
124        if classification:
125            return '%s: %s' % (self._SUMMARY, classification)
126        else:
127            return self._SUMMARY
128
129
130class HostUpdateError(_AttributedUpdateError):
131    """Failure updating a DUT attributable to the DUT.
132
133    This class of exception should be raised when the most likely cause
134    of failure was a condition existing on the DUT prior to the update,
135    such as a hardware problem, or a bug in the software on the DUT.
136    """
137
138    DUT_DOWN = 'No answer to ssh'
139
140    _SUMMARY = 'DUT failed prior to update'
141    _CLASSIFIERS = [
142        (DUT_DOWN, DUT_DOWN),
143        (_REBOOT_FAILURE_MESSAGE, 'Reboot failed'),
144    ]
145
146    def __init__(self, hostname, msg):
147        super(HostUpdateError, self).__init__(
148            'Error on %s prior to update' % hostname, msg)
149
150
151class DevServerError(_AttributedUpdateError):
152    """Failure updating a DUT attributable to the devserver.
153
154    This class of exception should be raised when the most likely cause
155    of failure was the devserver serving the target image for update.
156    """
157
158    _SUMMARY = 'Devserver failed prior to update'
159    _CLASSIFIERS = []
160
161    def __init__(self, devserver, msg):
162        super(DevServerError, self).__init__(
163            'Devserver error on %s' % devserver, msg)
164
165
166class ImageInstallError(_AttributedUpdateError):
167    """Failure updating a DUT when installing from the devserver.
168
169    This class of exception should be raised when the target DUT fails
170    to download and install the target image from the devserver, and
171    either the devserver or the DUT might be at fault.
172    """
173
174    _SUMMARY = 'Image failed to download and install'
175    _CLASSIFIERS = []
176
177    def __init__(self, hostname, devserver, msg):
178        super(ImageInstallError, self).__init__(
179            'Download and install failed from %s onto %s'
180            % (devserver, hostname), msg)
181
182
183class NewBuildUpdateError(_AttributedUpdateError):
184    """Failure updating a DUT attributable to the target build.
185
186    This class of exception should be raised when updating to a new
187    build fails, and the most likely cause of the failure is a bug in
188    the newly installed target build.
189    """
190
191    CHROME_FAILURE = 'Chrome failed to reach login screen'
192    UPDATE_ENGINE_FAILURE = ('update-engine failed to call '
193                             'chromeos-setgoodkernel')
194    ROLLBACK_FAILURE = 'System rolled back to previous build'
195
196    _SUMMARY = 'New build failed'
197    _CLASSIFIERS = [
198        (CHROME_FAILURE, 'Chrome did not start'),
199        (UPDATE_ENGINE_FAILURE, 'update-engine did not start'),
200        (ROLLBACK_FAILURE, ROLLBACK_FAILURE),
201    ]
202
203    def __init__(self, update_version, msg):
204        super(NewBuildUpdateError, self).__init__(
205            'Failure in build %s' % update_version, msg)
206
207    @property
208    def failure_summary(self):
209        #pylint: disable=missing-docstring
210        return 'Build failed to work after installing'
211
212
213def _url_to_version(update_url):
214    """Return the version based on update_url.
215
216    @param update_url: url to the image to update to.
217
218    """
219    # The Chrome OS version is generally the last element in the URL. The only
220    # exception is delta update URLs, which are rooted under the version; e.g.,
221    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
222    # strip off the au section of the path before reading the version.
223    return re.sub('/au/.*', '',
224                  urlparse.urlparse(update_url).path).split('/')[-1].strip()
225
226
227def url_to_image_name(update_url):
228    """Return the image name based on update_url.
229
230    From a URL like:
231        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
232    return lumpy-release/R27-3837.0.0
233
234    @param update_url: url to the image to update to.
235    @returns a string representing the image name in the update_url.
236
237    """
238    return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
239
240
241def get_update_failure_reason(exception):
242    """Convert an exception into a failure reason for metrics.
243
244    The passed in `exception` should be one raised by failure of
245    `ChromiumOSUpdater.run_update`.  The returned string will describe
246    the failure.  If the input exception value is not a truish value
247    the return value will be `None`.
248
249    The number of possible return strings is restricted to a limited
250    enumeration of values so that the string may be safely used in
251    Monarch metrics without worrying about cardinality of the range of
252    string values.
253
254    @param exception  Exception to be converted to a failure reason.
255
256    @return A string suitable for use in Monarch metrics, or `None`.
257    """
258    if exception:
259        if isinstance(exception, _AttributedUpdateError):
260            return exception.failure_summary
261        else:
262            return 'Unknown Error: %s' % type(exception).__name__
263    return None
264
265
266def _get_devserver_build_from_update_url(update_url):
267    """Get the devserver and build from the update url.
268
269    @param update_url: The url for update.
270        Eg: http://devserver:port/update/build.
271
272    @return: A tuple of (devserver url, build) or None if the update_url
273        doesn't match the expected pattern.
274
275    @raises ValueError: If the update_url doesn't match the expected pattern.
276    @raises ValueError: If no global_config was found, or it doesn't contain an
277        image_url_pattern.
278    """
279    pattern = global_config.global_config.get_config_value(
280            'CROS', 'image_url_pattern', type=str, default='')
281    if not pattern:
282        raise ValueError('Cannot parse update_url, the global config needs '
283                'an image_url_pattern.')
284    re_pattern = pattern.replace('%s', '(\S+)')
285    parts = re.search(re_pattern, update_url)
286    if not parts or len(parts.groups()) < 2:
287        raise ValueError('%s is not an update url' % update_url)
288    return parts.groups()
289
290
291def _list_image_dir_contents(update_url):
292    """Lists the contents of the devserver for a given build/update_url.
293
294    @param update_url: An update url. Eg: http://devserver:port/update/build.
295    """
296    if not update_url:
297        logging.warning('Need update_url to list contents of the devserver.')
298        return
299    error_msg = 'Cannot check contents of devserver, update url %s' % update_url
300    try:
301        devserver_url, build = _get_devserver_build_from_update_url(update_url)
302    except ValueError as e:
303        logging.warning('%s: %s', error_msg, e)
304        return
305    devserver = dev_server.ImageServer(devserver_url)
306    try:
307        devserver.list_image_dir(build)
308    # The devserver will retry on URLError to avoid flaky connections, but will
309    # eventually raise the URLError if it persists. All HTTPErrors get
310    # converted to DevServerExceptions.
311    except (dev_server.DevServerException, urllib2.URLError) as e:
312        logging.warning('%s: %s', error_msg, e)
313
314
315def _get_metric_fields(update_url):
316    """Return a dict of metric fields.
317
318    This is used for sending autoupdate metrics for the given update URL.
319
320    @param update_url  Metrics fields will be calculated from this URL.
321    """
322    build_name = url_to_image_name(update_url)
323    try:
324        board, build_type, milestone, _ = server_utils.ParseBuildName(
325            build_name)
326    except server_utils.ParseBuildNameException:
327        logging.warning('Unable to parse build name %s for metrics. '
328                        'Continuing anyway.', build_name)
329        board, build_type, milestone = ('', '', '')
330    return {
331        'dev_server': dev_server.get_resolved_hostname(update_url),
332        'board': board,
333        'build_type': build_type,
334        'milestone': milestone,
335    }
336
337
338# TODO(garnold) This implements shared updater functionality needed for
339# supporting the autoupdate_EndToEnd server-side test. We should probably
340# migrate more of the existing ChromiumOSUpdater functionality to it as we
341# expand non-CrOS support in other tests.
342class ChromiumOSUpdater(object):
343    """Chromium OS specific DUT update functionality."""
344
345    def __init__(self, update_url, host=None, interactive=True,
346                 use_quick_provision=False):
347        """Initializes the object.
348
349        @param update_url: The URL we want the update to use.
350        @param host: A client.common_lib.hosts.Host implementation.
351        @param interactive: Bool whether we are doing an interactive update.
352        @param use_quick_provision: Whether we should attempt to perform
353            the update using the quick-provision script.
354        """
355        self.update_url = update_url
356        self.host = host
357        self.interactive = interactive
358        self.update_version = _url_to_version(update_url)
359        self._use_quick_provision = use_quick_provision
360
361
362    def _run(self, cmd, *args, **kwargs):
363        """Abbreviated form of self.host.run(...)"""
364        return self.host.run(cmd, *args, **kwargs)
365
366
367    def check_update_status(self):
368        """Returns the current update engine state.
369
370        We use the `update_engine_client -status' command and parse the line
371        indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
372        """
373        update_status = self.host.run(command='%s -status | grep CURRENT_OP' %
374                                      _UPDATER_BIN)
375        return update_status.stdout.strip().split('=')[-1]
376
377
378    def _rootdev(self, options=''):
379        """Returns the stripped output of rootdev <options>.
380
381        @param options: options to run rootdev.
382
383        """
384        return self._run('rootdev %s' % options).stdout.strip()
385
386
387    def get_kernel_state(self):
388        """Returns the (<active>, <inactive>) kernel state as a pair.
389
390        @raise RootFSUpdateError if the DUT reports a root partition
391                number that isn't one of the known valid values.
392        """
393        active_root = int(re.findall('\d+\Z', self._rootdev('-s'))[0])
394        if active_root == _KERNEL_A['root']:
395            return _KERNEL_A, _KERNEL_B
396        elif active_root == _KERNEL_B['root']:
397            return _KERNEL_B, _KERNEL_A
398        else:
399            raise RootFSUpdateError(
400                    'Encountered unknown root partition: %s' % active_root)
401
402
403    def _cgpt(self, flag, kernel):
404        """Return numeric cgpt value for the specified flag, kernel, device."""
405        return int(self._run('cgpt show -n -i %d %s $(rootdev -s -d)' % (
406            kernel['kernel'], flag)).stdout.strip())
407
408
409    def _get_next_kernel(self):
410        """Return the kernel that has priority for the next boot."""
411        priority_a = self._cgpt('-P', _KERNEL_A)
412        priority_b = self._cgpt('-P', _KERNEL_B)
413        if priority_a > priority_b:
414            return _KERNEL_A
415        else:
416            return _KERNEL_B
417
418
419    def _get_kernel_success(self, kernel):
420        """Return boolean success flag for the specified kernel.
421
422        @param kernel: information of the given kernel, either _KERNEL_A
423            or _KERNEL_B.
424        """
425        return self._cgpt('-S', kernel) != 0
426
427
428    def _get_kernel_tries(self, kernel):
429        """Return tries count for the specified kernel.
430
431        @param kernel: information of the given kernel, either _KERNEL_A
432            or _KERNEL_B.
433        """
434        return self._cgpt('-T', kernel)
435
436
437    def _get_last_update_error(self):
438        """Get the last autoupdate error code."""
439        command_result = self._run(
440                 '%s --last_attempt_error' % _UPDATER_BIN)
441        return command_result.stdout.strip().replace('\n', ', ')
442
443
444    def _base_update_handler_no_retry(self, run_args):
445        """Base function to handle a remote update ssh call.
446
447        @param run_args: Dictionary of args passed to ssh_host.run function.
448
449        @throws: intercepts and re-throws all exceptions
450        """
451        try:
452            self.host.run(**run_args)
453        except Exception as e:
454            logging.debug('exception in update handler: %s', e)
455            raise e
456
457
458    def _base_update_handler(self, run_args, err_msg_prefix=None):
459        """Handle a remote update ssh call, possibly with retries.
460
461        @param run_args: Dictionary of args passed to ssh_host.run function.
462        @param err_msg_prefix: Prefix of the exception error message.
463        """
464        def exception_handler(e):
465            """Examines exceptions and returns True if the update handler
466            should be retried.
467
468            @param e: the exception intercepted by the retry util.
469            """
470            return (isinstance(e, error.AutoservSSHTimeout) or
471                    (isinstance(e, error.GenericHostRunError) and
472                     hasattr(e, 'description') and
473                     (re.search('ERROR_CODE=37', e.description) or
474                      re.search('generic error .255.', e.description))))
475
476        try:
477            # Try the update twice (arg 2 is max_retry, not including the first
478            # call).  Some exceptions may be caught by the retry handler.
479            retry_util.GenericRetry(exception_handler, 1,
480                                    self._base_update_handler_no_retry,
481                                    run_args)
482        except Exception as e:
483            message = err_msg_prefix + ': ' + str(e)
484            raise RootFSUpdateError(message)
485
486
487    def _wait_for_update_service(self):
488        """Ensure that the update engine daemon is running, possibly
489        by waiting for it a bit in case the DUT just rebooted and the
490        service hasn't started yet.
491        """
492        def handler(e):
493            """Retry exception handler.
494
495            Assumes that the error is due to the update service not having
496            started yet.
497
498            @param e: the exception intercepted by the retry util.
499            """
500            if isinstance(e, error.AutoservRunError):
501                logging.debug('update service check exception: %s\n'
502                              'retrying...', e)
503                return True
504            else:
505                return False
506
507        # Retry at most three times, every 5s.
508        status = retry_util.GenericRetry(handler, 3,
509                                         self.check_update_status,
510                                         sleep=5)
511
512        # Expect the update engine to be idle.
513        if status != UPDATER_IDLE:
514            raise RootFSUpdateError(
515                    'Update engine status is %s (%s was expected).'
516                    % (status, UPDATER_IDLE))
517
518
519    def _reset_update_engine(self):
520        """Resets the host to prepare for a clean update regardless of state."""
521        self._run('stop ui || true')
522        self._run('stop update-engine || true')
523        self._run('start update-engine')
524        self._wait_for_update_service()
525
526
527    def _reset_stateful_partition(self):
528        """Clear any pending stateful update request."""
529        self._run('%s --stateful_change=reset 2>&1'
530                  % self._get_stateful_update_script())
531        self._run('rm -f %s' % _TARGET_VERSION)
532
533
534    def _set_target_version(self):
535        """Set the "target version" for the update."""
536        version_number = self.update_version.split('-')[1]
537        self._run('echo %s > %s' % (version_number, _TARGET_VERSION))
538
539
540    def _revert_boot_partition(self):
541        """Revert the boot partition."""
542        part = self._rootdev('-s')
543        logging.warning('Reverting update; Boot partition will be %s', part)
544        return self._run('/postinst %s 2>&1' % part)
545
546
547    def _verify_kernel_state(self):
548        """Verify that the next kernel to boot is correct for update.
549
550        This tests that the kernel state is correct for a successfully
551        downloaded and installed update.  That is, the next kernel to
552        boot must be the currently inactive kernel.
553
554        @raise RootFSUpdateError if the DUT next kernel isn't the
555                expected next kernel.
556        """
557        inactive_kernel = self.get_kernel_state()[1]
558        next_kernel = self._get_next_kernel()
559        if next_kernel != inactive_kernel:
560            raise RootFSUpdateError(
561                    'Update failed.  The kernel for next boot is %s, '
562                    'but %s was expected.'
563                    % (next_kernel['name'], inactive_kernel['name']))
564        return inactive_kernel
565
566
567    def _verify_update_completed(self):
568        """Verifies that an update has completed.
569
570        @raise RootFSUpdateError if the DUT doesn't indicate that
571                download is complete and the DUT is ready for reboot.
572        """
573        status = self.check_update_status()
574        if status != UPDATER_NEED_REBOOT:
575            error_msg = ''
576            if status == UPDATER_IDLE:
577                error_msg = 'Update error: %s' % self._get_last_update_error()
578            raise RootFSUpdateError(
579                    'Update engine status is %s (%s was expected).  %s'
580                    % (status, UPDATER_NEED_REBOOT, error_msg))
581        return self._verify_kernel_state()
582
583
584    def trigger_update(self):
585        """Triggers a background update."""
586        # If this function is called immediately after reboot (which it
587        # can be), there is no guarantee that the update engine is up
588        # and running yet, so wait for it.
589        self._wait_for_update_service()
590
591        autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
592                          (_UPDATER_BIN, self.update_url))
593        run_args = {'command': autoupdate_cmd}
594        err_prefix = 'Failed to trigger an update on %s. ' % self.host.hostname
595        logging.info('Triggering update via: %s', autoupdate_cmd)
596        metric_fields = {'success': False}
597        try:
598            self._base_update_handler(run_args, err_prefix)
599            metric_fields['success'] = True
600        finally:
601            c = metrics.Counter('chromeos/autotest/autoupdater/trigger')
602            metric_fields.update(_get_metric_fields(self.update_url))
603            c.increment(fields=metric_fields)
604
605
606    def update_image(self):
607        """Updates the device root FS and kernel and verifies success."""
608        autoupdate_cmd = ('%s --update --omaha_url=%s' %
609                          (_UPDATER_BIN, self.update_url))
610        if not self.interactive:
611            autoupdate_cmd = '%s --interactive=false' % autoupdate_cmd
612        run_args = {'command': autoupdate_cmd, 'timeout': 3600}
613        err_prefix = ('Failed to install device image using payload at %s '
614                      'on %s. ' % (self.update_url, self.host.hostname))
615        logging.info('Updating image via: %s', autoupdate_cmd)
616        metric_fields = {'success': False}
617        try:
618            self._base_update_handler(run_args, err_prefix)
619            metric_fields['success'] = True
620        finally:
621            c = metrics.Counter('chromeos/autotest/autoupdater/update')
622            metric_fields.update(_get_metric_fields(self.update_url))
623            c.increment(fields=metric_fields)
624        return self._verify_update_completed()
625
626
627    def _get_remote_script(self, script_name):
628        """Ensure that `script_name` is present on the DUT.
629
630        The given script (e.g. `stateful_update`) may be present in the
631        stateful partition under /usr/local/bin, or we may have to
632        download it from the devserver.
633
634        Determine whether the script is present or must be downloaded
635        and download if necessary.  Then, return a command fragment
636        sufficient to run the script from whereever it now lives on the
637        DUT.
638
639        @param script_name  The name of the script as expected in
640                            /usr/local/bin and on the devserver.
641        @return A string with the command (minus arguments) that will
642                run the target script.
643        """
644        remote_script = '/usr/local/bin/%s' % script_name
645        if self.host.path_exists(remote_script):
646            return remote_script
647        remote_tmp_script = '/tmp/%s' % script_name
648        server_name = urlparse.urlparse(self.update_url)[1]
649        script_url = 'http://%s/static/%s' % (server_name, script_name)
650        fetch_script = 'curl -Ss -o %s %s && head -1 %s' % (
651            remote_tmp_script, script_url, remote_tmp_script)
652
653        first_line = self._run(fetch_script).stdout.strip()
654
655        if first_line and first_line.startswith('#!'):
656            script_interpreter = first_line.lstrip('#!')
657            if script_interpreter:
658                return '%s %s' % (script_interpreter, remote_tmp_script)
659        return None
660
661    def _get_stateful_update_script(self):
662        """Returns a command to run the stateful update script.
663
664        Find `stateful_update` on the target or install it, as
665        necessary.  If installation fails, raise an exception.
666
667        @raise StatefulUpdateError if the script can't be found or
668            installed.
669        @return A string that can be joined with arguments to run the
670            `stateful_update` command on the DUT.
671        """
672        script_command = self._get_remote_script(_STATEFUL_UPDATE_SCRIPT)
673        if not script_command:
674            raise StatefulUpdateError('Could not install %s on DUT'
675                                      % _STATEFUL_UPDATE_SCRIPT)
676        return script_command
677
678
679    def rollback_rootfs(self, powerwash):
680        """Triggers rollback and waits for it to complete.
681
682        @param powerwash: If true, powerwash as part of rollback.
683
684        @raise RootFSUpdateError if anything went wrong.
685        """
686        version = self.host.get_release_version()
687        # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
688        # X.Y.Z. This version split just pulls the first part out.
689        try:
690            build_number = int(version.split('.')[0])
691        except ValueError:
692            logging.error('Could not parse build number.')
693            build_number = 0
694
695        if build_number >= 5772:
696            can_rollback_cmd = '%s --can_rollback' % _UPDATER_BIN
697            logging.info('Checking for rollback.')
698            try:
699                self._run(can_rollback_cmd)
700            except error.AutoservRunError as e:
701                raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
702                                        (self.host.hostname, str(e)))
703
704        rollback_cmd = '%s --rollback --follow' % _UPDATER_BIN
705        if not powerwash:
706            rollback_cmd += ' --nopowerwash'
707
708        logging.info('Performing rollback.')
709        try:
710            self._run(rollback_cmd)
711        except error.AutoservRunError as e:
712            raise RootFSUpdateError('Rollback failed on %s: %s' %
713                                    (self.host.hostname, str(e)))
714
715        self._verify_update_completed()
716
717
718    def update_stateful(self, clobber=True):
719        """Updates the stateful partition.
720
721        @param clobber: If True, a clean stateful installation.
722
723        @raise StatefulUpdateError if the update script fails to
724                complete successfully.
725        """
726        logging.info('Updating stateful partition...')
727        statefuldev_url = self.update_url.replace('update', 'static')
728
729        # Attempt stateful partition update; this must succeed so that the newly
730        # installed host is testable after update.
731        statefuldev_cmd = [self._get_stateful_update_script(), statefuldev_url]
732        if clobber:
733            statefuldev_cmd.append('--stateful_change=clean')
734
735        statefuldev_cmd.append('2>&1')
736        try:
737            self._run(' '.join(statefuldev_cmd), timeout=1200)
738        except error.AutoservRunError:
739            raise StatefulUpdateError(
740                    'Failed to perform stateful update on %s' %
741                    self.host.hostname)
742
743
744    def verify_boot_expectations(self, expected_kernel, rollback_message):
745        """Verifies that we fully booted given expected kernel state.
746
747        This method both verifies that we booted using the correct kernel
748        state and that the OS has marked the kernel as good.
749
750        @param expected_kernel: kernel that we are verifying with,
751            i.e. I expect to be booted onto partition 4 etc. See output of
752            get_kernel_state.
753        @param rollback_message: string include in except message text
754            if we booted with the wrong partition.
755
756        @raise NewBuildUpdateError if any of the various checks fail.
757        """
758        # Figure out the newly active kernel.
759        active_kernel = self.get_kernel_state()[0]
760
761        # Check for rollback due to a bad build.
762        if active_kernel != expected_kernel:
763
764            # Kernel crash reports should be wiped between test runs, but
765            # may persist from earlier parts of the test, or from problems
766            # with provisioning.
767            #
768            # Kernel crash reports will NOT be present if the crash happened
769            # before encrypted stateful is mounted.
770            #
771            # TODO(dgarrett): Integrate with server/crashcollect.py at some
772            # point.
773            kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
774            if kernel_crashes:
775                rollback_message += ': kernel_crash'
776                logging.debug('Found %d kernel crash reports:',
777                              len(kernel_crashes))
778                # The crash names contain timestamps that may be useful:
779                #   kernel.20131207.005945.0.kcrash
780                for crash in kernel_crashes:
781                    logging.debug('  %s', os.path.basename(crash))
782
783            # Print out some information to make it easier to debug
784            # the rollback.
785            logging.debug('Dumping partition table.')
786            self._run('cgpt show $(rootdev -s -d)')
787            logging.debug('Dumping crossystem for firmware debugging.')
788            self._run('crossystem --all')
789            raise NewBuildUpdateError(self.update_version, rollback_message)
790
791        # Make sure chromeos-setgoodkernel runs.
792        try:
793            utils.poll_for_condition(
794                lambda: (self._get_kernel_tries(active_kernel) == 0
795                         and self._get_kernel_success(active_kernel)),
796                exception=RootFSUpdateError(),
797                timeout=_KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
798        except RootFSUpdateError:
799            services_status = self._run('status system-services').stdout
800            if services_status != 'system-services start/running\n':
801                event = NewBuildUpdateError.CHROME_FAILURE
802            else:
803                event = NewBuildUpdateError.UPDATE_ENGINE_FAILURE
804            raise NewBuildUpdateError(self.update_version, event)
805
806
807    def _prepare_host(self):
808        """Make sure the target DUT is working and ready for update.
809
810        Initially, the target DUT's state is unknown.  The DUT is
811        expected to be online, but we strive to be forgiving if Chrome
812        and/or the update engine aren't fully functional.
813        """
814        # Summary of work, and the rationale:
815        #  1. Reboot, because it's a good way to clear out problems.
816        #  2. Touch the PROVISION_FAILED file, to allow repair to detect
817        #     failure later.
818        #  3. Run the hook for host class specific preparation.
819        #  4. Stop Chrome, because the system is designed to eventually
820        #     reboot if Chrome is stuck in a crash loop.
821        #  5. Force `update-engine` to start, because if Chrome failed
822        #     to start properly, the status of the `update-engine` job
823        #     will be uncertain.
824        if not self.host.is_up():
825            raise HostUpdateError(self.host.hostname,
826                                  HostUpdateError.DUT_DOWN)
827        self._reset_stateful_partition()
828        self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
829        self._run('touch %s' % PROVISION_FAILED)
830        self.host.prepare_for_update()
831        self._reset_update_engine()
832        logging.info('Updating from version %s to %s.',
833                     self.host.get_release_version(),
834                     self.update_version)
835
836
837    def _install_via_update_engine(self):
838        """Install an updating using the production AU flow.
839
840        This uses the standard AU flow and the `stateful_update` script
841        to download and install a root FS, kernel and stateful
842        filesystem content.
843
844        @return The kernel expected to be booted next.
845        """
846        logging.info('Installing image using update_engine.')
847        expected_kernel = self.update_image()
848        self.update_stateful()
849        self._set_target_version()
850        return expected_kernel
851
852
853    def _quick_provision_with_gs_cache(self, provision_command, devserver_name,
854                                       image_name):
855        """Run quick_provision using GsCache server.
856
857        @param provision_command: The path of quick_provision command.
858        @param devserver_name: The devserver name and port (optional).
859        @param image_name: The image to be installed.
860        """
861        logging.info('Try quick provision with gs_cache.')
862        # If enabled, GsCache server listion on different port on the
863        # devserver.
864        gs_cache_server = devserver_name.replace(DEVSERVER_PORT, GS_CACHE_PORT)
865        gs_cache_url = ('http://%s/download/chromeos-image-archive'
866                        % gs_cache_server)
867
868        # Check if GS_Cache server is enabled on the server.
869        self._run('curl -s -o /dev/null %s' % gs_cache_url)
870
871        command = '%s --noreboot %s %s' % (provision_command, image_name,
872                                           gs_cache_url)
873        self._run(command)
874        metrics.Counter(_metric_name('quick_provision')).increment(
875                fields={'devserver': devserver_name, 'gs_cache': True})
876
877
878    def _quick_provision_with_devserver(self, provision_command,
879                                        devserver_name, image_name):
880        """Run quick_provision using legacy devserver.
881
882        @param provision_command: The path of quick_provision command.
883        @param devserver_name: The devserver name and port (optional).
884        @param image_name: The image to be installed.
885        """
886        logging.info('Try quick provision with devserver.')
887        ds = dev_server.ImageServer('http://%s' % devserver_name)
888        try:
889            ds.stage_artifacts(image_name, ['quick_provision', 'stateful'])
890        except dev_server.DevServerException as e:
891            raise error.TestFail, str(e), sys.exc_info()[2]
892
893        static_url = 'http://%s/static' % devserver_name
894        command = '%s --noreboot %s %s' % (provision_command, image_name,
895                                           static_url)
896        self._run(command)
897        metrics.Counter(_metric_name('quick_provision')).increment(
898                fields={'devserver': devserver_name, 'gs_cache': False})
899
900
901    def _install_via_quick_provision(self):
902        """Install an updating using the `quick-provision` script.
903
904        This uses the `quick-provision` script to download and install
905        a root FS, kernel and stateful filesystem content.
906
907        @return The kernel expected to be booted next.
908        """
909        if not self._use_quick_provision:
910            return None
911        image_name = url_to_image_name(self.update_url)
912        logging.info('Installing image using quick-provision.')
913        provision_command = self._get_remote_script(_QUICK_PROVISION_SCRIPT)
914        server_name = urlparse.urlparse(self.update_url)[1]
915        try:
916            try:
917                self._quick_provision_with_gs_cache(provision_command,
918                                                    server_name, image_name)
919            except Exception:
920                self._quick_provision_with_devserver(provision_command,
921                                                     server_name, image_name)
922
923            self._set_target_version()
924            return self._verify_kernel_state()
925        except Exception:
926            # N.B.  We handle only `Exception` here.  Non-Exception
927            # classes (such as KeyboardInterrupt) are handled by our
928            # caller.
929            logging.exception('quick-provision script failed; '
930                              'will fall back to update_engine.')
931            self._revert_boot_partition()
932            self._reset_stateful_partition()
933            self._reset_update_engine()
934            return None
935
936
937    def _install_update(self):
938        """Install the requested image on the DUT, but don't start it.
939
940        This downloads and installs a root FS, kernel and stateful
941        filesystem content.  This does not reboot the DUT, so the update
942        is merely pending when the method returns.
943
944        @return The kernel expected to be booted next.
945        """
946        logging.info('Installing image at %s onto %s',
947                     self.update_url, self.host.hostname)
948        try:
949            return (self._install_via_quick_provision()
950                    or self._install_via_update_engine())
951        except:
952            # N.B. This handling code includes non-Exception classes such
953            # as KeyboardInterrupt.  We need to clean up, but we also must
954            # re-raise.
955            self._revert_boot_partition()
956            self._reset_stateful_partition()
957            self._reset_update_engine()
958            # Collect update engine logs in the event of failure.
959            if self.host.job:
960                logging.info('Collecting update engine logs due to failure...')
961                self.host.get_file(
962                        _UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
963                        preserve_perm=False)
964            _list_image_dir_contents(self.update_url)
965            raise
966
967
968    def _complete_update(self, expected_kernel):
969        """Finish the update, and confirm that it succeeded.
970
971        Initial condition is that the target build has been downloaded
972        and installed on the DUT, but has not yet been booted.  This
973        function is responsible for rebooting the DUT, and checking that
974        the new build is running successfully.
975
976        @param expected_kernel: kernel expected to be active after reboot.
977        """
978        # Regarding the 'crossystem' command below: In some cases,
979        # the update flow puts the TPM into a state such that it
980        # fails verification.  We don't know why.  However, this
981        # call papers over the problem by clearing the TPM during
982        # the reboot.
983        #
984        # We ignore failures from 'crossystem'.  Although failure
985        # here is unexpected, and could signal a bug, the point of
986        # the exercise is to paper over problems; allowing this to
987        # fail would defeat the purpose.
988        self._run('crossystem clear_tpm_owner_request=1',
989                  ignore_status=True)
990        self.host.reboot(timeout=self.host.REBOOT_TIMEOUT)
991
992        # Touch the lab machine file to leave a marker that
993        # distinguishes this image from other test images.
994        # Afterwards, we must re-run the autoreboot script because
995        # it depends on the _LAB_MACHINE_FILE.
996        autoreboot_cmd = ('FILE="%s" ; [ -f "$FILE" ] || '
997                          '( touch "$FILE" ; start autoreboot )')
998        self._run(autoreboot_cmd % _LAB_MACHINE_FILE)
999        self.verify_boot_expectations(
1000                expected_kernel, NewBuildUpdateError.ROLLBACK_FAILURE)
1001
1002        logging.debug('Cleaning up old autotest directories.')
1003        try:
1004            installed_autodir = autotest.Autotest.get_installed_autodir(
1005                    self.host)
1006            self._run('rm -rf ' + installed_autodir)
1007        except autotest.AutodirNotFoundError:
1008            logging.debug('No autotest installed directory found.')
1009
1010
1011    def run_update(self):
1012        """Perform a full update of a DUT in the test lab.
1013
1014        This downloads and installs the root FS and stateful partition
1015        content needed for the update specified in `self.host` and
1016        `self.update_url`.  The update is performed according to the
1017        requirements for provisioning a DUT for testing the requested
1018        build.
1019
1020        At the end of the procedure, metrics are reported describing the
1021        outcome of the operation.
1022
1023        @returns A tuple of the form `(image_name, attributes)`, where
1024            `image_name` is the name of the image installed, and
1025            `attributes` is new attributes to be applied to the DUT.
1026        """
1027        server_name = dev_server.get_resolved_hostname(self.update_url)
1028        metrics.Counter(_metric_name('install')).increment(
1029                fields={'devserver': server_name})
1030
1031        try:
1032            self._prepare_host()
1033        except _AttributedUpdateError:
1034            raise
1035        except Exception as e:
1036            logging.exception('Failure preparing host prior to update.')
1037            raise HostUpdateError(self.host.hostname, str(e))
1038
1039        try:
1040            expected_kernel = self._install_update()
1041        except _AttributedUpdateError:
1042            raise
1043        except Exception as e:
1044            logging.exception('Failure during download and install.')
1045            raise ImageInstallError(self.host.hostname, server_name, str(e))
1046
1047        try:
1048            self._complete_update(expected_kernel)
1049        except _AttributedUpdateError:
1050            raise
1051        except Exception as e:
1052            logging.exception('Failure from build after update.')
1053            raise NewBuildUpdateError(self.update_version, str(e))
1054
1055        image_name = url_to_image_name(self.update_url)
1056        # update_url is different from devserver url needed to stage autotest
1057        # packages, therefore, resolve a new devserver url here.
1058        devserver_url = dev_server.ImageServer.resolve(
1059                image_name, self.host.hostname).url()
1060        repo_url = tools.get_package_url(devserver_url, image_name)
1061        return image_name, {ds_constants.JOB_REPO_URL: repo_url}
1062