site_tests/cheets_CTS/cheets_CTS.py

# Copyright 2016 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

# repohooks/pre-upload.py currently does not run pylint. But for developers who
# want to check their code manually we disable several harmless pylint warnings
# which just distract from more serious remaining issues.
#
# The instance variable _android_cts is not defined in __init__().
# pylint: disable=attribute-defined-outside-init
#
# Many short variable names don't follow the naming convention.
# pylint: disable=invalid-name

import logging
import os
import shutil

from autotest_lib.client.common_lib import error
from autotest_lib.server import utils
from autotest_lib.server.cros import tradefed_test

# Notice if there are only a few failures each RETRY step currently (08/01/2016)
# takes a bit more than 6 minutes (mostly for reboot, login, starting ARC).
# In other words a RETRY of 10 takes about 1h, which is well within the limit of
# the 4h TIMEOUT. Nevertheless RETRY steps will cause the test to end with a
# test warning and should be treated as serious bugs.
# Finally, if we have a true test hang or even reboot, tradefed currently will
# likely hang unit the TIMEOUT hits and no RETRY steps will happen.
_CTS_MAX_RETRY = {'dev': 9, 'beta': 9, 'stable': 9}
# Maximum default time allowed for each individual CTS package.
_CTS_TIMEOUT_SECONDS = (4 * 3600)

# Public download locations for android cts bundles.
_DL_CTS = 'https://dl.google.com/dl/android/cts/'
_CTS_URI = {
    'arm' : _DL_CTS + 'android-cts-6.0_r16-linux_x86-arm.zip',
    'x86' : _DL_CTS + 'android-cts-6.0_r16-linux_x86-x86.zip',
    'media' : _DL_CTS + 'android-cts-media-1.3.zip'
}


class cheets_CTS(tradefed_test.TradefedTest):
    """Sets up tradefed to run CTS tests."""
    version = 1

    def setup(self, bundle=None, uri=None):
        """Download and install a zipfile bundle from Google Storage.

        @param bundle: bundle name, which needs to be key of the _CTS_URI
                       dictionary. Can be 'arm', 'x86' and undefined.
        @param uri: URI of CTS bundle. Required if |abi| is undefined.
        """
        if bundle in _CTS_URI:
            self._android_cts = self._install_bundle(_CTS_URI[bundle])
        else:
            self._android_cts = self._install_bundle(uri)

        self._cts_tradefed = os.path.join(
                self._android_cts,
                'android-cts',
                'tools',
                'cts-tradefed')
        logging.info('CTS-tradefed path: %s', self._cts_tradefed)

        # Load waivers and manual tests so TF doesn't re-run them.
        self.waivers_and_manual_tests = self._get_expected_failures(
                                                'expectations')
        # Load packages with no tests.
        self.notest_packages = self._get_expected_failures('notest_packages')

    def _clean_repository(self):
        """Ensures all old logs, results and plans are deleted.

        This function should be called at the start of each autotest iteration.
        """
        logging.info('Cleaning up repository.')
        repository = os.path.join(self._android_cts, 'android-cts',
                'repository')
        for directory in ['logs', 'results']:
            path = os.path.join(repository, directory)
            if os.path.exists(path):
                shutil.rmtree(path)
            self._safe_makedirs(path)

    def _install_plan(self, plan):
        logging.info('Install plan: %s', plan)
        plans_dir = os.path.join(self._android_cts, 'android-cts',
                'repository', 'plans')
        src_plan_file = os.path.join(self.bindir, 'plans', '%s.xml' % plan)
        shutil.copy(src_plan_file, plans_dir)

    def _tradefed_run_command(self,
                              package=None,
                              plan=None,
                              session_id=None,
                              test_class=None,
                              test_method=None):

        """Builds the CTS tradefed 'run' command line.

        There are five different usages:

        1. Test a package: assign the package name via |package|.
        2. Test a plan: assign the plan name via |plan|.
        3. Continue a session: assign the session ID via |session_id|.
        4. Run all test cases of a class: assign the class name via
           |test_class|.
        5. Run a specific test case: assign the class name and method name in
           |test_class| and |test_method|.

        @param package: the name of test package to be run.
        @param plan: name of the plan to be run.
        @param session_id: tradefed session id to continue.
        @param test_class: the name of the class of which all test cases will
                           be run.
        @param test_name: the name of the method of |test_class| to be run.
                          Must be used with |test_class|.
        @return: list of command tokens for the 'run' command.
        """
        if package is not None:
            cmd = ['run', 'commandAndExit', 'cts', '--package', package]
        elif plan is not None:
            cmd = ['run', 'commandAndExit', 'cts', '--plan', plan]
        elif session_id is not None:
            cmd = ['run', 'commandAndExit', 'cts', '--continue-session',
                   '%d' % session_id]
        elif test_class is not None:
            cmd = ['run', 'commandAndExit', 'cts', '-c', test_class]
            if test_method is not None:
                cmd += ['-m', test_method]
        else:
            logging.warning('Running all tests. This can take several days.')
            cmd = ['run', 'commandAndExit', 'cts', '--plan', 'CTS']
        # Automated media download is broken, so disable it. Instead we handle
        # this explicitly via _push_media(). This has the benefit of being
        # cached on the dev server. b/27245577
        cmd.append('--skip-media-download')

        # If we are running outside of the lab we can collect more data.
        if not utils.is_in_container():
            logging.info('Running outside of lab, adding extra debug options.')
            cmd.append('--log-level-display=DEBUG')
            cmd.append('--screenshot-on-failure')
            cmd.append('--collect-deqp-logs')
        # At early stage, cts-tradefed tries to reboot the device by
        # "adb reboot" command. In a real Android device case, when the
        # rebooting is completed, adb connection is re-established
        # automatically, and cts-tradefed expects that behavior.
        # However, in ARC, it doesn't work, so the whole test process
        # is just stuck. Here, disable the feature.
        cmd.append('--disable-reboot')
        # Create a logcat file for each individual failure.
        cmd.append('--logcat-on-failure')
        return cmd

    def _run_cts_tradefed(self, commands, datetime_id=None):
        """Runs tradefed, collects logs and returns the result counts.

        Assumes that only last entry of |commands| actually runs tests and has
        interesting output (results, logs) for collection. Ignores all other
        commands for this purpose.

        @param commands: List of lists of command tokens.
        @param datetime_id: For 'continue' datetime of previous run is known.
                            Knowing it makes collecting logs more robust.
        @return: tuple of (tests, pass, fail, notexecuted) counts.
        """
        for command in commands:
            # Assume only last command actually runs tests and has interesting
            # output (results, logs) for collection.
            logging.info('RUN: ./cts-tradefed %s', ' '.join(command))
            output = self._run(
                self._cts_tradefed,
                args=tuple(command),
                timeout=self._timeout,
                verbose=True,
                ignore_status=False,
                # Make sure to tee tradefed stdout/stderr to autotest logs
                # continuously during the test run.
                stdout_tee=utils.TEE_TO_LOGS,
                stderr_tee=utils.TEE_TO_LOGS)
            logging.info('END: ./cts-tradefed %s\n', ' '.join(command))
        result_destination = os.path.join(self.resultsdir, 'android-cts')
        # Gather the global log first. Datetime parsing below can abort the test
        # if tradefed startup had failed. Even then the global log is useful.
        self._collect_tradefed_global_log(output, result_destination)
        if not datetime_id:
            # Parse stdout to obtain datetime of the session. This is needed to
            # locate result xml files and logs.
            datetime_id = self._parse_tradefed_datetime(output, self.summary)
        # Collect tradefed logs for autotest.
        tradefed = os.path.join(self._android_cts, 'android-cts', 'repository')
        self._collect_logs(tradefed, datetime_id, result_destination)
        return self._parse_result(output, self.waivers_and_manual_tests)

    def _tradefed_continue(self, session_id, datetime_id=None):
        """Continues a previously started session.

        Attempts to run all 'notexecuted' tests.
        @param session_id: tradefed session id to continue.
        @param datetime_id: datetime of run to continue.
        @return: tuple of (tests, pass, fail, notexecuted) counts.
        """
        # The list command is not required. It allows the reader to inspect the
        # tradefed state when examining the autotest logs.
        commands = [
                ['list', 'results'],
                self._tradefed_run_command(session_id=session_id)]
        return self._run_cts_tradefed(commands, datetime_id)

    def _tradefed_retry(self, test_name, session_id):
        """Retries failing tests in session.

        It is assumed that there are no notexecuted tests of session_id,
        otherwise some tests will be missed and never run.

        @param test_name: the name of test to be retried.
        @param session_id: tradefed session id to retry.
        @return: tuple of (new session_id, tests, pass, fail, notexecuted).
        """
        # Creating new test plan for retry.
        derivedplan = 'retry.%s.%s' % (test_name, session_id)
        logging.info('Retrying failures using derived plan %s.', derivedplan)
        # The list commands are not required. It allows the reader to inspect
        # the tradefed state when examining the autotest logs.
        commands = [
                ['list', 'plans'],
                ['add', 'derivedplan', '--plan', derivedplan, '--session', '%d'
                        % session_id, '-r', 'fail'],
                ['list', 'plans'],
                ['list', 'results'],
                self._tradefed_run_command(plan=derivedplan)]
        counts = self._run_cts_tradefed(commands)
        # TODO(ihf): Consider if diffing/parsing output of "list results" for
        # new session_id might be more reliable. For now just assume simple
        # increment. This works if only one tradefed instance is active and
        # only a single run command is executing at any moment.
        session_id += 1
        return (session_id, counts)

    def _get_release_channel(self):
        """Returns the DUT channel of the image ('dev', 'beta', 'stable')."""
        # TODO(ihf): check CHROMEOS_RELEASE_DESCRIPTION and return channel.
        return 'dev'

    def _get_channel_retry(self):
        """Returns the maximum number of retries for DUT image channel."""
        channel = self._get_release_channel()
        if channel in _CTS_MAX_RETRY:
            return _CTS_MAX_RETRY[channel]
        retry = _CTS_MAX_RETRY['dev']
        logging.warning('Could not establish channel. Using retry=%d.', retry)
        return retry

    def run_once(self,
                 target_package=None,
                 target_plan=None,
                 target_class=None,
                 target_method=None,
                 needs_push_media=False,
                 max_retry=None,
                 timeout=_CTS_TIMEOUT_SECONDS):
        """Runs the specified CTS once, but with several retries.

        There are four usages:
        1. Test the whole package named |target_package|.
        2. Test with a plan named |target_plan|.
        3. Run all the test cases of class named |target_class|.
        4. Run a specific test method named |target_method| of class
           |target_class|.

        @param target_package: the name of test package to run.
        @param target_plan: the name of the test plan to run.
        @param target_class: the name of the class to be tested.
        @param target_method: the name of the method to be tested.
        @param needs_push_media: need to push test media streams.
        @param max_retry: number of retry steps before reporting results.
        @param timeout: time after which tradefed can be interrupted.
        """
        # Don't download media for tests that don't need it. b/29371037
        # TODO(ihf): This can be removed once the control file generator is
        # aware of this constraint.
        if target_package is not None and target_package.startswith(
                'android.mediastress'):
            needs_push_media = True

        # On dev and beta channels timeouts are sharp, lenient on stable.
        self._timeout = timeout
        if self._get_release_channel == 'stable':
            self._timeout += 3600
        # Retries depend on channel.
        self._max_retry = (
            max_retry if max_retry is not None else self._get_channel_retry())
        session_id = 0

        steps = -1  # For historic reasons the first iteration is not counted.
        pushed_media = False
        total_tests = 0
        self.summary = ''
        if target_package is not None:
            test_name = 'package.%s' % target_package
            test_command = self._tradefed_run_command(package=target_package)
        elif target_plan is not None:
            test_name = 'plan.%s' % target_plan
            test_command = self._tradefed_run_command(plan=target_plan)
        elif target_class is not None:
            test_name = 'testcase.%s' % target_class
            if target_method is not None:
                test_name += '.' + target_method
            test_command = self._tradefed_run_command(
                test_class=target_class, test_method=target_method)
        else:
            test_command = self._tradefed_run_command()
            test_name = 'all_CTS'

        # Unconditionally run CTS package until we see some tests executed.
        while steps < self._max_retry and total_tests == 0:
            steps += 1
            with self._login_chrome(dont_override_profile=pushed_media):
                self._ready_arc()

                # Only push media for tests that need it. b/29371037
                if needs_push_media and not pushed_media:
                    self._push_media(_CTS_URI)
                    # copy_media.sh is not lazy, but we try to be.
                    pushed_media = True

                # Start each valid iteration with a clean repository. This
                # allows us to track session_id blindly.
                self._clean_repository()
                if target_plan is not None:
                    self._install_plan(target_plan)
                logging.info('Running %s:', test_name)

                # The list command is not required. It allows the reader to
                # inspect the tradefed state when examining the autotest logs.
                commands = [['list', 'results'], test_command]
                counts = self._run_cts_tradefed(commands)
                tests, passed, failed, notexecuted, waived = counts
                logging.info('RESULT: tests=%d, passed=%d, failed=%d, '
                        'notexecuted=%d, waived=%d', *counts)
                self.summary += 'run(t=%d, p=%d, f=%d, ne=%d, w=%d)' % counts
                if tests == 0 and target_package in self.notest_packages:
                    logging.info('Package has no tests as expected.')
                    return
                if tests > 0 and target_package in self.notest_packages:
                    # We expected no tests, but the new bundle drop must have
                    # added some for us. Alert us to the situation.
                    raise error.TestFail('Failed: Remove package %s from '
                                         'notest_packages directory!' %
                                         target_package)
                if tests == 0 and target_package not in self.notest_packages:
                    logging.error('Did not find any tests in package. Hoping '
                                  'this is transient. Retry after reboot.')
                # An internal self-check. We really should never hit this.
                if tests != passed + failed + notexecuted:
                    raise error.TestFail('Error: Test count inconsistent. %s' %
                                        self.summary)
                # Keep track of global counts as each continue/retry step below
                # works on local failures.
                total_tests = tests
                total_passed = passed
            # The DUT has rebooted at this point and is in a clean state.
        if total_tests == 0:
            raise error.TestFail('Error: Could not find any tests in package.')

        # If the results were not completed or were failing then continue or
        # retry them iteratively MAX_RETRY times.
        while steps < self._max_retry and (notexecuted > 0 or failed > waived):
            # First retry until there is no test is left that was not executed.
            while notexecuted > 0 and steps < self._max_retry:
                steps += 1
                with self._login_chrome(dont_override_profile=pushed_media):
                    self._ready_arc()
                    logging.info('Continuing session %d:', session_id)
                    # 'Continue' reports as passed all passing results in the
                    # current session (including all tests passing before
                    # continue). Hence first subtract the old count before
                    # adding the new count. (Same for failed.)
                    previously_passed = passed
                    previously_failed = failed
                    previously_notexecuted = notexecuted
                    # TODO(ihf): For increased robustness pass in datetime_id of
                    # session we are continuing.
                    counts = self._tradefed_continue(session_id)
                    tests, passed, failed, notexecuted, waived = counts
                    # Unfortunately tradefed sometimes encounters an error
                    # running the tests for instance timing out on downloading
                    # the media files. Check for this condition and give it one
                    # extra chance.
                    if not (tests == previously_notexecuted and
                            tests == passed + failed + notexecuted):
                        logging.warning('Tradefed inconsistency - retrying.')
                        counts = self._tradefed_continue(session_id)
                        tests, passed, failed, notexecuted, waived = counts
                    newly_passed = passed - previously_passed
                    newly_failed = failed - previously_failed
                    total_passed += newly_passed
                    logging.info('RESULT: total_tests=%d, total_passed=%d, step'
                            '(tests=%d, passed=%d, failed=%d, notexecuted=%d,'
                            ' waived=%d)', total_tests, total_passed, tests,
                            newly_passed, newly_failed, notexecuted, waived)
                    self.summary += ' cont(t=%d, p=%d, f=%d, ne=%d, w=%d)' % (
                            tests, newly_passed, newly_failed, notexecuted,
                            waived)
                    # An internal self-check. We really should never hit this.
                    if not (tests == previously_notexecuted and
                            tests == newly_passed + newly_failed + notexecuted):
                        logging.warning('Test count inconsistent. %s',
                                self.summary)
                # The DUT has rebooted at this point and is in a clean state.

            if notexecuted > 0:
                # This likely means there were too many crashes/reboots to
                # attempt running all tests. Don't attempt to retry as it is
                # impossible to pass at this stage (and also inconsistent).
                raise error.TestFail(
                    'Failed: Ran out of steps with %d total '
                    'passed and %d remaining not executed tests. %s' %
                    (total_passed, notexecuted, self.summary))

            # Managed to reduce notexecuted to zero. Now create a new test plan
            # to rerun only the failures we did encounter.
            if failed > waived:
                with self._login_chrome(dont_override_profile=pushed_media):
                    steps += 1
                    self._ready_arc()
                    logging.info('Retrying failures of %s with session_id %d:',
                            test_name, session_id)
                    previously_failed = failed
                    session_id, counts = self._tradefed_retry(
                            test_name, session_id)
                    tests, passed, failed, notexecuted, waived = counts
                    # Unfortunately tradefed sometimes encounters an error
                    # running the tests for instance timing out on downloading
                    # the media files. Check for this condition and give it one
                    # extra chance.
                    if not (tests == previously_failed and
                            tests == passed + failed + notexecuted):
                        logging.warning('Tradefed inconsistency - retrying.')
                        session_id, counts = self._tradefed_retry(
                                test_name, session_id)
                        tests, passed, failed, notexecuted, waived = counts
                    total_passed += passed
                    logging.info('RESULT: total_tests=%d, total_passed=%d, step'
                            '(tests=%d, passed=%d, failed=%d, notexecuted=%d,'
                            ' waived=%d)', total_tests, total_passed, *counts)
                    self.summary += (
                            ' retry(t=%d, p=%d, f=%d, ne=%d, w=%d)' % counts)
                    # An internal self-check. We really should never hit this.
                    if not (previously_failed == tests and
                            tests == passed + failed + notexecuted):
                        logging.warning('Test count inconsistent. %s',
                                self.summary)
                # The DUT has rebooted at this point and is in a clean state.

        # Final classification of test results.
        if total_passed + waived == 0 or notexecuted > 0 or failed > waived:
            raise error.TestFail(
                'Failed: after %d retries giving up. '
                'total_passed=%d, failed=%d, notexecuted=%d, waived=%d. %s' %
                (steps, total_passed, failed, notexecuted, waived,
                 self.summary))
        if steps > 0:
            # TODO(ihf): Make this error.TestPass('...') once available.
            raise error.TestWarn(
                'Passed: after %d retries passing %d tests, waived=%d. %s' %
                (steps, total_passed, waived, self.summary))