• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import contextlib
6import logging
7import os
8import random
9import re
10from xml.etree import ElementTree
11
12from autotest_lib.client.bin import utils as client_utils
13from autotest_lib.client.common_lib import utils as common_utils
14from autotest_lib.client.common_lib import error
15from autotest_lib.server import utils
16from autotest_lib.server.cros import lockfile
17
18PERF_MODULE_NAME_PREFIX = 'CTS.'
19
20@contextlib.contextmanager
21def lock(filename):
22    """Prevents other autotest/tradefed instances from accessing cache.
23
24    @param filename: The file to be locked.
25    """
26    filelock = lockfile.FileLock(filename)
27    # It is tempting just to call filelock.acquire(3600). But the implementation
28    # has very poor temporal granularity (timeout/10), which is unsuitable for
29    # our needs. See /usr/lib64/python2.7/site-packages/lockfile/
30    attempts = 0
31    while not filelock.i_am_locking():
32        try:
33            attempts += 1
34            logging.info('Waiting for cache lock...')
35            # We must not use a random integer as the filelock implementations
36            # may underflow an integer division.
37            filelock.acquire(random.uniform(0.0, pow(2.0, attempts)))
38        except (lockfile.AlreadyLocked, lockfile.LockTimeout):
39            # Our goal is to wait long enough to be sure something very bad
40            # happened to the locking thread. 11 attempts is between 15 and
41            # 30 minutes.
42            if attempts > 11:
43                # Normally we should aqcuire the lock immediately. Once we
44                # wait on the order of 10 minutes either the dev server IO is
45                # overloaded or a lock didn't get cleaned up. Take one for the
46                # team, break the lock and report a failure. This should fix
47                # the lock for following tests. If the failure affects more than
48                # one job look for a deadlock or dev server overload.
49                logging.error('Permanent lock failure. Trying to break lock.')
50                # TODO(ihf): Think how to do this cleaner without having a
51                # recursive lock breaking problem. We may have to kill every
52                # job that is currently waiting. The main goal though really is
53                # to have a cache that does not corrupt. And cache updates
54                # only happen once a month or so, everything else are reads.
55                filelock.break_lock()
56                raise error.TestFail('Error: permanent cache lock failure.')
57        else:
58            logging.info('Acquired cache lock after %d attempts.', attempts)
59    try:
60        yield
61    finally:
62        filelock.release()
63        logging.info('Released cache lock.')
64
65
66@contextlib.contextmanager
67def adb_keepalive(targets, extra_paths):
68    """A context manager that keeps the adb connection alive.
69
70    AdbKeepalive will spin off a new process that will continuously poll for
71    adb's connected state, and will attempt to reconnect if it ever goes down.
72    This is the only way we can currently recover safely from (intentional)
73    reboots.
74
75    @param target: the hostname and port of the DUT.
76    @param extra_paths: any additional components to the PATH environment
77                        variable.
78    """
79    from autotest_lib.client.common_lib.cros import adb_keepalive as module
80    # |__file__| returns the absolute path of the compiled bytecode of the
81    # module. We want to run the original .py file, so we need to change the
82    # extension back.
83    script_filename = module.__file__.replace('.pyc', '.py')
84    jobs = [common_utils.BgJob(
85        [script_filename, target],
86        nickname='adb_keepalive',
87        stderr_level=logging.DEBUG,
88        stdout_tee=common_utils.TEE_TO_LOGS,
89        stderr_tee=common_utils.TEE_TO_LOGS,
90        extra_paths=extra_paths) for target in targets]
91
92    try:
93        yield
94    finally:
95        # The adb_keepalive.py script runs forever until SIGTERM is sent.
96        for job in jobs:
97            common_utils.nuke_subprocess(job.sp)
98        common_utils.join_bg_jobs(jobs)
99
100
101@contextlib.contextmanager
102def pushd(d):
103    """Defines pushd.
104    @param d: the directory to change to.
105    """
106    current = os.getcwd()
107    os.chdir(d)
108    try:
109        yield
110    finally:
111        os.chdir(current)
112
113
114def parse_tradefed_result(result, waivers=None):
115    """Check the result from the tradefed output.
116
117    @param result: The result stdout string from the tradefed command.
118    @param waivers: a set() of tests which are permitted to fail.
119    @return List of the waived tests.
120    """
121    # Regular expressions for start/end messages of each test-run chunk.
122    abi_re = r'arm\S*|x86\S*'
123    # TODO(kinaba): use the current running module name.
124    module_re = r'\S+'
125    start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with'
126                          r' (\d+(?:,\d+)?) test' % (abi_re, module_re))
127    end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.'
128                        r' (\d+) passed, (\d+) failed, (\d+) not executed' %
129                        (abi_re, module_re))
130    fail_re = re.compile(r'I/ConsoleReporter.* (\S+) fail:')
131    inaccurate_re = re.compile(r'IMPORTANT: Some modules failed to run to '
132                                'completion, tests counts may be inaccurate')
133    abis = set()
134    waived_count = dict()
135    failed_tests = set()
136    accurate = True
137    for line in result.splitlines():
138        match = start_re.search(line)
139        if match:
140            abis = abis.union([match.group(1)])
141            continue
142        match = end_re.search(line)
143        if match:
144            abi = match.group(1)
145            if abi not in abis:
146                logging.error('Trunk end with %s abi but have not seen '
147                              'any trunk start with this abi.(%s)', abi, line)
148            continue
149        match = fail_re.search(line)
150        if match:
151            testname = match.group(1)
152            if waivers and testname in waivers:
153                waived_count[testname] = waived_count.get(testname, 0) + 1
154            else:
155                failed_tests.add(testname)
156            continue
157        # b/66899135, tradefed may reported inaccuratly with `list results`.
158        # Add warning if summary section shows that the result is inacurrate.
159        match = inaccurate_re.search(line)
160        if match:
161            accurate = False
162
163    logging.info('Total ABIs: %s', abis)
164    if failed_tests:
165        logging.error('Failed (but not waived) tests:\n%s',
166            '\n'.join(sorted(failed_tests)))
167
168    # TODO(dhaddock): Find a more robust way to apply waivers.
169    waived = []
170    for testname, fail_count in waived_count.items():
171        if fail_count > len(abis):
172            # This should be an error.TestFail, but unfortunately
173            # tradefed has a bug that emits "fail" twice when a
174            # test failed during teardown. It will anyway causes
175            # a test count inconsistency and visible on the dashboard.
176            logging.error('Found %d failures for %s but there are only %d '
177                          'abis: %s', fail_count, testname, len(abis), abis)
178            fail_count = len(abis)
179        waived += [testname] * fail_count
180        logging.info('Waived failure for %s %d time(s)', testname, fail_count)
181    logging.info('Total waived = %s', waived)
182    return waived, accurate
183
184
185def select_32bit_java():
186    """Switches to 32 bit java if installed (like in lab lxc images) to save
187    about 30-40% server/shard memory during the run."""
188    if utils.is_in_container() and not client_utils.is_moblab():
189        java = '/usr/lib/jvm/java-8-openjdk-i386'
190        if os.path.exists(java):
191            logging.info('Found 32 bit java, switching to use it.')
192            os.environ['JAVA_HOME'] = java
193            os.environ['PATH'] = (
194                os.path.join(java, 'bin') + os.pathsep + os.environ['PATH'])
195
196# A similar implementation in Java can be found at
197# https://android.googlesource.com/platform/test/suite_harness/+/refs/heads/\
198# pie-cts-dev/common/util/src/com/android/compatibility/common/util/\
199# ResultHandler.java
200def get_test_result_xml_path(results_destination):
201    """Get the path of test_result.xml from the last session."""
202    last_result_path = None
203    for dir in os.listdir(results_destination):
204        result_dir = os.path.join(results_destination, dir)
205        result_path = os.path.join(result_dir, 'test_result.xml')
206        # We use the lexicographically largest path, because |dir| are
207        # of format YYYY.MM.DD_HH.MM.SS. The last session will always
208        # have the latest date which leads to the lexicographically
209        # largest path.
210        if last_result_path and last_result_path > result_path:
211            continue
212        # We need to check for `islink` as `isdir` returns true if |result_dir|
213        # is a symbolic link to a directory.
214        if not os.path.isdir(result_dir) or os.path.islink(result_dir):
215            continue
216        if not os.path.exists(result_path):
217            continue
218        last_result_path = result_path
219    return last_result_path
220
221
222def get_perf_metrics_from_test_result_xml(result_path, resultsdir):
223    """Parse test_result.xml and each <Metric /> is mapped to a dict that
224    can be used as kwargs of |TradefedTest.output_perf_value|."""
225    try:
226        root = ElementTree.parse(result_path)
227        for module in root.iter('Module'):
228            module_name = module.get('name')
229            for testcase in module.iter('TestCase'):
230                testcase_name = testcase.get('name')
231                for test in testcase.iter('Test'):
232                    test_name = test.get('name')
233                    for metric in test.iter('Metric'):
234                        score_type = metric.get('score_type')
235                        if score_type not in ['higher_better', 'lower_better']:
236                            logging.warning(
237                                'Unsupported score_type in %s/%s/%s',
238                                module_name, testcase_name, test_name)
239                            continue
240                        higher_is_better = (score_type == 'higher_better')
241                        units = metric.get('score_unit')
242                        yield dict(
243                            description=testcase_name + '#' + test_name,
244                            value=metric[0].text,
245                            units=units,
246                            higher_is_better=higher_is_better,
247                            resultsdir=os.path.join(resultsdir, 'tests',
248                                PERF_MODULE_NAME_PREFIX + module_name)
249                        )
250    except Exception as e:
251        logging.warning(
252            'Exception raised in '
253            '|tradefed_utils.get_perf_metrics_from_test_result_xml|: {'
254            '0}'.format(e))
255