• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import contextlib
6import logging
7import os
8import random
9import re
10
11from autotest_lib.client.bin import utils as client_utils
12from autotest_lib.client.common_lib import utils as common_utils
13from autotest_lib.client.common_lib import error
14from autotest_lib.server import utils
15from autotest_lib.server.cros import lockfile
16
17
18@contextlib.contextmanager
19def lock(filename):
20    """Prevents other autotest/tradefed instances from accessing cache.
21
22    @param filename: The file to be locked.
23    """
24    filelock = lockfile.FileLock(filename)
25    # It is tempting just to call filelock.acquire(3600). But the implementation
26    # has very poor temporal granularity (timeout/10), which is unsuitable for
27    # our needs. See /usr/lib64/python2.7/site-packages/lockfile/
28    attempts = 0
29    while not filelock.i_am_locking():
30        try:
31            attempts += 1
32            logging.info('Waiting for cache lock...')
33            # We must not use a random integer as the filelock implementations
34            # may underflow an integer division.
35            filelock.acquire(random.uniform(0.0, pow(2.0, attempts)))
36        except (lockfile.AlreadyLocked, lockfile.LockTimeout):
37            # Our goal is to wait long enough to be sure something very bad
38            # happened to the locking thread. 11 attempts is between 15 and
39            # 30 minutes.
40            if attempts > 11:
41                # Normally we should aqcuire the lock immediately. Once we
42                # wait on the order of 10 minutes either the dev server IO is
43                # overloaded or a lock didn't get cleaned up. Take one for the
44                # team, break the lock and report a failure. This should fix
45                # the lock for following tests. If the failure affects more than
46                # one job look for a deadlock or dev server overload.
47                logging.error('Permanent lock failure. Trying to break lock.')
48                # TODO(ihf): Think how to do this cleaner without having a
49                # recursive lock breaking problem. We may have to kill every
50                # job that is currently waiting. The main goal though really is
51                # to have a cache that does not corrupt. And cache updates
52                # only happen once a month or so, everything else are reads.
53                filelock.break_lock()
54                raise error.TestFail('Error: permanent cache lock failure.')
55        else:
56            logging.info('Acquired cache lock after %d attempts.', attempts)
57    try:
58        yield
59    finally:
60        filelock.release()
61        logging.info('Released cache lock.')
62
63
64@contextlib.contextmanager
65def adb_keepalive(target, extra_paths):
66    """A context manager that keeps the adb connection alive.
67
68    AdbKeepalive will spin off a new process that will continuously poll for
69    adb's connected state, and will attempt to reconnect if it ever goes down.
70    This is the only way we can currently recover safely from (intentional)
71    reboots.
72
73    @param target: the hostname and port of the DUT.
74    @param extra_paths: any additional components to the PATH environment
75                        variable.
76    """
77    from autotest_lib.client.common_lib.cros import adb_keepalive as module
78    # |__file__| returns the absolute path of the compiled bytecode of the
79    # module. We want to run the original .py file, so we need to change the
80    # extension back.
81    script_filename = module.__file__.replace('.pyc', '.py')
82    job = common_utils.BgJob(
83        [script_filename, target],
84        nickname='adb_keepalive',
85        stderr_level=logging.DEBUG,
86        stdout_tee=common_utils.TEE_TO_LOGS,
87        stderr_tee=common_utils.TEE_TO_LOGS,
88        extra_paths=extra_paths)
89
90    try:
91        yield
92    finally:
93        # The adb_keepalive.py script runs forever until SIGTERM is sent.
94        common_utils.nuke_subprocess(job.sp)
95        common_utils.join_bg_jobs([job])
96
97
98@contextlib.contextmanager
99def pushd(d):
100    """Defines pushd.
101    @param d: the directory to change to.
102    """
103    current = os.getcwd()
104    os.chdir(d)
105    try:
106        yield
107    finally:
108        os.chdir(current)
109
110
111def parse_tradefed_result(result, waivers=None):
112    """Check the result from the tradefed output.
113
114    @param result: The result stdout string from the tradefed command.
115    @param waivers: a set() of tests which are permitted to fail.
116    @return 5-tuple (tests, passed, failed, notexecuted, waived)
117    """
118    # Regular expressions for start/end messages of each test-run chunk.
119    abi_re = r'arm\S*|x86\S*'
120    # TODO(kinaba): use the current running module name.
121    module_re = r'\S+'
122    start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with'
123                          r' (\d+(?:,\d+)?) test' % (abi_re, module_re))
124    end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.'
125                        r' (\d+) passed, (\d+) failed, (\d+) not executed' %
126                        (abi_re, module_re))
127
128    # Records the result per each ABI.
129    total_test = dict()
130    total_pass = dict()
131    total_fail = dict()
132    last_notexec = dict()
133
134    # ABI and the test count for the current chunk.
135    abi = None
136    ntest = None
137    prev_npass = prev_nfail = prev_nnotexec = None
138
139    for line in result.splitlines():
140        # Beginning of a chunk of tests.
141        match = start_re.search(line)
142        if match:
143            if abi:
144                raise error.TestFail('Error: Unexpected test start: ' + line)
145            abi = match.group(1)
146            ntest = int(match.group(2).replace(',', ''))
147            prev_npass = prev_nfail = prev_nnotexec = None
148        else:
149            # End of the current chunk.
150            match = end_re.search(line)
151            if not match:
152                continue
153
154            npass, nfail, nnotexec = map(int, match.group(2, 3, 4))
155            if abi != match.group(1):
156                # When the last case crashed during teardown, tradefed emits two
157                # end-messages with possibly increased fail count. Ignore it.
158                if (prev_npass == npass and
159                    (prev_nfail == nfail or prev_nfail == nfail - 1) and
160                        prev_nnotexec == nnotexec):
161                    continue
162                raise error.TestFail('Error: Unexpected test end: ' + line)
163            prev_npass, prev_nfail, prev_nnotexec = npass, nfail, nnotexec
164
165            # When the test crashes too ofen, tradefed seems to finish the
166            # iteration by running "0 tests, 0 passed, ...". Do not count
167            # that in.
168            if ntest > 0:
169                total_test[abi] = (
170                    total_test.get(abi, 0) + ntest - last_notexec.get(abi, 0))
171                total_pass[abi] = total_pass.get(abi, 0) + npass
172                total_fail[abi] = total_fail.get(abi, 0) + nfail
173                last_notexec[abi] = nnotexec
174            abi = None
175
176    if abi:
177        # When tradefed crashes badly, it may exit without printing the counts
178        # from the last chunk. Regard them as not executed and retry (rather
179        # than aborting the test cycle at this point.)
180        if ntest > 0:
181            total_test[abi] = (
182                total_test.get(abi, 0) + ntest - last_notexec.get(abi, 0))
183            last_notexec[abi] = ntest
184        logging.warning('No result reported for the last chunk. ' +
185                        'Assuming all not executed.')
186
187    # TODO(rohitbm): make failure parsing more robust by extracting the list
188    # of failing tests instead of searching in the result blob. As well as
189    # only parse for waivers for the running ABI.
190    waived = 0
191    if waivers:
192        abis = total_test.keys()
193        for testname in waivers:
194            # TODO(dhaddock): Find a more robust way to apply waivers.
195            fail_count = (
196                result.count(testname + ' FAIL') +
197                result.count(testname + ' fail'))
198            if fail_count:
199                if fail_count > len(abis):
200                    # This should be an error.TestFail, but unfortunately
201                    # tradefed has a bug that emits "fail" twice when a
202                    # test failed during teardown. It will anyway causes
203                    # a test count inconsistency and visible on the dashboard.
204                    logging.error('Found %d failures for %s '
205                                  'but there are only %d abis: %s', fail_count,
206                                  testname, len(abis), abis)
207                waived += fail_count
208                logging.info('Waived failure for %s %d time(s)', testname,
209                             fail_count)
210    counts = tuple(
211        sum(count_per_abi.values())
212        for count_per_abi in (total_test, total_pass, total_fail,
213                              last_notexec)) + (waived,)
214    msg = (
215        'tests=%d, passed=%d, failed=%d, not_executed=%d, waived=%d' % counts)
216    logging.info(msg)
217    if counts[2] - waived < 0:
218        raise error.TestFail('Error: Internal waiver bookkeeping has '
219                             'become inconsistent (%s)' % msg)
220    return counts
221
222
223def select_32bit_java():
224    """Switches to 32 bit java if installed (like in lab lxc images) to save
225    about 30-40% server/shard memory during the run."""
226    if utils.is_in_container() and not client_utils.is_moblab():
227        java = '/usr/lib/jvm/java-8-openjdk-i386'
228        if os.path.exists(java):
229            logging.info('Found 32 bit java, switching to use it.')
230            os.environ['JAVA_HOME'] = java
231            os.environ['PATH'] = (
232                os.path.join(java, 'bin') + os.pathsep + os.environ['PATH'])
233