1# Copyright 2018 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import contextlib 6import logging 7import os 8import random 9import re 10from xml.etree import ElementTree 11 12from autotest_lib.client.bin import utils as client_utils 13from autotest_lib.client.common_lib import utils as common_utils 14from autotest_lib.client.common_lib import error 15from autotest_lib.server import utils 16from autotest_lib.server.cros import lockfile 17 18PERF_MODULE_NAME_PREFIX = 'CTS.' 19 20@contextlib.contextmanager 21def lock(filename): 22 """Prevents other autotest/tradefed instances from accessing cache. 23 24 @param filename: The file to be locked. 25 """ 26 filelock = lockfile.FileLock(filename) 27 # It is tempting just to call filelock.acquire(3600). But the implementation 28 # has very poor temporal granularity (timeout/10), which is unsuitable for 29 # our needs. See /usr/lib64/python2.7/site-packages/lockfile/ 30 attempts = 0 31 while not filelock.i_am_locking(): 32 try: 33 attempts += 1 34 logging.info('Waiting for cache lock...') 35 # We must not use a random integer as the filelock implementations 36 # may underflow an integer division. 37 filelock.acquire(random.uniform(0.0, pow(2.0, attempts))) 38 except (lockfile.AlreadyLocked, lockfile.LockTimeout): 39 # Our goal is to wait long enough to be sure something very bad 40 # happened to the locking thread. 11 attempts is between 15 and 41 # 30 minutes. 42 if attempts > 11: 43 # Normally we should aqcuire the lock immediately. Once we 44 # wait on the order of 10 minutes either the dev server IO is 45 # overloaded or a lock didn't get cleaned up. Take one for the 46 # team, break the lock and report a failure. This should fix 47 # the lock for following tests. If the failure affects more than 48 # one job look for a deadlock or dev server overload. 49 logging.error('Permanent lock failure. Trying to break lock.') 50 # TODO(ihf): Think how to do this cleaner without having a 51 # recursive lock breaking problem. We may have to kill every 52 # job that is currently waiting. The main goal though really is 53 # to have a cache that does not corrupt. And cache updates 54 # only happen once a month or so, everything else are reads. 55 filelock.break_lock() 56 raise error.TestFail('Error: permanent cache lock failure.') 57 else: 58 logging.info('Acquired cache lock after %d attempts.', attempts) 59 try: 60 yield 61 finally: 62 filelock.release() 63 logging.info('Released cache lock.') 64 65 66@contextlib.contextmanager 67def adb_keepalive(targets, extra_paths): 68 """A context manager that keeps the adb connection alive. 69 70 AdbKeepalive will spin off a new process that will continuously poll for 71 adb's connected state, and will attempt to reconnect if it ever goes down. 72 This is the only way we can currently recover safely from (intentional) 73 reboots. 74 75 @param target: the hostname and port of the DUT. 76 @param extra_paths: any additional components to the PATH environment 77 variable. 78 """ 79 from autotest_lib.client.common_lib.cros import adb_keepalive as module 80 # |__file__| returns the absolute path of the compiled bytecode of the 81 # module. We want to run the original .py file, so we need to change the 82 # extension back. 83 script_filename = module.__file__.replace('.pyc', '.py') 84 jobs = [common_utils.BgJob( 85 [script_filename, target], 86 nickname='adb_keepalive', 87 stderr_level=logging.DEBUG, 88 stdout_tee=common_utils.TEE_TO_LOGS, 89 stderr_tee=common_utils.TEE_TO_LOGS, 90 extra_paths=extra_paths) for target in targets] 91 92 try: 93 yield 94 finally: 95 # The adb_keepalive.py script runs forever until SIGTERM is sent. 96 for job in jobs: 97 common_utils.nuke_subprocess(job.sp) 98 common_utils.join_bg_jobs(jobs) 99 100 101@contextlib.contextmanager 102def pushd(d): 103 """Defines pushd. 104 @param d: the directory to change to. 105 """ 106 current = os.getcwd() 107 os.chdir(d) 108 try: 109 yield 110 finally: 111 os.chdir(current) 112 113 114def parse_tradefed_result(result, waivers=None): 115 """Check the result from the tradefed output. 116 117 @param result: The result stdout string from the tradefed command. 118 @param waivers: a set() of tests which are permitted to fail. 119 @return List of the waived tests. 120 """ 121 # Regular expressions for start/end messages of each test-run chunk. 122 abi_re = r'arm\S*|x86\S*' 123 # TODO(kinaba): use the current running module name. 124 module_re = r'\S+' 125 start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with' 126 r' (\d+(?:,\d+)?) test' % (abi_re, module_re)) 127 end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.' 128 r' (\d+) passed, (\d+) failed, (\d+) not executed' % 129 (abi_re, module_re)) 130 fail_re = re.compile(r'I/ConsoleReporter.* (\S+) fail:') 131 inaccurate_re = re.compile(r'IMPORTANT: Some modules failed to run to ' 132 'completion, tests counts may be inaccurate') 133 abis = set() 134 waived_count = dict() 135 failed_tests = set() 136 accurate = True 137 for line in result.splitlines(): 138 match = start_re.search(line) 139 if match: 140 abis = abis.union([match.group(1)]) 141 continue 142 match = end_re.search(line) 143 if match: 144 abi = match.group(1) 145 if abi not in abis: 146 logging.error('Trunk end with %s abi but have not seen ' 147 'any trunk start with this abi.(%s)', abi, line) 148 continue 149 match = fail_re.search(line) 150 if match: 151 testname = match.group(1) 152 if waivers and testname in waivers: 153 waived_count[testname] = waived_count.get(testname, 0) + 1 154 else: 155 failed_tests.add(testname) 156 continue 157 # b/66899135, tradefed may reported inaccuratly with `list results`. 158 # Add warning if summary section shows that the result is inacurrate. 159 match = inaccurate_re.search(line) 160 if match: 161 accurate = False 162 163 logging.info('Total ABIs: %s', abis) 164 if failed_tests: 165 logging.error('Failed (but not waived) tests:\n%s', 166 '\n'.join(sorted(failed_tests))) 167 168 # TODO(dhaddock): Find a more robust way to apply waivers. 169 waived = [] 170 for testname, fail_count in waived_count.items(): 171 if fail_count > len(abis): 172 # This should be an error.TestFail, but unfortunately 173 # tradefed has a bug that emits "fail" twice when a 174 # test failed during teardown. It will anyway causes 175 # a test count inconsistency and visible on the dashboard. 176 logging.error('Found %d failures for %s but there are only %d ' 177 'abis: %s', fail_count, testname, len(abis), abis) 178 fail_count = len(abis) 179 waived += [testname] * fail_count 180 logging.info('Waived failure for %s %d time(s)', testname, fail_count) 181 logging.info('Total waived = %s', waived) 182 return waived, accurate 183 184 185def select_32bit_java(): 186 """Switches to 32 bit java if installed (like in lab lxc images) to save 187 about 30-40% server/shard memory during the run.""" 188 if utils.is_in_container() and not client_utils.is_moblab(): 189 java = '/usr/lib/jvm/java-8-openjdk-i386' 190 if os.path.exists(java): 191 logging.info('Found 32 bit java, switching to use it.') 192 os.environ['JAVA_HOME'] = java 193 os.environ['PATH'] = ( 194 os.path.join(java, 'bin') + os.pathsep + os.environ['PATH']) 195 196# A similar implementation in Java can be found at 197# https://android.googlesource.com/platform/test/suite_harness/+/refs/heads/\ 198# pie-cts-dev/common/util/src/com/android/compatibility/common/util/\ 199# ResultHandler.java 200def get_test_result_xml_path(results_destination): 201 """Get the path of test_result.xml from the last session.""" 202 last_result_path = None 203 for dir in os.listdir(results_destination): 204 result_dir = os.path.join(results_destination, dir) 205 result_path = os.path.join(result_dir, 'test_result.xml') 206 # We use the lexicographically largest path, because |dir| are 207 # of format YYYY.MM.DD_HH.MM.SS. The last session will always 208 # have the latest date which leads to the lexicographically 209 # largest path. 210 if last_result_path and last_result_path > result_path: 211 continue 212 # We need to check for `islink` as `isdir` returns true if |result_dir| 213 # is a symbolic link to a directory. 214 if not os.path.isdir(result_dir) or os.path.islink(result_dir): 215 continue 216 if not os.path.exists(result_path): 217 continue 218 last_result_path = result_path 219 return last_result_path 220 221 222def get_perf_metrics_from_test_result_xml(result_path, resultsdir): 223 """Parse test_result.xml and each <Metric /> is mapped to a dict that 224 can be used as kwargs of |TradefedTest.output_perf_value|.""" 225 try: 226 root = ElementTree.parse(result_path) 227 for module in root.iter('Module'): 228 module_name = module.get('name') 229 for testcase in module.iter('TestCase'): 230 testcase_name = testcase.get('name') 231 for test in testcase.iter('Test'): 232 test_name = test.get('name') 233 for metric in test.iter('Metric'): 234 score_type = metric.get('score_type') 235 if score_type not in ['higher_better', 'lower_better']: 236 logging.warning( 237 'Unsupported score_type in %s/%s/%s', 238 module_name, testcase_name, test_name) 239 continue 240 higher_is_better = (score_type == 'higher_better') 241 units = metric.get('score_unit') 242 yield dict( 243 description=testcase_name + '#' + test_name, 244 value=metric[0].text, 245 units=units, 246 higher_is_better=higher_is_better, 247 resultsdir=os.path.join(resultsdir, 'tests', 248 PERF_MODULE_NAME_PREFIX + module_name) 249 ) 250 except Exception as e: 251 logging.warning( 252 'Exception raised in ' 253 '|tradefed_utils.get_perf_metrics_from_test_result_xml|: {' 254 '0}'.format(e)) 255