1# Copyright 2018 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import contextlib 6import logging 7import os 8import random 9import re 10from xml.etree import ElementTree 11 12from autotest_lib.client.common_lib import utils as common_utils 13from autotest_lib.client.common_lib import error 14from autotest_lib.server.cros import lockfile 15 16PERF_MODULE_NAME_PREFIX = 'CTS.' 17 18@contextlib.contextmanager 19def lock(filename): 20 """Prevents other autotest/tradefed instances from accessing cache. 21 22 @param filename: The file to be locked. 23 """ 24 filelock = lockfile.FileLock(filename) 25 # It is tempting just to call filelock.acquire(3600). But the implementation 26 # has very poor temporal granularity (timeout/10), which is unsuitable for 27 # our needs. See /usr/lib64/python2.7/site-packages/lockfile/ 28 attempts = 0 29 while not filelock.i_am_locking(): 30 try: 31 attempts += 1 32 logging.info('Waiting for cache lock...') 33 # We must not use a random integer as the filelock implementations 34 # may underflow an integer division. 35 filelock.acquire(random.uniform(0.0, pow(2.0, attempts))) 36 except (lockfile.AlreadyLocked, lockfile.LockTimeout): 37 # Our goal is to wait long enough to be sure something very bad 38 # happened to the locking thread. 11 attempts is between 15 and 39 # 30 minutes. 40 if attempts > 11: 41 # Normally we should aqcuire the lock immediately. Once we 42 # wait on the order of 10 minutes either the dev server IO is 43 # overloaded or a lock didn't get cleaned up. Take one for the 44 # team, break the lock and report a failure. This should fix 45 # the lock for following tests. If the failure affects more than 46 # one job look for a deadlock or dev server overload. 47 age = filelock.age_of_lock() 48 logging.error('Permanent lock failure. Lock age = %d.', age) 49 # Breaking a lock is a destructive operation that causes 50 # abort of locking jobs, cleanup and redowloading of cache 51 # contents. When the error was due to overloaded server, 52 # it cause even more cascade of lock errors. Wait 4 hours 53 # before breaking the lock. Tasks inside the critical section 54 # is about downloading a few gigabytes of files. Taking 4 hours 55 # strongly implies the job had went wrong. 56 if age > 4 * 60 * 60: 57 logging.error('Trying to break lock.') 58 filelock.break_lock() 59 raise error.TestFail('Error: permanent cache lock failure.') 60 else: 61 logging.info('Acquired cache lock after %d attempts.', attempts) 62 try: 63 yield 64 finally: 65 filelock.release() 66 logging.info('Released cache lock.') 67 68 69@contextlib.contextmanager 70def adb_keepalive(targets, extra_paths): 71 """A context manager that keeps the adb connection alive. 72 73 AdbKeepalive will spin off a new process that will continuously poll for 74 adb's connected state, and will attempt to reconnect if it ever goes down. 75 This is the only way we can currently recover safely from (intentional) 76 reboots. 77 78 @param target: the hostname and port of the DUT. 79 @param extra_paths: any additional components to the PATH environment 80 variable. 81 """ 82 from autotest_lib.client.common_lib.cros import adb_keepalive as module 83 # |__file__| returns the absolute path of the compiled bytecode of the 84 # module. We want to run the original .py file, so we need to change the 85 # extension back. 86 script_filename = module.__file__.replace('.pyc', '.py') 87 jobs = [common_utils.BgJob( 88 [script_filename, target], 89 nickname='adb_keepalive', 90 stderr_level=logging.DEBUG, 91 stdout_tee=common_utils.TEE_TO_LOGS, 92 stderr_tee=common_utils.TEE_TO_LOGS, 93 extra_paths=extra_paths) for target in targets] 94 95 try: 96 yield 97 finally: 98 # The adb_keepalive.py script runs forever until SIGTERM is sent. 99 for job in jobs: 100 common_utils.nuke_subprocess(job.sp) 101 common_utils.join_bg_jobs(jobs) 102 103 104def parse_tradefed_testresults_xml(test_result_xml_path, waivers=None): 105 """ Check the result from tradefed through test_results.xml 106 @param waivers: a set() of tests which are permitted to fail. 107 """ 108 waived_count = dict() 109 failed_tests = set() 110 try: 111 root = ElementTree.parse(test_result_xml_path) 112 for module in root.iter('Module'): 113 module_name = module.get('name') 114 for testcase in module.iter('TestCase'): 115 testcase_name = testcase.get('name') 116 for test in testcase.iter('Test'): 117 test_case = test.get('name') 118 test_res = test.get('result') 119 test_name = '%s#%s' % (testcase_name, test_case) 120 121 if test_res == "fail": 122 test_fail = test.find('Failure') 123 failed_message = test_fail.get('message') 124 failed_stacktrace = test_fail.find('StackTrace').text 125 126 if waivers and test_name in waivers: 127 waived_count[test_name] = ( 128 waived_count.get(test_name, 0) + 1) 129 else: 130 failed_tests.add(test_name) 131 132 # Check for test completion. 133 for summary in root.iter('Summary'): 134 modules_done = summary.get('modules_done') 135 modules_total = summary.get('modules_total') 136 137 if failed_tests: 138 logging.error('Failed (but not waived) tests:\n%s', 139 '\n'.join(sorted(failed_tests))) 140 141 waived = [] 142 for testname, fail_count in waived_count.items(): 143 waived += [testname] * fail_count 144 logging.info('Waived failure for %s %d time(s)', 145 testname, fail_count) 146 logging.info('>> Total waived = %s', waived) 147 return waived, True 148 149 except Exception as e: 150 logging.warning( 151 'Exception raised in ' 152 '|tradefed_utils.parse_tradefed_result_xml|: {' 153 '0}'.format(e)) 154 155def parse_tradefed_result(result, waivers=None): 156 """Check the result from the tradefed output. 157 158 @param result: The result stdout string from the tradefed command. 159 @param waivers: a set() of tests which are permitted to fail. 160 @return List of the waived tests. 161 """ 162 # Regular expressions for start/end messages of each test-run chunk. 163 abi_re = r'arm\S*|x86\S*' 164 # TODO(kinaba): use the current running module name. 165 module_re = r'\S+' 166 start_re = re.compile(r'(?:Start|Continu)ing (%s) %s with' 167 r' (\d+(?:,\d+)?) test' % (abi_re, module_re)) 168 end_re = re.compile(r'(%s) %s (?:complet|fail)ed in .*\.' 169 r' (\d+) passed, (\d+) failed, (\d+) not executed' % 170 (abi_re, module_re)) 171 fail_re = re.compile(r'I/ConsoleReporter.* (\S+) fail:') 172 inaccurate_re = re.compile(r'IMPORTANT: Some modules failed to run to ' 173 'completion, tests counts may be inaccurate') 174 abis = set() 175 waived_count = dict() 176 failed_tests = set() 177 accurate = True 178 for line in result.splitlines(): 179 match = start_re.search(line) 180 if match: 181 abis = abis.union([match.group(1)]) 182 continue 183 match = end_re.search(line) 184 if match: 185 abi = match.group(1) 186 if abi not in abis: 187 logging.error('Trunk end with %s abi but have not seen ' 188 'any trunk start with this abi.(%s)', abi, line) 189 continue 190 match = fail_re.search(line) 191 if match: 192 testname = match.group(1) 193 if waivers and testname in waivers: 194 waived_count[testname] = waived_count.get(testname, 0) + 1 195 else: 196 failed_tests.add(testname) 197 continue 198 # b/66899135, tradefed may reported inaccuratly with `list results`. 199 # Add warning if summary section shows that the result is inacurrate. 200 match = inaccurate_re.search(line) 201 if match: 202 accurate = False 203 204 logging.info('Total ABIs: %s', abis) 205 if failed_tests: 206 logging.error('Failed (but not waived) tests:\n%s', 207 '\n'.join(sorted(failed_tests))) 208 209 # TODO(dhaddock): Find a more robust way to apply waivers. 210 waived = [] 211 for testname, fail_count in waived_count.items(): 212 if fail_count > len(abis): 213 # This should be an error.TestFail, but unfortunately 214 # tradefed has a bug that emits "fail" twice when a 215 # test failed during teardown. It will anyway causes 216 # a test count inconsistency and visible on the dashboard. 217 logging.error('Found %d failures for %s but there are only %d ' 218 'abis: %s', fail_count, testname, len(abis), abis) 219 fail_count = len(abis) 220 waived += [testname] * fail_count 221 logging.info('Waived failure for %s %d time(s)', testname, fail_count) 222 logging.info('Total waived = %s', waived) 223 return waived, accurate 224 225 226# A similar implementation in Java can be found at 227# https://android.googlesource.com/platform/test/suite_harness/+/refs/heads/\ 228# pie-cts-dev/common/util/src/com/android/compatibility/common/util/\ 229# ResultHandler.java 230def get_test_result_xml_path(results_destination): 231 """Get the path of test_result.xml from the last session. 232 Raises: 233 Should never raise! 234 """ 235 try: 236 last_result_path = None 237 for dir in os.listdir(results_destination): 238 result_dir = os.path.join(results_destination, dir) 239 result_path = os.path.join(result_dir, 'test_result.xml') 240 # We use the lexicographically largest path, because |dir| are 241 # of format YYYY.MM.DD_HH.MM.SS. The last session will always 242 # have the latest date which leads to the lexicographically 243 # largest path. 244 if last_result_path and last_result_path > result_path: 245 continue 246 # We need to check for `islink` as `isdir` returns true if 247 # |result_dir| is a symbolic link to a directory. 248 if not os.path.isdir(result_dir) or os.path.islink(result_dir): 249 continue 250 if not os.path.exists(result_path): 251 continue 252 last_result_path = result_path 253 return last_result_path 254 except Exception as e: 255 logging.warning( 256 'Exception raised in ' 257 '|tradefed_utils.get_test_result_xml_path|: {' 258 '0}'.format(e)) 259 260 261def get_perf_metrics_from_test_result_xml(result_path, resultsdir): 262 """Parse test_result.xml and each <Metric /> is mapped to a dict that 263 can be used as kwargs of |TradefedTest.output_perf_value|. 264 Raises: 265 Should never raise! 266 """ 267 try: 268 root = ElementTree.parse(result_path) 269 for module in root.iter('Module'): 270 module_name = module.get('name') 271 for testcase in module.iter('TestCase'): 272 testcase_name = testcase.get('name') 273 for test in testcase.iter('Test'): 274 test_name = test.get('name') 275 for metric in test.iter('Metric'): 276 score_type = metric.get('score_type') 277 if score_type not in ['higher_better', 'lower_better']: 278 logging.warning( 279 'Unsupported score_type in %s/%s/%s', 280 module_name, testcase_name, test_name) 281 continue 282 higher_is_better = (score_type == 'higher_better') 283 units = metric.get('score_unit') 284 yield dict( 285 description=testcase_name + '#' + test_name, 286 value=metric[0].text, 287 units=units, 288 higher_is_better=higher_is_better, 289 resultsdir=os.path.join(resultsdir, 'tests', 290 PERF_MODULE_NAME_PREFIX + module_name) 291 ) 292 except Exception as e: 293 logging.warning( 294 'Exception raised in ' 295 '|tradefed_utils.get_perf_metrics_from_test_result_xml|: {' 296 '0}'.format(e)) 297