1#!/usr/bin/env python3 2# Copyright 2019, The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16"""Atest indexing module.""" 17 18from __future__ import annotations 19from __future__ import print_function 20 21from dataclasses import dataclass 22import functools 23import logging 24import os 25from pathlib import Path 26import pickle 27import shutil 28import subprocess 29import sys 30import tempfile 31import time 32from typing import List 33 34from atest import atest_utils as au 35from atest import atest_utils 36from atest import constants 37from atest.atest_enum import DetectType 38from atest.metrics import metrics, metrics_utils 39 40UPDATEDB = 'updatedb' 41LOCATE = 'locate' 42# The list was generated by command: 43# find `gettop` -type d -wholename `gettop`/out -prune -o -type d -name '.*' 44# -print | awk -F/ '{{print $NF}}'| sort -u 45PRUNENAMES = [ 46 '.abc', 47 '.appveyor', 48 '.azure-pipelines', 49 '.bazelci', 50 '.build-id', 51 '.buildkite', 52 '.buildscript', 53 '.cargo', 54 '.ci', 55 '.circleci', 56 '.clusterfuzzlite', 57 '.conan', 58 '.devcontainer', 59 '.dwz', 60 '.externalToolBuilders', 61 '.git', 62 '.githooks', 63 '.github', 64 '.gitlab', 65 '.gitlab-ci', 66 '.google', 67 '.hidden', 68 '.idea', 69 '.intermediates', 70 '.jenkins', 71 '.kokoro', 72 '.libs_cffi_backend', 73 '.more', 74 '.mvn', 75 '.prebuilt_info', 76 '.private', 77 '__pycache__', 78 '.repo', 79 '.settings', 80 '.static', 81 '.svn', 82 '.test', 83 '.travis', 84 '.travis_scripts', 85 '.tx', 86 '.vscode', 87] 88PRUNEPATHS = ['prebuilts'] 89 90 91def debug_log(func): 92 """Decorator for logging with debug mode.""" 93 94 @functools.wraps(func) 95 def wrapper(*args, **kwargs): 96 logging.debug('Running %s...', {func.__name__}) 97 func(*args, **kwargs) 98 logging.debug('%s done.', {func.__name__}) 99 100 return wrapper 101 102 103def run_updatedb(output_cache: Path, prunepaths: List[str] = None) -> bool: 104 """Run updatedb and generate cache in $ANDROID_HOST_OUT/indices/plocate.db 105 106 Args: 107 output_cache: The file path of the updatedb cache. 108 prunepaths: a list of paths that are relative to the build top. 109 110 Returns: 111 True for success; false otherwise. 112 """ 113 search_root = str(au.get_build_top()) 114 prunepaths = prunepaths if prunepaths else PRUNEPATHS 115 prunepaths = [os.path.join(search_root, p) for p in prunepaths] 116 prunepaths.append(str(au.get_build_out_dir())) 117 updatedb_cmd = [UPDATEDB, '-l0'] 118 updatedb_cmd.append('-U%s' % search_root) 119 updatedb_cmd.append('-n%s' % ' '.join(PRUNENAMES)) 120 updatedb_cmd.append('-o%s' % output_cache) 121 # (b/206866627) /etc/updatedb.conf excludes /mnt from scanning on Linux. 122 # Use --prunepaths to override the default configuration. 123 updatedb_cmd.append('--prunepaths') 124 updatedb_cmd.append(' '.join(prunepaths)) 125 # Support scanning bind mounts as well. 126 updatedb_cmd.extend(['--prune-bind-mounts', 'no']) 127 128 logging.debug('Running updatedb... ') 129 try: 130 full_env_vars = os.environ.copy() 131 logging.debug('Executing: %s', updatedb_cmd) 132 result = subprocess.run( 133 updatedb_cmd, env=full_env_vars, capture_output=True, check=True 134 ) 135 logging.debug('Completed executing updatedb: %s', result.stdout) 136 return True 137 except (KeyboardInterrupt, SystemExit): 138 atest_utils.print_and_log_error('Process interrupted or failure.') 139 # Delete indices when plocate.db is locked() or other CalledProcessError. 140 # (b/141588997) 141 except subprocess.CalledProcessError as err: 142 atest_utils.print_and_log_error( 143 '%s\nStdout: %s\nstderr: %s', err, err.stdout, err.stderr 144 ) 145 metrics.LocalDetectEvent( 146 detect_type=DetectType.IS_PLOCATEDB_LOCKED, result=1 147 ) 148 output_cache.unlink(missing_ok=True) 149 except FileNotFoundError: 150 atest_utils.print_and_log_error('updatedb is not available on this host.') 151 152 return False 153 154 155def _dump_index(dump_file, output, output_re, key, value): 156 """Dump indexed data with pickle. 157 158 Args: 159 dump_file: A string of absolute path of the index file. 160 output: A string generated by locate and grep. 161 output_re: An regex which is used for grouping patterns. 162 key: A string for dictionary key, e.g. classname, package, cc_class, etc. 163 value: A set of path. 164 165 The data structure will be like: 166 { 167 'Foo': {'/path/to/Foo.java', '/path2/to/Foo.kt'}, 168 'Boo': {'/path3/to/Boo.java'} 169 } 170 """ 171 _dict = {} 172 with tempfile.NamedTemporaryFile() as temp_file: 173 with open(temp_file.name, 'wb') as cache_file: 174 if isinstance(output, bytes): 175 output = output.decode() 176 for entry in output.splitlines(): 177 match = output_re.match(entry) 178 if match: 179 _dict.setdefault(match.group(key), set()).add(match.group(value)) 180 try: 181 pickle.dump(_dict, cache_file, protocol=2) 182 except IOError: 183 atest_utils.print_and_log_error('Failed in dumping %s', dump_file) 184 shutil.copy(temp_file.name, dump_file) 185 186 187# pylint: disable=anomalous-backslash-in-string 188def get_cc_result(indices: Indices): 189 """Search all testable cc/cpp and grep TEST(), TEST_F() or TEST_P(). 190 191 After searching cc/cpp files, index corresponding data types in parallel. 192 193 Args: 194 indices: an Indices object. 195 """ 196 find_cc_cmd = ( 197 f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(cc|cpp)$'" 198 f"| xargs egrep -sH '{constants.CC_GREP_RE}' 2>/dev/null || true" 199 ) 200 logging.debug('Probing CC classes:\n %s', find_cc_cmd) 201 result = subprocess.getoutput(find_cc_cmd) 202 203 au.start_threading( 204 target=_index_cc_classes, args=[result, indices.cc_classes_idx] 205 ) 206 207 208# pylint: disable=anomalous-backslash-in-string 209def get_java_result(indices: Indices): 210 """Search all testable java/kt and grep package. 211 212 After searching java/kt files, index corresponding data types in parallel. 213 214 Args: 215 indices: an Indices object. 216 """ 217 package_grep_re = r'^\s*package\s+[a-z][[:alnum:]]+[^{]' 218 find_java_cmd = ( 219 f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(java|kt)$' " 220 # (b/204398677) suppress stderr when indexing target terminated. 221 f"| xargs egrep -sH '{package_grep_re}' 2>/dev/null|| true" 222 ) 223 logging.debug('Probing Java classes:\n %s', find_java_cmd) 224 result = subprocess.getoutput(find_java_cmd) 225 226 au.start_threading( 227 target=_index_java_classes, args=[result, indices.classes_idx] 228 ) 229 au.start_threading( 230 target=_index_qualified_classes, args=[result, indices.fqcn_idx] 231 ) 232 au.start_threading( 233 target=_index_packages, args=[result, indices.packages_idx] 234 ) 235 236 237@debug_log 238def _index_cc_classes(output, index): 239 """Index CC classes. 240 241 The data structure is like: 242 { 243 'FooTestCase': {'/path1/to/the/FooTestCase.cpp', 244 '/path2/to/the/FooTestCase.cc'} 245 } 246 247 Args: 248 output: A string object generated by get_cc_result(). 249 index: A string path of the index file. 250 """ 251 _dump_index( 252 dump_file=index, 253 output=output, 254 output_re=constants.CC_OUTPUT_RE, 255 key='test_name', 256 value='file_path', 257 ) 258 259 260@debug_log 261def _index_java_classes(output, index): 262 """Index Java classes. 263 264 The data structure is like: { 265 266 'FooTestCase': {'/path1/to/the/FooTestCase.java', 267 '/path2/to/the/FooTestCase.kt'} 268 } 269 270 Args: 271 output: A string object generated by get_java_result(). 272 index: A string path of the index file. 273 """ 274 _dump_index( 275 dump_file=index, 276 output=output, 277 output_re=constants.CLASS_OUTPUT_RE, 278 key='class', 279 value='java_path', 280 ) 281 282 283@debug_log 284def _index_packages(output, index): 285 """Index Java packages. 286 287 The data structure is like: { 288 289 'a.b.c.d': {'/path1/to/a/b/c/d/', 290 '/path2/to/a/b/c/d/' 291 } 292 293 Args: 294 output: A string object generated by get_java_result(). 295 index: A string path of the index file. 296 """ 297 _dump_index( 298 dump_file=index, 299 output=output, 300 output_re=constants.PACKAGE_OUTPUT_RE, 301 key='package', 302 value='java_dir', 303 ) 304 305 306@debug_log 307def _index_qualified_classes(output, index): 308 """Index Fully Qualified Java Classes(FQCN). 309 310 The data structure is like: { 311 312 'a.b.c.d.FooTestCase': {'/path1/to/a/b/c/d/FooTestCase.java', 313 '/path2/to/a/b/c/d/FooTestCase.kt'} 314 } 315 316 Args: 317 output: A string object generated by get_java_result(). 318 index: A string path of the index file. 319 """ 320 _dict = {} 321 with tempfile.NamedTemporaryFile() as temp_file: 322 with open(temp_file.name, 'wb') as cache_file: 323 if isinstance(output, bytes): 324 output = output.decode() 325 for entry in output.split('\n'): 326 match = constants.QCLASS_OUTPUT_RE.match(entry) 327 if match: 328 fqcn = match.group('package') + '.' + match.group('class') 329 _dict.setdefault(fqcn, set()).add(match.group('java_path')) 330 try: 331 pickle.dump(_dict, cache_file, protocol=2) 332 except (KeyboardInterrupt, SystemExit): 333 atest_utils.print_and_log_error('Process interrupted or failure.') 334 except IOError: 335 atest_utils.print_and_log_error('Failed in dumping %s', index) 336 shutil.copy(temp_file.name, index) 337 338 339def index_targets(): 340 """The entrypoint of indexing targets. 341 342 Utilise plocate database to index reference types of CLASS, CC_CLASS, 343 PACKAGE and QUALIFIED_CLASS. 344 """ 345 start = time.time() 346 unavailable_cmds = [ 347 cmd for cmd in [UPDATEDB, LOCATE] if not au.has_command(cmd) 348 ] 349 if unavailable_cmds: 350 logging.debug( 351 'command %s is unavailable; skip indexing...', 352 ' '.join(unavailable_cmds), 353 ) 354 return None 355 356 indices = Indices() 357 output_cache = indices.locate_db 358 get_num_cmd = f'{LOCATE} -d{output_cache} --count /' 359 pre_number = 0 360 if output_cache.exists(): 361 ret, pre_number = subprocess.getstatusoutput(get_num_cmd) 362 if ret != 0: 363 logging.debug('Found a broken db: %s', output_cache) 364 pre_number = sys.maxsize 365 366 if run_updatedb(output_cache): 367 if not indices.has_all_indices(): 368 logging.debug('Missing essential indices; will re-index targets.') 369 return _index_targets(indices, start) 370 371 # (b/206886222) The checksum and plocate.db file size are not indicators 372 # to determining whether the source tree had changed. Therefore, when 373 # fulfilling the following conditions, Atest will trigger indexing: 374 # 1. different file numbers in current and previous plocate.db. 375 same_number_of_files = pre_number == subprocess.getoutput(get_num_cmd) 376 if not same_number_of_files: 377 logging.debug('Found file number changed; will re-index targets.') 378 return _index_targets(indices, start) 379 380 # 2. had issued `repo sync` before running atest. 381 checksum_file = au.get_index_path('repo_sync.md5') 382 repo_syncd = not au.check_md5(checksum_file, missing_ok=False) 383 if repo_syncd: 384 logging.debug('Found repo syncd; will re-index targets.') 385 repo_file = au.get_build_top('.repo/.repo_fetchtimes.json') 386 au.start_threading(target=au.save_md5, args=[[repo_file], checksum_file]) 387 return _index_targets(indices, start) 388 logging.debug('Indices remains the same. Ignore indexing...') 389 else: 390 atest_utils.print_and_log_warning( 391 'Unable to run %s. Search targets will be very slow.', output_cache 392 ) 393 return None 394 395 396def _index_targets(indices: Indices, start_from: float): 397 """The actual index_targets function.""" 398 logging.debug('Indexing targets... ') 399 proc_java = au.start_threading(target=get_java_result, args=[indices]) 400 proc_cc = au.start_threading(target=get_cc_result, args=[indices]) 401 proc_java.join() 402 proc_cc.join() 403 elapsed_time = time.time() - start_from 404 logging.debug('Indexing targets took %ss', elapsed_time) 405 metrics.LocalDetectEvent( 406 detect_type=DetectType.INDEX_TARGETS_MS, result=int(elapsed_time * 1000) 407 ) 408 409 410@dataclass 411class Indices: 412 """Class that stores index files.""" 413 414 locate_db: Path 415 classes_idx: Path 416 cc_classes_idx: Path 417 packages_idx: Path 418 fqcn_idx: Path 419 420 def __init__(self): 421 """initiation of Indices object.""" 422 self.locate_db = au.get_index_path('plocate.db') 423 self.classes_idx = au.get_index_path('classes.idx') 424 self.cc_classes_idx = au.get_index_path('cc_classes.idx') 425 self.packages_idx = au.get_index_path('packages.idx') 426 self.fqcn_idx = au.get_index_path('fqcn.idx') 427 au.get_index_path().mkdir(parents=True, exist_ok=True) 428 429 def has_all_indices(self): 430 """Whether all indices files exist.""" 431 exists = [ 432 self.locate_db.exists(), 433 self.classes_idx.exists(), 434 self.cc_classes_idx.exists(), 435 self.packages_idx.exists(), 436 self.fqcn_idx.exists(), 437 ] 438 if not all(exists): 439 logging.debug("Some index file doesn't exist: %s", exists) 440 return all(exists) 441