• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2019, The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Atest indexing module."""
17
18from __future__ import annotations
19from __future__ import print_function
20
21from dataclasses import dataclass
22import functools
23import logging
24import os
25from pathlib import Path
26import pickle
27import shutil
28import subprocess
29import sys
30import tempfile
31import time
32from typing import List
33
34from atest import atest_utils as au
35from atest import atest_utils
36from atest import constants
37from atest.atest_enum import DetectType
38from atest.metrics import metrics, metrics_utils
39
40UPDATEDB = 'updatedb'
41LOCATE = 'locate'
42# The list was generated by command:
43# find `gettop` -type d -wholename `gettop`/out -prune  -o -type d -name '.*'
44# -print | awk -F/ '{{print $NF}}'| sort -u
45PRUNENAMES = [
46    '.abc',
47    '.appveyor',
48    '.azure-pipelines',
49    '.bazelci',
50    '.build-id',
51    '.buildkite',
52    '.buildscript',
53    '.cargo',
54    '.ci',
55    '.circleci',
56    '.clusterfuzzlite',
57    '.conan',
58    '.devcontainer',
59    '.dwz',
60    '.externalToolBuilders',
61    '.git',
62    '.githooks',
63    '.github',
64    '.gitlab',
65    '.gitlab-ci',
66    '.google',
67    '.hidden',
68    '.idea',
69    '.intermediates',
70    '.jenkins',
71    '.kokoro',
72    '.libs_cffi_backend',
73    '.more',
74    '.mvn',
75    '.prebuilt_info',
76    '.private',
77    '__pycache__',
78    '.repo',
79    '.settings',
80    '.static',
81    '.svn',
82    '.test',
83    '.travis',
84    '.travis_scripts',
85    '.tx',
86    '.vscode',
87]
88PRUNEPATHS = ['prebuilts']
89
90
91def debug_log(func):
92  """Decorator for logging with debug mode."""
93
94  @functools.wraps(func)
95  def wrapper(*args, **kwargs):
96    logging.debug('Running %s...', {func.__name__})
97    func(*args, **kwargs)
98    logging.debug('%s done.', {func.__name__})
99
100  return wrapper
101
102
103def run_updatedb(output_cache: Path, prunepaths: List[str] = None) -> bool:
104  """Run updatedb and generate cache in $ANDROID_HOST_OUT/indices/plocate.db
105
106  Args:
107    output_cache: The file path of the updatedb cache.
108    prunepaths: a list of paths that are relative to the build top.
109
110  Returns:
111    True for success; false otherwise.
112  """
113  search_root = str(au.get_build_top())
114  prunepaths = prunepaths if prunepaths else PRUNEPATHS
115  prunepaths = [os.path.join(search_root, p) for p in prunepaths]
116  prunepaths.append(str(au.get_build_out_dir()))
117  updatedb_cmd = [UPDATEDB, '-l0']
118  updatedb_cmd.append('-U%s' % search_root)
119  updatedb_cmd.append('-n%s' % ' '.join(PRUNENAMES))
120  updatedb_cmd.append('-o%s' % output_cache)
121  # (b/206866627) /etc/updatedb.conf excludes /mnt from scanning on Linux.
122  # Use --prunepaths to override the default configuration.
123  updatedb_cmd.append('--prunepaths')
124  updatedb_cmd.append(' '.join(prunepaths))
125  # Support scanning bind mounts as well.
126  updatedb_cmd.extend(['--prune-bind-mounts', 'no'])
127
128  logging.debug('Running updatedb... ')
129  try:
130    full_env_vars = os.environ.copy()
131    logging.debug('Executing: %s', updatedb_cmd)
132    result = subprocess.run(
133        updatedb_cmd, env=full_env_vars, capture_output=True, check=True
134    )
135    logging.debug('Completed executing updatedb: %s', result.stdout)
136    return True
137  except (KeyboardInterrupt, SystemExit):
138    atest_utils.print_and_log_error('Process interrupted or failure.')
139  # Delete indices when plocate.db is locked() or other CalledProcessError.
140  # (b/141588997)
141  except subprocess.CalledProcessError as err:
142    atest_utils.print_and_log_error(
143        '%s\nStdout: %s\nstderr: %s', err, err.stdout, err.stderr
144    )
145    metrics.LocalDetectEvent(
146        detect_type=DetectType.IS_PLOCATEDB_LOCKED, result=1
147    )
148    output_cache.unlink(missing_ok=True)
149  except FileNotFoundError:
150    atest_utils.print_and_log_error('updatedb is not available on this host.')
151
152  return False
153
154
155def _dump_index(dump_file, output, output_re, key, value):
156  """Dump indexed data with pickle.
157
158  Args:
159      dump_file: A string of absolute path of the index file.
160      output: A string generated by locate and grep.
161      output_re: An regex which is used for grouping patterns.
162      key: A string for dictionary key, e.g. classname, package, cc_class, etc.
163      value: A set of path.
164
165  The data structure will be like:
166  {
167    'Foo': {'/path/to/Foo.java', '/path2/to/Foo.kt'},
168    'Boo': {'/path3/to/Boo.java'}
169  }
170  """
171  _dict = {}
172  with tempfile.NamedTemporaryFile() as temp_file:
173    with open(temp_file.name, 'wb') as cache_file:
174      if isinstance(output, bytes):
175        output = output.decode()
176      for entry in output.splitlines():
177        match = output_re.match(entry)
178        if match:
179          _dict.setdefault(match.group(key), set()).add(match.group(value))
180      try:
181        pickle.dump(_dict, cache_file, protocol=2)
182      except IOError:
183        atest_utils.print_and_log_error('Failed in dumping %s', dump_file)
184    shutil.copy(temp_file.name, dump_file)
185
186
187# pylint: disable=anomalous-backslash-in-string
188def get_cc_result(indices: Indices):
189  """Search all testable cc/cpp and grep TEST(), TEST_F() or TEST_P().
190
191  After searching cc/cpp files, index corresponding data types in parallel.
192
193  Args:
194      indices: an Indices object.
195  """
196  find_cc_cmd = (
197      f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(cc|cpp)$'"
198      f"| xargs egrep -sH '{constants.CC_GREP_RE}' 2>/dev/null || true"
199  )
200  logging.debug('Probing CC classes:\n %s', find_cc_cmd)
201  result = subprocess.getoutput(find_cc_cmd)
202
203  au.start_threading(
204      target=_index_cc_classes, args=[result, indices.cc_classes_idx]
205  )
206
207
208# pylint: disable=anomalous-backslash-in-string
209def get_java_result(indices: Indices):
210  """Search all testable java/kt and grep package.
211
212  After searching java/kt files, index corresponding data types in parallel.
213
214  Args:
215      indices: an Indices object.
216  """
217  package_grep_re = r'^\s*package\s+[a-z][[:alnum:]]+[^{]'
218  find_java_cmd = (
219      f"{LOCATE} -id{indices.locate_db} --regex '/*.test.*\.(java|kt)$' "
220      # (b/204398677) suppress stderr when indexing target terminated.
221      f"| xargs egrep -sH '{package_grep_re}' 2>/dev/null|| true"
222  )
223  logging.debug('Probing Java classes:\n %s', find_java_cmd)
224  result = subprocess.getoutput(find_java_cmd)
225
226  au.start_threading(
227      target=_index_java_classes, args=[result, indices.classes_idx]
228  )
229  au.start_threading(
230      target=_index_qualified_classes, args=[result, indices.fqcn_idx]
231  )
232  au.start_threading(
233      target=_index_packages, args=[result, indices.packages_idx]
234  )
235
236
237@debug_log
238def _index_cc_classes(output, index):
239  """Index CC classes.
240
241  The data structure is like:
242  {
243    'FooTestCase': {'/path1/to/the/FooTestCase.cpp',
244                    '/path2/to/the/FooTestCase.cc'}
245  }
246
247  Args:
248      output: A string object generated by get_cc_result().
249      index: A string path of the index file.
250  """
251  _dump_index(
252      dump_file=index,
253      output=output,
254      output_re=constants.CC_OUTPUT_RE,
255      key='test_name',
256      value='file_path',
257  )
258
259
260@debug_log
261def _index_java_classes(output, index):
262  """Index Java classes.
263
264  The data structure is like: {
265
266      'FooTestCase': {'/path1/to/the/FooTestCase.java',
267                      '/path2/to/the/FooTestCase.kt'}
268  }
269
270  Args:
271      output: A string object generated by get_java_result().
272      index: A string path of the index file.
273  """
274  _dump_index(
275      dump_file=index,
276      output=output,
277      output_re=constants.CLASS_OUTPUT_RE,
278      key='class',
279      value='java_path',
280  )
281
282
283@debug_log
284def _index_packages(output, index):
285  """Index Java packages.
286
287  The data structure is like: {
288
289      'a.b.c.d': {'/path1/to/a/b/c/d/',
290                  '/path2/to/a/b/c/d/'
291  }
292
293  Args:
294      output: A string object generated by get_java_result().
295      index: A string path of the index file.
296  """
297  _dump_index(
298      dump_file=index,
299      output=output,
300      output_re=constants.PACKAGE_OUTPUT_RE,
301      key='package',
302      value='java_dir',
303  )
304
305
306@debug_log
307def _index_qualified_classes(output, index):
308  """Index Fully Qualified Java Classes(FQCN).
309
310  The data structure is like: {
311
312      'a.b.c.d.FooTestCase': {'/path1/to/a/b/c/d/FooTestCase.java',
313                              '/path2/to/a/b/c/d/FooTestCase.kt'}
314  }
315
316  Args:
317      output: A string object generated by get_java_result().
318      index: A string path of the index file.
319  """
320  _dict = {}
321  with tempfile.NamedTemporaryFile() as temp_file:
322    with open(temp_file.name, 'wb') as cache_file:
323      if isinstance(output, bytes):
324        output = output.decode()
325      for entry in output.split('\n'):
326        match = constants.QCLASS_OUTPUT_RE.match(entry)
327        if match:
328          fqcn = match.group('package') + '.' + match.group('class')
329          _dict.setdefault(fqcn, set()).add(match.group('java_path'))
330      try:
331        pickle.dump(_dict, cache_file, protocol=2)
332      except (KeyboardInterrupt, SystemExit):
333        atest_utils.print_and_log_error('Process interrupted or failure.')
334      except IOError:
335        atest_utils.print_and_log_error('Failed in dumping %s', index)
336    shutil.copy(temp_file.name, index)
337
338
339def index_targets():
340  """The entrypoint of indexing targets.
341
342  Utilise plocate database to index reference types of CLASS, CC_CLASS,
343  PACKAGE and QUALIFIED_CLASS.
344  """
345  start = time.time()
346  unavailable_cmds = [
347      cmd for cmd in [UPDATEDB, LOCATE] if not au.has_command(cmd)
348  ]
349  if unavailable_cmds:
350    logging.debug(
351        'command %s is unavailable; skip indexing...',
352        ' '.join(unavailable_cmds),
353    )
354    return None
355
356  indices = Indices()
357  output_cache = indices.locate_db
358  get_num_cmd = f'{LOCATE} -d{output_cache} --count /'
359  pre_number = 0
360  if output_cache.exists():
361    ret, pre_number = subprocess.getstatusoutput(get_num_cmd)
362    if ret != 0:
363      logging.debug('Found a broken db: %s', output_cache)
364      pre_number = sys.maxsize
365
366  if run_updatedb(output_cache):
367    if not indices.has_all_indices():
368      logging.debug('Missing essential indices; will re-index targets.')
369      return _index_targets(indices, start)
370
371    # (b/206886222) The checksum and plocate.db file size are not indicators
372    # to determining whether the source tree had changed. Therefore, when
373    # fulfilling the following conditions, Atest will trigger indexing:
374    #  1. different file numbers in current and previous plocate.db.
375    same_number_of_files = pre_number == subprocess.getoutput(get_num_cmd)
376    if not same_number_of_files:
377      logging.debug('Found file number changed; will re-index targets.')
378      return _index_targets(indices, start)
379
380    #  2. had issued `repo sync` before running atest.
381    checksum_file = au.get_index_path('repo_sync.md5')
382    repo_syncd = not au.check_md5(checksum_file, missing_ok=False)
383    if repo_syncd:
384      logging.debug('Found repo syncd; will re-index targets.')
385      repo_file = au.get_build_top('.repo/.repo_fetchtimes.json')
386      au.start_threading(target=au.save_md5, args=[[repo_file], checksum_file])
387      return _index_targets(indices, start)
388    logging.debug('Indices remains the same. Ignore indexing...')
389  else:
390    atest_utils.print_and_log_warning(
391        'Unable to run %s. Search targets will be very slow.', output_cache
392    )
393  return None
394
395
396def _index_targets(indices: Indices, start_from: float):
397  """The actual index_targets function."""
398  logging.debug('Indexing targets... ')
399  proc_java = au.start_threading(target=get_java_result, args=[indices])
400  proc_cc = au.start_threading(target=get_cc_result, args=[indices])
401  proc_java.join()
402  proc_cc.join()
403  elapsed_time = time.time() - start_from
404  logging.debug('Indexing targets took %ss', elapsed_time)
405  metrics.LocalDetectEvent(
406      detect_type=DetectType.INDEX_TARGETS_MS, result=int(elapsed_time * 1000)
407  )
408
409
410@dataclass
411class Indices:
412  """Class that stores index files."""
413
414  locate_db: Path
415  classes_idx: Path
416  cc_classes_idx: Path
417  packages_idx: Path
418  fqcn_idx: Path
419
420  def __init__(self):
421    """initiation of Indices object."""
422    self.locate_db = au.get_index_path('plocate.db')
423    self.classes_idx = au.get_index_path('classes.idx')
424    self.cc_classes_idx = au.get_index_path('cc_classes.idx')
425    self.packages_idx = au.get_index_path('packages.idx')
426    self.fqcn_idx = au.get_index_path('fqcn.idx')
427    au.get_index_path().mkdir(parents=True, exist_ok=True)
428
429  def has_all_indices(self):
430    """Whether all indices files exist."""
431    exists = [
432        self.locate_db.exists(),
433        self.classes_idx.exists(),
434        self.cc_classes_idx.exists(),
435        self.packages_idx.exists(),
436        self.fqcn_idx.exists(),
437    ]
438    if not all(exists):
439      logging.debug("Some index file doesn't exist: %s", exists)
440    return all(exists)
441