# Copyright 2023 The Chromium Authors # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Methods to run tools over jars and cache their output.""" import dataclasses import functools import logging import pathlib import zipfile from typing import List, Optional from util import build_utils _SRC_PATH = pathlib.Path(__file__).resolve().parents[4] _JDEPS_PATH = _SRC_PATH / 'third_party/jdk/current/bin/jdeps' _IGNORED_JAR_PATHS = [ # This matches org_ow2_asm_asm_commons and org_ow2_asm_asm_analysis, both of # which fail jdeps (not sure why). 'third_party/android_deps/libs/org_ow2_asm_asm', ] def _is_relative_to(path: pathlib.Path, other_path: pathlib.Path): """This replicates pathlib.Path.is_relative_to. Since bots still run python3.8, they do not have access to is_relative_to, which was introduced in python3.9. """ try: path.relative_to(other_path) return True except ValueError: # This error is expected when path is not a subpath of other_path. return False @dataclasses.dataclass class CacheFile: jar_path: pathlib.Path cache_suffix: str build_output_dir: pathlib.Path src_dir: pathlib.Path = _SRC_PATH def __post_init__(self): # Ensure that all paths are absolute so that relative_to works correctly. self.jar_path = self.jar_path.resolve() self.build_output_dir = self.build_output_dir.resolve() self.src_dir = self.src_dir.resolve() @functools.cached_property def cache_path(self): """Return a cache path for the jar that is always in the output dir. Example: - Given: src_path = /cr/src build_output_dir = /cr/src/out/Debug cache_suffix = .jdeps - filepath = /cr/src/out/Debug/a/d/file.jar Returns: /cr/src/out/Debug/a/d/file.jar.jdeps - filepath = /cr/src/out/b/c/file.jar Returns: /cr/src/out/Debug/gen/b/c/file.jar.jdeps - filepath = /random/path/file.jar Returns: /cr/src/out/Debug/gen/abs/random/path/file.jar.jdeps """ path = self.jar_path.with_suffix(self.jar_path.suffix + self.cache_suffix) if _is_relative_to(path, self.build_output_dir): # already in the outdir, no need to adjust cache path return path if _is_relative_to(self.jar_path, _SRC_PATH): return self.build_output_dir / 'gen' / path.relative_to(_SRC_PATH) return self.build_output_dir / 'gen/abs' / path.relative_to(path.anchor) def is_valid(self): return (self.cache_path.exists() and self.jar_path.exists() and self.cache_path.stat().st_mtime > self.jar_path.stat().st_mtime) def read(self): with open(self.cache_path) as f: return f.read() def write(self, content: str): # If the jar file is in //src but not in the output dir or outside //src # then the reparented dirs within the output dir need to be created first. self.cache_path.parent.mkdir(parents=True, exist_ok=True) with open(self.cache_path, 'w') as f: f.write(content) def _should_ignore(jar_path: pathlib.Path) -> bool: for ignored_jar_path in _IGNORED_JAR_PATHS: if ignored_jar_path in str(jar_path): return True return False def run_jdeps(filepath: pathlib.Path, *, build_output_dir: pathlib.Path, jdeps_path: pathlib.Path = _JDEPS_PATH, src_path: pathlib.Path = _SRC_PATH) -> Optional[str]: """Runs jdeps on the given filepath and returns the output. Uses a simple file cache for the output of jdeps. If the jar file's mtime is older than the jdeps cache then just use the cached content instead. Otherwise jdeps is run again and the output used to update the file cache. Tested Nov 2nd, 2022: - With all cache hits, script takes 13 seconds. - Without the cache, script takes 1 minute 14 seconds. """ # Some __compile_java targets do not generate a .jar file, skipping these # does not affect correctness. if not filepath.exists() or _should_ignore(filepath): return None cache_file = CacheFile(jar_path=filepath, cache_suffix='.jdeps_cache', build_output_dir=build_output_dir, src_dir=src_path) if cache_file.is_valid(): return cache_file.read() # Cache either doesn't exist or is older than the jar file. output = build_utils.CheckOutput([ str(jdeps_path), '-verbose:class', '--multi-release', # Some jars support multiple JDK releases. 'base', str(filepath), ]) cache_file.write(output) return output def extract_full_class_names_from_jar(build_output_dir: pathlib.Path, jar_path: pathlib.Path) -> List[str]: """Returns set of fully qualified class names in passed-in jar.""" cache_file = CacheFile(jar_path=jar_path, cache_suffix='.class_name_cache', build_output_dir=build_output_dir) if cache_file.is_valid(): return cache_file.read().splitlines() out = set() with zipfile.ZipFile(jar_path) as z: for zip_entry_name in z.namelist(): if not zip_entry_name.endswith('.class'): continue # Remove .class suffix full_java_class = zip_entry_name[:-6] # Remove inner class names after the first $. full_java_class = full_java_class.replace('/', '.') dollar_index = full_java_class.find('$') if dollar_index >= 0: full_java_class = full_java_class[0:dollar_index] out.add(full_java_class) out = sorted(out) cache_file.write('\n'.join(out)) return out def parse_full_java_class(source_path: pathlib.Path) -> str: """Guess the fully qualified class name from the path to the source file.""" if source_path.suffix not in ('.java', '.kt'): logging.warning('"%s" does not end in .java or .kt.', source_path) return '' directory_path = source_path.parent package_list_reversed = [] for part in reversed(directory_path.parts): if part == 'java': break package_list_reversed.append(part) if part in ('com', 'org'): break else: logging.debug( 'File %s not in a subdir of "org" or "com", cannot detect ' 'package heuristically.', source_path) return '' package = '.'.join(reversed(package_list_reversed)) class_name = source_path.stem return f'{package}.{class_name}'