1# Copyright 2023 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4"""Methods to run tools over jars and cache their output.""" 5 6import dataclasses 7import functools 8import logging 9import pathlib 10import zipfile 11from typing import List, Optional 12 13from util import build_utils 14 15_SRC_PATH = pathlib.Path(__file__).resolve().parents[4] 16_JDEPS_PATH = _SRC_PATH / 'third_party/jdk/current/bin/jdeps' 17 18_IGNORED_JAR_PATHS = [ 19 # This matches org_ow2_asm_asm_commons and org_ow2_asm_asm_analysis, both of 20 # which fail jdeps (not sure why). 21 'third_party/android_deps/libs/org_ow2_asm_asm', 22] 23 24 25def _is_relative_to(path: pathlib.Path, other_path: pathlib.Path): 26 """This replicates pathlib.Path.is_relative_to. 27 28 Since bots still run python3.8, they do not have access to is_relative_to, 29 which was introduced in python3.9. 30 """ 31 try: 32 path.relative_to(other_path) 33 return True 34 except ValueError: 35 # This error is expected when path is not a subpath of other_path. 36 return False 37 38 39@dataclasses.dataclass 40class CacheFile: 41 jar_path: pathlib.Path 42 cache_suffix: str 43 build_output_dir: pathlib.Path 44 src_dir: pathlib.Path = _SRC_PATH 45 46 def __post_init__(self): 47 # Ensure that all paths are absolute so that relative_to works correctly. 48 self.jar_path = self.jar_path.resolve() 49 self.build_output_dir = self.build_output_dir.resolve() 50 self.src_dir = self.src_dir.resolve() 51 52 @functools.cached_property 53 def cache_path(self): 54 """Return a cache path for the jar that is always in the output dir. 55 56 Example: 57 - Given: 58 src_path = /cr/src 59 build_output_dir = /cr/src/out/Debug 60 cache_suffix = .jdeps 61 - filepath = /cr/src/out/Debug/a/d/file.jar 62 Returns: /cr/src/out/Debug/a/d/file.jar.jdeps 63 - filepath = /cr/src/out/b/c/file.jar 64 Returns: /cr/src/out/Debug/gen/b/c/file.jar.jdeps 65 - filepath = /random/path/file.jar 66 Returns: /cr/src/out/Debug/gen/abs/random/path/file.jar.jdeps 67 """ 68 path = self.jar_path.with_suffix(self.jar_path.suffix + self.cache_suffix) 69 if _is_relative_to(path, self.build_output_dir): 70 # already in the outdir, no need to adjust cache path 71 return path 72 if _is_relative_to(self.jar_path, _SRC_PATH): 73 return self.build_output_dir / 'gen' / path.relative_to(_SRC_PATH) 74 return self.build_output_dir / 'gen/abs' / path.relative_to(path.anchor) 75 76 def is_valid(self): 77 return (self.cache_path.exists() and self.jar_path.exists() 78 and self.cache_path.stat().st_mtime > self.jar_path.stat().st_mtime) 79 80 def read(self): 81 with open(self.cache_path) as f: 82 return f.read() 83 84 def write(self, content: str): 85 # If the jar file is in //src but not in the output dir or outside //src 86 # then the reparented dirs within the output dir need to be created first. 87 self.cache_path.parent.mkdir(parents=True, exist_ok=True) 88 with open(self.cache_path, 'w') as f: 89 f.write(content) 90 91 92def _should_ignore(jar_path: pathlib.Path) -> bool: 93 for ignored_jar_path in _IGNORED_JAR_PATHS: 94 if ignored_jar_path in str(jar_path): 95 return True 96 return False 97 98 99def run_jdeps(filepath: pathlib.Path, 100 *, 101 build_output_dir: pathlib.Path, 102 jdeps_path: pathlib.Path = _JDEPS_PATH, 103 src_path: pathlib.Path = _SRC_PATH) -> Optional[str]: 104 """Runs jdeps on the given filepath and returns the output. 105 106 Uses a simple file cache for the output of jdeps. If the jar file's mtime is 107 older than the jdeps cache then just use the cached content instead. 108 Otherwise jdeps is run again and the output used to update the file cache. 109 110 Tested Nov 2nd, 2022: 111 - With all cache hits, script takes 13 seconds. 112 - Without the cache, script takes 1 minute 14 seconds. 113 """ 114 # Some __compile_java targets do not generate a .jar file, skipping these 115 # does not affect correctness. 116 if not filepath.exists() or _should_ignore(filepath): 117 return None 118 119 cache_file = CacheFile(jar_path=filepath, 120 cache_suffix='.jdeps_cache', 121 build_output_dir=build_output_dir, 122 src_dir=src_path) 123 if cache_file.is_valid(): 124 return cache_file.read() 125 126 # Cache either doesn't exist or is older than the jar file. 127 output = build_utils.CheckOutput([ 128 str(jdeps_path), 129 '-verbose:class', 130 '--multi-release', # Some jars support multiple JDK releases. 131 'base', 132 str(filepath), 133 ]) 134 135 cache_file.write(output) 136 return output 137 138 139def extract_full_class_names_from_jar(build_output_dir: pathlib.Path, 140 jar_path: pathlib.Path) -> List[str]: 141 """Returns set of fully qualified class names in passed-in jar.""" 142 143 cache_file = CacheFile(jar_path=jar_path, 144 cache_suffix='.class_name_cache', 145 build_output_dir=build_output_dir) 146 if cache_file.is_valid(): 147 return cache_file.read().splitlines() 148 149 out = set() 150 with zipfile.ZipFile(jar_path) as z: 151 for zip_entry_name in z.namelist(): 152 if not zip_entry_name.endswith('.class'): 153 continue 154 # Remove .class suffix 155 full_java_class = zip_entry_name[:-6] 156 157 # Remove inner class names after the first $. 158 full_java_class = full_java_class.replace('/', '.') 159 dollar_index = full_java_class.find('$') 160 if dollar_index >= 0: 161 full_java_class = full_java_class[0:dollar_index] 162 163 out.add(full_java_class) 164 out = sorted(out) 165 166 cache_file.write('\n'.join(out)) 167 return out 168 169 170def parse_full_java_class(source_path: pathlib.Path) -> str: 171 """Guess the fully qualified class name from the path to the source file.""" 172 if source_path.suffix not in ('.java', '.kt'): 173 logging.warning('"%s" does not end in .java or .kt.', source_path) 174 return '' 175 176 directory_path = source_path.parent 177 package_list_reversed = [] 178 for part in reversed(directory_path.parts): 179 if part == 'java': 180 break 181 package_list_reversed.append(part) 182 if part in ('com', 'org'): 183 break 184 else: 185 logging.debug( 186 'File %s not in a subdir of "org" or "com", cannot detect ' 187 'package heuristically.', source_path) 188 return '' 189 190 package = '.'.join(reversed(package_list_reversed)) 191 class_name = source_path.stem 192 return f'{package}.{class_name}' 193