1#!/usr/bin/env python3 2# 3# Copyright (C) 2021 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16"""Provides useful diff information for build artifacts. 17 18Uses collected build artifacts from two separate build invocations to 19compare output artifacts of these builds and/or the commands executed 20to generate them. 21 22See the directory-level README for information about full usage, including 23the collection step: a preparatory step required before invocation of this 24tool. 25 26Use `difftool.py --help` for full usage information of this tool. 27 28Example Usage: 29 ./difftool.py [left_dir] [left_output_file] [right_dir] [right_output_file] 30 31Difftool will compare [left_dir]/[left_output_file] and 32[right_dir]/[right_output_file] and provide its best insightful analysis on the 33differences between these files. The content and depth of this analysis depends 34on the types of these files, and also on Difftool"s verbosity mode. Difftool 35may also use command data present in the left and right directories as part of 36its analysis. 37""" 38 39import argparse 40import enum 41import functools 42import json 43import os 44import pathlib 45import re 46import subprocess 47import sys 48from typing import Callable 49 50import clangcompile 51import commands 52from collect import COLLECTION_INFO_FILENAME 53 54DiffFunction = Callable[[pathlib.Path, pathlib.Path], list[str]] 55"""Given two files, produces a list of differences.""" 56 57 58@functools.total_ordering 59class DiffLevel(enum.Enum): 60 """Defines the level of differences that should trigger a failure. 61 62 E.g. when set to WARNING, differences deemed WARNING or SEVERE are taken into 63 account while other differences (INFO, FINE etc.) will be ignored. 64 """ 65 SEVERE = 1 66 WARNING = 2 67 INFO = 3 68 FINE = 4 69 70 def __lt__(self, other): 71 if self.__class__ is other.__class__: 72 return self.value < other.value 73 return NotImplemented 74 75 76class EnumAction(argparse.Action): 77 """Parses command line options into Enum types.""" 78 79 def __init__(self, **kwargs): 80 enum_type = kwargs.pop("type", None) 81 kwargs.setdefault("choices", list(e.name for e in enum_type)) 82 super(EnumAction, self).__init__(**kwargs) 83 self._enum = enum_type 84 85 def __call__(self, parser, namespace, values, option_string=None): 86 value = self._enum[values] 87 setattr(namespace, self.dest, value) 88 89 90class ArtifactType(enum.Enum): 91 AUTO_INFER_FROM_SUFFIX = 0 92 CC_OBJECT = 1 93 CC_SHARED_LIBRARY = 2 94 CC_OBJECT_WITH_DEBUG_SYMBOLS = 3 95 OTHER = 99 96 97 98FILE_TYPE_CHOICES = { 99 "auto": ArtifactType.AUTO_INFER_FROM_SUFFIX, 100 "object": ArtifactType.CC_OBJECT, 101 "object_with_debug_symbols": ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS, 102 "shared_library": ArtifactType.CC_SHARED_LIBRARY, 103} 104 105 106def _artifact_type(file_path): 107 ext = file_path.suffix 108 if ext in [".o", ".a"]: 109 return ArtifactType.CC_OBJECT 110 elif ext == ".so": 111 return ArtifactType.CC_SHARED_LIBRARY 112 else: 113 return ArtifactType.OTHER 114 115 116# TODO(usta) use libdiff 117def literal_diff(left_path: pathlib.Path, 118 right_path: pathlib.Path) -> list[str]: 119 return subprocess.run( 120 ["diff", str(left_path), str(right_path)], 121 check=False, 122 capture_output=True, 123 encoding="utf-8").stdout.splitlines() 124 125 126@functools.cache 127def _diff_fns(artifact_type: ArtifactType, 128 level: DiffLevel) -> list[DiffFunction]: 129 fns = [] 130 131 if artifact_type in [ 132 ArtifactType.CC_OBJECT, ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS 133 ]: 134 fns.append(clangcompile.nm_differences) 135 if level >= DiffLevel.WARNING: 136 fns.append(clangcompile.elf_differences) 137 if artifact_type == ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS: 138 fns.append(clangcompile.bloaty_differences_compileunits) 139 else: 140 fns.append(clangcompile.bloaty_differences) 141 else: 142 fns.append(literal_diff) 143 144 return fns 145 146 147def collect_commands_bazel(expr: str, config: str, mnemonic: str, *args): 148 bazel_tool_path = pathlib.Path("build/bazel/bin/bazel").resolve().absolute() 149 bazel_proc = subprocess.run( 150 [ 151 bazel_tool_path, 152 "aquery", 153 "--curses=no", 154 "--config=bp2build", 155 "--output=jsonproto", 156 f"--config={config}", 157 *args, 158 f"{expr}", 159 ], 160 capture_output=True, 161 encoding="utf-8", 162 ) 163 print(bazel_proc.stderr) 164 actions_json = json.loads(bazel_proc.stdout) 165 return [a for a in actions_json["actions"] if a["mnemonic"] == mnemonic] 166 167 168def collect_commands_ninja(ninja_file_path: pathlib.Path, 169 output_file_path: pathlib.Path, 170 ninja_tool_path: pathlib.Path) -> list[str]: 171 """Returns a list of all command lines required to build the file at given 172 173 output_file_path_string, as described by the ninja file present at 174 ninja_file_path_string. 175 """ 176 177 result = subprocess.check_output([ 178 str(ninja_tool_path), "-f", ninja_file_path, "-t", "commands", 179 str(output_file_path) 180 ]).decode("utf-8") 181 return result.splitlines() 182 183 184def collect_commands(ninja_file_path: pathlib.Path, 185 output_file_path: pathlib.Path) -> list[str]: 186 ninja_tool_path = pathlib.Path( 187 "prebuilts/build-tools/linux-x86/bin/ninja").resolve() 188 wd = os.getcwd() 189 try: 190 os.chdir(ninja_file_path.parent.absolute()) 191 return collect_commands_ninja( 192 ninja_file_path.name, 193 output_file_path, 194 ninja_tool_path, 195 ) 196 except Exception as e: 197 raise e 198 finally: 199 os.chdir(wd) 200 201 202def file_differences( 203 left_path: pathlib.Path, 204 right_path: pathlib.Path, 205 level=DiffLevel.SEVERE, 206 file_type=ArtifactType.AUTO_INFER_FROM_SUFFIX) -> list[str]: 207 """Returns differences between the two given files. 208 209 Returns the empty list if these files are deemed "similar enough". 210 """ 211 212 errors = [] 213 if not left_path.is_file(): 214 errors += ["%s does not exist" % left_path] 215 if not right_path.is_file(): 216 errors += ["%s does not exist" % right_path] 217 if errors: 218 return errors 219 220 if file_type is ArtifactType.AUTO_INFER_FROM_SUFFIX: 221 file_type = _artifact_type(left_path) 222 right_type = _artifact_type(right_path) 223 if file_type != right_type: 224 errors += ["file types differ: %s and %s" % (file_type, right_type)] 225 return errors 226 227 for fn in _diff_fns(file_type, level): 228 errors += fn(left_path, right_path) 229 230 return errors 231 232 233def parse_collection_info(info_file_path: pathlib.Path): 234 """Parses the collection info file at the given path and returns details.""" 235 if not info_file_path.is_file(): 236 raise Exception("Expected file %s was not found. " % info_file_path + 237 "Did you run collect.py for this directory?") 238 239 info_contents = info_file_path.read_text().splitlines() 240 ninja_path = pathlib.Path(info_contents[0]) 241 target_file = None 242 243 if len(info_contents) > 1 and info_contents[1]: 244 target_file = info_contents[1] 245 246 return ninja_path, target_file 247 248 249# Pattern to parse out env-setting command prefix, for example: 250# 251# FOO=BAR KEY=VALUE {main_command_args} 252env_set_prefix_pattern = re.compile("^(( )*([^ =]+=[^ =]+)( )*)+(.*)$") 253 254# Pattern to parse out command prefixes which cd into the execroot and 255# then remove the old output. For example: 256# 257# cd path/to/execroot && rm old_output && {main_command} 258cd_rm_prefix_pattern = re.compile("^cd [^&]* &&( )+rm [^&]* && (.*)$") 259 260# Pattern to parse out any trailing comment suffix. For example: 261# 262# {main_command} # This comment should be removed. 263comment_suffix_pattern = re.compile("(.*) # .*") 264 265 266def _remove_rbe_tokens(tokens, tool_endings): 267 for i in range(len(tokens)): 268 for ending in tool_endings: 269 if tokens[i].endswith(ending): 270 return tokens[i:] 271 return None 272 273 274def rich_command_info(raw_command): 275 """Returns a command info object describing the raw command string.""" 276 cmd = raw_command.strip() 277 # Remove things unrelated to the core command. 278 m = env_set_prefix_pattern.fullmatch(cmd) 279 if m is not None: 280 cmd = m.group(5) 281 m = cd_rm_prefix_pattern.fullmatch(cmd) 282 if m is not None: 283 cmd = m.group(2) 284 m = comment_suffix_pattern.fullmatch(cmd) 285 if m is not None: 286 cmd = m.group(1) 287 tokens = cmd.split() 288 tokens_without_rbe = _remove_rbe_tokens(tokens, ["clang", "clang++"]) 289 if tokens_without_rbe: 290 tokens = tokens_without_rbe 291 tool = tokens[0] 292 args = tokens[1:] 293 294 if tool.endswith("clang") or tool.endswith("clang++"): 295 # TODO(cparsons): Disambiguate between clang compile and other clang 296 # commands. 297 return clangcompile.ClangCompileInfo(tool=tool, args=args) 298 else: 299 return commands.CommandInfo(tool=tool, args=args) 300 301 302def main(): 303 parser = argparse.ArgumentParser(description="") 304 parser.add_argument( 305 "--level", 306 action=EnumAction, 307 default=DiffLevel.SEVERE, 308 type=DiffLevel, 309 help="the level of differences to be considered." + 310 "Diffs below the specified level are ignored.") 311 parser.add_argument( 312 "--verbose", 313 "-v", 314 action=argparse.BooleanOptionalAction, 315 default=False, 316 help="log verbosely.") 317 parser.add_argument( 318 "left_dir", 319 help="the 'left' directory to compare build outputs " + 320 "from. This must be the target of an invocation of collect.py.") 321 parser.add_argument( 322 "--left_file", 323 "-l", 324 dest="left_file", 325 default=None, 326 help="the output file (relative to execution root) for " + 327 "the 'left' build invocation.") 328 parser.add_argument( 329 "right_dir", 330 help="the 'right' directory to compare build outputs " + 331 "from. This must be the target of an invocation of collect.py.") 332 parser.add_argument( 333 "--right_file", 334 "-r", 335 dest="right_file", 336 default=None, 337 help="the output file (relative to execution root) " + 338 "for the 'right' build invocation.") 339 parser.add_argument( 340 "--file_type", 341 dest="file_type", 342 default="auto", 343 choices=FILE_TYPE_CHOICES.keys(), 344 help="the type of file being diffed (overrides automatic " + 345 "filetype resolution)") 346 parser.add_argument( 347 "--allow_missing_file", 348 action=argparse.BooleanOptionalAction, 349 default=False, 350 help="allow a missing output file; this is useful to " + 351 "compare actions even in the absence of an output file.") 352 args = parser.parse_args() 353 354 level = args.level 355 left_diffinfo = pathlib.Path(args.left_dir).joinpath(COLLECTION_INFO_FILENAME) 356 right_diffinfo = pathlib.Path( 357 args.right_dir).joinpath(COLLECTION_INFO_FILENAME) 358 359 left_ninja_name, left_file = parse_collection_info(left_diffinfo) 360 right_ninja_name, right_file = parse_collection_info(right_diffinfo) 361 if args.left_file: 362 left_file = pathlib.Path(args.left_file) 363 if args.right_file: 364 right_file = pathlib.Path(args.right_file) 365 366 if left_file is None: 367 raise Exception("No left file specified. Either run collect.py with a " + 368 "target file, or specify --left_file.") 369 if right_file is None: 370 raise Exception("No right file specified. Either run collect.py with a " + 371 "target file, or specify --right_file.") 372 373 left_path = pathlib.Path(args.left_dir).joinpath(left_file) 374 right_path = pathlib.Path(args.right_dir).joinpath(right_file) 375 if not args.allow_missing_file: 376 if not left_path.is_file(): 377 raise RuntimeError("Expected file %s was not found. " % left_path) 378 if not right_path.is_file(): 379 raise RuntimeError("Expected file %s was not found. " % right_path) 380 381 file_diff_errors = file_differences(left_path, right_path, level, 382 FILE_TYPE_CHOICES[args.file_type]) 383 384 if file_diff_errors: 385 for err in file_diff_errors: 386 print(err) 387 if args.verbose: 388 left_ninja_path = pathlib.Path(args.left_dir).joinpath(left_ninja_name) 389 left_commands = collect_commands(left_ninja_path, left_file) 390 left_command_info = rich_command_info(left_commands[-1]) 391 right_ninja_path = pathlib.Path(args.right_dir).joinpath(right_ninja_name) 392 right_commands = collect_commands(right_ninja_path, right_file) 393 right_command_info = rich_command_info(right_commands[-1]) 394 print("======== ACTION COMPARISON: ========") 395 print("=== LEFT ONLY:\n") 396 print(left_command_info.compare(right_command_info)) 397 print() 398 print("=== RIGHT ONLY:\n") 399 print(right_command_info.compare(left_command_info)) 400 print() 401 sys.exit(1) 402 else: 403 print(f"{left_file} matches\n{right_file}") 404 sys.exit(0) 405 406 407if __name__ == "__main__": 408 main() 409