• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2021 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16"""Provides useful diff information for build artifacts.
17
18Uses collected build artifacts from two separate build invocations to
19compare output artifacts of these builds and/or the commands executed
20to generate them.
21
22See the directory-level README for information about full usage, including
23the collection step: a preparatory step required before invocation of this
24tool.
25
26Use `difftool.py --help` for full usage information of this tool.
27
28Example Usage:
29  ./difftool.py [left_dir] [left_output_file] [right_dir] [right_output_file]
30
31Difftool will compare [left_dir]/[left_output_file] and
32[right_dir]/[right_output_file] and provide its best insightful analysis on the
33differences between these files. The content and depth of this analysis depends
34on the types of these files, and also on Difftool"s verbosity mode. Difftool
35may also use command data present in the left and right directories as part of
36its analysis.
37"""
38
39import argparse
40import enum
41import functools
42import json
43import os
44import pathlib
45import re
46import subprocess
47import sys
48from typing import Callable
49
50import clangcompile
51import commands
52from collect import COLLECTION_INFO_FILENAME
53
54DiffFunction = Callable[[pathlib.Path, pathlib.Path], list[str]]
55"""Given two files, produces a list of differences."""
56
57
58@functools.total_ordering
59class DiffLevel(enum.Enum):
60  """Defines the level of differences that should trigger a failure.
61
62  E.g. when set to WARNING, differences deemed WARNING or SEVERE are taken into
63  account while other differences (INFO, FINE etc.) will be ignored.
64  """
65  SEVERE = 1
66  WARNING = 2
67  INFO = 3
68  FINE = 4
69
70  def __lt__(self, other):
71    if self.__class__ is other.__class__:
72      return self.value < other.value
73    return NotImplemented
74
75
76class EnumAction(argparse.Action):
77  """Parses command line options into Enum types."""
78
79  def __init__(self, **kwargs):
80    enum_type = kwargs.pop("type", None)
81    kwargs.setdefault("choices", list(e.name for e in enum_type))
82    super(EnumAction, self).__init__(**kwargs)
83    self._enum = enum_type
84
85  def __call__(self, parser, namespace, values, option_string=None):
86    value = self._enum[values]
87    setattr(namespace, self.dest, value)
88
89
90class ArtifactType(enum.Enum):
91  AUTO_INFER_FROM_SUFFIX = 0
92  CC_OBJECT = 1
93  CC_SHARED_LIBRARY = 2
94  CC_OBJECT_WITH_DEBUG_SYMBOLS = 3
95  OTHER = 99
96
97
98FILE_TYPE_CHOICES = {
99    "auto": ArtifactType.AUTO_INFER_FROM_SUFFIX,
100    "object": ArtifactType.CC_OBJECT,
101    "object_with_debug_symbols": ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS,
102    "shared_library": ArtifactType.CC_SHARED_LIBRARY,
103}
104
105
106def _artifact_type(file_path):
107  ext = file_path.suffix
108  if ext in [".o", ".a"]:
109    return ArtifactType.CC_OBJECT
110  elif ext == ".so":
111    return ArtifactType.CC_SHARED_LIBRARY
112  else:
113    return ArtifactType.OTHER
114
115
116# TODO(usta) use libdiff
117def literal_diff(left_path: pathlib.Path,
118                 right_path: pathlib.Path) -> list[str]:
119  return subprocess.run(
120      ["diff", str(left_path), str(right_path)],
121      check=False,
122      capture_output=True,
123      encoding="utf-8").stdout.splitlines()
124
125
126@functools.cache
127def _diff_fns(artifact_type: ArtifactType,
128              level: DiffLevel) -> list[DiffFunction]:
129  fns = []
130
131  if artifact_type in [
132      ArtifactType.CC_OBJECT, ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS
133  ]:
134    fns.append(clangcompile.nm_differences)
135    if level >= DiffLevel.WARNING:
136      fns.append(clangcompile.elf_differences)
137      if artifact_type == ArtifactType.CC_OBJECT_WITH_DEBUG_SYMBOLS:
138        fns.append(clangcompile.bloaty_differences_compileunits)
139      else:
140        fns.append(clangcompile.bloaty_differences)
141  else:
142    fns.append(literal_diff)
143
144  return fns
145
146
147def collect_commands_bazel(expr: str, config: str, mnemonic: str, *args):
148  bazel_tool_path = pathlib.Path("build/bazel/bin/bazel").resolve().absolute()
149  bazel_proc = subprocess.run(
150      [
151          bazel_tool_path,
152          "aquery",
153          "--curses=no",
154          "--config=bp2build",
155          "--output=jsonproto",
156          f"--config={config}",
157          *args,
158          f"{expr}",
159      ],
160      capture_output=True,
161      encoding="utf-8",
162  )
163  print(bazel_proc.stderr)
164  actions_json = json.loads(bazel_proc.stdout)
165  return [a for a in actions_json["actions"] if a["mnemonic"] == mnemonic]
166
167
168def collect_commands_ninja(ninja_file_path: pathlib.Path,
169                           output_file_path: pathlib.Path,
170                           ninja_tool_path: pathlib.Path) -> list[str]:
171  """Returns a list of all command lines required to build the file at given
172
173  output_file_path_string, as described by the ninja file present at
174  ninja_file_path_string.
175  """
176
177  result = subprocess.check_output([
178      str(ninja_tool_path), "-f", ninja_file_path, "-t", "commands",
179      str(output_file_path)
180  ]).decode("utf-8")
181  return result.splitlines()
182
183
184def collect_commands(ninja_file_path: pathlib.Path,
185                     output_file_path: pathlib.Path) -> list[str]:
186  ninja_tool_path = pathlib.Path(
187      "prebuilts/build-tools/linux-x86/bin/ninja").resolve()
188  wd = os.getcwd()
189  try:
190    os.chdir(ninja_file_path.parent.absolute())
191    return collect_commands_ninja(
192        ninja_file_path.name,
193        output_file_path,
194        ninja_tool_path,
195    )
196  except Exception as e:
197    raise e
198  finally:
199    os.chdir(wd)
200
201
202def file_differences(
203    left_path: pathlib.Path,
204    right_path: pathlib.Path,
205    level=DiffLevel.SEVERE,
206    file_type=ArtifactType.AUTO_INFER_FROM_SUFFIX) -> list[str]:
207  """Returns differences between the two given files.
208
209  Returns the empty list if these files are deemed "similar enough".
210  """
211
212  errors = []
213  if not left_path.is_file():
214    errors += ["%s does not exist" % left_path]
215  if not right_path.is_file():
216    errors += ["%s does not exist" % right_path]
217  if errors:
218    return errors
219
220  if file_type is ArtifactType.AUTO_INFER_FROM_SUFFIX:
221    file_type = _artifact_type(left_path)
222    right_type = _artifact_type(right_path)
223    if file_type != right_type:
224      errors += ["file types differ: %s and %s" % (file_type, right_type)]
225      return errors
226
227  for fn in _diff_fns(file_type, level):
228    errors += fn(left_path, right_path)
229
230  return errors
231
232
233def parse_collection_info(info_file_path: pathlib.Path):
234  """Parses the collection info file at the given path and returns details."""
235  if not info_file_path.is_file():
236    raise Exception("Expected file %s was not found. " % info_file_path +
237                    "Did you run collect.py for this directory?")
238
239  info_contents = info_file_path.read_text().splitlines()
240  ninja_path = pathlib.Path(info_contents[0])
241  target_file = None
242
243  if len(info_contents) > 1 and info_contents[1]:
244    target_file = info_contents[1]
245
246  return ninja_path, target_file
247
248
249# Pattern to parse out env-setting command prefix, for example:
250#
251# FOO=BAR KEY=VALUE {main_command_args}
252env_set_prefix_pattern = re.compile("^(( )*([^ =]+=[^ =]+)( )*)+(.*)$")
253
254# Pattern to parse out command prefixes which cd into the execroot and
255# then remove the old output. For example:
256#
257# cd path/to/execroot && rm old_output && {main_command}
258cd_rm_prefix_pattern = re.compile("^cd [^&]* &&( )+rm [^&]* && (.*)$")
259
260# Pattern to parse out any trailing comment suffix. For example:
261#
262# {main_command} # This comment should be removed.
263comment_suffix_pattern = re.compile("(.*) # .*")
264
265
266def _remove_rbe_tokens(tokens, tool_endings):
267  for i in range(len(tokens)):
268    for ending in tool_endings:
269      if tokens[i].endswith(ending):
270        return tokens[i:]
271  return None
272
273
274def rich_command_info(raw_command):
275  """Returns a command info object describing the raw command string."""
276  cmd = raw_command.strip()
277  # Remove things unrelated to the core command.
278  m = env_set_prefix_pattern.fullmatch(cmd)
279  if m is not None:
280    cmd = m.group(5)
281  m = cd_rm_prefix_pattern.fullmatch(cmd)
282  if m is not None:
283    cmd = m.group(2)
284  m = comment_suffix_pattern.fullmatch(cmd)
285  if m is not None:
286    cmd = m.group(1)
287  tokens = cmd.split()
288  tokens_without_rbe = _remove_rbe_tokens(tokens, ["clang", "clang++"])
289  if tokens_without_rbe:
290    tokens = tokens_without_rbe
291  tool = tokens[0]
292  args = tokens[1:]
293
294  if tool.endswith("clang") or tool.endswith("clang++"):
295    # TODO(cparsons): Disambiguate between clang compile and other clang
296    # commands.
297    return clangcompile.ClangCompileInfo(tool=tool, args=args)
298  else:
299    return commands.CommandInfo(tool=tool, args=args)
300
301
302def main():
303  parser = argparse.ArgumentParser(description="")
304  parser.add_argument(
305      "--level",
306      action=EnumAction,
307      default=DiffLevel.SEVERE,
308      type=DiffLevel,
309      help="the level of differences to be considered." +
310      "Diffs below the specified level are ignored.")
311  parser.add_argument(
312      "--verbose",
313      "-v",
314      action=argparse.BooleanOptionalAction,
315      default=False,
316      help="log verbosely.")
317  parser.add_argument(
318      "left_dir",
319      help="the 'left' directory to compare build outputs " +
320      "from. This must be the target of an invocation of collect.py.")
321  parser.add_argument(
322      "--left_file",
323      "-l",
324      dest="left_file",
325      default=None,
326      help="the output file (relative to execution root) for " +
327      "the 'left' build invocation.")
328  parser.add_argument(
329      "right_dir",
330      help="the 'right' directory to compare build outputs " +
331      "from. This must be the target of an invocation of collect.py.")
332  parser.add_argument(
333      "--right_file",
334      "-r",
335      dest="right_file",
336      default=None,
337      help="the output file (relative to execution root) " +
338      "for the 'right' build invocation.")
339  parser.add_argument(
340      "--file_type",
341      dest="file_type",
342      default="auto",
343      choices=FILE_TYPE_CHOICES.keys(),
344      help="the type of file being diffed (overrides automatic " +
345      "filetype resolution)")
346  parser.add_argument(
347      "--allow_missing_file",
348      action=argparse.BooleanOptionalAction,
349      default=False,
350      help="allow a missing output file; this is useful to " +
351      "compare actions even in the absence of an output file.")
352  args = parser.parse_args()
353
354  level = args.level
355  left_diffinfo = pathlib.Path(args.left_dir).joinpath(COLLECTION_INFO_FILENAME)
356  right_diffinfo = pathlib.Path(
357      args.right_dir).joinpath(COLLECTION_INFO_FILENAME)
358
359  left_ninja_name, left_file = parse_collection_info(left_diffinfo)
360  right_ninja_name, right_file = parse_collection_info(right_diffinfo)
361  if args.left_file:
362    left_file = pathlib.Path(args.left_file)
363  if args.right_file:
364    right_file = pathlib.Path(args.right_file)
365
366  if left_file is None:
367    raise Exception("No left file specified. Either run collect.py with a " +
368                    "target file, or specify --left_file.")
369  if right_file is None:
370    raise Exception("No right file specified. Either run collect.py with a " +
371                    "target file, or specify --right_file.")
372
373  left_path = pathlib.Path(args.left_dir).joinpath(left_file)
374  right_path = pathlib.Path(args.right_dir).joinpath(right_file)
375  if not args.allow_missing_file:
376    if not left_path.is_file():
377      raise RuntimeError("Expected file %s was not found. " % left_path)
378    if not right_path.is_file():
379      raise RuntimeError("Expected file %s was not found. " % right_path)
380
381  file_diff_errors = file_differences(left_path, right_path, level,
382                                      FILE_TYPE_CHOICES[args.file_type])
383
384  if file_diff_errors:
385    for err in file_diff_errors:
386      print(err)
387    if args.verbose:
388      left_ninja_path = pathlib.Path(args.left_dir).joinpath(left_ninja_name)
389      left_commands = collect_commands(left_ninja_path, left_file)
390      left_command_info = rich_command_info(left_commands[-1])
391      right_ninja_path = pathlib.Path(args.right_dir).joinpath(right_ninja_name)
392      right_commands = collect_commands(right_ninja_path, right_file)
393      right_command_info = rich_command_info(right_commands[-1])
394      print("======== ACTION COMPARISON: ========")
395      print("=== LEFT ONLY:\n")
396      print(left_command_info.compare(right_command_info))
397      print()
398      print("=== RIGHT ONLY:\n")
399      print(right_command_info.compare(left_command_info))
400      print()
401    sys.exit(1)
402  else:
403    print(f"{left_file} matches\n{right_file}")
404  sys.exit(0)
405
406
407if __name__ == "__main__":
408  main()
409