• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2#
3# Copyright (C) 2021 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Provides useful diff information for build artifacts.
18
19Uses collected build artifacts from two separate build invocations to
20compare output artifacts of these builds and/or the commands executed
21to generate them.
22
23See the directory-level README for information about full usage, including
24the collection step: a preparatory step required before invocation of this
25tool.
26
27Use `difftool.py --help` for full usage information of this tool.
28
29Example Usage:
30  ./difftool.py [left_dir] [left_output_file] [right_dir] [right_output_file]
31
32Difftool will compare [left_dir]/[left_output_file] and
33[right_dir]/[right_output_file] and provide its best insightful analysis on the
34differences between these files. The content and depth of this analysis depends
35on the types of these files, and also on Difftool"s verbosity mode. Difftool
36may also use command data present in the left and right directories as part of
37its analysis.
38"""
39
40import argparse
41import enum
42import functools
43import os
44import pathlib
45import re
46import subprocess
47import sys
48from typing import Callable
49
50import clangcompile
51import commands
52from collect import COLLECTION_INFO_FILENAME
53
54DiffFunction = Callable[[pathlib.Path, pathlib.Path], list[str]]
55"""Given two files, produces a list of differences."""
56
57
58@functools.total_ordering
59class DiffLevel(enum.Enum):
60  """Defines the level of differences that should trigger a failure.
61
62  E.g. when set to WARNING, differences deemed WARNING or SEVERE are taken into
63  account while other differences (INFO, FINE etc.) will be ignored.
64  """
65  SEVERE = 1
66  WARNING = 2
67  INFO = 3
68  FINE = 4
69
70  def __lt__(self, other):
71    if self.__class__ is other.__class__:
72      return self.value < other.value
73    return NotImplemented
74
75
76class EnumAction(argparse.Action):
77  """Parses command line options into Enum types."""
78
79  def __init__(self, **kwargs):
80    enum_type = kwargs.pop("type", None)
81    kwargs.setdefault("choices", list(e.name for e in enum_type))
82    super(EnumAction, self).__init__(**kwargs)
83    self._enum = enum_type
84
85  def __call__(self, parser, namespace, values, option_string=None):
86    value = self._enum[values]
87    setattr(namespace, self.dest, value)
88
89
90class ArtifactType(enum.Enum):
91  CC_OBJECT = 1
92  CC_SHARED_LIBRARY = 2
93  OTHER = 99
94
95
96def _artifact_type(file_path):
97  ext = file_path.suffix
98  if ext == ".o":
99    return ArtifactType.CC_OBJECT
100  elif ext == ".so":
101    return ArtifactType.CC_SHARED_LIBRARY
102  else:
103    return ArtifactType.OTHER
104
105
106# TODO(usta) use libdiff
107def literal_diff(left_path: pathlib.Path, right_path: pathlib.Path) -> list[
108  str]:
109  return subprocess.run(["diff", str(left_path), str(right_path)],
110                        check=False, capture_output=True,
111                        encoding="utf-8").stdout.splitlines()
112
113
114@functools.cache
115def _diff_fns(artifact_type: ArtifactType, level: DiffLevel) -> list[
116  DiffFunction]:
117  fns = []
118
119  if artifact_type == ArtifactType.CC_OBJECT:
120    fns.append(clangcompile.nm_differences)
121    if level >= DiffLevel.WARNING:
122      fns.append(clangcompile.elf_differences)
123  else:
124    fns.append(literal_diff)
125
126  return fns
127
128
129def collect_commands(ninja_file_path: pathlib.Path,
130    output_file_path: pathlib.Path) -> list[str]:
131  """Returns a list of all command lines required to build the file at given
132  output_file_path_string, as described by the ninja file present at
133  ninja_file_path_string."""
134
135  ninja_tool_path = pathlib.Path(
136      "prebuilts/build-tools/linux-x86/bin/ninja").resolve()
137  wd = os.getcwd()
138  os.chdir(ninja_file_path.parent.absolute())
139  result = subprocess.check_output([str(ninja_tool_path),
140                                    "-f", ninja_file_path.name,
141                                    "-t", "commands",
142                                    str(output_file_path)]).decode("utf-8")
143  os.chdir(wd)
144  return result.splitlines()
145
146
147def file_differences(left_path: pathlib.Path, right_path: pathlib.Path,
148    level=DiffLevel.SEVERE) -> list[str]:
149  """Returns differences between the two given files.
150  Returns the empty list if these files are deemed "similar enough"."""
151
152  errors = []
153  if not left_path.is_file():
154    errors += ["%s does not exist" % left_path]
155  if not right_path.is_file():
156    errors += ["%s does not exist" % right_path]
157  if errors:
158    return errors
159
160  left_type = _artifact_type(left_path)
161  right_type = _artifact_type(right_path)
162  if left_type != right_type:
163    errors += ["file types differ: %s and %s" % (left_type, right_type)]
164    return errors
165
166  for fn in _diff_fns(left_type, level):
167    errors += fn(left_path, right_path)
168
169  return errors
170
171
172def parse_collection_info(info_file_path: pathlib.Path):
173  """Parses the collection info file at the given path and returns details."""
174  if not info_file_path.is_file():
175    raise Exception("Expected file %s was not found. " % info_file_path +
176                    "Did you run collect.py for this directory?")
177
178  info_contents = info_file_path.read_text().splitlines()
179  ninja_path = pathlib.Path(info_contents[0])
180  target_file = None
181
182  if len(info_contents) > 1 and info_contents[1]:
183    target_file = info_contents[1]
184
185  return ninja_path, target_file
186
187
188# Pattern to parse out env-setting command prefix, for example:
189#
190# FOO=BAR KEY=VALUE {main_command_args}
191env_set_prefix_pattern = re.compile("^(( )*([^ =]+=[^ =]+)( )*)+(.*)$")
192
193# Pattern to parse out command prefixes which cd into the execroot and
194# then remove the old output. For example:
195#
196# cd path/to/execroot && rm old_output && {main_command}
197cd_rm_prefix_pattern = re.compile("^cd [^&]* &&( )+rm [^&]* && (.*)$")
198
199# Pattern to parse out any trailing comment suffix. For example:
200#
201# {main_command} # This comment should be removed.
202comment_suffix_pattern = re.compile("(.*) # .*")
203
204
205def rich_command_info(raw_command):
206  """Returns a command info object describing the raw command string."""
207  cmd = raw_command.strip()
208  # Remove things unrelated to the core command.
209  m = env_set_prefix_pattern.fullmatch(cmd)
210  if m is not None:
211    cmd = m.group(5)
212  m = cd_rm_prefix_pattern.fullmatch(cmd)
213  if m is not None:
214    cmd = m.group(2)
215  m = comment_suffix_pattern.fullmatch(cmd)
216  if m is not None:
217    cmd = m.group(1)
218  tokens = cmd.split()
219  tool = tokens[0]
220  args = tokens[1:]
221
222  if tool.endswith("clang") or tool.endswith("clang++"):
223    # TODO(cparsons): Disambiguate between clang compile and other clang
224    # commands.
225    return clangcompile.ClangCompileInfo(tool=tool, args=args)
226  else:
227    return commands.CommandInfo(tool=tool, args=args)
228
229
230def main():
231  parser = argparse.ArgumentParser(description="")
232  parser.add_argument("--level",
233                      action=EnumAction,
234                      default=DiffLevel.SEVERE,
235                      type=DiffLevel,
236                      help="the level of differences to be considered." +
237                           "Diffs below the specified level are ignored.")
238  parser.add_argument("--verbose", "-v",
239                      action=argparse.BooleanOptionalAction,
240                      default=False,
241                      help="log verbosely.")
242  parser.add_argument("left_dir",
243                      help="the 'left' directory to compare build outputs " +
244                           "from. This must be the target of an invocation " +
245                           "of collect.py.")
246  parser.add_argument("--left_file", "-l", dest="left_file", default=None,
247                      help="the output file (relative to execution root) for " +
248                           "the 'left' build invocation.")
249  parser.add_argument("right_dir",
250                      help="the 'right' directory to compare build outputs " +
251                           "from. This must be the target of an invocation " +
252                           "of collect.py.")
253  parser.add_argument("--right_file", "-r", dest="right_file", default=None,
254                      help="the output file (relative to execution root) " +
255                           "for the 'right' build invocation.")
256  parser.add_argument("--allow_missing_file",
257                      action=argparse.BooleanOptionalAction,
258                      default=False,
259                      help="allow a missing output file; this is useful to " +
260                           "compare actions even in the absence of " +
261                           "an output file.")
262  args = parser.parse_args()
263
264  level = args.level
265  left_diffinfo = pathlib.Path(args.left_dir).joinpath(COLLECTION_INFO_FILENAME)
266  right_diffinfo = pathlib.Path(args.right_dir).joinpath(
267    COLLECTION_INFO_FILENAME)
268
269  left_ninja_name, left_file = parse_collection_info(left_diffinfo)
270  right_ninja_name, right_file = parse_collection_info(right_diffinfo)
271  if args.left_file:
272    left_file = pathlib.Path(args.left_file)
273  if args.right_file:
274    right_file = pathlib.Path(args.right_file)
275
276  if left_file is None:
277    raise Exception("No left file specified. Either run collect.py with a " +
278                    "target file, or specify --left_file.")
279  if right_file is None:
280    raise Exception("No right file specified. Either run collect.py with a " +
281                    "target file, or specify --right_file.")
282
283  left_path = pathlib.Path(args.left_dir).joinpath(left_file)
284  right_path = pathlib.Path(args.right_dir).joinpath(right_file)
285  if not args.allow_missing_file:
286    if not left_path.is_file():
287      raise RuntimeError("Expected file %s was not found. " % left_path)
288    if not right_path.is_file():
289      raise RuntimeError("Expected file %s was not found. " % right_path)
290
291  file_diff_errors = file_differences(left_path, right_path, level)
292
293  if file_diff_errors:
294    for err in file_diff_errors:
295      print(err)
296    if args.verbose:
297      left_ninja_path = pathlib.Path(args.left_dir).joinpath(left_ninja_name)
298      left_commands = collect_commands(left_ninja_path, left_file)
299      left_command_info = rich_command_info(left_commands[-1])
300      right_ninja_path = pathlib.Path(args.right_dir).joinpath(right_ninja_name)
301      right_commands = collect_commands(right_ninja_path, right_file)
302      right_command_info = rich_command_info(right_commands[-1])
303      print("======== ACTION COMPARISON: ========")
304      print("=== LEFT:\n")
305      print(left_command_info)
306      print()
307      print("=== RIGHT:\n")
308      print(right_command_info)
309      print()
310    sys.exit(1)
311  else:
312    print(f"{left_file} matches\n{right_file}")
313  sys.exit(0)
314
315
316if __name__ == "__main__":
317  main()
318