• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The Chromium Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5
6import difflib
7import hashlib
8import itertools
9import json
10import os
11import sys
12import zipfile
13
14from util import build_utils
15import action_helpers  # build_utils adds //build to sys.path.
16import print_python_deps
17
18# When set and a difference is detected, a diff of what changed is printed.
19PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))
20
21# An escape hatch that causes all targets to be rebuilt.
22_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))
23
24
25def CallAndWriteDepfileIfStale(on_stale_md5,
26                               options,
27                               record_path=None,
28                               input_paths=None,
29                               input_strings=None,
30                               output_paths=None,
31                               force=False,
32                               pass_changes=False,
33                               track_subpaths_allowlist=None,
34                               depfile_deps=None):
35  """Wraps CallAndRecordIfStale() and writes a depfile if applicable.
36
37  Depfiles are automatically added to output_paths when present in the |options|
38  argument. They are then created after |on_stale_md5| is called.
39
40  By default, only python dependencies are added to the depfile. If there are
41  other input paths that are not captured by GN deps, then they should be listed
42  in depfile_deps. It's important to write paths to the depfile that are already
43  captured by GN deps since GN args can cause GN deps to change, and such
44  changes are not immediately reflected in depfiles (http://crbug.com/589311).
45  """
46  if not output_paths:
47    raise Exception('At least one output_path must be specified.')
48  input_paths = list(input_paths or [])
49  input_strings = list(input_strings or [])
50  output_paths = list(output_paths or [])
51
52  input_paths += print_python_deps.ComputePythonDependencies()
53
54  CallAndRecordIfStale(
55      on_stale_md5,
56      record_path=record_path,
57      input_paths=input_paths,
58      input_strings=input_strings,
59      output_paths=output_paths,
60      force=force,
61      pass_changes=pass_changes,
62      track_subpaths_allowlist=track_subpaths_allowlist)
63
64  # Write depfile even when inputs have not changed to ensure build correctness
65  # on bots that build with & without patch, and the patch changes the depfile
66  # location.
67  if hasattr(options, 'depfile') and options.depfile:
68    action_helpers.write_depfile(options.depfile, output_paths[0], depfile_deps)
69
70
71def CallAndRecordIfStale(function,
72                         record_path=None,
73                         input_paths=None,
74                         input_strings=None,
75                         output_paths=None,
76                         force=False,
77                         pass_changes=False,
78                         track_subpaths_allowlist=None):
79  """Calls function if outputs are stale.
80
81  Outputs are considered stale if:
82  - any output_paths are missing, or
83  - the contents of any file within input_paths has changed, or
84  - the contents of input_strings has changed.
85
86  To debug which files are out-of-date, set the environment variable:
87      PRINT_MD5_DIFFS=1
88
89  Args:
90    function: The function to call.
91    record_path: Path to record metadata.
92      Defaults to output_paths[0] + '.md5.stamp'
93    input_paths: List of paths to calcualte an md5 sum on.
94    input_strings: List of strings to record verbatim.
95    output_paths: List of output paths.
96    force: Whether to treat outputs as missing regardless of whether they
97      actually are.
98    pass_changes: Whether to pass a Changes instance to |function|.
99    track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip
100      files from |input_paths| to make subpath information available for.
101  """
102  assert record_path or output_paths
103  input_paths = input_paths or []
104  input_strings = input_strings or []
105  output_paths = output_paths or []
106  record_path = record_path or output_paths[0] + '.md5.stamp'
107
108  assert record_path.endswith('.stamp'), (
109      'record paths must end in \'.stamp\' so that they are easy to find '
110      'and delete')
111
112  new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS)
113  new_metadata.AddStrings(input_strings)
114
115  zip_allowlist = set(track_subpaths_allowlist or [])
116  for path in input_paths:
117    if os.path.isabs(path):
118      path = os.path.relpath(path)
119    # It's faster to md5 an entire zip file than it is to just locate & hash
120    # its central directory (which is what this used to do).
121    if path in zip_allowlist:
122      entries = _ExtractZipEntries(path)
123      new_metadata.AddZipFile(path, entries)
124    else:
125      new_metadata.AddFile(path, _ComputeTagForPath(path))
126
127  force = force or _FORCE_REBUILD
128  missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
129  old_metadata = None
130
131  if not missing_outputs and os.path.exists(record_path):
132    with open(record_path, 'r') as jsonfile:
133      try:
134        old_metadata = _Metadata.FromFile(jsonfile)
135      except:  # pylint: disable=bare-except
136        pass  # Not yet using new file format.
137
138  changes = Changes(old_metadata, new_metadata, force, missing_outputs)
139  if not changes.HasChanges():
140    return
141
142  if PRINT_EXPLANATIONS:
143    print('=' * 80)
144    print('Target is stale: %s' % record_path)
145    print(changes.DescribeDifference())
146    print('=' * 80)
147
148  args = (changes,) if pass_changes else ()
149  function(*args)
150
151  with open(record_path, 'w') as f:
152    new_metadata.ToFile(f)
153
154
155class Changes:
156  """Provides and API for querying what changed between runs."""
157
158  def __init__(self, old_metadata, new_metadata, force, missing_outputs):
159    self.old_metadata = old_metadata
160    self.new_metadata = new_metadata
161    self.force = force
162    self.missing_outputs = missing_outputs
163
164  def _GetOldTag(self, path, subpath=None):
165    return self.old_metadata and self.old_metadata.GetTag(path, subpath)
166
167  def HasChanges(self):
168    """Returns whether any changes exist."""
169    return (self.HasStringChanges()
170            or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
171
172  def HasStringChanges(self):
173    """Returns whether string metadata changed."""
174    return (self.force or not self.old_metadata
175            or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5())
176
177  def AddedOrModifiedOnly(self):
178    """Returns whether the only changes were from added or modified (sub)files.
179
180    No missing outputs, no removed paths/subpaths.
181    """
182    if self.HasStringChanges():
183      return False
184    if any(self.IterRemovedPaths()):
185      return False
186    for path in self.IterModifiedPaths():
187      if any(self.IterRemovedSubpaths(path)):
188        return False
189    return True
190
191  def IterAllPaths(self):
192    """Generator for paths."""
193    return self.new_metadata.IterPaths();
194
195  def IterAllSubpaths(self, path):
196    """Generator for subpaths."""
197    return self.new_metadata.IterSubpaths(path);
198
199  def IterAddedPaths(self):
200    """Generator for paths that were added."""
201    for path in self.new_metadata.IterPaths():
202      if self._GetOldTag(path) is None:
203        yield path
204
205  def IterAddedSubpaths(self, path):
206    """Generator for paths that were added within the given zip file."""
207    for subpath in self.new_metadata.IterSubpaths(path):
208      if self._GetOldTag(path, subpath) is None:
209        yield subpath
210
211  def IterRemovedPaths(self):
212    """Generator for paths that were removed."""
213    if self.old_metadata:
214      for path in self.old_metadata.IterPaths():
215        if self.new_metadata.GetTag(path) is None:
216          yield path
217
218  def IterRemovedSubpaths(self, path):
219    """Generator for paths that were removed within the given zip file."""
220    if self.old_metadata:
221      for subpath in self.old_metadata.IterSubpaths(path):
222        if self.new_metadata.GetTag(path, subpath) is None:
223          yield subpath
224
225  def IterModifiedPaths(self):
226    """Generator for paths whose contents have changed."""
227    for path in self.new_metadata.IterPaths():
228      old_tag = self._GetOldTag(path)
229      new_tag = self.new_metadata.GetTag(path)
230      if old_tag is not None and old_tag != new_tag:
231        yield path
232
233  def IterModifiedSubpaths(self, path):
234    """Generator for paths within a zip file whose contents have changed."""
235    for subpath in self.new_metadata.IterSubpaths(path):
236      old_tag = self._GetOldTag(path, subpath)
237      new_tag = self.new_metadata.GetTag(path, subpath)
238      if old_tag is not None and old_tag != new_tag:
239        yield subpath
240
241  def IterChangedPaths(self):
242    """Generator for all changed paths (added/removed/modified)."""
243    return itertools.chain(self.IterRemovedPaths(),
244                           self.IterModifiedPaths(),
245                           self.IterAddedPaths())
246
247  def IterChangedSubpaths(self, path):
248    """Generator for paths within a zip that were added/removed/modified."""
249    return itertools.chain(self.IterRemovedSubpaths(path),
250                           self.IterModifiedSubpaths(path),
251                           self.IterAddedSubpaths(path))
252
253  def DescribeDifference(self):
254    """Returns a human-readable description of what changed."""
255    if self.force:
256      return 'force=True'
257    if self.missing_outputs:
258      return 'Outputs do not exist:\n  ' + '\n  '.join(self.missing_outputs)
259    if self.old_metadata is None:
260      return 'Previous stamp file not found.'
261
262    if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
263      ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
264                            self.new_metadata.GetStrings())
265      changed = [s for s in ndiff if not s.startswith(' ')]
266      return 'Input strings changed:\n  ' + '\n  '.join(changed)
267
268    if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
269      return "There's no difference."
270
271    lines = []
272    lines.extend('Added: ' + p for p in self.IterAddedPaths())
273    lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
274    for path in self.IterModifiedPaths():
275      lines.append('Modified: ' + path)
276      lines.extend('  -> Subpath added: ' + p
277                   for p in self.IterAddedSubpaths(path))
278      lines.extend('  -> Subpath removed: ' + p
279                   for p in self.IterRemovedSubpaths(path))
280      lines.extend('  -> Subpath modified: ' + p
281                   for p in self.IterModifiedSubpaths(path))
282    if lines:
283      return 'Input files changed:\n  ' + '\n  '.join(lines)
284    return 'I have no idea what changed (there is a bug).'
285
286
287class _Metadata:
288  """Data model for tracking change metadata.
289
290  Args:
291    track_entries: Enables per-file change tracking. Slower, but required for
292        Changes functionality.
293  """
294  # Schema:
295  # {
296  #   "files-md5": "VALUE",
297  #   "strings-md5": "VALUE",
298  #   "input-files": [
299  #     {
300  #       "path": "path.jar",
301  #       "tag": "{MD5 of entries}",
302  #       "entries": [
303  #         { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
304  #       ]
305  #     }, {
306  #       "path": "path.txt",
307  #       "tag": "{MD5}",
308  #     }
309  #   ],
310  #   "input-strings": ["a", "b", ...],
311  # }
312  def __init__(self, track_entries=False):
313    self._track_entries = track_entries
314    self._files_md5 = None
315    self._strings_md5 = None
316    self._files = []
317    self._strings = []
318    # Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
319    self._file_map = None
320
321  @classmethod
322  def FromFile(cls, fileobj):
323    """Returns a _Metadata initialized from a file object."""
324    ret = cls()
325    obj = json.load(fileobj)
326    ret._files_md5 = obj['files-md5']
327    ret._strings_md5 = obj['strings-md5']
328    ret._files = obj.get('input-files', [])
329    ret._strings = obj.get('input-strings', [])
330    return ret
331
332  def ToFile(self, fileobj):
333    """Serializes metadata to the given file object."""
334    obj = {
335        'files-md5': self.FilesMd5(),
336        'strings-md5': self.StringsMd5(),
337    }
338    if self._track_entries:
339      obj['input-files'] = sorted(self._files, key=lambda e: e['path'])
340      obj['input-strings'] = self._strings
341
342    json.dump(obj, fileobj, indent=2)
343
344  def _AssertNotQueried(self):
345    assert self._files_md5 is None
346    assert self._strings_md5 is None
347    assert self._file_map is None
348
349  def AddStrings(self, values):
350    self._AssertNotQueried()
351    self._strings.extend(str(v) for v in values)
352
353  def AddFile(self, path, tag):
354    """Adds metadata for a non-zip file.
355
356    Args:
357      path: Path to the file.
358      tag: A short string representative of the file contents.
359    """
360    self._AssertNotQueried()
361    self._files.append({
362        'path': path,
363        'tag': tag,
364    })
365
366  def AddZipFile(self, path, entries):
367    """Adds metadata for a zip file.
368
369    Args:
370      path: Path to the file.
371      entries: List of (subpath, tag) tuples for entries within the zip.
372    """
373    self._AssertNotQueried()
374    tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
375                                            (e[1] for e in entries)))
376    self._files.append({
377        'path': path,
378        'tag': tag,
379        'entries': [{"path": e[0], "tag": e[1]} for e in entries],
380    })
381
382  def GetStrings(self):
383    """Returns the list of input strings."""
384    return self._strings
385
386  def FilesMd5(self):
387    """Lazily computes and returns the aggregate md5 of input files."""
388    if self._files_md5 is None:
389      # Omit paths from md5 since temporary files have random names.
390      self._files_md5 = _ComputeInlineMd5(
391          self.GetTag(p) for p in sorted(self.IterPaths()))
392    return self._files_md5
393
394  def StringsMd5(self):
395    """Lazily computes and returns the aggregate md5 of input strings."""
396    if self._strings_md5 is None:
397      self._strings_md5 = _ComputeInlineMd5(self._strings)
398    return self._strings_md5
399
400  def _GetEntry(self, path, subpath=None):
401    """Returns the JSON entry for the given path / subpath."""
402    if self._file_map is None:
403      self._file_map = {}
404      for entry in self._files:
405        self._file_map[(entry['path'], None)] = entry
406        for subentry in entry.get('entries', ()):
407          self._file_map[(entry['path'], subentry['path'])] = subentry
408    return self._file_map.get((path, subpath))
409
410  def GetTag(self, path, subpath=None):
411    """Returns the tag for the given path / subpath."""
412    ret = self._GetEntry(path, subpath)
413    return ret and ret['tag']
414
415  def IterPaths(self):
416    """Returns a generator for all top-level paths."""
417    return (e['path'] for e in self._files)
418
419  def IterSubpaths(self, path):
420    """Returns a generator for all subpaths in the given zip.
421
422    If the given path is not a zip file or doesn't exist, returns an empty
423    iterable.
424    """
425    outer_entry = self._GetEntry(path)
426    if not outer_entry:
427      return ()
428    subentries = outer_entry.get('entries', [])
429    return (entry['path'] for entry in subentries)
430
431
432def _ComputeTagForPath(path):
433  stat = os.stat(path)
434  if stat.st_size > 1 * 1024 * 1024:
435    # Fallback to mtime for large files so that md5_check does not take too long
436    # to run.
437    return stat.st_mtime
438  md5 = hashlib.md5()
439  with open(path, 'rb') as f:
440    md5.update(f.read())
441  return md5.hexdigest()
442
443
444def _ComputeInlineMd5(iterable):
445  """Computes the md5 of the concatenated parameters."""
446  md5 = hashlib.md5()
447  for item in iterable:
448    md5.update(str(item).encode('ascii'))
449  return md5.hexdigest()
450
451
452def _ExtractZipEntries(path):
453  """Returns a list of (path, CRC32) of all files within |path|."""
454  entries = []
455  with zipfile.ZipFile(path) as zip_file:
456    for zip_info in zip_file.infolist():
457      # Skip directories and empty files.
458      if zip_info.CRC:
459        entries.append(
460            (zip_info.filename, zip_info.CRC + zip_info.compress_type))
461  return entries
462