1# Copyright 2013 The Chromium Authors 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5 6import difflib 7import hashlib 8import itertools 9import json 10import os 11import sys 12import zipfile 13 14from util import build_utils 15import action_helpers # build_utils adds //build to sys.path. 16import print_python_deps 17 18# When set and a difference is detected, a diff of what changed is printed. 19PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 20 21# An escape hatch that causes all targets to be rebuilt. 22_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 23 24 25def CallAndWriteDepfileIfStale(on_stale_md5, 26 options, 27 record_path=None, 28 input_paths=None, 29 input_strings=None, 30 output_paths=None, 31 force=False, 32 pass_changes=False, 33 track_subpaths_allowlist=None, 34 depfile_deps=None): 35 """Wraps CallAndRecordIfStale() and writes a depfile if applicable. 36 37 Depfiles are automatically added to output_paths when present in the |options| 38 argument. They are then created after |on_stale_md5| is called. 39 40 By default, only python dependencies are added to the depfile. If there are 41 other input paths that are not captured by GN deps, then they should be listed 42 in depfile_deps. It's important to write paths to the depfile that are already 43 captured by GN deps since GN args can cause GN deps to change, and such 44 changes are not immediately reflected in depfiles (http://crbug.com/589311). 45 """ 46 if not output_paths: 47 raise Exception('At least one output_path must be specified.') 48 input_paths = list(input_paths or []) 49 input_strings = list(input_strings or []) 50 output_paths = list(output_paths or []) 51 52 input_paths += print_python_deps.ComputePythonDependencies() 53 54 CallAndRecordIfStale( 55 on_stale_md5, 56 record_path=record_path, 57 input_paths=input_paths, 58 input_strings=input_strings, 59 output_paths=output_paths, 60 force=force, 61 pass_changes=pass_changes, 62 track_subpaths_allowlist=track_subpaths_allowlist) 63 64 # Write depfile even when inputs have not changed to ensure build correctness 65 # on bots that build with & without patch, and the patch changes the depfile 66 # location. 67 if hasattr(options, 'depfile') and options.depfile: 68 action_helpers.write_depfile(options.depfile, output_paths[0], depfile_deps) 69 70 71def CallAndRecordIfStale(function, 72 record_path=None, 73 input_paths=None, 74 input_strings=None, 75 output_paths=None, 76 force=False, 77 pass_changes=False, 78 track_subpaths_allowlist=None): 79 """Calls function if outputs are stale. 80 81 Outputs are considered stale if: 82 - any output_paths are missing, or 83 - the contents of any file within input_paths has changed, or 84 - the contents of input_strings has changed. 85 86 To debug which files are out-of-date, set the environment variable: 87 PRINT_MD5_DIFFS=1 88 89 Args: 90 function: The function to call. 91 record_path: Path to record metadata. 92 Defaults to output_paths[0] + '.md5.stamp' 93 input_paths: List of paths to calcualte an md5 sum on. 94 input_strings: List of strings to record verbatim. 95 output_paths: List of output paths. 96 force: Whether to treat outputs as missing regardless of whether they 97 actually are. 98 pass_changes: Whether to pass a Changes instance to |function|. 99 track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip 100 files from |input_paths| to make subpath information available for. 101 """ 102 assert record_path or output_paths 103 input_paths = input_paths or [] 104 input_strings = input_strings or [] 105 output_paths = output_paths or [] 106 record_path = record_path or output_paths[0] + '.md5.stamp' 107 108 assert record_path.endswith('.stamp'), ( 109 'record paths must end in \'.stamp\' so that they are easy to find ' 110 'and delete') 111 112 new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS) 113 new_metadata.AddStrings(input_strings) 114 115 zip_allowlist = set(track_subpaths_allowlist or []) 116 for path in input_paths: 117 if os.path.isabs(path): 118 path = os.path.relpath(path) 119 # It's faster to md5 an entire zip file than it is to just locate & hash 120 # its central directory (which is what this used to do). 121 if path in zip_allowlist: 122 entries = _ExtractZipEntries(path) 123 new_metadata.AddZipFile(path, entries) 124 else: 125 new_metadata.AddFile(path, _ComputeTagForPath(path)) 126 127 force = force or _FORCE_REBUILD 128 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] 129 old_metadata = None 130 131 if not missing_outputs and os.path.exists(record_path): 132 with open(record_path, 'r') as jsonfile: 133 try: 134 old_metadata = _Metadata.FromFile(jsonfile) 135 except: # pylint: disable=bare-except 136 pass # Not yet using new file format. 137 138 changes = Changes(old_metadata, new_metadata, force, missing_outputs) 139 if not changes.HasChanges(): 140 return 141 142 if PRINT_EXPLANATIONS: 143 print('=' * 80) 144 print('Target is stale: %s' % record_path) 145 print(changes.DescribeDifference()) 146 print('=' * 80) 147 148 args = (changes,) if pass_changes else () 149 function(*args) 150 151 with open(record_path, 'w') as f: 152 new_metadata.ToFile(f) 153 154 155class Changes: 156 """Provides and API for querying what changed between runs.""" 157 158 def __init__(self, old_metadata, new_metadata, force, missing_outputs): 159 self.old_metadata = old_metadata 160 self.new_metadata = new_metadata 161 self.force = force 162 self.missing_outputs = missing_outputs 163 164 def _GetOldTag(self, path, subpath=None): 165 return self.old_metadata and self.old_metadata.GetTag(path, subpath) 166 167 def HasChanges(self): 168 """Returns whether any changes exist.""" 169 return (self.HasStringChanges() 170 or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) 171 172 def HasStringChanges(self): 173 """Returns whether string metadata changed.""" 174 return (self.force or not self.old_metadata 175 or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()) 176 177 def AddedOrModifiedOnly(self): 178 """Returns whether the only changes were from added or modified (sub)files. 179 180 No missing outputs, no removed paths/subpaths. 181 """ 182 if self.HasStringChanges(): 183 return False 184 if any(self.IterRemovedPaths()): 185 return False 186 for path in self.IterModifiedPaths(): 187 if any(self.IterRemovedSubpaths(path)): 188 return False 189 return True 190 191 def IterAllPaths(self): 192 """Generator for paths.""" 193 return self.new_metadata.IterPaths(); 194 195 def IterAllSubpaths(self, path): 196 """Generator for subpaths.""" 197 return self.new_metadata.IterSubpaths(path); 198 199 def IterAddedPaths(self): 200 """Generator for paths that were added.""" 201 for path in self.new_metadata.IterPaths(): 202 if self._GetOldTag(path) is None: 203 yield path 204 205 def IterAddedSubpaths(self, path): 206 """Generator for paths that were added within the given zip file.""" 207 for subpath in self.new_metadata.IterSubpaths(path): 208 if self._GetOldTag(path, subpath) is None: 209 yield subpath 210 211 def IterRemovedPaths(self): 212 """Generator for paths that were removed.""" 213 if self.old_metadata: 214 for path in self.old_metadata.IterPaths(): 215 if self.new_metadata.GetTag(path) is None: 216 yield path 217 218 def IterRemovedSubpaths(self, path): 219 """Generator for paths that were removed within the given zip file.""" 220 if self.old_metadata: 221 for subpath in self.old_metadata.IterSubpaths(path): 222 if self.new_metadata.GetTag(path, subpath) is None: 223 yield subpath 224 225 def IterModifiedPaths(self): 226 """Generator for paths whose contents have changed.""" 227 for path in self.new_metadata.IterPaths(): 228 old_tag = self._GetOldTag(path) 229 new_tag = self.new_metadata.GetTag(path) 230 if old_tag is not None and old_tag != new_tag: 231 yield path 232 233 def IterModifiedSubpaths(self, path): 234 """Generator for paths within a zip file whose contents have changed.""" 235 for subpath in self.new_metadata.IterSubpaths(path): 236 old_tag = self._GetOldTag(path, subpath) 237 new_tag = self.new_metadata.GetTag(path, subpath) 238 if old_tag is not None and old_tag != new_tag: 239 yield subpath 240 241 def IterChangedPaths(self): 242 """Generator for all changed paths (added/removed/modified).""" 243 return itertools.chain(self.IterRemovedPaths(), 244 self.IterModifiedPaths(), 245 self.IterAddedPaths()) 246 247 def IterChangedSubpaths(self, path): 248 """Generator for paths within a zip that were added/removed/modified.""" 249 return itertools.chain(self.IterRemovedSubpaths(path), 250 self.IterModifiedSubpaths(path), 251 self.IterAddedSubpaths(path)) 252 253 def DescribeDifference(self): 254 """Returns a human-readable description of what changed.""" 255 if self.force: 256 return 'force=True' 257 if self.missing_outputs: 258 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) 259 if self.old_metadata is None: 260 return 'Previous stamp file not found.' 261 262 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): 263 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), 264 self.new_metadata.GetStrings()) 265 changed = [s for s in ndiff if not s.startswith(' ')] 266 return 'Input strings changed:\n ' + '\n '.join(changed) 267 268 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): 269 return "There's no difference." 270 271 lines = [] 272 lines.extend('Added: ' + p for p in self.IterAddedPaths()) 273 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) 274 for path in self.IterModifiedPaths(): 275 lines.append('Modified: ' + path) 276 lines.extend(' -> Subpath added: ' + p 277 for p in self.IterAddedSubpaths(path)) 278 lines.extend(' -> Subpath removed: ' + p 279 for p in self.IterRemovedSubpaths(path)) 280 lines.extend(' -> Subpath modified: ' + p 281 for p in self.IterModifiedSubpaths(path)) 282 if lines: 283 return 'Input files changed:\n ' + '\n '.join(lines) 284 return 'I have no idea what changed (there is a bug).' 285 286 287class _Metadata: 288 """Data model for tracking change metadata. 289 290 Args: 291 track_entries: Enables per-file change tracking. Slower, but required for 292 Changes functionality. 293 """ 294 # Schema: 295 # { 296 # "files-md5": "VALUE", 297 # "strings-md5": "VALUE", 298 # "input-files": [ 299 # { 300 # "path": "path.jar", 301 # "tag": "{MD5 of entries}", 302 # "entries": [ 303 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... 304 # ] 305 # }, { 306 # "path": "path.txt", 307 # "tag": "{MD5}", 308 # } 309 # ], 310 # "input-strings": ["a", "b", ...], 311 # } 312 def __init__(self, track_entries=False): 313 self._track_entries = track_entries 314 self._files_md5 = None 315 self._strings_md5 = None 316 self._files = [] 317 self._strings = [] 318 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). 319 self._file_map = None 320 321 @classmethod 322 def FromFile(cls, fileobj): 323 """Returns a _Metadata initialized from a file object.""" 324 ret = cls() 325 obj = json.load(fileobj) 326 ret._files_md5 = obj['files-md5'] 327 ret._strings_md5 = obj['strings-md5'] 328 ret._files = obj.get('input-files', []) 329 ret._strings = obj.get('input-strings', []) 330 return ret 331 332 def ToFile(self, fileobj): 333 """Serializes metadata to the given file object.""" 334 obj = { 335 'files-md5': self.FilesMd5(), 336 'strings-md5': self.StringsMd5(), 337 } 338 if self._track_entries: 339 obj['input-files'] = sorted(self._files, key=lambda e: e['path']) 340 obj['input-strings'] = self._strings 341 342 json.dump(obj, fileobj, indent=2) 343 344 def _AssertNotQueried(self): 345 assert self._files_md5 is None 346 assert self._strings_md5 is None 347 assert self._file_map is None 348 349 def AddStrings(self, values): 350 self._AssertNotQueried() 351 self._strings.extend(str(v) for v in values) 352 353 def AddFile(self, path, tag): 354 """Adds metadata for a non-zip file. 355 356 Args: 357 path: Path to the file. 358 tag: A short string representative of the file contents. 359 """ 360 self._AssertNotQueried() 361 self._files.append({ 362 'path': path, 363 'tag': tag, 364 }) 365 366 def AddZipFile(self, path, entries): 367 """Adds metadata for a zip file. 368 369 Args: 370 path: Path to the file. 371 entries: List of (subpath, tag) tuples for entries within the zip. 372 """ 373 self._AssertNotQueried() 374 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), 375 (e[1] for e in entries))) 376 self._files.append({ 377 'path': path, 378 'tag': tag, 379 'entries': [{"path": e[0], "tag": e[1]} for e in entries], 380 }) 381 382 def GetStrings(self): 383 """Returns the list of input strings.""" 384 return self._strings 385 386 def FilesMd5(self): 387 """Lazily computes and returns the aggregate md5 of input files.""" 388 if self._files_md5 is None: 389 # Omit paths from md5 since temporary files have random names. 390 self._files_md5 = _ComputeInlineMd5( 391 self.GetTag(p) for p in sorted(self.IterPaths())) 392 return self._files_md5 393 394 def StringsMd5(self): 395 """Lazily computes and returns the aggregate md5 of input strings.""" 396 if self._strings_md5 is None: 397 self._strings_md5 = _ComputeInlineMd5(self._strings) 398 return self._strings_md5 399 400 def _GetEntry(self, path, subpath=None): 401 """Returns the JSON entry for the given path / subpath.""" 402 if self._file_map is None: 403 self._file_map = {} 404 for entry in self._files: 405 self._file_map[(entry['path'], None)] = entry 406 for subentry in entry.get('entries', ()): 407 self._file_map[(entry['path'], subentry['path'])] = subentry 408 return self._file_map.get((path, subpath)) 409 410 def GetTag(self, path, subpath=None): 411 """Returns the tag for the given path / subpath.""" 412 ret = self._GetEntry(path, subpath) 413 return ret and ret['tag'] 414 415 def IterPaths(self): 416 """Returns a generator for all top-level paths.""" 417 return (e['path'] for e in self._files) 418 419 def IterSubpaths(self, path): 420 """Returns a generator for all subpaths in the given zip. 421 422 If the given path is not a zip file or doesn't exist, returns an empty 423 iterable. 424 """ 425 outer_entry = self._GetEntry(path) 426 if not outer_entry: 427 return () 428 subentries = outer_entry.get('entries', []) 429 return (entry['path'] for entry in subentries) 430 431 432def _ComputeTagForPath(path): 433 stat = os.stat(path) 434 if stat.st_size > 1 * 1024 * 1024: 435 # Fallback to mtime for large files so that md5_check does not take too long 436 # to run. 437 return stat.st_mtime 438 md5 = hashlib.md5() 439 with open(path, 'rb') as f: 440 md5.update(f.read()) 441 return md5.hexdigest() 442 443 444def _ComputeInlineMd5(iterable): 445 """Computes the md5 of the concatenated parameters.""" 446 md5 = hashlib.md5() 447 for item in iterable: 448 md5.update(str(item).encode('ascii')) 449 return md5.hexdigest() 450 451 452def _ExtractZipEntries(path): 453 """Returns a list of (path, CRC32) of all files within |path|.""" 454 entries = [] 455 with zipfile.ZipFile(path) as zip_file: 456 for zip_info in zip_file.infolist(): 457 # Skip directories and empty files. 458 if zip_info.CRC: 459 entries.append( 460 (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 461 return entries 462