1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import difflib 6import hashlib 7import itertools 8import json 9import os 10import sys 11import zipfile 12 13 14# When set and a difference is detected, a diff of what changed is printed. 15PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 16 17# An escape hatch that causes all targets to be rebuilt. 18_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 19 20 21def CallAndRecordIfStale( 22 function, record_path=None, input_paths=None, input_strings=None, 23 output_paths=None, force=False, pass_changes=False): 24 """Calls function if outputs are stale. 25 26 Outputs are considered stale if: 27 - any output_paths are missing, or 28 - the contents of any file within input_paths has changed, or 29 - the contents of input_strings has changed. 30 31 To debug which files are out-of-date, set the environment variable: 32 PRINT_MD5_DIFFS=1 33 34 Args: 35 function: The function to call. 36 record_path: Path to record metadata. 37 Defaults to output_paths[0] + '.md5.stamp' 38 input_paths: List of paths to calcualte an md5 sum on. 39 input_strings: List of strings to record verbatim. 40 output_paths: List of output paths. 41 force: Whether to treat outputs as missing regardless of whether they 42 actually are. 43 pass_changes: Whether to pass a Changes instance to |function|. 44 """ 45 assert record_path or output_paths 46 input_paths = input_paths or [] 47 input_strings = input_strings or [] 48 output_paths = output_paths or [] 49 record_path = record_path or output_paths[0] + '.md5.stamp' 50 51 assert record_path.endswith('.stamp'), ( 52 'record paths must end in \'.stamp\' so that they are easy to find ' 53 'and delete') 54 55 new_metadata = _Metadata() 56 new_metadata.AddStrings(input_strings) 57 58 for path in input_paths: 59 if _IsZipFile(path): 60 entries = _ExtractZipEntries(path) 61 new_metadata.AddZipFile(path, entries) 62 else: 63 new_metadata.AddFile(path, _Md5ForPath(path)) 64 65 old_metadata = None 66 force = force or _FORCE_REBUILD 67 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] 68 # When outputs are missing, don't bother gathering change information. 69 if not missing_outputs and os.path.exists(record_path): 70 with open(record_path, 'r') as jsonfile: 71 try: 72 old_metadata = _Metadata.FromFile(jsonfile) 73 except: # pylint: disable=bare-except 74 pass # Not yet using new file format. 75 76 changes = Changes(old_metadata, new_metadata, force, missing_outputs) 77 if not changes.HasChanges(): 78 return 79 80 if PRINT_EXPLANATIONS: 81 print '=' * 80 82 print 'Target is stale: %s' % record_path 83 print changes.DescribeDifference() 84 print '=' * 80 85 86 args = (changes,) if pass_changes else () 87 function(*args) 88 89 with open(record_path, 'w') as f: 90 new_metadata.ToFile(f) 91 92 93class Changes(object): 94 """Provides and API for querying what changed between runs.""" 95 96 def __init__(self, old_metadata, new_metadata, force, missing_outputs): 97 self.old_metadata = old_metadata 98 self.new_metadata = new_metadata 99 self.force = force 100 self.missing_outputs = missing_outputs 101 102 def _GetOldTag(self, path, subpath=None): 103 return self.old_metadata and self.old_metadata.GetTag(path, subpath) 104 105 def HasChanges(self): 106 """Returns whether any changes exist.""" 107 return (self.force or 108 not self.old_metadata or 109 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or 110 self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) 111 112 def AddedOrModifiedOnly(self): 113 """Returns whether the only changes were from added or modified (sub)files. 114 115 No missing outputs, no removed paths/subpaths. 116 """ 117 if (self.force or 118 not self.old_metadata or 119 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()): 120 return False 121 if any(self.IterRemovedPaths()): 122 return False 123 for path in self.IterModifiedPaths(): 124 if any(self.IterRemovedSubpaths(path)): 125 return False 126 return True 127 128 def IterAddedPaths(self): 129 """Generator for paths that were added.""" 130 for path in self.new_metadata.IterPaths(): 131 if self._GetOldTag(path) is None: 132 yield path 133 134 def IterAddedSubpaths(self, path): 135 """Generator for paths that were added within the given zip file.""" 136 for subpath in self.new_metadata.IterSubpaths(path): 137 if self._GetOldTag(path, subpath) is None: 138 yield subpath 139 140 def IterRemovedPaths(self): 141 """Generator for paths that were removed.""" 142 if self.old_metadata: 143 for path in self.old_metadata.IterPaths(): 144 if self.new_metadata.GetTag(path) is None: 145 yield path 146 147 def IterRemovedSubpaths(self, path): 148 """Generator for paths that were removed within the given zip file.""" 149 if self.old_metadata: 150 for subpath in self.old_metadata.IterSubpaths(path): 151 if self.new_metadata.GetTag(path, subpath) is None: 152 yield subpath 153 154 def IterModifiedPaths(self): 155 """Generator for paths whose contents have changed.""" 156 for path in self.new_metadata.IterPaths(): 157 old_tag = self._GetOldTag(path) 158 new_tag = self.new_metadata.GetTag(path) 159 if old_tag is not None and old_tag != new_tag: 160 yield path 161 162 def IterModifiedSubpaths(self, path): 163 """Generator for paths within a zip file whose contents have changed.""" 164 for subpath in self.new_metadata.IterSubpaths(path): 165 old_tag = self._GetOldTag(path, subpath) 166 new_tag = self.new_metadata.GetTag(path, subpath) 167 if old_tag is not None and old_tag != new_tag: 168 yield subpath 169 170 def IterChangedPaths(self): 171 """Generator for all changed paths (added/removed/modified).""" 172 return itertools.chain(self.IterRemovedPaths(), 173 self.IterModifiedPaths(), 174 self.IterAddedPaths()) 175 176 def IterChangedSubpaths(self, path): 177 """Generator for paths within a zip that were added/removed/modified.""" 178 return itertools.chain(self.IterRemovedSubpaths(path), 179 self.IterModifiedSubpaths(path), 180 self.IterAddedSubpaths(path)) 181 182 def DescribeDifference(self): 183 """Returns a human-readable description of what changed.""" 184 if self.force: 185 return 'force=True' 186 elif self.missing_outputs: 187 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) 188 elif self.old_metadata is None: 189 return 'Previous stamp file not found.' 190 191 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): 192 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), 193 self.new_metadata.GetStrings()) 194 changed = [s for s in ndiff if not s.startswith(' ')] 195 return 'Input strings changed:\n ' + '\n '.join(changed) 196 197 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): 198 return "There's no difference." 199 200 lines = [] 201 lines.extend('Added: ' + p for p in self.IterAddedPaths()) 202 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) 203 for path in self.IterModifiedPaths(): 204 lines.append('Modified: ' + path) 205 lines.extend(' -> Subpath added: ' + p 206 for p in self.IterAddedSubpaths(path)) 207 lines.extend(' -> Subpath removed: ' + p 208 for p in self.IterRemovedSubpaths(path)) 209 lines.extend(' -> Subpath modified: ' + p 210 for p in self.IterModifiedSubpaths(path)) 211 if lines: 212 return 'Input files changed:\n ' + '\n '.join(lines) 213 return 'I have no idea what changed (there is a bug).' 214 215 216class _Metadata(object): 217 """Data model for tracking change metadata.""" 218 # Schema: 219 # { 220 # "files-md5": "VALUE", 221 # "strings-md5": "VALUE", 222 # "input-files": [ 223 # { 224 # "path": "path.jar", 225 # "tag": "{MD5 of entries}", 226 # "entries": [ 227 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... 228 # ] 229 # }, { 230 # "path": "path.txt", 231 # "tag": "{MD5}", 232 # } 233 # ], 234 # "input-strings": ["a", "b", ...], 235 # } 236 def __init__(self): 237 self._files_md5 = None 238 self._strings_md5 = None 239 self._files = [] 240 self._strings = [] 241 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). 242 self._file_map = None 243 244 @classmethod 245 def FromFile(cls, fileobj): 246 """Returns a _Metadata initialized from a file object.""" 247 ret = cls() 248 obj = json.load(fileobj) 249 ret._files_md5 = obj['files-md5'] 250 ret._strings_md5 = obj['strings-md5'] 251 ret._files = obj['input-files'] 252 ret._strings = obj['input-strings'] 253 return ret 254 255 def ToFile(self, fileobj): 256 """Serializes metadata to the given file object.""" 257 obj = { 258 "files-md5": self.FilesMd5(), 259 "strings-md5": self.StringsMd5(), 260 "input-files": self._files, 261 "input-strings": self._strings, 262 } 263 json.dump(obj, fileobj, indent=2) 264 265 def _AssertNotQueried(self): 266 assert self._files_md5 is None 267 assert self._strings_md5 is None 268 assert self._file_map is None 269 270 def AddStrings(self, values): 271 self._AssertNotQueried() 272 self._strings.extend(str(v) for v in values) 273 274 def AddFile(self, path, tag): 275 """Adds metadata for a non-zip file. 276 277 Args: 278 path: Path to the file. 279 tag: A short string representative of the file contents. 280 """ 281 self._AssertNotQueried() 282 self._files.append({ 283 'path': path, 284 'tag': tag, 285 }) 286 287 def AddZipFile(self, path, entries): 288 """Adds metadata for a zip file. 289 290 Args: 291 path: Path to the file. 292 entries: List of (subpath, tag) tuples for entries within the zip. 293 """ 294 self._AssertNotQueried() 295 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), 296 (e[1] for e in entries))) 297 self._files.append({ 298 'path': path, 299 'tag': tag, 300 'entries': [{"path": e[0], "tag": e[1]} for e in entries], 301 }) 302 303 def GetStrings(self): 304 """Returns the list of input strings.""" 305 return self._strings 306 307 def FilesMd5(self): 308 """Lazily computes and returns the aggregate md5 of input files.""" 309 if self._files_md5 is None: 310 # Omit paths from md5 since temporary files have random names. 311 self._files_md5 = _ComputeInlineMd5( 312 self.GetTag(p) for p in sorted(self.IterPaths())) 313 return self._files_md5 314 315 def StringsMd5(self): 316 """Lazily computes and returns the aggregate md5 of input strings.""" 317 if self._strings_md5 is None: 318 self._strings_md5 = _ComputeInlineMd5(self._strings) 319 return self._strings_md5 320 321 def _GetEntry(self, path, subpath=None): 322 """Returns the JSON entry for the given path / subpath.""" 323 if self._file_map is None: 324 self._file_map = {} 325 for entry in self._files: 326 self._file_map[(entry['path'], None)] = entry 327 for subentry in entry.get('entries', ()): 328 self._file_map[(entry['path'], subentry['path'])] = subentry 329 return self._file_map.get((path, subpath)) 330 331 def GetTag(self, path, subpath=None): 332 """Returns the tag for the given path / subpath.""" 333 ret = self._GetEntry(path, subpath) 334 return ret and ret['tag'] 335 336 def IterPaths(self): 337 """Returns a generator for all top-level paths.""" 338 return (e['path'] for e in self._files) 339 340 def IterSubpaths(self, path): 341 """Returns a generator for all subpaths in the given zip. 342 343 If the given path is not a zip file or doesn't exist, returns an empty 344 iterable. 345 """ 346 outer_entry = self._GetEntry(path) 347 if not outer_entry: 348 return () 349 subentries = outer_entry.get('entries', []) 350 return (entry['path'] for entry in subentries) 351 352 353def _UpdateMd5ForFile(md5, path, block_size=2**16): 354 with open(path, 'rb') as infile: 355 while True: 356 data = infile.read(block_size) 357 if not data: 358 break 359 md5.update(data) 360 361 362def _UpdateMd5ForDirectory(md5, dir_path): 363 for root, _, files in os.walk(dir_path): 364 for f in files: 365 _UpdateMd5ForFile(md5, os.path.join(root, f)) 366 367 368def _Md5ForPath(path): 369 md5 = hashlib.md5() 370 if os.path.isdir(path): 371 _UpdateMd5ForDirectory(md5, path) 372 else: 373 _UpdateMd5ForFile(md5, path) 374 return md5.hexdigest() 375 376 377def _ComputeInlineMd5(iterable): 378 """Computes the md5 of the concatenated parameters.""" 379 md5 = hashlib.md5() 380 for item in iterable: 381 md5.update(str(item)) 382 return md5.hexdigest() 383 384 385def _IsZipFile(path): 386 """Returns whether to treat the given file as a zip file.""" 387 # ijar doesn't set the CRC32 field. 388 if path.endswith('.interface.jar'): 389 return False 390 return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar') 391 392 393def _ExtractZipEntries(path): 394 """Returns a list of (path, CRC32) of all files within |path|.""" 395 entries = [] 396 with zipfile.ZipFile(path) as zip_file: 397 for zip_info in zip_file.infolist(): 398 # Skip directories and empty files. 399 if zip_info.CRC: 400 entries.append( 401 (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 402 return entries 403