1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import difflib 6import hashlib 7import itertools 8import json 9import os 10import sys 11import zipfile 12 13 14# When set and a difference is detected, a diff of what changed is printed. 15PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0)) 16 17# An escape hatch that causes all targets to be rebuilt. 18_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0)) 19 20 21def CallAndRecordIfStale( 22 function, record_path=None, input_paths=None, input_strings=None, 23 output_paths=None, force=False, pass_changes=False): 24 """Calls function if outputs are stale. 25 26 Outputs are considered stale if: 27 - any output_paths are missing, or 28 - the contents of any file within input_paths has changed, or 29 - the contents of input_strings has changed. 30 31 To debug which files are out-of-date, set the environment variable: 32 PRINT_MD5_DIFFS=1 33 34 Args: 35 function: The function to call. 36 record_path: Path to record metadata. 37 Defaults to output_paths[0] + '.md5.stamp' 38 input_paths: List of paths to calcualte an md5 sum on. 39 input_strings: List of strings to record verbatim. 40 output_paths: List of output paths. 41 force: Whether to treat outputs as missing regardless of whether they 42 actually are. 43 pass_changes: Whether to pass a Changes instance to |function|. 44 """ 45 assert record_path or output_paths 46 input_paths = input_paths or [] 47 input_strings = input_strings or [] 48 output_paths = output_paths or [] 49 record_path = record_path or output_paths[0] + '.md5.stamp' 50 51 assert record_path.endswith('.stamp'), ( 52 'record paths must end in \'.stamp\' so that they are easy to find ' 53 'and delete') 54 55 new_metadata = _Metadata() 56 new_metadata.AddStrings(input_strings) 57 58 for path in input_paths: 59 if _IsZipFile(path): 60 entries = _ExtractZipEntries(path) 61 new_metadata.AddZipFile(path, entries) 62 else: 63 new_metadata.AddFile(path, _Md5ForPath(path)) 64 65 old_metadata = None 66 force = force or _FORCE_REBUILD 67 missing_outputs = [x for x in output_paths if force or not os.path.exists(x)] 68 # When outputs are missing, don't bother gathering change information. 69 if not missing_outputs and os.path.exists(record_path): 70 with open(record_path, 'r') as jsonfile: 71 try: 72 old_metadata = _Metadata.FromFile(jsonfile) 73 except: # pylint: disable=bare-except 74 pass # Not yet using new file format. 75 76 changes = Changes(old_metadata, new_metadata, force, missing_outputs) 77 if not changes.HasChanges(): 78 return 79 80 if PRINT_EXPLANATIONS: 81 print '=' * 80 82 print 'Target is stale: %s' % record_path 83 print changes.DescribeDifference() 84 print '=' * 80 85 86 args = (changes,) if pass_changes else () 87 function(*args) 88 89 with open(record_path, 'w') as f: 90 new_metadata.ToFile(f) 91 92 93class Changes(object): 94 """Provides and API for querying what changed between runs.""" 95 96 def __init__(self, old_metadata, new_metadata, force, missing_outputs): 97 self.old_metadata = old_metadata 98 self.new_metadata = new_metadata 99 self.force = force 100 self.missing_outputs = missing_outputs 101 102 def _GetOldTag(self, path, subpath=None): 103 return self.old_metadata and self.old_metadata.GetTag(path, subpath) 104 105 def HasChanges(self): 106 """Returns whether any changes exist.""" 107 return (self.force or 108 not self.old_metadata or 109 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5() or 110 self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5()) 111 112 def AddedOrModifiedOnly(self): 113 """Returns whether the only changes were from added or modified (sub)files. 114 115 No missing outputs, no removed paths/subpaths. 116 """ 117 if (self.force or 118 not self.old_metadata or 119 self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5()): 120 return False 121 if any(self.IterRemovedPaths()): 122 return False 123 for path in self.IterModifiedPaths(): 124 if any(self.IterRemovedSubpaths(path)): 125 return False 126 return True 127 128 def IterAllPaths(self): 129 """Generator for paths.""" 130 return self.new_metadata.IterPaths(); 131 132 def IterAllSubpaths(self, path): 133 """Generator for subpaths.""" 134 return self.new_metadata.IterSubpaths(path); 135 136 def IterAddedPaths(self): 137 """Generator for paths that were added.""" 138 for path in self.new_metadata.IterPaths(): 139 if self._GetOldTag(path) is None: 140 yield path 141 142 def IterAddedSubpaths(self, path): 143 """Generator for paths that were added within the given zip file.""" 144 for subpath in self.new_metadata.IterSubpaths(path): 145 if self._GetOldTag(path, subpath) is None: 146 yield subpath 147 148 def IterRemovedPaths(self): 149 """Generator for paths that were removed.""" 150 if self.old_metadata: 151 for path in self.old_metadata.IterPaths(): 152 if self.new_metadata.GetTag(path) is None: 153 yield path 154 155 def IterRemovedSubpaths(self, path): 156 """Generator for paths that were removed within the given zip file.""" 157 if self.old_metadata: 158 for subpath in self.old_metadata.IterSubpaths(path): 159 if self.new_metadata.GetTag(path, subpath) is None: 160 yield subpath 161 162 def IterModifiedPaths(self): 163 """Generator for paths whose contents have changed.""" 164 for path in self.new_metadata.IterPaths(): 165 old_tag = self._GetOldTag(path) 166 new_tag = self.new_metadata.GetTag(path) 167 if old_tag is not None and old_tag != new_tag: 168 yield path 169 170 def IterModifiedSubpaths(self, path): 171 """Generator for paths within a zip file whose contents have changed.""" 172 for subpath in self.new_metadata.IterSubpaths(path): 173 old_tag = self._GetOldTag(path, subpath) 174 new_tag = self.new_metadata.GetTag(path, subpath) 175 if old_tag is not None and old_tag != new_tag: 176 yield subpath 177 178 def IterChangedPaths(self): 179 """Generator for all changed paths (added/removed/modified).""" 180 return itertools.chain(self.IterRemovedPaths(), 181 self.IterModifiedPaths(), 182 self.IterAddedPaths()) 183 184 def IterChangedSubpaths(self, path): 185 """Generator for paths within a zip that were added/removed/modified.""" 186 return itertools.chain(self.IterRemovedSubpaths(path), 187 self.IterModifiedSubpaths(path), 188 self.IterAddedSubpaths(path)) 189 190 def DescribeDifference(self): 191 """Returns a human-readable description of what changed.""" 192 if self.force: 193 return 'force=True' 194 elif self.missing_outputs: 195 return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs) 196 elif self.old_metadata is None: 197 return 'Previous stamp file not found.' 198 199 if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5(): 200 ndiff = difflib.ndiff(self.old_metadata.GetStrings(), 201 self.new_metadata.GetStrings()) 202 changed = [s for s in ndiff if not s.startswith(' ')] 203 return 'Input strings changed:\n ' + '\n '.join(changed) 204 205 if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5(): 206 return "There's no difference." 207 208 lines = [] 209 lines.extend('Added: ' + p for p in self.IterAddedPaths()) 210 lines.extend('Removed: ' + p for p in self.IterRemovedPaths()) 211 for path in self.IterModifiedPaths(): 212 lines.append('Modified: ' + path) 213 lines.extend(' -> Subpath added: ' + p 214 for p in self.IterAddedSubpaths(path)) 215 lines.extend(' -> Subpath removed: ' + p 216 for p in self.IterRemovedSubpaths(path)) 217 lines.extend(' -> Subpath modified: ' + p 218 for p in self.IterModifiedSubpaths(path)) 219 if lines: 220 return 'Input files changed:\n ' + '\n '.join(lines) 221 return 'I have no idea what changed (there is a bug).' 222 223 224class _Metadata(object): 225 """Data model for tracking change metadata.""" 226 # Schema: 227 # { 228 # "files-md5": "VALUE", 229 # "strings-md5": "VALUE", 230 # "input-files": [ 231 # { 232 # "path": "path.jar", 233 # "tag": "{MD5 of entries}", 234 # "entries": [ 235 # { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ... 236 # ] 237 # }, { 238 # "path": "path.txt", 239 # "tag": "{MD5}", 240 # } 241 # ], 242 # "input-strings": ["a", "b", ...], 243 # } 244 def __init__(self): 245 self._files_md5 = None 246 self._strings_md5 = None 247 self._files = [] 248 self._strings = [] 249 # Map of (path, subpath) -> entry. Created upon first call to _GetEntry(). 250 self._file_map = None 251 252 @classmethod 253 def FromFile(cls, fileobj): 254 """Returns a _Metadata initialized from a file object.""" 255 ret = cls() 256 obj = json.load(fileobj) 257 ret._files_md5 = obj['files-md5'] 258 ret._strings_md5 = obj['strings-md5'] 259 ret._files = obj['input-files'] 260 ret._strings = obj['input-strings'] 261 return ret 262 263 def ToFile(self, fileobj): 264 """Serializes metadata to the given file object.""" 265 obj = { 266 "files-md5": self.FilesMd5(), 267 "strings-md5": self.StringsMd5(), 268 "input-files": self._files, 269 "input-strings": self._strings, 270 } 271 json.dump(obj, fileobj, indent=2) 272 273 def _AssertNotQueried(self): 274 assert self._files_md5 is None 275 assert self._strings_md5 is None 276 assert self._file_map is None 277 278 def AddStrings(self, values): 279 self._AssertNotQueried() 280 self._strings.extend(str(v) for v in values) 281 282 def AddFile(self, path, tag): 283 """Adds metadata for a non-zip file. 284 285 Args: 286 path: Path to the file. 287 tag: A short string representative of the file contents. 288 """ 289 self._AssertNotQueried() 290 self._files.append({ 291 'path': path, 292 'tag': tag, 293 }) 294 295 def AddZipFile(self, path, entries): 296 """Adds metadata for a zip file. 297 298 Args: 299 path: Path to the file. 300 entries: List of (subpath, tag) tuples for entries within the zip. 301 """ 302 self._AssertNotQueried() 303 tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries), 304 (e[1] for e in entries))) 305 self._files.append({ 306 'path': path, 307 'tag': tag, 308 'entries': [{"path": e[0], "tag": e[1]} for e in entries], 309 }) 310 311 def GetStrings(self): 312 """Returns the list of input strings.""" 313 return self._strings 314 315 def FilesMd5(self): 316 """Lazily computes and returns the aggregate md5 of input files.""" 317 if self._files_md5 is None: 318 # Omit paths from md5 since temporary files have random names. 319 self._files_md5 = _ComputeInlineMd5( 320 self.GetTag(p) for p in sorted(self.IterPaths())) 321 return self._files_md5 322 323 def StringsMd5(self): 324 """Lazily computes and returns the aggregate md5 of input strings.""" 325 if self._strings_md5 is None: 326 self._strings_md5 = _ComputeInlineMd5(self._strings) 327 return self._strings_md5 328 329 def _GetEntry(self, path, subpath=None): 330 """Returns the JSON entry for the given path / subpath.""" 331 if self._file_map is None: 332 self._file_map = {} 333 for entry in self._files: 334 self._file_map[(entry['path'], None)] = entry 335 for subentry in entry.get('entries', ()): 336 self._file_map[(entry['path'], subentry['path'])] = subentry 337 return self._file_map.get((path, subpath)) 338 339 def GetTag(self, path, subpath=None): 340 """Returns the tag for the given path / subpath.""" 341 ret = self._GetEntry(path, subpath) 342 return ret and ret['tag'] 343 344 def IterPaths(self): 345 """Returns a generator for all top-level paths.""" 346 return (e['path'] for e in self._files) 347 348 def IterSubpaths(self, path): 349 """Returns a generator for all subpaths in the given zip. 350 351 If the given path is not a zip file or doesn't exist, returns an empty 352 iterable. 353 """ 354 outer_entry = self._GetEntry(path) 355 if not outer_entry: 356 return () 357 subentries = outer_entry.get('entries', []) 358 return (entry['path'] for entry in subentries) 359 360 361def _UpdateMd5ForFile(md5, path, block_size=2**16): 362 with open(path, 'rb') as infile: 363 while True: 364 data = infile.read(block_size) 365 if not data: 366 break 367 md5.update(data) 368 369 370def _UpdateMd5ForDirectory(md5, dir_path): 371 for root, _, files in os.walk(dir_path): 372 for f in files: 373 _UpdateMd5ForFile(md5, os.path.join(root, f)) 374 375 376def _Md5ForPath(path): 377 md5 = hashlib.md5() 378 if os.path.isdir(path): 379 _UpdateMd5ForDirectory(md5, path) 380 else: 381 _UpdateMd5ForFile(md5, path) 382 return md5.hexdigest() 383 384 385def _ComputeInlineMd5(iterable): 386 """Computes the md5 of the concatenated parameters.""" 387 md5 = hashlib.md5() 388 for item in iterable: 389 md5.update(str(item)) 390 return md5.hexdigest() 391 392 393def _IsZipFile(path): 394 """Returns whether to treat the given file as a zip file.""" 395 # ijar doesn't set the CRC32 field. 396 if path.endswith('.interface.jar'): 397 return False 398 return path[-4:] in ('.zip', '.apk', '.jar') or path.endswith('.srcjar') 399 400 401def _ExtractZipEntries(path): 402 """Returns a list of (path, CRC32) of all files within |path|.""" 403 entries = [] 404 with zipfile.ZipFile(path) as zip_file: 405 for zip_info in zip_file.infolist(): 406 # Skip directories and empty files. 407 if zip_info.CRC: 408 entries.append( 409 (zip_info.filename, zip_info.CRC + zip_info.compress_type)) 410 return entries 411