1#!/usr/bin/env -S python3 -u 2 3""" 4This script helps find various build behaviors that make builds less hermetic 5and repeatable. Depending on the flags, it runs a sequence of builds and looks 6for files that have changed or have been improperly regenerated, updating 7their timestamps incorrectly. It also looks for changes that the build has 8done to the source tree, and for files whose contents are dependent on the 9location of the out directory. 10 11This utility has two major modes, full and incremental. By default, this tool 12runs in full mode. To run in incremental mode, pass the --incremental flag. 13 14 15FULL MODE 16 17In full mode, this tool helps verify BUILD CORRECTNESS by examining its 18REPEATABILITY. In full mode, this tool runs two complete builds in different 19directories and compares the CONTENTS of the two directories. Lists of any 20files that are added, removed or changed are printed, sorted by the timestamp 21of that file, to aid finding which dependencies trigger the rebuilding of 22other files. 23 24 25INCREMENTAL MODE 26 27In incremental mode, this tool helps verfiy the SPEED of the build. It runs two 28builds and looks at the TIMESTAMPS of the generated files, and reports files 29that were changed by the second build. In theory, an incremental build with no 30source files touched should not have any generated targets changed. As in full 31builds, the file list is returned sorted by timestamp. 32 33 34OTHER CHECKS 35 36In both full and incremental mode, this tool looks at the timestamps of all 37source files in the tree, and reports on files that have been touched. In the 38output, these are labeled with the header "Source files touched after start of 39build." 40 41In addition, by default, this tool sets the OUT_DIR environment variable to 42something other than "out" in order to find build rules that are not respecting 43the OUT_DIR. If you see these, you should fix them, but if your build can not 44complete for some reason because of this, you can pass the --no-check-out-dir 45flag to suppress this check. 46 47 48OTHER FLAGS 49 50In full mode, the --detect-embedded-paths flag does the two builds in different 51directories, to help in finding rules that embed the out directory path into 52the targets. 53 54The --hide-build-output flag hides the output of successful bulds, to make 55script output cleaner. The output of builds that fail is still shown. 56 57The --no-build flag is useful if you have already done a build and would 58just like to re-run the analysis. 59 60The --target flag lets you specify a build target other than the default 61full build (droid). You can pass "nothing" as in the example below, or a 62specific target, to reduce the scope of the checks performed. 63 64The --touch flag lets you specify a list of source files to touch between 65the builds, to examine the consequences of editing a particular file. 66 67 68EXAMPLE COMMANDLINES 69 70Please run build/make/tools/compare_builds.py --help for a full listing 71of the commandline flags. Here are a sampling of useful combinations. 72 73 1. Find files changed during an incremental build that doesn't build 74 any targets. 75 76 build/make/tools/compare_builds.py --incremental --target nothing 77 78 Long incremental build times, or consecutive builds that re-run build actions 79 are usually caused by files being touched as part of loading the makefiles. 80 81 The nothing build (m nothing) loads the make and blueprint files, generates 82 the dependency graph, but then doesn't actually build any targets. Checking 83 against this build is the fastest and easiest way to find files that are 84 modified while makefiles are read, for example with $(shell) invocations. 85 86 2. Find packaging targets that are different, ignoring intermediate files. 87 88 build/make/tools/compare_builds.py --subdirs --detect-embedded-paths 89 90 These flags will compare the final staging directories for partitions, 91 as well as the APKs, apexes, testcases, and the like (the full directory 92 list is in the DEFAULT_DIRS variable below). Since these are the files 93 that are ultimately released, it is more important that these files be 94 replicable, even if the intermediates that went into them are not (for 95 example, when debugging symbols are stripped). 96 97 3. Check that all targets are repeatable. 98 99 build/make/tools/compare_builds.py --detect-embedded-paths 100 101 This check will list all of the differences in built targets that it can 102 find. Be aware that the AOSP tree still has quite a few targets that 103 are flagged by this check, so OEM changes might be lost in that list. 104 That said, each file shown here is a potential blocker for a repeatable 105 build. 106 107 4. See what targets are rebuilt when a file is touched between builds. 108 109 build/make/tools/compare_builds.py --incremental \ 110 --touch frameworks/base/core/java/android/app/Activity.java 111 112 This check simulates the common engineer workflow of touching a single 113 file and rebuilding the whole system. To see a restricted view, consider 114 also passing a --target option for a common use case. For example: 115 116 build/make/tools/compare_builds.py --incremental --target framework \ 117 --touch frameworks/base/core/java/android/app/Activity.java 118""" 119 120import argparse 121import itertools 122import os 123import shutil 124import stat 125import subprocess 126import sys 127 128 129# Soong 130SOONG_UI = "build/soong/soong_ui.bash" 131 132 133# Which directories to use if no --subdirs is supplied without explicit directories. 134DEFAULT_DIRS = ( 135 "apex", 136 "data", 137 "product", 138 "ramdisk", 139 "recovery", 140 "root", 141 "system", 142 "system_ext", 143 "system_other", 144 "testcases", 145 "vendor", 146) 147 148 149# Files to skip for incremental timestamp checking 150BUILD_INTERNALS_PREFIX_SKIP = ( 151 "soong/.glob/", 152 ".path/", 153) 154 155 156BUILD_INTERNALS_SUFFIX_SKIP = ( 157 "/soong/soong_build_metrics.pb", 158 "/.installable_test_files", 159 "/files.db", 160 "/.blueprint.bootstrap", 161 "/build_number.txt", 162 "/build.ninja", 163 "/.out-dir", 164 "/build_fingerprint.txt", 165 "/build_thumbprint.txt", 166 "/.copied_headers_list", 167 "/.installable_files", 168) 169 170 171class DiffType(object): 172 def __init__(self, code, message): 173 self.code = code 174 self.message = message 175 176DIFF_NONE = DiffType("DIFF_NONE", "Files are the same") 177DIFF_MODE = DiffType("DIFF_MODE", "Stat mode bits differ") 178DIFF_SIZE = DiffType("DIFF_SIZE", "File size differs") 179DIFF_SYMLINK = DiffType("DIFF_SYMLINK", "Symlinks point to different locations") 180DIFF_CONTENTS = DiffType("DIFF_CONTENTS", "File contents differ") 181 182 183def main(): 184 argparser = argparse.ArgumentParser(description="Diff build outputs from two builds.", 185 epilog="Run this command from the root of the tree." 186 + " Before running this command, the build environment" 187 + " must be set up, including sourcing build/envsetup.sh" 188 + " and running lunch.") 189 argparser.add_argument("--detect-embedded-paths", action="store_true", 190 help="Use unique out dirs to detect paths embedded in binaries.") 191 argparser.add_argument("--incremental", action="store_true", 192 help="Compare which files are touched in two consecutive builds without a clean in between.") 193 argparser.add_argument("--hide-build-output", action="store_true", 194 help="Don't print the build output for successful builds") 195 argparser.add_argument("--no-build", dest="run_build", action="store_false", 196 help="Don't build or clean, but do everything else.") 197 argparser.add_argument("--no-check-out-dir", dest="check_out_dir", action="store_false", 198 help="Don't check for rules not honoring movable out directories.") 199 argparser.add_argument("--subdirs", nargs="*", 200 help="Only scan these subdirs of $PRODUCT_OUT instead of the whole out directory." 201 + " The --subdirs argument with no listed directories will give a default list.") 202 argparser.add_argument("--target", default="droid", 203 help="Make target to run. The default is droid") 204 argparser.add_argument("--touch", nargs="+", default=[], 205 help="Files to touch between builds. Must pair with --incremental.") 206 args = argparser.parse_args(sys.argv[1:]) 207 208 if args.detect_embedded_paths and args.incremental: 209 sys.stderr.write("Can't pass --detect-embedded-paths and --incremental together.\n") 210 sys.exit(1) 211 if args.detect_embedded_paths and not args.check_out_dir: 212 sys.stderr.write("Can't pass --detect-embedded-paths and --no-check-out-dir together.\n") 213 sys.exit(1) 214 if args.touch and not args.incremental: 215 sys.stderr.write("The --incremental flag is required if the --touch flag is passed.") 216 sys.exit(1) 217 218 AssertAtTop() 219 RequireEnvVar("TARGET_PRODUCT") 220 RequireEnvVar("TARGET_BUILD_VARIANT") 221 222 # Out dir file names: 223 # - dir_prefix - The directory we'll put everything in (except for maybe the top level 224 # out/ dir). 225 # - *work_dir - The directory that we will build directly into. This is in dir_prefix 226 # unless --no-check-out-dir is set. 227 # - *out_dir - After building, if work_dir is different from out_dir, we move the out 228 # directory to here so we can do the comparisions. 229 # - timestamp_* - Files we touch so we know the various phases between the builds, so we 230 # can compare timestamps of files. 231 if args.incremental: 232 dir_prefix = "out_incremental" 233 if args.check_out_dir: 234 first_work_dir = first_out_dir = dir_prefix + "/out" 235 second_work_dir = second_out_dir = dir_prefix + "/out" 236 else: 237 first_work_dir = first_out_dir = "out" 238 second_work_dir = second_out_dir = "out" 239 else: 240 dir_prefix = "out_full" 241 first_out_dir = dir_prefix + "/out_1" 242 second_out_dir = dir_prefix + "/out_2" 243 if not args.check_out_dir: 244 first_work_dir = second_work_dir = "out" 245 elif args.detect_embedded_paths: 246 first_work_dir = first_out_dir 247 second_work_dir = second_out_dir 248 else: 249 first_work_dir = dir_prefix + "/work" 250 second_work_dir = dir_prefix + "/work" 251 timestamp_start = dir_prefix + "/timestamp_start" 252 timestamp_between = dir_prefix + "/timestamp_between" 253 timestamp_end = dir_prefix + "/timestamp_end" 254 255 if args.run_build: 256 # Initial clean, if necessary 257 print("Cleaning " + dir_prefix + "/") 258 Clean(dir_prefix) 259 print("Cleaning out/") 260 Clean("out") 261 CreateEmptyFile(timestamp_start) 262 print("Running the first build in " + first_work_dir) 263 RunBuild(first_work_dir, first_out_dir, args.target, args.hide_build_output) 264 for f in args.touch: 265 print("Touching " + f) 266 TouchFile(f) 267 CreateEmptyFile(timestamp_between) 268 print("Running the second build in " + second_work_dir) 269 RunBuild(second_work_dir, second_out_dir, args.target, args.hide_build_output) 270 CreateEmptyFile(timestamp_end) 271 print("Done building") 272 print() 273 274 # Which out directories to scan 275 if args.subdirs is not None: 276 if args.subdirs: 277 subdirs = args.subdirs 278 else: 279 subdirs = DEFAULT_DIRS 280 first_files = ProductFiles(RequireBuildVar(first_out_dir, "PRODUCT_OUT"), subdirs) 281 second_files = ProductFiles(RequireBuildVar(second_out_dir, "PRODUCT_OUT"), subdirs) 282 else: 283 first_files = OutFiles(first_out_dir) 284 second_files = OutFiles(second_out_dir) 285 286 printer = Printer() 287 288 if args.incremental: 289 # Find files that were rebuilt unnecessarily 290 touched_incrementally = FindOutFilesTouchedAfter(first_files, 291 GetFileTimestamp(timestamp_between)) 292 printer.PrintList("Touched in incremental build", touched_incrementally) 293 else: 294 # Compare the two out dirs 295 added, removed, changed = DiffFileList(first_files, second_files) 296 printer.PrintList("Added", added) 297 printer.PrintList("Removed", removed) 298 printer.PrintList("Changed", changed, "%s %s") 299 300 # Find files in the source tree that were touched 301 touched_during = FindSourceFilesTouchedAfter(GetFileTimestamp(timestamp_start)) 302 printer.PrintList("Source files touched after start of build", touched_during) 303 304 # Find files and dirs that were output to "out" and didn't respect $OUT_DIR 305 if args.check_out_dir: 306 bad_out_dir_contents = FindFilesAndDirectories("out") 307 printer.PrintList("Files and directories created by rules that didn't respect $OUT_DIR", 308 bad_out_dir_contents) 309 310 # If we didn't find anything, print success message 311 if not printer.printed_anything: 312 print("No bad behaviors found.") 313 314 315def AssertAtTop(): 316 """If the current directory is not the top of an android source tree, print an error 317 message and exit.""" 318 if not os.access(SOONG_UI, os.X_OK): 319 sys.stderr.write("FAILED: Please run from the root of the tree.\n") 320 sys.exit(1) 321 322 323def RequireEnvVar(name): 324 """Gets an environment variable. If that fails, then print an error message and exit.""" 325 result = os.environ.get(name) 326 if not result: 327 sys.stderr.write("error: Can't determine %s. Please run lunch first.\n" % name) 328 sys.exit(1) 329 return result 330 331 332def RunSoong(out_dir, args, capture_output): 333 env = dict(os.environ) 334 env["OUT_DIR"] = out_dir 335 args = [SOONG_UI,] + args 336 if capture_output: 337 proc = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 338 combined_output, none = proc.communicate() 339 return proc.returncode, combined_output 340 else: 341 result = subprocess.run(args, env=env) 342 return result.returncode, None 343 344 345def GetBuildVar(out_dir, name): 346 """Gets a variable from the build system.""" 347 returncode, output = RunSoong(out_dir, ["--dumpvar-mode", name], True) 348 if returncode != 0: 349 return None 350 else: 351 return output.decode("utf-8").strip() 352 353 354def RequireBuildVar(out_dir, name): 355 """Gets a variable from the builds system. If that fails, then print an error 356 message and exit.""" 357 value = GetBuildVar(out_dir, name) 358 if not value: 359 sys.stderr.write("error: Can't determine %s. Please run lunch first.\n" % name) 360 sys.exit(1) 361 return value 362 363 364def Clean(directory): 365 """"Deletes the supplied directory.""" 366 try: 367 shutil.rmtree(directory) 368 except FileNotFoundError: 369 pass 370 371 372def RunBuild(work_dir, out_dir, target, hide_build_output): 373 """Runs a build. If the build fails, prints a message and exits.""" 374 returncode, output = RunSoong(work_dir, 375 ["--build-mode", "--all-modules", "--dir=" + os.getcwd(), target], 376 hide_build_output) 377 if work_dir != out_dir: 378 os.replace(work_dir, out_dir) 379 if returncode != 0: 380 if hide_build_output: 381 # The build output was hidden, so print it now for debugging 382 sys.stderr.buffer.write(output) 383 sys.stderr.write("FAILED: Build failed. Stopping.\n") 384 sys.exit(1) 385 386 387def DiffFileList(first_files, second_files): 388 """Examines the files. 389 390 Returns: 391 Filenames of files in first_filelist but not second_filelist (added files) 392 Filenames of files in second_filelist but not first_filelist (removed files) 393 2-Tuple of filenames for the files that are in both but are different (changed files) 394 """ 395 # List of files, relative to their respective PRODUCT_OUT directories 396 first_filelist = sorted([x for x in first_files], key=lambda x: x[1]) 397 second_filelist = sorted([x for x in second_files], key=lambda x: x[1]) 398 399 added = [] 400 removed = [] 401 changed = [] 402 403 first_index = 0 404 second_index = 0 405 406 while first_index < len(first_filelist) and second_index < len(second_filelist): 407 # Path relative to source root and path relative to PRODUCT_OUT 408 first_full_filename, first_relative_filename = first_filelist[first_index] 409 second_full_filename, second_relative_filename = second_filelist[second_index] 410 411 if first_relative_filename < second_relative_filename: 412 # Removed 413 removed.append(first_full_filename) 414 first_index += 1 415 elif first_relative_filename > second_relative_filename: 416 # Added 417 added.append(second_full_filename) 418 second_index += 1 419 else: 420 # Both present 421 diff_type = DiffFiles(first_full_filename, second_full_filename) 422 if diff_type != DIFF_NONE: 423 changed.append((first_full_filename, second_full_filename)) 424 first_index += 1 425 second_index += 1 426 427 while first_index < len(first_filelist): 428 first_full_filename, first_relative_filename = first_filelist[first_index] 429 removed.append(first_full_filename) 430 first_index += 1 431 432 while second_index < len(second_filelist): 433 second_full_filename, second_relative_filename = second_filelist[second_index] 434 added.append(second_full_filename) 435 second_index += 1 436 437 return (SortByTimestamp(added), 438 SortByTimestamp(removed), 439 SortByTimestamp(changed, key=lambda item: item[1])) 440 441 442def FindOutFilesTouchedAfter(files, timestamp): 443 """Find files in the given file iterator that were touched after timestamp.""" 444 result = [] 445 for full, relative in files: 446 ts = GetFileTimestamp(full) 447 if ts > timestamp: 448 result.append(TouchedFile(full, ts)) 449 return [f.filename for f in sorted(result, key=lambda f: f.timestamp)] 450 451 452def GetFileTimestamp(filename): 453 """Get timestamp for a file (just wraps stat).""" 454 st = os.stat(filename, follow_symlinks=False) 455 return st.st_mtime 456 457 458def SortByTimestamp(items, key=lambda item: item): 459 """Sort the list by timestamp of files. 460 Args: 461 items - the list of items to sort 462 key - a function to extract a filename from each element in items 463 """ 464 return [x[0] for x in sorted([(item, GetFileTimestamp(key(item))) for item in items], 465 key=lambda y: y[1])] 466 467 468def FindSourceFilesTouchedAfter(timestamp): 469 """Find files in the source tree that have changed after timestamp. Ignores 470 the out directory.""" 471 result = [] 472 for root, dirs, files in os.walk(".", followlinks=False): 473 if root == ".": 474 RemoveItemsFromList(dirs, (".repo", "out", "out_full", "out_incremental")) 475 for f in files: 476 full = os.path.sep.join((root, f))[2:] 477 ts = GetFileTimestamp(full) 478 if ts > timestamp: 479 result.append(TouchedFile(full, ts)) 480 return [f.filename for f in sorted(result, key=lambda f: f.timestamp)] 481 482 483def FindFilesAndDirectories(directory): 484 """Finds all files and directories inside a directory.""" 485 result = [] 486 for root, dirs, files in os.walk(directory, followlinks=False): 487 result += [os.path.sep.join((root, x, "")) for x in dirs] 488 result += [os.path.sep.join((root, x)) for x in files] 489 return result 490 491 492def CreateEmptyFile(filename): 493 """Create an empty file with now as the timestamp at filename.""" 494 try: 495 os.makedirs(os.path.dirname(filename)) 496 except FileExistsError: 497 pass 498 open(filename, "w").close() 499 os.utime(filename) 500 501 502def TouchFile(filename): 503 os.utime(filename) 504 505 506def DiffFiles(first_filename, second_filename): 507 def AreFileContentsSame(remaining, first_filename, second_filename): 508 """Compare the file contents. They must be known to be the same size.""" 509 CHUNK_SIZE = 32*1024 510 with open(first_filename, "rb") as first_file: 511 with open(second_filename, "rb") as second_file: 512 while remaining > 0: 513 size = min(CHUNK_SIZE, remaining) 514 if first_file.read(CHUNK_SIZE) != second_file.read(CHUNK_SIZE): 515 return False 516 remaining -= size 517 return True 518 519 first_stat = os.stat(first_filename, follow_symlinks=False) 520 second_stat = os.stat(first_filename, follow_symlinks=False) 521 522 # Mode bits 523 if first_stat.st_mode != second_stat.st_mode: 524 return DIFF_MODE 525 526 # File size 527 if first_stat.st_size != second_stat.st_size: 528 return DIFF_SIZE 529 530 # Contents 531 if stat.S_ISLNK(first_stat.st_mode): 532 if os.readlink(first_filename) != os.readlink(second_filename): 533 return DIFF_SYMLINK 534 elif stat.S_ISREG(first_stat.st_mode): 535 if not AreFileContentsSame(first_stat.st_size, first_filename, second_filename): 536 return DIFF_CONTENTS 537 538 return DIFF_NONE 539 540 541class FileIterator(object): 542 """Object that produces an iterator containing all files in a given directory. 543 544 Each iteration yields a tuple containing: 545 546 [0] (full) Path to file relative to source tree. 547 [1] (relative) Path to the file relative to the base directory given in the 548 constructor. 549 """ 550 551 def __init__(self, base_dir): 552 self._base_dir = base_dir 553 554 def __iter__(self): 555 return self._Iterator(self, self._base_dir) 556 557 def ShouldIncludeFile(self, root, path): 558 return False 559 560 class _Iterator(object): 561 def __init__(self, parent, base_dir): 562 self._parent = parent 563 self._base_dir = base_dir 564 self._walker = os.walk(base_dir, followlinks=False) 565 self._current_index = 0 566 self._current_dir = [] 567 568 def __iter__(self): 569 return self 570 571 def __next__(self): 572 # os.walk's iterator will eventually terminate by raising StopIteration 573 while True: 574 if self._current_index >= len(self._current_dir): 575 root, dirs, files = self._walker.__next__() 576 full_paths = [os.path.sep.join((root, f)) for f in files] 577 pairs = [(f, f[len(self._base_dir)+1:]) for f in full_paths] 578 self._current_dir = [(full, relative) for full, relative in pairs 579 if self._parent.ShouldIncludeFile(root, relative)] 580 self._current_index = 0 581 if not self._current_dir: 582 continue 583 index = self._current_index 584 self._current_index += 1 585 return self._current_dir[index] 586 587 588class OutFiles(FileIterator): 589 """Object that produces an iterator containing all files in a given out directory, 590 except for files which are known to be touched as part of build setup. 591 """ 592 def __init__(self, out_dir): 593 super().__init__(out_dir) 594 self._out_dir = out_dir 595 596 def ShouldIncludeFile(self, root, relative): 597 # Skip files in root, although note that this could actually skip 598 # files that are sadly generated directly into that directory. 599 if root == self._out_dir: 600 return False 601 # Skiplist 602 for skip in BUILD_INTERNALS_PREFIX_SKIP: 603 if relative.startswith(skip): 604 return False 605 for skip in BUILD_INTERNALS_SUFFIX_SKIP: 606 if relative.endswith(skip): 607 return False 608 return True 609 610 611class ProductFiles(FileIterator): 612 """Object that produces an iterator containing files in listed subdirectories of $PRODUCT_OUT. 613 """ 614 def __init__(self, product_out, subdirs): 615 super().__init__(product_out) 616 self._subdirs = subdirs 617 618 def ShouldIncludeFile(self, root, relative): 619 for subdir in self._subdirs: 620 if relative.startswith(subdir): 621 return True 622 return False 623 624 625class TouchedFile(object): 626 """A file in the out directory with a timestamp.""" 627 def __init__(self, filename, timestamp): 628 self.filename = filename 629 self.timestamp = timestamp 630 631 632def RemoveItemsFromList(haystack, needles): 633 for needle in needles: 634 try: 635 haystack.remove(needle) 636 except ValueError: 637 pass 638 639 640class Printer(object): 641 def __init__(self): 642 self.printed_anything = False 643 644 def PrintList(self, title, items, fmt="%s"): 645 if items: 646 if self.printed_anything: 647 sys.stdout.write("\n") 648 sys.stdout.write("%s:\n" % title) 649 for item in items: 650 sys.stdout.write(" %s\n" % fmt % item) 651 self.printed_anything = True 652 653 654if __name__ == "__main__": 655 try: 656 main() 657 except KeyboardInterrupt: 658 pass 659 660 661# vim: ts=2 sw=2 sts=2 nocindent 662