1#!/usr/bin/env python 2# Copyright 2017 The PDFium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Compares the performance of two versions of the pdfium code.""" 7 8import argparse 9import functools 10import json 11import multiprocessing 12import os 13import re 14import shutil 15import subprocess 16import sys 17import tempfile 18 19from common import GetBooleanGnArg 20from common import PrintErr 21from common import RunCommandPropagateErr 22from githelper import GitHelper 23from safetynet_conclusions import ComparisonConclusions 24from safetynet_conclusions import PrintConclusionsDictHumanReadable 25from safetynet_conclusions import RATING_IMPROVEMENT 26from safetynet_conclusions import RATING_REGRESSION 27 28 29def RunSingleTestCaseParallel(this, run_label, build_dir, test_case): 30 result = this.RunSingleTestCase(run_label, build_dir, test_case) 31 return (test_case, result) 32 33 34class CompareRun(object): 35 """A comparison between two branches of pdfium.""" 36 37 def __init__(self, args): 38 self.git = GitHelper() 39 self.args = args 40 self._InitPaths() 41 42 def _InitPaths(self): 43 if self.args.this_repo: 44 self.safe_script_dir = self.args.build_dir 45 else: 46 self.safe_script_dir = os.path.join('testing', 'tools') 47 48 self.safe_measure_script_path = os.path.abspath( 49 os.path.join(self.safe_script_dir, 50 'safetynet_measure.py')) 51 52 input_file_re = re.compile('^.+[.]pdf$') 53 self.test_cases = [] 54 for input_path in self.args.input_paths: 55 if os.path.isfile(input_path): 56 self.test_cases.append(input_path) 57 elif os.path.isdir(input_path): 58 for file_dir, _, filename_list in os.walk(input_path): 59 for input_filename in filename_list: 60 if input_file_re.match(input_filename): 61 file_path = os.path.join(file_dir, input_filename) 62 if os.path.isfile(file_path): 63 self.test_cases.append(file_path) 64 65 self.after_build_dir = self.args.build_dir 66 if self.args.build_dir_before: 67 self.before_build_dir = self.args.build_dir_before 68 else: 69 self.before_build_dir = self.after_build_dir 70 71 def Run(self): 72 """Runs comparison by checking out branches, building and measuring them. 73 74 Returns: 75 Exit code for the script. 76 """ 77 if self.args.this_repo: 78 self._FreezeMeasureScript() 79 80 if self.args.branch_after: 81 if self.args.this_repo: 82 before, after = self._ProfileTwoOtherBranchesInThisRepo( 83 self.args.branch_before, 84 self.args.branch_after) 85 else: 86 before, after = self._ProfileTwoOtherBranches( 87 self.args.branch_before, 88 self.args.branch_after) 89 elif self.args.branch_before: 90 if self.args.this_repo: 91 before, after = self._ProfileCurrentAndOtherBranchInThisRepo( 92 self.args.branch_before) 93 else: 94 before, after = self._ProfileCurrentAndOtherBranch( 95 self.args.branch_before) 96 else: 97 if self.args.this_repo: 98 before, after = self._ProfileLocalChangesAndCurrentBranchInThisRepo() 99 else: 100 before, after = self._ProfileLocalChangesAndCurrentBranch() 101 102 conclusions = self._DrawConclusions(before, after) 103 conclusions_dict = conclusions.GetOutputDict() 104 conclusions_dict.setdefault('metadata', {})['profiler'] = self.args.profiler 105 106 self._PrintConclusions(conclusions_dict) 107 108 self._CleanUp(conclusions) 109 110 return 0 111 112 def _FreezeMeasureScript(self): 113 """Freezes a version of the measuring script. 114 115 This is needed to make sure we are comparing the pdfium library changes and 116 not script changes that may happen between the two branches. 117 """ 118 self.__FreezeFile(os.path.join('testing', 'tools', 'safetynet_measure.py')) 119 self.__FreezeFile(os.path.join('testing', 'tools', 'common.py')) 120 121 def __FreezeFile(self, file): 122 RunCommandPropagateErr(['cp', file, self.safe_script_dir], 123 exit_status_on_error=1) 124 125 def _ProfileTwoOtherBranchesInThisRepo(self, before_branch, after_branch): 126 """Profiles two branches that are not the current branch. 127 128 This is done in the local repository and changes may not be restored if the 129 script fails or is interrupted. 130 131 after_branch does not need to descend from before_branch, they will be 132 measured the same way 133 134 Args: 135 before_branch: One branch to profile. 136 after_branch: Other branch to profile. 137 138 Returns: 139 A tuple (before, after), where each of before and after is a dict 140 mapping a test case name to the profiling values for that test case 141 in the given branch. 142 """ 143 branch_to_restore = self.git.GetCurrentBranchName() 144 145 self._StashLocalChanges() 146 147 self._CheckoutBranch(after_branch) 148 self._BuildCurrentBranch(self.after_build_dir) 149 after = self._MeasureCurrentBranch('after', self.after_build_dir) 150 151 self._CheckoutBranch(before_branch) 152 self._BuildCurrentBranch(self.before_build_dir) 153 before = self._MeasureCurrentBranch('before', self.before_build_dir) 154 155 self._CheckoutBranch(branch_to_restore) 156 self._RestoreLocalChanges() 157 158 return before, after 159 160 def _ProfileTwoOtherBranches(self, before_branch, after_branch): 161 """Profiles two branches that are not the current branch. 162 163 This is done in new, cloned repositories, therefore it is safer but slower 164 and requires downloads. 165 166 after_branch does not need to descend from before_branch, they will be 167 measured the same way 168 169 Args: 170 before_branch: One branch to profile. 171 after_branch: Other branch to profile. 172 173 Returns: 174 A tuple (before, after), where each of before and after is a dict 175 mapping a test case name to the profiling values for that test case 176 in the given branch. 177 """ 178 after = self._ProfileSeparateRepo('after', 179 self.after_build_dir, 180 after_branch) 181 before = self._ProfileSeparateRepo('before', 182 self.before_build_dir, 183 before_branch) 184 return before, after 185 186 def _ProfileCurrentAndOtherBranchInThisRepo(self, other_branch): 187 """Profiles the current branch (with uncommitted changes) and another one. 188 189 This is done in the local repository and changes may not be restored if the 190 script fails or is interrupted. 191 192 The current branch does not need to descend from other_branch. 193 194 Args: 195 other_branch: Other branch to profile that is not the current. 196 197 Returns: 198 A tuple (before, after), where each of before and after is a dict 199 mapping a test case name to the profiling values for that test case 200 in the given branch. The current branch is considered to be "after" and 201 the other branch is considered to be "before". 202 """ 203 branch_to_restore = self.git.GetCurrentBranchName() 204 205 self._BuildCurrentBranch(self.after_build_dir) 206 after = self._MeasureCurrentBranch('after', self.after_build_dir) 207 208 self._StashLocalChanges() 209 210 self._CheckoutBranch(other_branch) 211 self._BuildCurrentBranch(self.before_build_dir) 212 before = self._MeasureCurrentBranch('before', self.before_build_dir) 213 214 self._CheckoutBranch(branch_to_restore) 215 self._RestoreLocalChanges() 216 217 return before, after 218 219 def _ProfileCurrentAndOtherBranch(self, other_branch): 220 """Profiles the current branch (with uncommitted changes) and another one. 221 222 This is done in new, cloned repositories, therefore it is safer but slower 223 and requires downloads. 224 225 The current branch does not need to descend from other_branch. 226 227 Args: 228 other_branch: Other branch to profile that is not the current. None will 229 compare to the same branch. 230 231 Returns: 232 A tuple (before, after), where each of before and after is a dict 233 mapping a test case name to the profiling values for that test case 234 in the given branch. The current branch is considered to be "after" and 235 the other branch is considered to be "before". 236 """ 237 self._BuildCurrentBranch(self.after_build_dir) 238 after = self._MeasureCurrentBranch('after', self.after_build_dir) 239 240 before = self._ProfileSeparateRepo('before', 241 self.before_build_dir, 242 other_branch) 243 244 return before, after 245 246 def _ProfileLocalChangesAndCurrentBranchInThisRepo(self): 247 """Profiles the current branch with and without uncommitted changes. 248 249 This is done in the local repository and changes may not be restored if the 250 script fails or is interrupted. 251 252 Returns: 253 A tuple (before, after), where each of before and after is a dict 254 mapping a test case name to the profiling values for that test case 255 using the given version. The current branch without uncommitted changes is 256 considered to be "before" and with uncommitted changes is considered to be 257 "after". 258 """ 259 self._BuildCurrentBranch(self.after_build_dir) 260 after = self._MeasureCurrentBranch('after', self.after_build_dir) 261 262 pushed = self._StashLocalChanges() 263 if not pushed and not self.args.build_dir_before: 264 PrintErr('Warning: No local changes to compare') 265 266 before_build_dir = self.before_build_dir 267 268 self._BuildCurrentBranch(before_build_dir) 269 before = self._MeasureCurrentBranch('before', before_build_dir) 270 271 self._RestoreLocalChanges() 272 273 return before, after 274 275 def _ProfileLocalChangesAndCurrentBranch(self): 276 """Profiles the current branch with and without uncommitted changes. 277 278 This is done in new, cloned repositories, therefore it is safer but slower 279 and requires downloads. 280 281 Returns: 282 A tuple (before, after), where each of before and after is a dict 283 mapping a test case name to the profiling values for that test case 284 using the given version. The current branch without uncommitted changes is 285 considered to be "before" and with uncommitted changes is considered to be 286 "after". 287 """ 288 return self._ProfileCurrentAndOtherBranch(other_branch=None) 289 290 def _ProfileSeparateRepo(self, run_label, relative_build_dir, branch): 291 """Profiles a branch in a a temporary git repository. 292 293 Args: 294 run_label: String to differentiate this version of the code in output 295 files from other versions. 296 relative_build_dir: Path to the build dir in the current working dir to 297 clone build args from. 298 branch: Branch to checkout in the new repository. None will 299 profile the same branch checked out in the original repo. 300 Returns: 301 A dict mapping each test case name to the profiling values for that 302 test case. 303 """ 304 build_dir = self._CreateTempRepo('repo_%s' % run_label, 305 relative_build_dir, 306 branch) 307 308 self._BuildCurrentBranch(build_dir) 309 return self._MeasureCurrentBranch(run_label, build_dir) 310 311 def _CreateTempRepo(self, dir_name, relative_build_dir, branch): 312 """Clones a temporary git repository out of the current working dir. 313 314 Args: 315 dir_name: Name for the temporary repository directory 316 relative_build_dir: Path to the build dir in the current working dir to 317 clone build args from. 318 branch: Branch to checkout in the new repository. None will keep checked 319 out the same branch as the local repo. 320 Returns: 321 Path to the build directory of the new repository. 322 """ 323 cwd = os.getcwd() 324 325 repo_dir = tempfile.mkdtemp(suffix='-%s' % dir_name) 326 src_dir = os.path.join(repo_dir, 'pdfium') 327 328 self.git.CloneLocal(os.getcwd(), src_dir) 329 330 if branch is not None: 331 os.chdir(src_dir) 332 self.git.Checkout(branch) 333 334 os.chdir(repo_dir) 335 PrintErr('Syncing...') 336 337 cmd = ['gclient', 'config', '--unmanaged', 338 'https://pdfium.googlesource.com/pdfium.git'] 339 if self.args.cache_dir: 340 cmd.append('--cache-dir=%s' % self.args.cache_dir) 341 RunCommandPropagateErr(cmd, exit_status_on_error=1) 342 343 RunCommandPropagateErr(['gclient', 'sync'], exit_status_on_error=1) 344 345 PrintErr('Done.') 346 347 build_dir = os.path.join(src_dir, relative_build_dir) 348 os.makedirs(build_dir) 349 os.chdir(src_dir) 350 351 source_gn_args = os.path.join(cwd, relative_build_dir, 'args.gn') 352 dest_gn_args = os.path.join(build_dir, 'args.gn') 353 shutil.copy(source_gn_args, dest_gn_args) 354 355 RunCommandPropagateErr(['gn', 'gen', relative_build_dir], 356 exit_status_on_error=1) 357 358 os.chdir(cwd) 359 360 return build_dir 361 362 363 def _CheckoutBranch(self, branch): 364 PrintErr("Checking out branch '%s'" % branch) 365 self.git.Checkout(branch) 366 367 def _StashLocalChanges(self): 368 PrintErr('Stashing local changes') 369 return self.git.StashPush() 370 371 def _RestoreLocalChanges(self): 372 PrintErr('Restoring local changes') 373 self.git.StashPopAll() 374 375 def _BuildCurrentBranch(self, build_dir): 376 """Synchronizes and builds the current version of pdfium. 377 378 Args: 379 build_dir: String with path to build directory 380 """ 381 PrintErr('Syncing...') 382 RunCommandPropagateErr(['gclient', 'sync'], exit_status_on_error=1) 383 PrintErr('Done.') 384 385 PrintErr('Building...') 386 cmd = ['ninja', '-C', build_dir, 'pdfium_test'] 387 if GetBooleanGnArg('use_goma', build_dir): 388 cmd.extend(['-j', '250']) 389 RunCommandPropagateErr(cmd, stdout_has_errors=True, exit_status_on_error=1) 390 PrintErr('Done.') 391 392 def _MeasureCurrentBranch(self, run_label, build_dir): 393 PrintErr('Measuring...') 394 if self.args.num_workers > 1 and len(self.test_cases) > 1: 395 results = self._RunAsync(run_label, build_dir) 396 else: 397 results = self._RunSync(run_label, build_dir) 398 PrintErr('Done.') 399 400 return results 401 402 def _RunSync(self, run_label, build_dir): 403 """Profiles the test cases synchronously. 404 405 Args: 406 run_label: String to differentiate this version of the code in output 407 files from other versions. 408 build_dir: String with path to build directory 409 410 Returns: 411 A dict mapping each test case name to the profiling values for that 412 test case. 413 """ 414 results = {} 415 416 for test_case in self.test_cases: 417 result = self.RunSingleTestCase(run_label, build_dir, test_case) 418 if result is not None: 419 results[test_case] = result 420 421 return results 422 423 def _RunAsync(self, run_label, build_dir): 424 """Profiles the test cases asynchronously. 425 426 Uses as many workers as configured by --num-workers. 427 428 Args: 429 run_label: String to differentiate this version of the code in output 430 files from other versions. 431 build_dir: String with path to build directory 432 433 Returns: 434 A dict mapping each test case name to the profiling values for that 435 test case. 436 """ 437 results = {} 438 pool = multiprocessing.Pool(self.args.num_workers) 439 worker_func = functools.partial( 440 RunSingleTestCaseParallel, self, run_label, build_dir) 441 442 try: 443 # The timeout is a workaround for http://bugs.python.org/issue8296 444 # which prevents KeyboardInterrupt from working. 445 one_year_in_seconds = 3600 * 24 * 365 446 worker_results = (pool.map_async(worker_func, self.test_cases) 447 .get(one_year_in_seconds)) 448 for worker_result in worker_results: 449 test_case, result = worker_result 450 if result is not None: 451 results[test_case] = result 452 except KeyboardInterrupt: 453 pool.terminate() 454 sys.exit(1) 455 else: 456 pool.close() 457 458 pool.join() 459 460 return results 461 462 def RunSingleTestCase(self, run_label, build_dir, test_case): 463 """Profiles a single test case. 464 465 Args: 466 run_label: String to differentiate this version of the code in output 467 files from other versions. 468 build_dir: String with path to build directory 469 test_case: Path to the test case. 470 471 Returns: 472 The measured profiling value for that test case. 473 """ 474 command = [self.safe_measure_script_path, test_case, 475 '--build-dir=%s' % build_dir] 476 477 if self.args.interesting_section: 478 command.append('--interesting-section') 479 480 if self.args.profiler: 481 command.append('--profiler=%s' % self.args.profiler) 482 483 profile_file_path = self._GetProfileFilePath(run_label, test_case) 484 if profile_file_path: 485 command.append('--output-path=%s' % profile_file_path) 486 487 output = RunCommandPropagateErr(command) 488 489 if output is None: 490 return None 491 492 # Get the time number as output, making sure it's just a number 493 output = output.strip() 494 if re.match('^[0-9]+$', output): 495 return int(output) 496 497 return None 498 499 def _GetProfileFilePath(self, run_label, test_case): 500 if self.args.output_dir: 501 output_filename = ('callgrind.out.%s.%s' 502 % (test_case.replace('/', '_'), 503 run_label)) 504 return os.path.join(self.args.output_dir, output_filename) 505 else: 506 return None 507 508 def _DrawConclusions(self, times_before_branch, times_after_branch): 509 """Draws conclusions comparing results of test runs in two branches. 510 511 Args: 512 times_before_branch: A dict mapping each test case name to the 513 profiling values for that test case in the branch to be considered 514 as the baseline. 515 times_after_branch: A dict mapping each test case name to the 516 profiling values for that test case in the branch to be considered 517 as the new version. 518 519 Returns: 520 ComparisonConclusions with all test cases processed. 521 """ 522 conclusions = ComparisonConclusions(self.args.threshold_significant) 523 524 for test_case in sorted(self.test_cases): 525 before = times_before_branch.get(test_case) 526 after = times_after_branch.get(test_case) 527 conclusions.ProcessCase(test_case, before, after) 528 529 return conclusions 530 531 def _PrintConclusions(self, conclusions_dict): 532 """Prints the conclusions as the script output. 533 534 Depending on the script args, this can output a human or a machine-readable 535 version of the conclusions. 536 537 Args: 538 conclusions_dict: Dict to print returned from 539 ComparisonConclusions.GetOutputDict(). 540 """ 541 if self.args.machine_readable: 542 print json.dumps(conclusions_dict) 543 else: 544 PrintConclusionsDictHumanReadable( 545 conclusions_dict, colored=True, key=self.args.case_order) 546 547 def _CleanUp(self, conclusions): 548 """Removes profile output files for uninteresting cases. 549 550 Cases without significant regressions or improvements and considered 551 uninteresting. 552 553 Args: 554 conclusions: A ComparisonConclusions. 555 """ 556 if not self.args.output_dir: 557 return 558 559 if self.args.profiler != 'callgrind': 560 return 561 562 for case_result in conclusions.GetCaseResults().values(): 563 if case_result.rating not in [RATING_REGRESSION, RATING_IMPROVEMENT]: 564 self._CleanUpOutputFile('before', case_result.case_name) 565 self._CleanUpOutputFile('after', case_result.case_name) 566 567 def _CleanUpOutputFile(self, run_label, case_name): 568 """Removes one profile output file. 569 570 If the output file does not exist, fails silently. 571 572 Args: 573 run_label: String to differentiate a version of the code in output 574 files from other versions. 575 case_name: String identifying test case for which to remove the output 576 file. 577 """ 578 try: 579 os.remove(self._GetProfileFilePath(run_label, case_name)) 580 except OSError: 581 pass 582 583 584def main(): 585 parser = argparse.ArgumentParser() 586 parser.add_argument('input_paths', nargs='+', 587 help='pdf files or directories to search for pdf files ' 588 'to run as test cases') 589 parser.add_argument('--branch-before', 590 help='git branch to use as "before" for comparison. ' 591 'Omitting this will use the current branch ' 592 'without uncommitted changes as the baseline.') 593 parser.add_argument('--branch-after', 594 help='git branch to use as "after" for comparison. ' 595 'Omitting this will use the current branch ' 596 'with uncommitted changes.') 597 parser.add_argument('--build-dir', default=os.path.join('out', 'Release'), 598 help='relative path from the base source directory ' 599 'to the build directory') 600 parser.add_argument('--build-dir-before', 601 help='relative path from the base source directory ' 602 'to the build directory for the "before" branch, if ' 603 'different from the build directory for the ' 604 '"after" branch') 605 parser.add_argument('--cache-dir', default=None, 606 help='directory with a new or preexisting cache for ' 607 'downloads. Default is to not use a cache.') 608 parser.add_argument('--this-repo', action='store_true', 609 help='use the repository where the script is instead of ' 610 'checking out a temporary one. This is faster and ' 611 'does not require downloads, but although it ' 612 'restores the state of the local repo, if the ' 613 'script is killed or crashes the changes can remain ' 614 'stashed and you may be on another branch.') 615 parser.add_argument('--profiler', default='callgrind', 616 help='which profiler to use. Supports callgrind and ' 617 'perfstat for now. Default is callgrind.') 618 parser.add_argument('--interesting-section', action='store_true', 619 help='whether to measure just the interesting section or ' 620 'the whole test harness. Limiting to only the ' 621 'interesting section does not work on Release since ' 622 'the delimiters are optimized out') 623 parser.add_argument('--num-workers', default=multiprocessing.cpu_count(), 624 type=int, help='run NUM_WORKERS jobs in parallel') 625 parser.add_argument('--output-dir', 626 help='directory to write the profile data output files') 627 parser.add_argument('--threshold-significant', default=0.02, type=float, 628 help='variations in performance above this factor are ' 629 'considered significant') 630 parser.add_argument('--machine-readable', action='store_true', 631 help='whether to get output for machines. If enabled the ' 632 'output will be a json with the format specified in ' 633 'ComparisonConclusions.GetOutputDict(). Default is ' 634 'human-readable.') 635 parser.add_argument('--case-order', default=None, 636 help='what key to use when sorting test cases in the ' 637 'output. Accepted values are "after", "before", ' 638 '"ratio" and "rating". Default is sorting by test ' 639 'case path.') 640 641 args = parser.parse_args() 642 643 # Always start at the pdfium src dir, which is assumed to be two level above 644 # this script. 645 pdfium_src_dir = os.path.join( 646 os.path.dirname(__file__), 647 os.path.pardir, 648 os.path.pardir) 649 os.chdir(pdfium_src_dir) 650 651 git = GitHelper() 652 653 if args.branch_after and not args.branch_before: 654 PrintErr('--branch-after requires --branch-before to be specified.') 655 return 1 656 657 if args.branch_after and not git.BranchExists(args.branch_after): 658 PrintErr('Branch "%s" does not exist' % args.branch_after) 659 return 1 660 661 if args.branch_before and not git.BranchExists(args.branch_before): 662 PrintErr('Branch "%s" does not exist' % args.branch_before) 663 return 1 664 665 if args.output_dir: 666 args.output_dir = os.path.expanduser(args.output_dir) 667 if not os.path.isdir(args.output_dir): 668 PrintErr('"%s" is not a directory' % args.output_dir) 669 return 1 670 671 if args.threshold_significant <= 0.0: 672 PrintErr('--threshold-significant should receive a positive float') 673 return 1 674 675 run = CompareRun(args) 676 return run.Run() 677 678 679if __name__ == '__main__': 680 sys.exit(main()) 681