1# Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved. 2# Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek@gmail.com) 3# Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions 7# are met: 8# 9# 1. Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 2. Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 15# its contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29# Module to share code to work with various version control systems. 30package VCSUtils; 31 32use strict; 33use warnings; 34 35use Cwd qw(); # "qw()" prevents warnings about redefining getcwd() with "use POSIX;" 36use English; # for $POSTMATCH, etc. 37use File::Basename; 38use File::Spec; 39use POSIX; 40 41BEGIN { 42 use Exporter (); 43 our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); 44 $VERSION = 1.00; 45 @ISA = qw(Exporter); 46 @EXPORT = qw( 47 &applyGitBinaryPatchDelta 48 &callSilently 49 &canonicalizePath 50 &changeLogEmailAddress 51 &changeLogName 52 &chdirReturningRelativePath 53 &decodeGitBinaryChunk 54 &decodeGitBinaryPatch 55 &determineSVNRoot 56 &determineVCSRoot 57 &exitStatus 58 &fixChangeLogPatch 59 &gitBranch 60 &gitdiff2svndiff 61 &isGit 62 &isGitSVN 63 &isGitBranchBuild 64 &isGitDirectory 65 &isSVN 66 &isSVNDirectory 67 &isSVNVersion16OrNewer 68 &makeFilePathRelative 69 &mergeChangeLogs 70 &normalizePath 71 &parseFirstEOL 72 &parsePatch 73 &pathRelativeToSVNRepositoryRootForPath 74 &prepareParsedPatch 75 &removeEOL 76 &runPatchCommand 77 &scmMoveOrRenameFile 78 &scmToggleExecutableBit 79 &setChangeLogDateAndReviewer 80 &svnRevisionForDirectory 81 &svnStatus 82 &toWindowsLineEndings 83 ); 84 %EXPORT_TAGS = ( ); 85 @EXPORT_OK = (); 86} 87 88our @EXPORT_OK; 89 90my $gitBranch; 91my $gitRoot; 92my $isGit; 93my $isGitSVN; 94my $isGitBranchBuild; 95my $isSVN; 96my $svnVersion; 97 98# Project time zone for Cupertino, CA, US 99my $changeLogTimeZone = "PST8PDT"; 100 101my $chunkRangeRegEx = qr#^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$#; # e.g. @@ -2,6 +2,18 @@ 102my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#; 103my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#; 104my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path. 105my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property. 106my $svnPropertyValueStartRegEx = qr#^ (\+|-|Merged|Reverse-merged) ([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines). 107 108# This method is for portability. Return the system-appropriate exit 109# status of a child process. 110# 111# Args: pass the child error status returned by the last pipe close, 112# for example "$?". 113sub exitStatus($) 114{ 115 my ($returnvalue) = @_; 116 if ($^O eq "MSWin32") { 117 return $returnvalue >> 8; 118 } 119 return WEXITSTATUS($returnvalue); 120} 121 122# Call a function while suppressing STDERR, and return the return values 123# as an array. 124sub callSilently($@) { 125 my ($func, @args) = @_; 126 127 # The following pattern was taken from here: 128 # http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html 129 # 130 # Also see this Perl documentation (search for "open OLDERR"): 131 # http://perldoc.perl.org/functions/open.html 132 open(OLDERR, ">&STDERR"); 133 close(STDERR); 134 my @returnValue = &$func(@args); 135 open(STDERR, ">&OLDERR"); 136 close(OLDERR); 137 138 return @returnValue; 139} 140 141sub toWindowsLineEndings 142{ 143 my ($text) = @_; 144 $text =~ s/\n/\r\n/g; 145 return $text; 146} 147 148# Note, this method will not error if the file corresponding to the $source path does not exist. 149sub scmMoveOrRenameFile 150{ 151 my ($source, $destination) = @_; 152 return if ! -e $source; 153 if (isSVN()) { 154 system("svn", "move", $source, $destination); 155 } elsif (isGit()) { 156 system("git", "mv", $source, $destination); 157 } 158} 159 160# Note, this method will not error if the file corresponding to the path does not exist. 161sub scmToggleExecutableBit 162{ 163 my ($path, $executableBitDelta) = @_; 164 return if ! -e $path; 165 if ($executableBitDelta == 1) { 166 scmAddExecutableBit($path); 167 } elsif ($executableBitDelta == -1) { 168 scmRemoveExecutableBit($path); 169 } 170} 171 172sub scmAddExecutableBit($) 173{ 174 my ($path) = @_; 175 176 if (isSVN()) { 177 system("svn", "propset", "svn:executable", "on", $path) == 0 or die "Failed to run 'svn propset svn:executable on $path'."; 178 } elsif (isGit()) { 179 chmod(0755, $path); 180 } 181} 182 183sub scmRemoveExecutableBit($) 184{ 185 my ($path) = @_; 186 187 if (isSVN()) { 188 system("svn", "propdel", "svn:executable", $path) == 0 or die "Failed to run 'svn propdel svn:executable $path'."; 189 } elsif (isGit()) { 190 chmod(0664, $path); 191 } 192} 193 194sub isGitDirectory($) 195{ 196 my ($dir) = @_; 197 return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0; 198} 199 200sub isGit() 201{ 202 return $isGit if defined $isGit; 203 204 $isGit = isGitDirectory("."); 205 return $isGit; 206} 207 208sub isGitSVN() 209{ 210 return $isGitSVN if defined $isGitSVN; 211 212 # There doesn't seem to be an officially documented way to determine 213 # if you're in a git-svn checkout. The best suggestions seen so far 214 # all use something like the following: 215 my $output = `git config --get svn-remote.svn.fetch 2>& 1`; 216 $isGitSVN = $output ne ''; 217 return $isGitSVN; 218} 219 220sub gitBranch() 221{ 222 unless (defined $gitBranch) { 223 chomp($gitBranch = `git symbolic-ref -q HEAD`); 224 $gitBranch = "" if exitStatus($?); 225 $gitBranch =~ s#^refs/heads/##; 226 $gitBranch = "" if $gitBranch eq "master"; 227 } 228 229 return $gitBranch; 230} 231 232sub isGitBranchBuild() 233{ 234 my $branch = gitBranch(); 235 chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`); 236 return 1 if $override eq "true"; 237 return 0 if $override eq "false"; 238 239 unless (defined $isGitBranchBuild) { 240 chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`); 241 $isGitBranchBuild = $gitBranchBuild eq "true"; 242 } 243 244 return $isGitBranchBuild; 245} 246 247sub isSVNDirectory($) 248{ 249 my ($dir) = @_; 250 251 return -d File::Spec->catdir($dir, ".svn"); 252} 253 254sub isSVN() 255{ 256 return $isSVN if defined $isSVN; 257 258 $isSVN = isSVNDirectory("."); 259 return $isSVN; 260} 261 262sub svnVersion() 263{ 264 return $svnVersion if defined $svnVersion; 265 266 if (!isSVN()) { 267 $svnVersion = 0; 268 } else { 269 chomp($svnVersion = `svn --version --quiet`); 270 } 271 return $svnVersion; 272} 273 274sub isSVNVersion16OrNewer() 275{ 276 my $version = svnVersion(); 277 return eval "v$version" ge v1.6; 278} 279 280sub chdirReturningRelativePath($) 281{ 282 my ($directory) = @_; 283 my $previousDirectory = Cwd::getcwd(); 284 chdir $directory; 285 my $newDirectory = Cwd::getcwd(); 286 return "." if $newDirectory eq $previousDirectory; 287 return File::Spec->abs2rel($previousDirectory, $newDirectory); 288} 289 290sub determineGitRoot() 291{ 292 chomp(my $gitDir = `git rev-parse --git-dir`); 293 return dirname($gitDir); 294} 295 296sub determineSVNRoot() 297{ 298 my $last = ''; 299 my $path = '.'; 300 my $parent = '..'; 301 my $repositoryRoot; 302 my $repositoryUUID; 303 while (1) { 304 my $thisRoot; 305 my $thisUUID; 306 # Ignore error messages in case we've run past the root of the checkout. 307 open INFO, "svn info '$path' 2> " . File::Spec->devnull() . " |" or die; 308 while (<INFO>) { 309 if (/^Repository Root: (.+)/) { 310 $thisRoot = $1; 311 } 312 if (/^Repository UUID: (.+)/) { 313 $thisUUID = $1; 314 } 315 if ($thisRoot && $thisUUID) { 316 local $/ = undef; 317 <INFO>; # Consume the rest of the input. 318 } 319 } 320 close INFO; 321 322 # It's possible (e.g. for developers of some ports) to have a WebKit 323 # checkout in a subdirectory of another checkout. So abort if the 324 # repository root or the repository UUID suddenly changes. 325 last if !$thisUUID; 326 $repositoryUUID = $thisUUID if !$repositoryUUID; 327 last if $thisUUID ne $repositoryUUID; 328 329 last if !$thisRoot; 330 $repositoryRoot = $thisRoot if !$repositoryRoot; 331 last if $thisRoot ne $repositoryRoot; 332 333 $last = $path; 334 $path = File::Spec->catdir($parent, $path); 335 } 336 337 return File::Spec->rel2abs($last); 338} 339 340sub determineVCSRoot() 341{ 342 if (isGit()) { 343 return determineGitRoot(); 344 } 345 346 if (!isSVN()) { 347 # Some users have a workflow where svn-create-patch, svn-apply and 348 # svn-unapply are used outside of multiple svn working directores, 349 # so warn the user and assume Subversion is being used in this case. 350 warn "Unable to determine VCS root; assuming Subversion"; 351 $isSVN = 1; 352 } 353 354 return determineSVNRoot(); 355} 356 357sub svnRevisionForDirectory($) 358{ 359 my ($dir) = @_; 360 my $revision; 361 362 if (isSVNDirectory($dir)) { 363 my $svnInfo = `LC_ALL=C svn info $dir | grep Revision:`; 364 ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g); 365 } elsif (isGitDirectory($dir)) { 366 my $gitLog = `cd $dir && LC_ALL=C git log --grep='git-svn-id: ' -n 1 | grep git-svn-id:`; 367 ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g); 368 } 369 die "Unable to determine current SVN revision in $dir" unless (defined $revision); 370 return $revision; 371} 372 373sub pathRelativeToSVNRepositoryRootForPath($) 374{ 375 my ($file) = @_; 376 my $relativePath = File::Spec->abs2rel($file); 377 378 my $svnInfo; 379 if (isSVN()) { 380 $svnInfo = `LC_ALL=C svn info $relativePath`; 381 } elsif (isGit()) { 382 $svnInfo = `LC_ALL=C git svn info $relativePath`; 383 } 384 385 $svnInfo =~ /.*^URL: (.*?)$/m; 386 my $svnURL = $1; 387 388 $svnInfo =~ /.*^Repository Root: (.*?)$/m; 389 my $repositoryRoot = $1; 390 391 $svnURL =~ s/$repositoryRoot\///; 392 return $svnURL; 393} 394 395sub makeFilePathRelative($) 396{ 397 my ($path) = @_; 398 return $path unless isGit(); 399 400 unless (defined $gitRoot) { 401 chomp($gitRoot = `git rev-parse --show-cdup`); 402 } 403 return $gitRoot . $path; 404} 405 406sub normalizePath($) 407{ 408 my ($path) = @_; 409 $path =~ s/\\/\//g; 410 return $path; 411} 412 413sub adjustPathForRecentRenamings($) 414{ 415 my ($fullPath) = @_; 416 417 if ($fullPath =~ m|^WebCore/| 418 || $fullPath =~ m|^JavaScriptCore/| 419 || $fullPath =~ m|^WebKit/| 420 || $fullPath =~ m|^WebKit2/|) { 421 return "Source/$fullPath"; 422 } 423 return $fullPath; 424} 425 426sub canonicalizePath($) 427{ 428 my ($file) = @_; 429 430 # Remove extra slashes and '.' directories in path 431 $file = File::Spec->canonpath($file); 432 433 # Remove '..' directories in path 434 my @dirs = (); 435 foreach my $dir (File::Spec->splitdir($file)) { 436 if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') { 437 pop(@dirs); 438 } else { 439 push(@dirs, $dir); 440 } 441 } 442 return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : "."; 443} 444 445sub removeEOL($) 446{ 447 my ($line) = @_; 448 return "" unless $line; 449 450 $line =~ s/[\r\n]+$//g; 451 return $line; 452} 453 454sub parseFirstEOL($) 455{ 456 my ($fileHandle) = @_; 457 458 # Make input record separator the new-line character to simplify regex matching below. 459 my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR; 460 $INPUT_RECORD_SEPARATOR = "\n"; 461 my $firstLine = <$fileHandle>; 462 $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator; 463 464 return unless defined($firstLine); 465 466 my $eol; 467 if ($firstLine =~ /\r\n/) { 468 $eol = "\r\n"; 469 } elsif ($firstLine =~ /\r/) { 470 $eol = "\r"; 471 } elsif ($firstLine =~ /\n/) { 472 $eol = "\n"; 473 } 474 return $eol; 475} 476 477sub firstEOLInFile($) 478{ 479 my ($file) = @_; 480 my $eol; 481 if (open(FILE, $file)) { 482 $eol = parseFirstEOL(*FILE); 483 close(FILE); 484 } 485 return $eol; 486} 487 488sub svnStatus($) 489{ 490 my ($fullPath) = @_; 491 my $svnStatus; 492 open SVN, "svn status --non-interactive --non-recursive '$fullPath' |" or die; 493 if (-d $fullPath) { 494 # When running "svn stat" on a directory, we can't assume that only one 495 # status will be returned (since any files with a status below the 496 # directory will be returned), and we can't assume that the directory will 497 # be first (since any files with unknown status will be listed first). 498 my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath)); 499 while (<SVN>) { 500 # Input may use a different EOL sequence than $/, so avoid chomp. 501 $_ = removeEOL($_); 502 my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7))); 503 if ($normalizedFullPath eq $normalizedStatPath) { 504 $svnStatus = "$_\n"; 505 last; 506 } 507 } 508 # Read the rest of the svn command output to avoid a broken pipe warning. 509 local $/ = undef; 510 <SVN>; 511 } 512 else { 513 # Files will have only one status returned. 514 $svnStatus = removeEOL(<SVN>) . "\n"; 515 } 516 close SVN; 517 return $svnStatus; 518} 519 520# Return whether the given file mode is executable in the source control 521# sense. We make this determination based on whether the executable bit 522# is set for "others" rather than the stronger condition that it be set 523# for the user, group, and others. This is sufficient for distinguishing 524# the default behavior in Git and SVN. 525# 526# Args: 527# $fileMode: A number or string representing a file mode in octal notation. 528sub isExecutable($) 529{ 530 my $fileMode = shift; 531 532 return $fileMode % 2; 533} 534 535# Parse the next Git diff header from the given file handle, and advance 536# the handle so the last line read is the first line after the header. 537# 538# This subroutine dies if given leading junk. 539# 540# Args: 541# $fileHandle: advanced so the last line read from the handle is the first 542# line of the header to parse. This should be a line 543# beginning with "diff --git". 544# $line: the line last read from $fileHandle 545# 546# Returns ($headerHashRef, $lastReadLine): 547# $headerHashRef: a hash reference representing a diff header, as follows-- 548# copiedFromPath: the path from which the file was copied or moved if 549# the diff is a copy or move. 550# executableBitDelta: the value 1 or -1 if the executable bit was added or 551# removed, respectively. New and deleted files have 552# this value only if the file is executable, in which 553# case the value is 1 and -1, respectively. 554# indexPath: the path of the target file. 555# isBinary: the value 1 if the diff is for a binary file. 556# isDeletion: the value 1 if the diff is a file deletion. 557# isCopyWithChanges: the value 1 if the file was copied or moved and 558# the target file was changed in some way after being 559# copied or moved (e.g. if its contents or executable 560# bit were changed). 561# isNew: the value 1 if the diff is for a new file. 562# shouldDeleteSource: the value 1 if the file was copied or moved and 563# the source file was deleted -- i.e. if the copy 564# was actually a move. 565# svnConvertedText: the header text with some lines converted to SVN 566# format. Git-specific lines are preserved. 567# $lastReadLine: the line last read from $fileHandle. 568sub parseGitDiffHeader($$) 569{ 570 my ($fileHandle, $line) = @_; 571 572 $_ = $line; 573 574 my $indexPath; 575 if (/$gitDiffStartRegEx/) { 576 # The first and second paths can differ in the case of copies 577 # and renames. We use the second file path because it is the 578 # destination path. 579 $indexPath = adjustPathForRecentRenamings($4); 580 # Use $POSTMATCH to preserve the end-of-line character. 581 $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format. 582 } else { 583 die("Could not parse leading \"diff --git\" line: \"$line\"."); 584 } 585 586 my $copiedFromPath; 587 my $foundHeaderEnding; 588 my $isBinary; 589 my $isDeletion; 590 my $isNew; 591 my $newExecutableBit = 0; 592 my $oldExecutableBit = 0; 593 my $shouldDeleteSource = 0; 594 my $similarityIndex = 0; 595 my $svnConvertedText; 596 while (1) { 597 # Temporarily strip off any end-of-line characters to simplify 598 # regex matching below. 599 s/([\n\r]+)$//; 600 my $eol = $1; 601 602 if (/^(deleted file|old) mode (\d+)/) { 603 $oldExecutableBit = (isExecutable($2) ? 1 : 0); 604 $isDeletion = 1 if $1 eq "deleted file"; 605 } elsif (/^new( file)? mode (\d+)/) { 606 $newExecutableBit = (isExecutable($2) ? 1 : 0); 607 $isNew = 1 if $1; 608 } elsif (/^similarity index (\d+)%/) { 609 $similarityIndex = $1; 610 } elsif (/^copy from (\S+)/) { 611 $copiedFromPath = $1; 612 } elsif (/^rename from (\S+)/) { 613 # FIXME: Record this as a move rather than as a copy-and-delete. 614 # This will simplify adding rename support to svn-unapply. 615 # Otherwise, the hash for a deletion would have to know 616 # everything about the file being deleted in order to 617 # support undoing itself. Recording as a move will also 618 # permit us to use "svn move" and "git move". 619 $copiedFromPath = $1; 620 $shouldDeleteSource = 1; 621 } elsif (/^--- \S+/) { 622 $_ = "--- $indexPath"; # Convert to SVN format. 623 } elsif (/^\+\+\+ \S+/) { 624 $_ = "+++ $indexPath"; # Convert to SVN format. 625 $foundHeaderEnding = 1; 626 } elsif (/^GIT binary patch$/ ) { 627 $isBinary = 1; 628 $foundHeaderEnding = 1; 629 # The "git diff" command includes a line of the form "Binary files 630 # <path1> and <path2> differ" if the --binary flag is not used. 631 } elsif (/^Binary files / ) { 632 die("Error: the Git diff contains a binary file without the binary data in ". 633 "line: \"$_\". Be sure to use the --binary flag when invoking \"git diff\" ". 634 "with diffs containing binary files."); 635 } 636 637 $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. 638 639 $_ = <$fileHandle>; # Not defined if end-of-file reached. 640 641 last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding); 642 } 643 644 my $executableBitDelta = $newExecutableBit - $oldExecutableBit; 645 646 my %header; 647 648 $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; 649 $header{executableBitDelta} = $executableBitDelta if $executableBitDelta; 650 $header{indexPath} = $indexPath; 651 $header{isBinary} = $isBinary if $isBinary; 652 $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta)); 653 $header{isDeletion} = $isDeletion if $isDeletion; 654 $header{isNew} = $isNew if $isNew; 655 $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource; 656 $header{svnConvertedText} = $svnConvertedText; 657 658 return (\%header, $_); 659} 660 661# Parse the next SVN diff header from the given file handle, and advance 662# the handle so the last line read is the first line after the header. 663# 664# This subroutine dies if given leading junk or if it could not detect 665# the end of the header block. 666# 667# Args: 668# $fileHandle: advanced so the last line read from the handle is the first 669# line of the header to parse. This should be a line 670# beginning with "Index:". 671# $line: the line last read from $fileHandle 672# 673# Returns ($headerHashRef, $lastReadLine): 674# $headerHashRef: a hash reference representing a diff header, as follows-- 675# copiedFromPath: the path from which the file was copied if the diff 676# is a copy. 677# indexPath: the path of the target file, which is the path found in 678# the "Index:" line. 679# isBinary: the value 1 if the diff is for a binary file. 680# isNew: the value 1 if the diff is for a new file. 681# sourceRevision: the revision number of the source, if it exists. This 682# is the same as the revision number the file was copied 683# from, in the case of a file copy. 684# svnConvertedText: the header text converted to a header with the paths 685# in some lines corrected. 686# $lastReadLine: the line last read from $fileHandle. 687sub parseSvnDiffHeader($$) 688{ 689 my ($fileHandle, $line) = @_; 690 691 $_ = $line; 692 693 my $indexPath; 694 if (/$svnDiffStartRegEx/) { 695 $indexPath = adjustPathForRecentRenamings($1); 696 } else { 697 die("First line of SVN diff does not begin with \"Index \": \"$_\""); 698 } 699 700 my $copiedFromPath; 701 my $foundHeaderEnding; 702 my $isBinary; 703 my $isNew; 704 my $sourceRevision; 705 my $svnConvertedText; 706 while (1) { 707 # Temporarily strip off any end-of-line characters to simplify 708 # regex matching below. 709 s/([\n\r]+)$//; 710 my $eol = $1; 711 712 # Fix paths on ""---" and "+++" lines to match the leading 713 # index line. 714 if (s/^--- \S+/--- $indexPath/) { 715 # --- 716 if (/^--- .+\(revision (\d+)\)/) { 717 $sourceRevision = $1; 718 $isNew = 1 if !$sourceRevision; # if revision 0. 719 if (/\(from (\S+):(\d+)\)$/) { 720 # The "from" clause is created by svn-create-patch, in 721 # which case there is always also a "revision" clause. 722 $copiedFromPath = $1; 723 die("Revision number \"$2\" in \"from\" clause does not match " . 724 "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision); 725 } 726 } 727 } elsif (s/^\+\+\+ \S+/+++ $indexPath/) { 728 $foundHeaderEnding = 1; 729 } elsif (/^Cannot display: file marked as a binary type.$/) { 730 $isBinary = 1; 731 $foundHeaderEnding = 1; 732 } 733 734 $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters. 735 736 $_ = <$fileHandle>; # Not defined if end-of-file reached. 737 738 last if (!defined($_) || /$svnDiffStartRegEx/ || $foundHeaderEnding); 739 } 740 741 if (!$foundHeaderEnding) { 742 die("Did not find end of header block corresponding to index path \"$indexPath\"."); 743 } 744 745 my %header; 746 747 $header{copiedFromPath} = $copiedFromPath if $copiedFromPath; 748 $header{indexPath} = $indexPath; 749 $header{isBinary} = $isBinary if $isBinary; 750 $header{isNew} = $isNew if $isNew; 751 $header{sourceRevision} = $sourceRevision if $sourceRevision; 752 $header{svnConvertedText} = $svnConvertedText; 753 754 return (\%header, $_); 755} 756 757# Parse the next diff header from the given file handle, and advance 758# the handle so the last line read is the first line after the header. 759# 760# This subroutine dies if given leading junk or if it could not detect 761# the end of the header block. 762# 763# Args: 764# $fileHandle: advanced so the last line read from the handle is the first 765# line of the header to parse. For SVN-formatted diffs, this 766# is a line beginning with "Index:". For Git, this is a line 767# beginning with "diff --git". 768# $line: the line last read from $fileHandle 769# 770# Returns ($headerHashRef, $lastReadLine): 771# $headerHashRef: a hash reference representing a diff header 772# copiedFromPath: the path from which the file was copied if the diff 773# is a copy. 774# executableBitDelta: the value 1 or -1 if the executable bit was added or 775# removed, respectively. New and deleted files have 776# this value only if the file is executable, in which 777# case the value is 1 and -1, respectively. 778# indexPath: the path of the target file. 779# isBinary: the value 1 if the diff is for a binary file. 780# isGit: the value 1 if the diff is Git-formatted. 781# isSvn: the value 1 if the diff is SVN-formatted. 782# sourceRevision: the revision number of the source, if it exists. This 783# is the same as the revision number the file was copied 784# from, in the case of a file copy. 785# svnConvertedText: the header text with some lines converted to SVN 786# format. Git-specific lines are preserved. 787# $lastReadLine: the line last read from $fileHandle. 788sub parseDiffHeader($$) 789{ 790 my ($fileHandle, $line) = @_; 791 792 my $header; # This is a hash ref. 793 my $isGit; 794 my $isSvn; 795 my $lastReadLine; 796 797 if ($line =~ $svnDiffStartRegEx) { 798 $isSvn = 1; 799 ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line); 800 } elsif ($line =~ $gitDiffStartRegEx) { 801 $isGit = 1; 802 ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line); 803 } else { 804 die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\""); 805 } 806 807 $header->{isGit} = $isGit if $isGit; 808 $header->{isSvn} = $isSvn if $isSvn; 809 810 return ($header, $lastReadLine); 811} 812 813# FIXME: The %diffHash "object" should not have an svnConvertedText property. 814# Instead, the hash object should store its information in a 815# structured way as properties. This should be done in a way so 816# that, if necessary, the text of an SVN or Git patch can be 817# reconstructed from the information in those hash properties. 818# 819# A %diffHash is a hash representing a source control diff of a single 820# file operation (e.g. a file modification, copy, or delete). 821# 822# These hashes appear, for example, in the parseDiff(), parsePatch(), 823# and prepareParsedPatch() subroutines of this package. 824# 825# The corresponding values are-- 826# 827# copiedFromPath: the path from which the file was copied if the diff 828# is a copy. 829# executableBitDelta: the value 1 or -1 if the executable bit was added or 830# removed from the target file, respectively. 831# indexPath: the path of the target file. For SVN-formatted diffs, 832# this is the same as the path in the "Index:" line. 833# isBinary: the value 1 if the diff is for a binary file. 834# isDeletion: the value 1 if the diff is known from the header to be a deletion. 835# isGit: the value 1 if the diff is Git-formatted. 836# isNew: the value 1 if the dif is known from the header to be a new file. 837# isSvn: the value 1 if the diff is SVN-formatted. 838# sourceRevision: the revision number of the source, if it exists. This 839# is the same as the revision number the file was copied 840# from, in the case of a file copy. 841# svnConvertedText: the diff with some lines converted to SVN format. 842# Git-specific lines are preserved. 843 844# Parse one diff from a patch file created by svn-create-patch, and 845# advance the file handle so the last line read is the first line 846# of the next header block. 847# 848# This subroutine preserves any leading junk encountered before the header. 849# 850# Composition of an SVN diff 851# 852# There are three parts to an SVN diff: the header, the property change, and 853# the binary contents, in that order. Either the header or the property change 854# may be ommitted, but not both. If there are binary changes, then you always 855# have all three. 856# 857# Args: 858# $fileHandle: a file handle advanced to the first line of the next 859# header block. Leading junk is okay. 860# $line: the line last read from $fileHandle. 861# $optionsHashRef: a hash reference representing optional options to use 862# when processing a diff. 863# shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead 864# instead of the line endings in the target file; the 865# value of 1 if svnConvertedText should use the line 866# endings in the diff. 867# 868# Returns ($diffHashRefs, $lastReadLine): 869# $diffHashRefs: A reference to an array of references to %diffHash hashes. 870# See the %diffHash documentation above. 871# $lastReadLine: the line last read from $fileHandle 872sub parseDiff($$;$) 873{ 874 # FIXME: Adjust this method so that it dies if the first line does not 875 # match the start of a diff. This will require a change to 876 # parsePatch() so that parsePatch() skips over leading junk. 877 my ($fileHandle, $line, $optionsHashRef) = @_; 878 879 my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default 880 881 my $headerHashRef; # Last header found, as returned by parseDiffHeader(). 882 my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties(). 883 my $svnText; 884 my $indexPathEOL; 885 while (defined($line)) { 886 if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) { 887 # Then assume all diffs in the patch are Git-formatted. This 888 # block was made to be enterable at most once since we assume 889 # all diffs in the patch are formatted the same (SVN or Git). 890 $headerStartRegEx = $gitDiffStartRegEx; 891 } 892 893 if ($line =~ $svnPropertiesStartRegEx) { 894 my $propertyPath = $1; 895 if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) { 896 # This is the start of the second diff in the while loop, which happens to 897 # be a property diff. If $svnPropertiesHasRef is defined, then this is the 898 # second consecutive property diff, otherwise it's the start of a property 899 # diff for a file that only has property changes. 900 last; 901 } 902 ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line); 903 next; 904 } 905 if ($line !~ $headerStartRegEx) { 906 # Then we are in the body of the diff. 907 if ($indexPathEOL && $line !~ /$chunkRangeRegEx/) { 908 # The chunk range is part of the body of the diff, but its line endings should't be 909 # modified or patch(1) will complain. So, we only modify non-chunk range lines. 910 $line =~ s/\r\n|\r|\n/$indexPathEOL/g; 911 } 912 $svnText .= $line; 913 $line = <$fileHandle>; 914 next; 915 } # Otherwise, we found a diff header. 916 917 if ($svnPropertiesHashRef || $headerHashRef) { 918 # Then either we just processed an SVN property change or this 919 # is the start of the second diff header of this while loop. 920 last; 921 } 922 923 ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line); 924 if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) { 925 $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary}; 926 } 927 928 $svnText .= $headerHashRef->{svnConvertedText}; 929 } 930 931 my @diffHashRefs; 932 933 if ($headerHashRef->{shouldDeleteSource}) { 934 my %deletionHash; 935 $deletionHash{indexPath} = $headerHashRef->{copiedFromPath}; 936 $deletionHash{isDeletion} = 1; 937 push @diffHashRefs, \%deletionHash; 938 } 939 if ($headerHashRef->{copiedFromPath}) { 940 my %copyHash; 941 $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath}; 942 $copyHash{indexPath} = $headerHashRef->{indexPath}; 943 $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; 944 if ($headerHashRef->{isSvn}) { 945 $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 946 } 947 push @diffHashRefs, \%copyHash; 948 } 949 950 # Note, the order of evaluation for the following if conditional has been explicitly chosen so that 951 # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that 952 # only has property changes). 953 if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) { 954 # Then add the usual file modification. 955 my %diffHash; 956 # FIXME: We should expand this code to support other properties. In the future, 957 # parseSvnDiffProperties may return a hash whose keys are the properties. 958 if ($headerHashRef->{isSvn}) { 959 # SVN records the change to the executable bit in a separate property change diff 960 # that follows the contents of the diff, except for binary diffs. For binary 961 # diffs, the property change diff follows the diff header. 962 $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 963 } elsif ($headerHashRef->{isGit}) { 964 # Git records the change to the executable bit in the header of a diff. 965 $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta}; 966 } 967 $diffHash{indexPath} = $headerHashRef->{indexPath}; 968 $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary}; 969 $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion}; 970 $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit}; 971 $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew}; 972 $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn}; 973 if (!$headerHashRef->{copiedFromPath}) { 974 # If the file was copied, then we have already incorporated the 975 # sourceRevision information into the change. 976 $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision}; 977 } 978 # FIXME: Remove the need for svnConvertedText. See the %diffHash 979 # code comments above for more information. 980 # 981 # Note, we may not always have SVN converted text since we intend 982 # to deprecate it in the future. For example, a property change 983 # diff for a file that only has property changes will not return 984 # any SVN converted text. 985 $diffHash{svnConvertedText} = $svnText if $svnText; 986 push @diffHashRefs, \%diffHash; 987 } 988 989 if (!%$headerHashRef && $svnPropertiesHashRef) { 990 # A property change diff for a file that only has property changes. 991 my %propertyChangeHash; 992 $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta}; 993 $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath}; 994 $propertyChangeHash{isSvn} = 1; 995 push @diffHashRefs, \%propertyChangeHash; 996 } 997 998 return (\@diffHashRefs, $line); 999} 1000 1001# Parse an SVN property change diff from the given file handle, and advance 1002# the handle so the last line read is the first line after this diff. 1003# 1004# For the case of an SVN binary diff, the binary contents will follow the 1005# the property changes. 1006# 1007# This subroutine dies if the first line does not begin with "Property changes on" 1008# or if the separator line that follows this line is missing. 1009# 1010# Args: 1011# $fileHandle: advanced so the last line read from the handle is the first 1012# line of the footer to parse. This line begins with 1013# "Property changes on". 1014# $line: the line last read from $fileHandle. 1015# 1016# Returns ($propertyHashRef, $lastReadLine): 1017# $propertyHashRef: a hash reference representing an SVN diff footer. 1018# propertyPath: the path of the target file. 1019# executableBitDelta: the value 1 or -1 if the executable bit was added or 1020# removed from the target file, respectively. 1021# $lastReadLine: the line last read from $fileHandle. 1022sub parseSvnDiffProperties($$) 1023{ 1024 my ($fileHandle, $line) = @_; 1025 1026 $_ = $line; 1027 1028 my %footer; 1029 if (/$svnPropertiesStartRegEx/) { 1030 $footer{propertyPath} = $1; 1031 } else { 1032 die("Failed to find start of SVN property change, \"Property changes on \": \"$_\""); 1033 } 1034 1035 # We advance $fileHandle two lines so that the next line that 1036 # we process is $svnPropertyStartRegEx in a well-formed footer. 1037 # A well-formed footer has the form: 1038 # Property changes on: FileA 1039 # ___________________________________________________________________ 1040 # Added: svn:executable 1041 # + * 1042 $_ = <$fileHandle>; # Not defined if end-of-file reached. 1043 my $separator = "_" x 67; 1044 if (defined($_) && /^$separator[\r\n]+$/) { 1045 $_ = <$fileHandle>; 1046 } else { 1047 die("Failed to find separator line: \"$_\"."); 1048 } 1049 1050 # FIXME: We should expand this to support other SVN properties 1051 # (e.g. return a hash of property key-values that represents 1052 # all properties). 1053 # 1054 # Notice, we keep processing until we hit end-of-file or some 1055 # line that does not resemble $svnPropertyStartRegEx, such as 1056 # the empty line that precedes the start of the binary contents 1057 # of a patch, or the start of the next diff (e.g. "Index:"). 1058 my $propertyHashRef; 1059 while (defined($_) && /$svnPropertyStartRegEx/) { 1060 ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_); 1061 if ($propertyHashRef->{name} eq "svn:executable") { 1062 # Notice, for SVN properties, propertyChangeDelta is always non-zero 1063 # because a property can only be added or removed. 1064 $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta}; 1065 } 1066 } 1067 1068 return(\%footer, $_); 1069} 1070 1071# Parse the next SVN property from the given file handle, and advance the handle so the last 1072# line read is the first line after the property. 1073# 1074# This subroutine dies if the first line is not a valid start of an SVN property, 1075# or the property is missing a value, or the property change type (e.g. "Added") 1076# does not correspond to the property value type (e.g. "+"). 1077# 1078# Args: 1079# $fileHandle: advanced so the last line read from the handle is the first 1080# line of the property to parse. This should be a line 1081# that matches $svnPropertyStartRegEx. 1082# $line: the line last read from $fileHandle. 1083# 1084# Returns ($propertyHashRef, $lastReadLine): 1085# $propertyHashRef: a hash reference representing a SVN property. 1086# name: the name of the property. 1087# value: the last property value. For instance, suppose the property is "Modified". 1088# Then it has both a '-' and '+' property value in that order. Therefore, 1089# the value of this key is the value of the '+' property by ordering (since 1090# it is the last value). 1091# propertyChangeDelta: the value 1 or -1 if the property was added or 1092# removed, respectively. 1093# $lastReadLine: the line last read from $fileHandle. 1094sub parseSvnProperty($$) 1095{ 1096 my ($fileHandle, $line) = @_; 1097 1098 $_ = $line; 1099 1100 my $propertyName; 1101 my $propertyChangeType; 1102 if (/$svnPropertyStartRegEx/) { 1103 $propertyChangeType = $1; 1104 $propertyName = $2; 1105 } else { 1106 die("Failed to find SVN property: \"$_\"."); 1107 } 1108 1109 $_ = <$fileHandle>; # Not defined if end-of-file reached. 1110 1111 # The "svn diff" command neither inserts newline characters between property values 1112 # nor between successive properties. 1113 # 1114 # FIXME: We do not support property values that contain tailing newline characters 1115 # as it is difficult to disambiguate these trailing newlines from the empty 1116 # line that precedes the contents of a binary patch. 1117 my $propertyValue; 1118 my $propertyValueType; 1119 while (defined($_) && /$svnPropertyValueStartRegEx/) { 1120 # Note, a '-' property may be followed by a '+' property in the case of a "Modified" 1121 # or "Name" property. We only care about the ending value (i.e. the '+' property) 1122 # in such circumstances. So, we take the property value for the property to be its 1123 # last parsed property value. 1124 # 1125 # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or 1126 # add error checking to prevent '+', '+', ..., '+' and other invalid combinations. 1127 $propertyValueType = $1; 1128 ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_); 1129 } 1130 1131 if (!$propertyValue) { 1132 die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\"."); 1133 } 1134 1135 my $propertyChangeDelta; 1136 if ($propertyValueType eq "+" || $propertyValueType eq "Merged") { 1137 $propertyChangeDelta = 1; 1138 } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") { 1139 $propertyChangeDelta = -1; 1140 } else { 1141 die("Not reached."); 1142 } 1143 1144 # We perform a simple validation that an "Added" or "Deleted" property 1145 # change type corresponds with a "+" and "-" value type, respectively. 1146 my $expectedChangeDelta; 1147 if ($propertyChangeType eq "Added") { 1148 $expectedChangeDelta = 1; 1149 } elsif ($propertyChangeType eq "Deleted") { 1150 $expectedChangeDelta = -1; 1151 } 1152 1153 if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) { 1154 die("The final property value type found \"$propertyValueType\" does not " . 1155 "correspond to the property change type found \"$propertyChangeType\"."); 1156 } 1157 1158 my %propertyHash; 1159 $propertyHash{name} = $propertyName; 1160 $propertyHash{propertyChangeDelta} = $propertyChangeDelta; 1161 $propertyHash{value} = $propertyValue; 1162 return (\%propertyHash, $_); 1163} 1164 1165# Parse the value of an SVN property from the given file handle, and advance 1166# the handle so the last line read is the first line after the property value. 1167# 1168# This subroutine dies if the first line is an invalid SVN property value line 1169# (i.e. a line that does not begin with " +" or " -"). 1170# 1171# Args: 1172# $fileHandle: advanced so the last line read from the handle is the first 1173# line of the property value to parse. This should be a line 1174# beginning with " +" or " -". 1175# $line: the line last read from $fileHandle. 1176# 1177# Returns ($propertyValue, $lastReadLine): 1178# $propertyValue: the value of the property. 1179# $lastReadLine: the line last read from $fileHandle. 1180sub parseSvnPropertyValue($$) 1181{ 1182 my ($fileHandle, $line) = @_; 1183 1184 $_ = $line; 1185 1186 my $propertyValue; 1187 my $eol; 1188 if (/$svnPropertyValueStartRegEx/) { 1189 $propertyValue = $2; # Does not include the end-of-line character(s). 1190 $eol = $POSTMATCH; 1191 } else { 1192 die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\"."); 1193 } 1194 1195 while (<$fileHandle>) { 1196 if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/) { 1197 # Note, we may encounter an empty line before the contents of a binary patch. 1198 # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be 1199 # followed by a '+' property in the case of a "Modified" or "Name" property. 1200 # We check for $svnPropertyStartRegEx because it indicates the start of the 1201 # next property to parse. 1202 last; 1203 } 1204 1205 # Temporarily strip off any end-of-line characters. We add the end-of-line characters 1206 # from the previously processed line to the start of this line so that the last line 1207 # of the property value does not end in end-of-line characters. 1208 s/([\n\r]+)$//; 1209 $propertyValue .= "$eol$_"; 1210 $eol = $1; 1211 } 1212 1213 return ($propertyValue, $_); 1214} 1215 1216# Parse a patch file created by svn-create-patch. 1217# 1218# Args: 1219# $fileHandle: A file handle to the patch file that has not yet been 1220# read from. 1221# $optionsHashRef: a hash reference representing optional options to use 1222# when processing a diff. 1223# shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead 1224# instead of the line endings in the target file; the 1225# value of 1 if svnConvertedText should use the line 1226# endings in the diff. 1227# 1228# Returns: 1229# @diffHashRefs: an array of diff hash references. 1230# See the %diffHash documentation above. 1231sub parsePatch($;$) 1232{ 1233 my ($fileHandle, $optionsHashRef) = @_; 1234 1235 my $newDiffHashRefs; 1236 my @diffHashRefs; # return value 1237 1238 my $line = <$fileHandle>; 1239 1240 while (defined($line)) { # Otherwise, at EOF. 1241 1242 ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef); 1243 1244 push @diffHashRefs, @$newDiffHashRefs; 1245 } 1246 1247 return @diffHashRefs; 1248} 1249 1250# Prepare the results of parsePatch() for use in svn-apply and svn-unapply. 1251# 1252# Args: 1253# $shouldForce: Whether to continue processing if an unexpected 1254# state occurs. 1255# @diffHashRefs: An array of references to %diffHashes. 1256# See the %diffHash documentation above. 1257# 1258# Returns $preparedPatchHashRef: 1259# copyDiffHashRefs: A reference to an array of the $diffHashRefs in 1260# @diffHashRefs that represent file copies. The original 1261# ordering is preserved. 1262# nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in 1263# @diffHashRefs that do not represent file copies. 1264# The original ordering is preserved. 1265# sourceRevisionHash: A reference to a hash of source path to source 1266# revision number. 1267sub prepareParsedPatch($@) 1268{ 1269 my ($shouldForce, @diffHashRefs) = @_; 1270 1271 my %copiedFiles; 1272 1273 # Return values 1274 my @copyDiffHashRefs = (); 1275 my @nonCopyDiffHashRefs = (); 1276 my %sourceRevisionHash = (); 1277 for my $diffHashRef (@diffHashRefs) { 1278 my $copiedFromPath = $diffHashRef->{copiedFromPath}; 1279 my $indexPath = $diffHashRef->{indexPath}; 1280 my $sourceRevision = $diffHashRef->{sourceRevision}; 1281 my $sourcePath; 1282 1283 if (defined($copiedFromPath)) { 1284 # Then the diff is a copy operation. 1285 $sourcePath = $copiedFromPath; 1286 1287 # FIXME: Consider printing a warning or exiting if 1288 # exists($copiedFiles{$indexPath}) is true -- i.e. if 1289 # $indexPath appears twice as a copy target. 1290 $copiedFiles{$indexPath} = $sourcePath; 1291 1292 push @copyDiffHashRefs, $diffHashRef; 1293 } else { 1294 # Then the diff is not a copy operation. 1295 $sourcePath = $indexPath; 1296 1297 push @nonCopyDiffHashRefs, $diffHashRef; 1298 } 1299 1300 if (defined($sourceRevision)) { 1301 if (exists($sourceRevisionHash{$sourcePath}) && 1302 ($sourceRevisionHash{$sourcePath} != $sourceRevision)) { 1303 if (!$shouldForce) { 1304 die "Two revisions of the same file required as a source:\n". 1305 " $sourcePath:$sourceRevisionHash{$sourcePath}\n". 1306 " $sourcePath:$sourceRevision"; 1307 } 1308 } 1309 $sourceRevisionHash{$sourcePath} = $sourceRevision; 1310 } 1311 } 1312 1313 my %preparedPatchHash; 1314 1315 $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs; 1316 $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs; 1317 $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash; 1318 1319 return \%preparedPatchHash; 1320} 1321 1322# Return localtime() for the project's time zone, given an integer time as 1323# returned by Perl's time() function. 1324sub localTimeInProjectTimeZone($) 1325{ 1326 my $epochTime = shift; 1327 1328 # Change the time zone temporarily for the localtime() call. 1329 my $savedTimeZone = $ENV{'TZ'}; 1330 $ENV{'TZ'} = $changeLogTimeZone; 1331 my @localTime = localtime($epochTime); 1332 if (defined $savedTimeZone) { 1333 $ENV{'TZ'} = $savedTimeZone; 1334 } else { 1335 delete $ENV{'TZ'}; 1336 } 1337 1338 return @localTime; 1339} 1340 1341# Set the reviewer and date in a ChangeLog patch, and return the new patch. 1342# 1343# Args: 1344# $patch: a ChangeLog patch as a string. 1345# $reviewer: the name of the reviewer, or undef if the reviewer should not be set. 1346# $epochTime: an integer time as returned by Perl's time() function. 1347sub setChangeLogDateAndReviewer($$$) 1348{ 1349 my ($patch, $reviewer, $epochTime) = @_; 1350 1351 my @localTime = localTimeInProjectTimeZone($epochTime); 1352 my $newDate = strftime("%Y-%m-%d", @localTime); 1353 1354 my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}( )#; 1355 $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/; 1356 1357 if (defined($reviewer)) { 1358 # We include a leading plus ("+") in the regular expression to make 1359 # the regular expression less likely to match text in the leading junk 1360 # for the patch, if the patch has leading junk. 1361 $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/; 1362 } 1363 1364 return $patch; 1365} 1366 1367# If possible, returns a ChangeLog patch equivalent to the given one, 1368# but with the newest ChangeLog entry inserted at the top of the 1369# file -- i.e. no leading context and all lines starting with "+". 1370# 1371# If given a patch string not representable as a patch with the above 1372# properties, it returns the input back unchanged. 1373# 1374# WARNING: This subroutine can return an inequivalent patch string if 1375# both the beginning of the new ChangeLog file matches the beginning 1376# of the source ChangeLog, and the source beginning was modified. 1377# Otherwise, it is guaranteed to return an equivalent patch string, 1378# if it returns. 1379# 1380# Applying this subroutine to ChangeLog patches allows svn-apply to 1381# insert new ChangeLog entries at the top of the ChangeLog file. 1382# svn-apply uses patch with --fuzz=3 to do this. We need to apply 1383# this subroutine because the diff(1) command is greedy when matching 1384# lines. A new ChangeLog entry with the same date and author as the 1385# previous will match and cause the diff to have lines of starting 1386# context. 1387# 1388# This subroutine has unit tests in VCSUtils_unittest.pl. 1389# 1390# Returns $changeLogHashRef: 1391# $changeLogHashRef: a hash reference representing a change log patch. 1392# patch: a ChangeLog patch equivalent to the given one, but with the 1393# newest ChangeLog entry inserted at the top of the file, if possible. 1394sub fixChangeLogPatch($) 1395{ 1396 my $patch = shift; # $patch will only contain patch fragments for ChangeLog. 1397 1398 $patch =~ /(\r?\n)/; 1399 my $lineEnding = $1; 1400 my @lines = split(/$lineEnding/, $patch); 1401 1402 my $i = 0; # We reuse the same index throughout. 1403 1404 # Skip to beginning of first chunk. 1405 for (; $i < @lines; ++$i) { 1406 if (substr($lines[$i], 0, 1) eq "@") { 1407 last; 1408 } 1409 } 1410 my $chunkStartIndex = ++$i; 1411 my %changeLogHashRef; 1412 1413 # Optimization: do not process if new lines already begin the chunk. 1414 if (substr($lines[$i], 0, 1) eq "+") { 1415 $changeLogHashRef{patch} = $patch; 1416 return \%changeLogHashRef; 1417 } 1418 1419 # Skip to first line of newly added ChangeLog entry. 1420 # For example, +2009-06-03 Eric Seidel <eric@webkit.org> 1421 my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date 1422 . '\s+(.+)\s+' # name 1423 . '<([^<>]+)>$'; # e-mail address 1424 1425 for (; $i < @lines; ++$i) { 1426 my $line = $lines[$i]; 1427 my $firstChar = substr($line, 0, 1); 1428 if ($line =~ /$dateStartRegEx/) { 1429 last; 1430 } elsif ($firstChar eq " " or $firstChar eq "+") { 1431 next; 1432 } 1433 $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found. 1434 return \%changeLogHashRef; 1435 } 1436 if ($i >= @lines) { 1437 $changeLogHashRef{patch} = $patch; # Do not change if date not found. 1438 return \%changeLogHashRef; 1439 } 1440 my $dateStartIndex = $i; 1441 1442 # Rewrite overlapping lines to lead with " ". 1443 my @overlappingLines = (); # These will include a leading "+". 1444 for (; $i < @lines; ++$i) { 1445 my $line = $lines[$i]; 1446 if (substr($line, 0, 1) ne "+") { 1447 last; 1448 } 1449 push(@overlappingLines, $line); 1450 $lines[$i] = " " . substr($line, 1); 1451 } 1452 1453 # Remove excess ending context, if necessary. 1454 my $shouldTrimContext = 1; 1455 for (; $i < @lines; ++$i) { 1456 my $firstChar = substr($lines[$i], 0, 1); 1457 if ($firstChar eq " ") { 1458 next; 1459 } elsif ($firstChar eq "@") { 1460 last; 1461 } 1462 $shouldTrimContext = 0; # For example, if "+" or "-" encountered. 1463 last; 1464 } 1465 my $deletedLineCount = 0; 1466 if ($shouldTrimContext) { # Also occurs if end of file reached. 1467 splice(@lines, $i - @overlappingLines, @overlappingLines); 1468 $deletedLineCount = @overlappingLines; 1469 } 1470 1471 # Work backwards, shifting overlapping lines towards front 1472 # while checking that patch stays equivalent. 1473 for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) { 1474 my $line = $lines[$i]; 1475 if (substr($line, 0, 1) ne " ") { 1476 next; 1477 } 1478 my $text = substr($line, 1); 1479 my $newLine = pop(@overlappingLines); 1480 if ($text ne substr($newLine, 1)) { 1481 $changeLogHashRef{patch} = $patch; # Unexpected difference. 1482 return \%changeLogHashRef; 1483 } 1484 $lines[$i] = "+$text"; 1485 } 1486 1487 # If @overlappingLines > 0, this is where we make use of the 1488 # assumption that the beginning of the source file was not modified. 1489 splice(@lines, $chunkStartIndex, 0, @overlappingLines); 1490 1491 # Update the date start index as it may have changed after shifting 1492 # the overlapping lines towards the front. 1493 for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) { 1494 $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/; 1495 } 1496 splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry. 1497 $deletedLineCount += $dateStartIndex - $chunkStartIndex; 1498 1499 # Update the initial chunk range. 1500 if ($lines[$chunkStartIndex - 1] !~ /$chunkRangeRegEx/) { 1501 # FIXME: Handle errors differently from ChangeLog files that 1502 # are okay but should not be altered. That way we can find out 1503 # if improvements to the script ever become necessary. 1504 $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format. 1505 return \%changeLogHashRef; 1506 } 1507 my $oldSourceLineCount = $2; 1508 my $oldTargetLineCount = $3; 1509 1510 my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount; 1511 my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount; 1512 $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@"; 1513 1514 $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline. 1515 return \%changeLogHashRef; 1516} 1517 1518# This is a supporting method for runPatchCommand. 1519# 1520# Arg: the optional $args parameter passed to runPatchCommand (can be undefined). 1521# 1522# Returns ($patchCommand, $isForcing). 1523# 1524# This subroutine has unit tests in VCSUtils_unittest.pl. 1525sub generatePatchCommand($) 1526{ 1527 my ($passedArgsHashRef) = @_; 1528 1529 my $argsHashRef = { # Defaults 1530 ensureForce => 0, 1531 shouldReverse => 0, 1532 options => [] 1533 }; 1534 1535 # Merges hash references. It's okay here if passed hash reference is undefined. 1536 @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef}; 1537 1538 my $ensureForce = $argsHashRef->{ensureForce}; 1539 my $shouldReverse = $argsHashRef->{shouldReverse}; 1540 my $options = $argsHashRef->{options}; 1541 1542 if (! $options) { 1543 $options = []; 1544 } else { 1545 $options = [@{$options}]; # Copy to avoid side effects. 1546 } 1547 1548 my $isForcing = 0; 1549 if (grep /^--force$/, @{$options}) { 1550 $isForcing = 1; 1551 } elsif ($ensureForce) { 1552 push @{$options}, "--force"; 1553 $isForcing = 1; 1554 } 1555 1556 if ($shouldReverse) { # No check: --reverse should never be passed explicitly. 1557 push @{$options}, "--reverse"; 1558 } 1559 1560 @{$options} = sort(@{$options}); # For easier testing. 1561 1562 my $patchCommand = join(" ", "patch -p0", @{$options}); 1563 1564 return ($patchCommand, $isForcing); 1565} 1566 1567# Apply the given patch using the patch(1) command. 1568# 1569# On success, return the resulting exit status. Otherwise, exit with the 1570# exit status. If "--force" is passed as an option, however, then never 1571# exit and always return the exit status. 1572# 1573# Args: 1574# $patch: a patch string. 1575# $repositoryRootPath: an absolute path to the repository root. 1576# $pathRelativeToRoot: the path of the file to be patched, relative to the 1577# repository root. This should normally be the path 1578# found in the patch's "Index:" line. It is passed 1579# explicitly rather than reparsed from the patch 1580# string for optimization purposes. 1581# This is used only for error reporting. The 1582# patch command gleans the actual file to patch 1583# from the patch string. 1584# $args: a reference to a hash of optional arguments. The possible 1585# keys are -- 1586# ensureForce: whether to ensure --force is passed (defaults to 0). 1587# shouldReverse: whether to pass --reverse (defaults to 0). 1588# options: a reference to an array of options to pass to the 1589# patch command. The subroutine passes the -p0 option 1590# no matter what. This should not include --reverse. 1591# 1592# This subroutine has unit tests in VCSUtils_unittest.pl. 1593sub runPatchCommand($$$;$) 1594{ 1595 my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_; 1596 1597 my ($patchCommand, $isForcing) = generatePatchCommand($args); 1598 1599 # Temporarily change the working directory since the path found 1600 # in the patch's "Index:" line is relative to the repository root 1601 # (i.e. the same as $pathRelativeToRoot). 1602 my $cwd = Cwd::getcwd(); 1603 chdir $repositoryRootPath; 1604 1605 open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!"; 1606 print PATCH $patch; 1607 close PATCH; 1608 my $exitStatus = exitStatus($?); 1609 1610 chdir $cwd; 1611 1612 if ($exitStatus && !$isForcing) { 1613 print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " . 1614 "status $exitStatus. Pass --force to ignore patch failures.\n"; 1615 exit $exitStatus; 1616 } 1617 1618 return $exitStatus; 1619} 1620 1621# Merge ChangeLog patches using a three-file approach. 1622# 1623# This is used by resolve-ChangeLogs when it's operated as a merge driver 1624# and when it's used to merge conflicts after a patch is applied or after 1625# an svn update. 1626# 1627# It's also used for traditional rejected patches. 1628# 1629# Args: 1630# $fileMine: The merged version of the file. Also known in git as the 1631# other branch's version (%B) or "ours". 1632# For traditional patch rejects, this is the *.rej file. 1633# $fileOlder: The base version of the file. Also known in git as the 1634# ancestor version (%O) or "base". 1635# For traditional patch rejects, this is the *.orig file. 1636# $fileNewer: The current version of the file. Also known in git as the 1637# current version (%A) or "theirs". 1638# For traditional patch rejects, this is the original-named 1639# file. 1640# 1641# Returns 1 if merge was successful, else 0. 1642sub mergeChangeLogs($$$) 1643{ 1644 my ($fileMine, $fileOlder, $fileNewer) = @_; 1645 1646 my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0; 1647 1648 local $/ = undef; 1649 1650 my $patch; 1651 if ($traditionalReject) { 1652 open(DIFF, "<", $fileMine) or die $!; 1653 $patch = <DIFF>; 1654 close(DIFF); 1655 rename($fileMine, "$fileMine.save"); 1656 rename($fileOlder, "$fileOlder.save"); 1657 } else { 1658 open(DIFF, "-|", qw(diff -u -a --binary), $fileOlder, $fileMine) or die $!; 1659 $patch = <DIFF>; 1660 close(DIFF); 1661 } 1662 1663 unlink("${fileNewer}.orig"); 1664 unlink("${fileNewer}.rej"); 1665 1666 open(PATCH, "| patch --force --fuzz=3 --binary $fileNewer > " . File::Spec->devnull()) or die $!; 1667 if ($traditionalReject) { 1668 print PATCH $patch; 1669 } else { 1670 my $changeLogHash = fixChangeLogPatch($patch); 1671 print PATCH $changeLogHash->{patch}; 1672 } 1673 close(PATCH); 1674 1675 my $result = !exitStatus($?); 1676 1677 # Refuse to merge the patch if it did not apply cleanly 1678 if (-e "${fileNewer}.rej") { 1679 unlink("${fileNewer}.rej"); 1680 if (-f "${fileNewer}.orig") { 1681 unlink($fileNewer); 1682 rename("${fileNewer}.orig", $fileNewer); 1683 } 1684 } else { 1685 unlink("${fileNewer}.orig"); 1686 } 1687 1688 if ($traditionalReject) { 1689 rename("$fileMine.save", $fileMine); 1690 rename("$fileOlder.save", $fileOlder); 1691 } 1692 1693 return $result; 1694} 1695 1696sub gitConfig($) 1697{ 1698 return unless $isGit; 1699 1700 my ($config) = @_; 1701 1702 my $result = `git config $config`; 1703 if (($? >> 8)) { 1704 $result = `git repo-config $config`; 1705 } 1706 chomp $result; 1707 return $result; 1708} 1709 1710sub changeLogNameError($) 1711{ 1712 my ($message) = @_; 1713 print STDERR "$message\nEither:\n"; 1714 print STDERR " set CHANGE_LOG_NAME in your environment\n"; 1715 print STDERR " OR pass --name= on the command line\n"; 1716 print STDERR " OR set REAL_NAME in your environment"; 1717 print STDERR " OR git users can set 'git config user.name'\n"; 1718 exit(1); 1719} 1720 1721sub changeLogName() 1722{ 1723 my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0]; 1724 1725 changeLogNameError("Failed to determine ChangeLog name.") unless $name; 1726 # getpwuid seems to always succeed on windows, returning the username instead of the full name. This check will catch that case. 1727 changeLogNameError("'$name' does not contain a space! ChangeLogs should contain your full name.") unless ($name =~ /\w \w/); 1728 1729 return $name; 1730} 1731 1732sub changeLogEmailAddressError($) 1733{ 1734 my ($message) = @_; 1735 print STDERR "$message\nEither:\n"; 1736 print STDERR " set CHANGE_LOG_EMAIL_ADDRESS in your environment\n"; 1737 print STDERR " OR pass --email= on the command line\n"; 1738 print STDERR " OR set EMAIL_ADDRESS in your environment\n"; 1739 print STDERR " OR git users can set 'git config user.email'\n"; 1740 exit(1); 1741} 1742 1743sub changeLogEmailAddress() 1744{ 1745 my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email"); 1746 1747 changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress; 1748 changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/); 1749 1750 return $emailAddress; 1751} 1752 1753# http://tools.ietf.org/html/rfc1924 1754sub decodeBase85($) 1755{ 1756 my ($encoded) = @_; 1757 my %table; 1758 my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~'); 1759 for (my $i = 0; $i < 85; $i++) { 1760 $table{$characters[$i]} = $i; 1761 } 1762 1763 my $decoded = ''; 1764 my @encodedChars = $encoded =~ /./g; 1765 1766 for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) { 1767 my $digit = 0; 1768 for (my $i = 0; $i < 5; $i++) { 1769 $digit *= 85; 1770 my $char = $encodedChars[$encodedIter]; 1771 $digit += $table{$char}; 1772 $encodedIter++; 1773 } 1774 1775 for (my $i = 0; $i < 4; $i++) { 1776 $decoded .= chr(($digit >> (3 - $i) * 8) & 255); 1777 } 1778 } 1779 1780 return $decoded; 1781} 1782 1783sub decodeGitBinaryChunk($$) 1784{ 1785 my ($contents, $fullPath) = @_; 1786 1787 # Load this module lazily in case the user don't have this module 1788 # and won't handle git binary patches. 1789 require Compress::Zlib; 1790 1791 my $encoded = ""; 1792 my $compressedSize = 0; 1793 while ($contents =~ /^([A-Za-z])(.*)$/gm) { 1794 my $line = $2; 1795 next if $line eq ""; 1796 die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0; 1797 my $actualSize = length($2) / 5 * 4; 1798 my $encodedExpectedSize = ord($1); 1799 my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27; 1800 1801 die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize; 1802 $compressedSize += $expectedSize; 1803 $encoded .= $line; 1804 } 1805 1806 my $compressed = decodeBase85($encoded); 1807 $compressed = substr($compressed, 0, $compressedSize); 1808 return Compress::Zlib::uncompress($compressed); 1809} 1810 1811sub decodeGitBinaryPatch($$) 1812{ 1813 my ($contents, $fullPath) = @_; 1814 1815 # Git binary patch has two chunks. One is for the normal patching 1816 # and another is for the reverse patching. 1817 # 1818 # Each chunk a line which starts from either "literal" or "delta", 1819 # followed by a number which specifies decoded size of the chunk. 1820 # 1821 # Then, content of the chunk comes. To decode the content, we 1822 # need decode it with base85 first, and then zlib. 1823 my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n'; 1824 if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") { 1825 die "$fullPath: unknown git binary patch format" 1826 } 1827 1828 my $binaryChunkType = $1; 1829 my $binaryChunkExpectedSize = $2; 1830 my $encodedChunk = $3; 1831 my $reverseBinaryChunkType = $4; 1832 my $reverseBinaryChunkExpectedSize = $5; 1833 my $encodedReverseChunk = $6; 1834 1835 my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath); 1836 my $binaryChunkActualSize = length($binaryChunk); 1837 my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath); 1838 my $reverseBinaryChunkActualSize = length($reverseBinaryChunk); 1839 1840 die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize); 1841 die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize); 1842 1843 return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk); 1844} 1845 1846sub readByte($$) 1847{ 1848 my ($data, $location) = @_; 1849 1850 # Return the byte at $location in $data as a numeric value. 1851 return ord(substr($data, $location, 1)); 1852} 1853 1854# The git binary delta format is undocumented, except in code: 1855# - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source 1856# of the algorithm in decodeGitBinaryPatchDeltaSize. 1857# - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source 1858# of the algorithm in applyGitBinaryPatchDelta. 1859sub decodeGitBinaryPatchDeltaSize($) 1860{ 1861 my ($binaryChunk) = @_; 1862 1863 # Source and destination buffer sizes are stored in 7-bit chunks at the 1864 # start of the binary delta patch data. The highest bit in each byte 1865 # except the last is set; the remaining 7 bits provide the next 1866 # chunk of the size. The chunks are stored in ascending significance 1867 # order. 1868 my $cmd; 1869 my $size = 0; 1870 my $shift = 0; 1871 for (my $i = 0; $i < length($binaryChunk);) { 1872 $cmd = readByte($binaryChunk, $i++); 1873 $size |= ($cmd & 0x7f) << $shift; 1874 $shift += 7; 1875 if (!($cmd & 0x80)) { 1876 return ($size, $i); 1877 } 1878 } 1879} 1880 1881sub applyGitBinaryPatchDelta($$) 1882{ 1883 my ($binaryChunk, $originalContents) = @_; 1884 1885 # Git delta format consists of two headers indicating source buffer size 1886 # and result size, then a series of commands. Each command is either 1887 # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta 1888 # command. Commands are applied sequentially to generate the result. 1889 # 1890 # A copy-from-old-version command encodes an offset and size to copy 1891 # from in subsequent bits, while a copy-from-delta command consists only 1892 # of the number of bytes to copy from the delta. 1893 1894 # We don't use these values, but we need to know how big they are so that 1895 # we can skip to the diff data. 1896 my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); 1897 $binaryChunk = substr($binaryChunk, $bytesUsed); 1898 ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk); 1899 $binaryChunk = substr($binaryChunk, $bytesUsed); 1900 1901 my $out = ""; 1902 for (my $i = 0; $i < length($binaryChunk); ) { 1903 my $cmd = ord(substr($binaryChunk, $i++, 1)); 1904 if ($cmd & 0x80) { 1905 # Extract an offset and size from the delta data, then copy 1906 # $size bytes from $offset in the original data into the output. 1907 my $offset = 0; 1908 my $size = 0; 1909 if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); } 1910 if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; } 1911 if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; } 1912 if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; } 1913 if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); } 1914 if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; } 1915 if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; } 1916 if ($size == 0) { $size = 0x10000; } 1917 $out .= substr($originalContents, $offset, $size); 1918 } elsif ($cmd) { 1919 # Copy $cmd bytes from the delta data into the output. 1920 $out .= substr($binaryChunk, $i, $cmd); 1921 $i += $cmd; 1922 } else { 1923 die "unexpected delta opcode 0"; 1924 } 1925 } 1926 1927 return $out; 1928} 1929 19301; 1931