• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (C) 2007, 2008, 2009 Apple Inc.  All rights reserved.
2# Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek@gmail.com)
3# Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8#
9# 1.  Redistributions of source code must retain the above copyright
10#     notice, this list of conditions and the following disclaimer.
11# 2.  Redistributions in binary form must reproduce the above copyright
12#     notice, this list of conditions and the following disclaimer in the
13#     documentation and/or other materials provided with the distribution.
14# 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
15#     its contributors may be used to endorse or promote products derived
16#     from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29# Module to share code to work with various version control systems.
30package VCSUtils;
31
32use strict;
33use warnings;
34
35use Cwd qw();  # "qw()" prevents warnings about redefining getcwd() with "use POSIX;"
36use English; # for $POSTMATCH, etc.
37use File::Basename;
38use File::Spec;
39use POSIX;
40
41BEGIN {
42    use Exporter   ();
43    our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
44    $VERSION     = 1.00;
45    @ISA         = qw(Exporter);
46    @EXPORT      = qw(
47        &applyGitBinaryPatchDelta
48        &callSilently
49        &canonicalizePath
50        &changeLogEmailAddress
51        &changeLogName
52        &chdirReturningRelativePath
53        &decodeGitBinaryChunk
54        &decodeGitBinaryPatch
55        &determineSVNRoot
56        &determineVCSRoot
57        &exitStatus
58        &fixChangeLogPatch
59        &gitBranch
60        &gitdiff2svndiff
61        &isGit
62        &isGitSVN
63        &isGitBranchBuild
64        &isGitDirectory
65        &isSVN
66        &isSVNDirectory
67        &isSVNVersion16OrNewer
68        &makeFilePathRelative
69        &mergeChangeLogs
70        &normalizePath
71        &parseFirstEOL
72        &parsePatch
73        &pathRelativeToSVNRepositoryRootForPath
74        &prepareParsedPatch
75        &removeEOL
76        &runPatchCommand
77        &scmMoveOrRenameFile
78        &scmToggleExecutableBit
79        &setChangeLogDateAndReviewer
80        &svnRevisionForDirectory
81        &svnStatus
82        &toWindowsLineEndings
83    );
84    %EXPORT_TAGS = ( );
85    @EXPORT_OK   = ();
86}
87
88our @EXPORT_OK;
89
90my $gitBranch;
91my $gitRoot;
92my $isGit;
93my $isGitSVN;
94my $isGitBranchBuild;
95my $isSVN;
96my $svnVersion;
97
98# Project time zone for Cupertino, CA, US
99my $changeLogTimeZone = "PST8PDT";
100
101my $chunkRangeRegEx = qr#^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$#; # e.g. @@ -2,6 +2,18 @@
102my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#;
103my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#;
104my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path.
105my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property.
106my $svnPropertyValueStartRegEx = qr#^   (\+|-|Merged|Reverse-merged) ([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines).
107
108# This method is for portability. Return the system-appropriate exit
109# status of a child process.
110#
111# Args: pass the child error status returned by the last pipe close,
112#       for example "$?".
113sub exitStatus($)
114{
115    my ($returnvalue) = @_;
116    if ($^O eq "MSWin32") {
117        return $returnvalue >> 8;
118    }
119    return WEXITSTATUS($returnvalue);
120}
121
122# Call a function while suppressing STDERR, and return the return values
123# as an array.
124sub callSilently($@) {
125    my ($func, @args) = @_;
126
127    # The following pattern was taken from here:
128    #   http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html
129    #
130    # Also see this Perl documentation (search for "open OLDERR"):
131    #   http://perldoc.perl.org/functions/open.html
132    open(OLDERR, ">&STDERR");
133    close(STDERR);
134    my @returnValue = &$func(@args);
135    open(STDERR, ">&OLDERR");
136    close(OLDERR);
137
138    return @returnValue;
139}
140
141sub toWindowsLineEndings
142{
143    my ($text) = @_;
144    $text =~ s/\n/\r\n/g;
145    return $text;
146}
147
148# Note, this method will not error if the file corresponding to the $source path does not exist.
149sub scmMoveOrRenameFile
150{
151    my ($source, $destination) = @_;
152    return if ! -e $source;
153    if (isSVN()) {
154        system("svn", "move", $source, $destination);
155    } elsif (isGit()) {
156        system("git", "mv", $source, $destination);
157    }
158}
159
160# Note, this method will not error if the file corresponding to the path does not exist.
161sub scmToggleExecutableBit
162{
163    my ($path, $executableBitDelta) = @_;
164    return if ! -e $path;
165    if ($executableBitDelta == 1) {
166        scmAddExecutableBit($path);
167    } elsif ($executableBitDelta == -1) {
168        scmRemoveExecutableBit($path);
169    }
170}
171
172sub scmAddExecutableBit($)
173{
174    my ($path) = @_;
175
176    if (isSVN()) {
177        system("svn", "propset", "svn:executable", "on", $path) == 0 or die "Failed to run 'svn propset svn:executable on $path'.";
178    } elsif (isGit()) {
179        chmod(0755, $path);
180    }
181}
182
183sub scmRemoveExecutableBit($)
184{
185    my ($path) = @_;
186
187    if (isSVN()) {
188        system("svn", "propdel", "svn:executable", $path) == 0 or die "Failed to run 'svn propdel svn:executable $path'.";
189    } elsif (isGit()) {
190        chmod(0664, $path);
191    }
192}
193
194sub isGitDirectory($)
195{
196    my ($dir) = @_;
197    return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0;
198}
199
200sub isGit()
201{
202    return $isGit if defined $isGit;
203
204    $isGit = isGitDirectory(".");
205    return $isGit;
206}
207
208sub isGitSVN()
209{
210    return $isGitSVN if defined $isGitSVN;
211
212    # There doesn't seem to be an officially documented way to determine
213    # if you're in a git-svn checkout. The best suggestions seen so far
214    # all use something like the following:
215    my $output = `git config --get svn-remote.svn.fetch 2>& 1`;
216    $isGitSVN = $output ne '';
217    return $isGitSVN;
218}
219
220sub gitBranch()
221{
222    unless (defined $gitBranch) {
223        chomp($gitBranch = `git symbolic-ref -q HEAD`);
224        $gitBranch = "" if exitStatus($?);
225        $gitBranch =~ s#^refs/heads/##;
226        $gitBranch = "" if $gitBranch eq "master";
227    }
228
229    return $gitBranch;
230}
231
232sub isGitBranchBuild()
233{
234    my $branch = gitBranch();
235    chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`);
236    return 1 if $override eq "true";
237    return 0 if $override eq "false";
238
239    unless (defined $isGitBranchBuild) {
240        chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`);
241        $isGitBranchBuild = $gitBranchBuild eq "true";
242    }
243
244    return $isGitBranchBuild;
245}
246
247sub isSVNDirectory($)
248{
249    my ($dir) = @_;
250
251    return -d File::Spec->catdir($dir, ".svn");
252}
253
254sub isSVN()
255{
256    return $isSVN if defined $isSVN;
257
258    $isSVN = isSVNDirectory(".");
259    return $isSVN;
260}
261
262sub svnVersion()
263{
264    return $svnVersion if defined $svnVersion;
265
266    if (!isSVN()) {
267        $svnVersion = 0;
268    } else {
269        chomp($svnVersion = `svn --version --quiet`);
270    }
271    return $svnVersion;
272}
273
274sub isSVNVersion16OrNewer()
275{
276    my $version = svnVersion();
277    return eval "v$version" ge v1.6;
278}
279
280sub chdirReturningRelativePath($)
281{
282    my ($directory) = @_;
283    my $previousDirectory = Cwd::getcwd();
284    chdir $directory;
285    my $newDirectory = Cwd::getcwd();
286    return "." if $newDirectory eq $previousDirectory;
287    return File::Spec->abs2rel($previousDirectory, $newDirectory);
288}
289
290sub determineGitRoot()
291{
292    chomp(my $gitDir = `git rev-parse --git-dir`);
293    return dirname($gitDir);
294}
295
296sub determineSVNRoot()
297{
298    my $last = '';
299    my $path = '.';
300    my $parent = '..';
301    my $repositoryRoot;
302    my $repositoryUUID;
303    while (1) {
304        my $thisRoot;
305        my $thisUUID;
306        # Ignore error messages in case we've run past the root of the checkout.
307        open INFO, "svn info '$path' 2> " . File::Spec->devnull() . " |" or die;
308        while (<INFO>) {
309            if (/^Repository Root: (.+)/) {
310                $thisRoot = $1;
311            }
312            if (/^Repository UUID: (.+)/) {
313                $thisUUID = $1;
314            }
315            if ($thisRoot && $thisUUID) {
316                local $/ = undef;
317                <INFO>; # Consume the rest of the input.
318            }
319        }
320        close INFO;
321
322        # It's possible (e.g. for developers of some ports) to have a WebKit
323        # checkout in a subdirectory of another checkout.  So abort if the
324        # repository root or the repository UUID suddenly changes.
325        last if !$thisUUID;
326        $repositoryUUID = $thisUUID if !$repositoryUUID;
327        last if $thisUUID ne $repositoryUUID;
328
329        last if !$thisRoot;
330        $repositoryRoot = $thisRoot if !$repositoryRoot;
331        last if $thisRoot ne $repositoryRoot;
332
333        $last = $path;
334        $path = File::Spec->catdir($parent, $path);
335    }
336
337    return File::Spec->rel2abs($last);
338}
339
340sub determineVCSRoot()
341{
342    if (isGit()) {
343        return determineGitRoot();
344    }
345
346    if (!isSVN()) {
347        # Some users have a workflow where svn-create-patch, svn-apply and
348        # svn-unapply are used outside of multiple svn working directores,
349        # so warn the user and assume Subversion is being used in this case.
350        warn "Unable to determine VCS root; assuming Subversion";
351        $isSVN = 1;
352    }
353
354    return determineSVNRoot();
355}
356
357sub svnRevisionForDirectory($)
358{
359    my ($dir) = @_;
360    my $revision;
361
362    if (isSVNDirectory($dir)) {
363        my $svnInfo = `LC_ALL=C svn info $dir | grep Revision:`;
364        ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g);
365    } elsif (isGitDirectory($dir)) {
366        my $gitLog = `cd $dir && LC_ALL=C git log --grep='git-svn-id: ' -n 1 | grep git-svn-id:`;
367        ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g);
368    }
369    die "Unable to determine current SVN revision in $dir" unless (defined $revision);
370    return $revision;
371}
372
373sub pathRelativeToSVNRepositoryRootForPath($)
374{
375    my ($file) = @_;
376    my $relativePath = File::Spec->abs2rel($file);
377
378    my $svnInfo;
379    if (isSVN()) {
380        $svnInfo = `LC_ALL=C svn info $relativePath`;
381    } elsif (isGit()) {
382        $svnInfo = `LC_ALL=C git svn info $relativePath`;
383    }
384
385    $svnInfo =~ /.*^URL: (.*?)$/m;
386    my $svnURL = $1;
387
388    $svnInfo =~ /.*^Repository Root: (.*?)$/m;
389    my $repositoryRoot = $1;
390
391    $svnURL =~ s/$repositoryRoot\///;
392    return $svnURL;
393}
394
395sub makeFilePathRelative($)
396{
397    my ($path) = @_;
398    return $path unless isGit();
399
400    unless (defined $gitRoot) {
401        chomp($gitRoot = `git rev-parse --show-cdup`);
402    }
403    return $gitRoot . $path;
404}
405
406sub normalizePath($)
407{
408    my ($path) = @_;
409    $path =~ s/\\/\//g;
410    return $path;
411}
412
413sub adjustPathForRecentRenamings($)
414{
415    my ($fullPath) = @_;
416
417    if ($fullPath =~ m|^WebCore/|
418        || $fullPath =~ m|^JavaScriptCore/|
419        || $fullPath =~ m|^WebKit/|
420        || $fullPath =~ m|^WebKit2/|) {
421        return "Source/$fullPath";
422    }
423    return $fullPath;
424}
425
426sub canonicalizePath($)
427{
428    my ($file) = @_;
429
430    # Remove extra slashes and '.' directories in path
431    $file = File::Spec->canonpath($file);
432
433    # Remove '..' directories in path
434    my @dirs = ();
435    foreach my $dir (File::Spec->splitdir($file)) {
436        if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') {
437            pop(@dirs);
438        } else {
439            push(@dirs, $dir);
440        }
441    }
442    return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : ".";
443}
444
445sub removeEOL($)
446{
447    my ($line) = @_;
448    return "" unless $line;
449
450    $line =~ s/[\r\n]+$//g;
451    return $line;
452}
453
454sub parseFirstEOL($)
455{
456    my ($fileHandle) = @_;
457
458    # Make input record separator the new-line character to simplify regex matching below.
459    my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR;
460    $INPUT_RECORD_SEPARATOR = "\n";
461    my $firstLine  = <$fileHandle>;
462    $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator;
463
464    return unless defined($firstLine);
465
466    my $eol;
467    if ($firstLine =~ /\r\n/) {
468        $eol = "\r\n";
469    } elsif ($firstLine =~ /\r/) {
470        $eol = "\r";
471    } elsif ($firstLine =~ /\n/) {
472        $eol = "\n";
473    }
474    return $eol;
475}
476
477sub firstEOLInFile($)
478{
479    my ($file) = @_;
480    my $eol;
481    if (open(FILE, $file)) {
482        $eol = parseFirstEOL(*FILE);
483        close(FILE);
484    }
485    return $eol;
486}
487
488sub svnStatus($)
489{
490    my ($fullPath) = @_;
491    my $svnStatus;
492    open SVN, "svn status --non-interactive --non-recursive '$fullPath' |" or die;
493    if (-d $fullPath) {
494        # When running "svn stat" on a directory, we can't assume that only one
495        # status will be returned (since any files with a status below the
496        # directory will be returned), and we can't assume that the directory will
497        # be first (since any files with unknown status will be listed first).
498        my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath));
499        while (<SVN>) {
500            # Input may use a different EOL sequence than $/, so avoid chomp.
501            $_ = removeEOL($_);
502            my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7)));
503            if ($normalizedFullPath eq $normalizedStatPath) {
504                $svnStatus = "$_\n";
505                last;
506            }
507        }
508        # Read the rest of the svn command output to avoid a broken pipe warning.
509        local $/ = undef;
510        <SVN>;
511    }
512    else {
513        # Files will have only one status returned.
514        $svnStatus = removeEOL(<SVN>) . "\n";
515    }
516    close SVN;
517    return $svnStatus;
518}
519
520# Return whether the given file mode is executable in the source control
521# sense.  We make this determination based on whether the executable bit
522# is set for "others" rather than the stronger condition that it be set
523# for the user, group, and others.  This is sufficient for distinguishing
524# the default behavior in Git and SVN.
525#
526# Args:
527#   $fileMode: A number or string representing a file mode in octal notation.
528sub isExecutable($)
529{
530    my $fileMode = shift;
531
532    return $fileMode % 2;
533}
534
535# Parse the next Git diff header from the given file handle, and advance
536# the handle so the last line read is the first line after the header.
537#
538# This subroutine dies if given leading junk.
539#
540# Args:
541#   $fileHandle: advanced so the last line read from the handle is the first
542#                line of the header to parse.  This should be a line
543#                beginning with "diff --git".
544#   $line: the line last read from $fileHandle
545#
546# Returns ($headerHashRef, $lastReadLine):
547#   $headerHashRef: a hash reference representing a diff header, as follows--
548#     copiedFromPath: the path from which the file was copied or moved if
549#                     the diff is a copy or move.
550#     executableBitDelta: the value 1 or -1 if the executable bit was added or
551#                         removed, respectively.  New and deleted files have
552#                         this value only if the file is executable, in which
553#                         case the value is 1 and -1, respectively.
554#     indexPath: the path of the target file.
555#     isBinary: the value 1 if the diff is for a binary file.
556#     isDeletion: the value 1 if the diff is a file deletion.
557#     isCopyWithChanges: the value 1 if the file was copied or moved and
558#                        the target file was changed in some way after being
559#                        copied or moved (e.g. if its contents or executable
560#                        bit were changed).
561#     isNew: the value 1 if the diff is for a new file.
562#     shouldDeleteSource: the value 1 if the file was copied or moved and
563#                         the source file was deleted -- i.e. if the copy
564#                         was actually a move.
565#     svnConvertedText: the header text with some lines converted to SVN
566#                       format.  Git-specific lines are preserved.
567#   $lastReadLine: the line last read from $fileHandle.
568sub parseGitDiffHeader($$)
569{
570    my ($fileHandle, $line) = @_;
571
572    $_ = $line;
573
574    my $indexPath;
575    if (/$gitDiffStartRegEx/) {
576        # The first and second paths can differ in the case of copies
577        # and renames.  We use the second file path because it is the
578        # destination path.
579        $indexPath = adjustPathForRecentRenamings($4);
580        # Use $POSTMATCH to preserve the end-of-line character.
581        $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format.
582    } else {
583        die("Could not parse leading \"diff --git\" line: \"$line\".");
584    }
585
586    my $copiedFromPath;
587    my $foundHeaderEnding;
588    my $isBinary;
589    my $isDeletion;
590    my $isNew;
591    my $newExecutableBit = 0;
592    my $oldExecutableBit = 0;
593    my $shouldDeleteSource = 0;
594    my $similarityIndex = 0;
595    my $svnConvertedText;
596    while (1) {
597        # Temporarily strip off any end-of-line characters to simplify
598        # regex matching below.
599        s/([\n\r]+)$//;
600        my $eol = $1;
601
602        if (/^(deleted file|old) mode (\d+)/) {
603            $oldExecutableBit = (isExecutable($2) ? 1 : 0);
604            $isDeletion = 1 if $1 eq "deleted file";
605        } elsif (/^new( file)? mode (\d+)/) {
606            $newExecutableBit = (isExecutable($2) ? 1 : 0);
607            $isNew = 1 if $1;
608        } elsif (/^similarity index (\d+)%/) {
609            $similarityIndex = $1;
610        } elsif (/^copy from (\S+)/) {
611            $copiedFromPath = $1;
612        } elsif (/^rename from (\S+)/) {
613            # FIXME: Record this as a move rather than as a copy-and-delete.
614            #        This will simplify adding rename support to svn-unapply.
615            #        Otherwise, the hash for a deletion would have to know
616            #        everything about the file being deleted in order to
617            #        support undoing itself.  Recording as a move will also
618            #        permit us to use "svn move" and "git move".
619            $copiedFromPath = $1;
620            $shouldDeleteSource = 1;
621        } elsif (/^--- \S+/) {
622            $_ = "--- $indexPath"; # Convert to SVN format.
623        } elsif (/^\+\+\+ \S+/) {
624            $_ = "+++ $indexPath"; # Convert to SVN format.
625            $foundHeaderEnding = 1;
626        } elsif (/^GIT binary patch$/ ) {
627            $isBinary = 1;
628            $foundHeaderEnding = 1;
629        # The "git diff" command includes a line of the form "Binary files
630        # <path1> and <path2> differ" if the --binary flag is not used.
631        } elsif (/^Binary files / ) {
632            die("Error: the Git diff contains a binary file without the binary data in ".
633                "line: \"$_\".  Be sure to use the --binary flag when invoking \"git diff\" ".
634                "with diffs containing binary files.");
635        }
636
637        $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters.
638
639        $_ = <$fileHandle>; # Not defined if end-of-file reached.
640
641        last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding);
642    }
643
644    my $executableBitDelta = $newExecutableBit - $oldExecutableBit;
645
646    my %header;
647
648    $header{copiedFromPath} = $copiedFromPath if $copiedFromPath;
649    $header{executableBitDelta} = $executableBitDelta if $executableBitDelta;
650    $header{indexPath} = $indexPath;
651    $header{isBinary} = $isBinary if $isBinary;
652    $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta));
653    $header{isDeletion} = $isDeletion if $isDeletion;
654    $header{isNew} = $isNew if $isNew;
655    $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource;
656    $header{svnConvertedText} = $svnConvertedText;
657
658    return (\%header, $_);
659}
660
661# Parse the next SVN diff header from the given file handle, and advance
662# the handle so the last line read is the first line after the header.
663#
664# This subroutine dies if given leading junk or if it could not detect
665# the end of the header block.
666#
667# Args:
668#   $fileHandle: advanced so the last line read from the handle is the first
669#                line of the header to parse.  This should be a line
670#                beginning with "Index:".
671#   $line: the line last read from $fileHandle
672#
673# Returns ($headerHashRef, $lastReadLine):
674#   $headerHashRef: a hash reference representing a diff header, as follows--
675#     copiedFromPath: the path from which the file was copied if the diff
676#                     is a copy.
677#     indexPath: the path of the target file, which is the path found in
678#                the "Index:" line.
679#     isBinary: the value 1 if the diff is for a binary file.
680#     isNew: the value 1 if the diff is for a new file.
681#     sourceRevision: the revision number of the source, if it exists.  This
682#                     is the same as the revision number the file was copied
683#                     from, in the case of a file copy.
684#     svnConvertedText: the header text converted to a header with the paths
685#                       in some lines corrected.
686#   $lastReadLine: the line last read from $fileHandle.
687sub parseSvnDiffHeader($$)
688{
689    my ($fileHandle, $line) = @_;
690
691    $_ = $line;
692
693    my $indexPath;
694    if (/$svnDiffStartRegEx/) {
695        $indexPath = adjustPathForRecentRenamings($1);
696    } else {
697        die("First line of SVN diff does not begin with \"Index \": \"$_\"");
698    }
699
700    my $copiedFromPath;
701    my $foundHeaderEnding;
702    my $isBinary;
703    my $isNew;
704    my $sourceRevision;
705    my $svnConvertedText;
706    while (1) {
707        # Temporarily strip off any end-of-line characters to simplify
708        # regex matching below.
709        s/([\n\r]+)$//;
710        my $eol = $1;
711
712        # Fix paths on ""---" and "+++" lines to match the leading
713        # index line.
714        if (s/^--- \S+/--- $indexPath/) {
715            # ---
716            if (/^--- .+\(revision (\d+)\)/) {
717                $sourceRevision = $1;
718                $isNew = 1 if !$sourceRevision; # if revision 0.
719                if (/\(from (\S+):(\d+)\)$/) {
720                    # The "from" clause is created by svn-create-patch, in
721                    # which case there is always also a "revision" clause.
722                    $copiedFromPath = $1;
723                    die("Revision number \"$2\" in \"from\" clause does not match " .
724                        "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision);
725                }
726            }
727        } elsif (s/^\+\+\+ \S+/+++ $indexPath/) {
728            $foundHeaderEnding = 1;
729        } elsif (/^Cannot display: file marked as a binary type.$/) {
730            $isBinary = 1;
731            $foundHeaderEnding = 1;
732        }
733
734        $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters.
735
736        $_ = <$fileHandle>; # Not defined if end-of-file reached.
737
738        last if (!defined($_) || /$svnDiffStartRegEx/ || $foundHeaderEnding);
739    }
740
741    if (!$foundHeaderEnding) {
742        die("Did not find end of header block corresponding to index path \"$indexPath\".");
743    }
744
745    my %header;
746
747    $header{copiedFromPath} = $copiedFromPath if $copiedFromPath;
748    $header{indexPath} = $indexPath;
749    $header{isBinary} = $isBinary if $isBinary;
750    $header{isNew} = $isNew if $isNew;
751    $header{sourceRevision} = $sourceRevision if $sourceRevision;
752    $header{svnConvertedText} = $svnConvertedText;
753
754    return (\%header, $_);
755}
756
757# Parse the next diff header from the given file handle, and advance
758# the handle so the last line read is the first line after the header.
759#
760# This subroutine dies if given leading junk or if it could not detect
761# the end of the header block.
762#
763# Args:
764#   $fileHandle: advanced so the last line read from the handle is the first
765#                line of the header to parse.  For SVN-formatted diffs, this
766#                is a line beginning with "Index:".  For Git, this is a line
767#                beginning with "diff --git".
768#   $line: the line last read from $fileHandle
769#
770# Returns ($headerHashRef, $lastReadLine):
771#   $headerHashRef: a hash reference representing a diff header
772#     copiedFromPath: the path from which the file was copied if the diff
773#                     is a copy.
774#     executableBitDelta: the value 1 or -1 if the executable bit was added or
775#                         removed, respectively.  New and deleted files have
776#                         this value only if the file is executable, in which
777#                         case the value is 1 and -1, respectively.
778#     indexPath: the path of the target file.
779#     isBinary: the value 1 if the diff is for a binary file.
780#     isGit: the value 1 if the diff is Git-formatted.
781#     isSvn: the value 1 if the diff is SVN-formatted.
782#     sourceRevision: the revision number of the source, if it exists.  This
783#                     is the same as the revision number the file was copied
784#                     from, in the case of a file copy.
785#     svnConvertedText: the header text with some lines converted to SVN
786#                       format.  Git-specific lines are preserved.
787#   $lastReadLine: the line last read from $fileHandle.
788sub parseDiffHeader($$)
789{
790    my ($fileHandle, $line) = @_;
791
792    my $header;  # This is a hash ref.
793    my $isGit;
794    my $isSvn;
795    my $lastReadLine;
796
797    if ($line =~ $svnDiffStartRegEx) {
798        $isSvn = 1;
799        ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line);
800    } elsif ($line =~ $gitDiffStartRegEx) {
801        $isGit = 1;
802        ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line);
803    } else {
804        die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\"");
805    }
806
807    $header->{isGit} = $isGit if $isGit;
808    $header->{isSvn} = $isSvn if $isSvn;
809
810    return ($header, $lastReadLine);
811}
812
813# FIXME: The %diffHash "object" should not have an svnConvertedText property.
814#        Instead, the hash object should store its information in a
815#        structured way as properties.  This should be done in a way so
816#        that, if necessary, the text of an SVN or Git patch can be
817#        reconstructed from the information in those hash properties.
818#
819# A %diffHash is a hash representing a source control diff of a single
820# file operation (e.g. a file modification, copy, or delete).
821#
822# These hashes appear, for example, in the parseDiff(), parsePatch(),
823# and prepareParsedPatch() subroutines of this package.
824#
825# The corresponding values are--
826#
827#   copiedFromPath: the path from which the file was copied if the diff
828#                   is a copy.
829#   executableBitDelta: the value 1 or -1 if the executable bit was added or
830#                       removed from the target file, respectively.
831#   indexPath: the path of the target file.  For SVN-formatted diffs,
832#              this is the same as the path in the "Index:" line.
833#   isBinary: the value 1 if the diff is for a binary file.
834#   isDeletion: the value 1 if the diff is known from the header to be a deletion.
835#   isGit: the value 1 if the diff is Git-formatted.
836#   isNew: the value 1 if the dif is known from the header to be a new file.
837#   isSvn: the value 1 if the diff is SVN-formatted.
838#   sourceRevision: the revision number of the source, if it exists.  This
839#                   is the same as the revision number the file was copied
840#                   from, in the case of a file copy.
841#   svnConvertedText: the diff with some lines converted to SVN format.
842#                     Git-specific lines are preserved.
843
844# Parse one diff from a patch file created by svn-create-patch, and
845# advance the file handle so the last line read is the first line
846# of the next header block.
847#
848# This subroutine preserves any leading junk encountered before the header.
849#
850# Composition of an SVN diff
851#
852# There are three parts to an SVN diff: the header, the property change, and
853# the binary contents, in that order. Either the header or the property change
854# may be ommitted, but not both. If there are binary changes, then you always
855# have all three.
856#
857# Args:
858#   $fileHandle: a file handle advanced to the first line of the next
859#                header block. Leading junk is okay.
860#   $line: the line last read from $fileHandle.
861#   $optionsHashRef: a hash reference representing optional options to use
862#                    when processing a diff.
863#     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
864#                               instead of the line endings in the target file; the
865#                               value of 1 if svnConvertedText should use the line
866#                               endings in the diff.
867#
868# Returns ($diffHashRefs, $lastReadLine):
869#   $diffHashRefs: A reference to an array of references to %diffHash hashes.
870#                  See the %diffHash documentation above.
871#   $lastReadLine: the line last read from $fileHandle
872sub parseDiff($$;$)
873{
874    # FIXME: Adjust this method so that it dies if the first line does not
875    #        match the start of a diff.  This will require a change to
876    #        parsePatch() so that parsePatch() skips over leading junk.
877    my ($fileHandle, $line, $optionsHashRef) = @_;
878
879    my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default
880
881    my $headerHashRef; # Last header found, as returned by parseDiffHeader().
882    my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties().
883    my $svnText;
884    my $indexPathEOL;
885    while (defined($line)) {
886        if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) {
887            # Then assume all diffs in the patch are Git-formatted. This
888            # block was made to be enterable at most once since we assume
889            # all diffs in the patch are formatted the same (SVN or Git).
890            $headerStartRegEx = $gitDiffStartRegEx;
891        }
892
893        if ($line =~ $svnPropertiesStartRegEx) {
894            my $propertyPath = $1;
895            if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) {
896                # This is the start of the second diff in the while loop, which happens to
897                # be a property diff.  If $svnPropertiesHasRef is defined, then this is the
898                # second consecutive property diff, otherwise it's the start of a property
899                # diff for a file that only has property changes.
900                last;
901            }
902            ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line);
903            next;
904        }
905        if ($line !~ $headerStartRegEx) {
906            # Then we are in the body of the diff.
907            if ($indexPathEOL && $line !~ /$chunkRangeRegEx/) {
908                # The chunk range is part of the body of the diff, but its line endings should't be
909                # modified or patch(1) will complain. So, we only modify non-chunk range lines.
910                $line =~ s/\r\n|\r|\n/$indexPathEOL/g;
911            }
912            $svnText .= $line;
913            $line = <$fileHandle>;
914            next;
915        } # Otherwise, we found a diff header.
916
917        if ($svnPropertiesHashRef || $headerHashRef) {
918            # Then either we just processed an SVN property change or this
919            # is the start of the second diff header of this while loop.
920            last;
921        }
922
923        ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line);
924        if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) {
925            $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary};
926        }
927
928        $svnText .= $headerHashRef->{svnConvertedText};
929    }
930
931    my @diffHashRefs;
932
933    if ($headerHashRef->{shouldDeleteSource}) {
934        my %deletionHash;
935        $deletionHash{indexPath} = $headerHashRef->{copiedFromPath};
936        $deletionHash{isDeletion} = 1;
937        push @diffHashRefs, \%deletionHash;
938    }
939    if ($headerHashRef->{copiedFromPath}) {
940        my %copyHash;
941        $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath};
942        $copyHash{indexPath} = $headerHashRef->{indexPath};
943        $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision};
944        if ($headerHashRef->{isSvn}) {
945            $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
946        }
947        push @diffHashRefs, \%copyHash;
948    }
949
950    # Note, the order of evaluation for the following if conditional has been explicitly chosen so that
951    # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that
952    # only has property changes).
953    if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) {
954        # Then add the usual file modification.
955        my %diffHash;
956        # FIXME: We should expand this code to support other properties.  In the future,
957        #        parseSvnDiffProperties may return a hash whose keys are the properties.
958        if ($headerHashRef->{isSvn}) {
959            # SVN records the change to the executable bit in a separate property change diff
960            # that follows the contents of the diff, except for binary diffs.  For binary
961            # diffs, the property change diff follows the diff header.
962            $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
963        } elsif ($headerHashRef->{isGit}) {
964            # Git records the change to the executable bit in the header of a diff.
965            $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta};
966        }
967        $diffHash{indexPath} = $headerHashRef->{indexPath};
968        $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary};
969        $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion};
970        $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit};
971        $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew};
972        $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn};
973        if (!$headerHashRef->{copiedFromPath}) {
974            # If the file was copied, then we have already incorporated the
975            # sourceRevision information into the change.
976            $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision};
977        }
978        # FIXME: Remove the need for svnConvertedText.  See the %diffHash
979        #        code comments above for more information.
980        #
981        # Note, we may not always have SVN converted text since we intend
982        # to deprecate it in the future.  For example, a property change
983        # diff for a file that only has property changes will not return
984        # any SVN converted text.
985        $diffHash{svnConvertedText} = $svnText if $svnText;
986        push @diffHashRefs, \%diffHash;
987    }
988
989    if (!%$headerHashRef && $svnPropertiesHashRef) {
990        # A property change diff for a file that only has property changes.
991        my %propertyChangeHash;
992        $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
993        $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath};
994        $propertyChangeHash{isSvn} = 1;
995        push @diffHashRefs, \%propertyChangeHash;
996    }
997
998    return (\@diffHashRefs, $line);
999}
1000
1001# Parse an SVN property change diff from the given file handle, and advance
1002# the handle so the last line read is the first line after this diff.
1003#
1004# For the case of an SVN binary diff, the binary contents will follow the
1005# the property changes.
1006#
1007# This subroutine dies if the first line does not begin with "Property changes on"
1008# or if the separator line that follows this line is missing.
1009#
1010# Args:
1011#   $fileHandle: advanced so the last line read from the handle is the first
1012#                line of the footer to parse.  This line begins with
1013#                "Property changes on".
1014#   $line: the line last read from $fileHandle.
1015#
1016# Returns ($propertyHashRef, $lastReadLine):
1017#   $propertyHashRef: a hash reference representing an SVN diff footer.
1018#     propertyPath: the path of the target file.
1019#     executableBitDelta: the value 1 or -1 if the executable bit was added or
1020#                         removed from the target file, respectively.
1021#   $lastReadLine: the line last read from $fileHandle.
1022sub parseSvnDiffProperties($$)
1023{
1024    my ($fileHandle, $line) = @_;
1025
1026    $_ = $line;
1027
1028    my %footer;
1029    if (/$svnPropertiesStartRegEx/) {
1030        $footer{propertyPath} = $1;
1031    } else {
1032        die("Failed to find start of SVN property change, \"Property changes on \": \"$_\"");
1033    }
1034
1035    # We advance $fileHandle two lines so that the next line that
1036    # we process is $svnPropertyStartRegEx in a well-formed footer.
1037    # A well-formed footer has the form:
1038    # Property changes on: FileA
1039    # ___________________________________________________________________
1040    # Added: svn:executable
1041    #    + *
1042    $_ = <$fileHandle>; # Not defined if end-of-file reached.
1043    my $separator = "_" x 67;
1044    if (defined($_) && /^$separator[\r\n]+$/) {
1045        $_ = <$fileHandle>;
1046    } else {
1047        die("Failed to find separator line: \"$_\".");
1048    }
1049
1050    # FIXME: We should expand this to support other SVN properties
1051    #        (e.g. return a hash of property key-values that represents
1052    #        all properties).
1053    #
1054    # Notice, we keep processing until we hit end-of-file or some
1055    # line that does not resemble $svnPropertyStartRegEx, such as
1056    # the empty line that precedes the start of the binary contents
1057    # of a patch, or the start of the next diff (e.g. "Index:").
1058    my $propertyHashRef;
1059    while (defined($_) && /$svnPropertyStartRegEx/) {
1060        ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_);
1061        if ($propertyHashRef->{name} eq "svn:executable") {
1062            # Notice, for SVN properties, propertyChangeDelta is always non-zero
1063            # because a property can only be added or removed.
1064            $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta};
1065        }
1066    }
1067
1068    return(\%footer, $_);
1069}
1070
1071# Parse the next SVN property from the given file handle, and advance the handle so the last
1072# line read is the first line after the property.
1073#
1074# This subroutine dies if the first line is not a valid start of an SVN property,
1075# or the property is missing a value, or the property change type (e.g. "Added")
1076# does not correspond to the property value type (e.g. "+").
1077#
1078# Args:
1079#   $fileHandle: advanced so the last line read from the handle is the first
1080#                line of the property to parse.  This should be a line
1081#                that matches $svnPropertyStartRegEx.
1082#   $line: the line last read from $fileHandle.
1083#
1084# Returns ($propertyHashRef, $lastReadLine):
1085#   $propertyHashRef: a hash reference representing a SVN property.
1086#     name: the name of the property.
1087#     value: the last property value.  For instance, suppose the property is "Modified".
1088#            Then it has both a '-' and '+' property value in that order.  Therefore,
1089#            the value of this key is the value of the '+' property by ordering (since
1090#            it is the last value).
1091#     propertyChangeDelta: the value 1 or -1 if the property was added or
1092#                          removed, respectively.
1093#   $lastReadLine: the line last read from $fileHandle.
1094sub parseSvnProperty($$)
1095{
1096    my ($fileHandle, $line) = @_;
1097
1098    $_ = $line;
1099
1100    my $propertyName;
1101    my $propertyChangeType;
1102    if (/$svnPropertyStartRegEx/) {
1103        $propertyChangeType = $1;
1104        $propertyName = $2;
1105    } else {
1106        die("Failed to find SVN property: \"$_\".");
1107    }
1108
1109    $_ = <$fileHandle>; # Not defined if end-of-file reached.
1110
1111    # The "svn diff" command neither inserts newline characters between property values
1112    # nor between successive properties.
1113    #
1114    # FIXME: We do not support property values that contain tailing newline characters
1115    #        as it is difficult to disambiguate these trailing newlines from the empty
1116    #        line that precedes the contents of a binary patch.
1117    my $propertyValue;
1118    my $propertyValueType;
1119    while (defined($_) && /$svnPropertyValueStartRegEx/) {
1120        # Note, a '-' property may be followed by a '+' property in the case of a "Modified"
1121        # or "Name" property.  We only care about the ending value (i.e. the '+' property)
1122        # in such circumstances.  So, we take the property value for the property to be its
1123        # last parsed property value.
1124        #
1125        # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or
1126        #        add error checking to prevent '+', '+', ..., '+' and other invalid combinations.
1127        $propertyValueType = $1;
1128        ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_);
1129    }
1130
1131    if (!$propertyValue) {
1132        die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\".");
1133    }
1134
1135    my $propertyChangeDelta;
1136    if ($propertyValueType eq "+" || $propertyValueType eq "Merged") {
1137        $propertyChangeDelta = 1;
1138    } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") {
1139        $propertyChangeDelta = -1;
1140    } else {
1141        die("Not reached.");
1142    }
1143
1144    # We perform a simple validation that an "Added" or "Deleted" property
1145    # change type corresponds with a "+" and "-" value type, respectively.
1146    my $expectedChangeDelta;
1147    if ($propertyChangeType eq "Added") {
1148        $expectedChangeDelta = 1;
1149    } elsif ($propertyChangeType eq "Deleted") {
1150        $expectedChangeDelta = -1;
1151    }
1152
1153    if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) {
1154        die("The final property value type found \"$propertyValueType\" does not " .
1155            "correspond to the property change type found \"$propertyChangeType\".");
1156    }
1157
1158    my %propertyHash;
1159    $propertyHash{name} = $propertyName;
1160    $propertyHash{propertyChangeDelta} = $propertyChangeDelta;
1161    $propertyHash{value} = $propertyValue;
1162    return (\%propertyHash, $_);
1163}
1164
1165# Parse the value of an SVN property from the given file handle, and advance
1166# the handle so the last line read is the first line after the property value.
1167#
1168# This subroutine dies if the first line is an invalid SVN property value line
1169# (i.e. a line that does not begin with "   +" or "   -").
1170#
1171# Args:
1172#   $fileHandle: advanced so the last line read from the handle is the first
1173#                line of the property value to parse.  This should be a line
1174#                beginning with "   +" or "   -".
1175#   $line: the line last read from $fileHandle.
1176#
1177# Returns ($propertyValue, $lastReadLine):
1178#   $propertyValue: the value of the property.
1179#   $lastReadLine: the line last read from $fileHandle.
1180sub parseSvnPropertyValue($$)
1181{
1182    my ($fileHandle, $line) = @_;
1183
1184    $_ = $line;
1185
1186    my $propertyValue;
1187    my $eol;
1188    if (/$svnPropertyValueStartRegEx/) {
1189        $propertyValue = $2; # Does not include the end-of-line character(s).
1190        $eol = $POSTMATCH;
1191    } else {
1192        die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\".");
1193    }
1194
1195    while (<$fileHandle>) {
1196        if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/) {
1197            # Note, we may encounter an empty line before the contents of a binary patch.
1198            # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be
1199            # followed by a '+' property in the case of a "Modified" or "Name" property.
1200            # We check for $svnPropertyStartRegEx because it indicates the start of the
1201            # next property to parse.
1202            last;
1203        }
1204
1205        # Temporarily strip off any end-of-line characters. We add the end-of-line characters
1206        # from the previously processed line to the start of this line so that the last line
1207        # of the property value does not end in end-of-line characters.
1208        s/([\n\r]+)$//;
1209        $propertyValue .= "$eol$_";
1210        $eol = $1;
1211    }
1212
1213    return ($propertyValue, $_);
1214}
1215
1216# Parse a patch file created by svn-create-patch.
1217#
1218# Args:
1219#   $fileHandle: A file handle to the patch file that has not yet been
1220#                read from.
1221#   $optionsHashRef: a hash reference representing optional options to use
1222#                    when processing a diff.
1223#     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
1224#                               instead of the line endings in the target file; the
1225#                               value of 1 if svnConvertedText should use the line
1226#                               endings in the diff.
1227#
1228# Returns:
1229#   @diffHashRefs: an array of diff hash references.
1230#                  See the %diffHash documentation above.
1231sub parsePatch($;$)
1232{
1233    my ($fileHandle, $optionsHashRef) = @_;
1234
1235    my $newDiffHashRefs;
1236    my @diffHashRefs; # return value
1237
1238    my $line = <$fileHandle>;
1239
1240    while (defined($line)) { # Otherwise, at EOF.
1241
1242        ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef);
1243
1244        push @diffHashRefs, @$newDiffHashRefs;
1245    }
1246
1247    return @diffHashRefs;
1248}
1249
1250# Prepare the results of parsePatch() for use in svn-apply and svn-unapply.
1251#
1252# Args:
1253#   $shouldForce: Whether to continue processing if an unexpected
1254#                 state occurs.
1255#   @diffHashRefs: An array of references to %diffHashes.
1256#                  See the %diffHash documentation above.
1257#
1258# Returns $preparedPatchHashRef:
1259#   copyDiffHashRefs: A reference to an array of the $diffHashRefs in
1260#                     @diffHashRefs that represent file copies. The original
1261#                     ordering is preserved.
1262#   nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in
1263#                        @diffHashRefs that do not represent file copies.
1264#                        The original ordering is preserved.
1265#   sourceRevisionHash: A reference to a hash of source path to source
1266#                       revision number.
1267sub prepareParsedPatch($@)
1268{
1269    my ($shouldForce, @diffHashRefs) = @_;
1270
1271    my %copiedFiles;
1272
1273    # Return values
1274    my @copyDiffHashRefs = ();
1275    my @nonCopyDiffHashRefs = ();
1276    my %sourceRevisionHash = ();
1277    for my $diffHashRef (@diffHashRefs) {
1278        my $copiedFromPath = $diffHashRef->{copiedFromPath};
1279        my $indexPath = $diffHashRef->{indexPath};
1280        my $sourceRevision = $diffHashRef->{sourceRevision};
1281        my $sourcePath;
1282
1283        if (defined($copiedFromPath)) {
1284            # Then the diff is a copy operation.
1285            $sourcePath = $copiedFromPath;
1286
1287            # FIXME: Consider printing a warning or exiting if
1288            #        exists($copiedFiles{$indexPath}) is true -- i.e. if
1289            #        $indexPath appears twice as a copy target.
1290            $copiedFiles{$indexPath} = $sourcePath;
1291
1292            push @copyDiffHashRefs, $diffHashRef;
1293        } else {
1294            # Then the diff is not a copy operation.
1295            $sourcePath = $indexPath;
1296
1297            push @nonCopyDiffHashRefs, $diffHashRef;
1298        }
1299
1300        if (defined($sourceRevision)) {
1301            if (exists($sourceRevisionHash{$sourcePath}) &&
1302                ($sourceRevisionHash{$sourcePath} != $sourceRevision)) {
1303                if (!$shouldForce) {
1304                    die "Two revisions of the same file required as a source:\n".
1305                        "    $sourcePath:$sourceRevisionHash{$sourcePath}\n".
1306                        "    $sourcePath:$sourceRevision";
1307                }
1308            }
1309            $sourceRevisionHash{$sourcePath} = $sourceRevision;
1310        }
1311    }
1312
1313    my %preparedPatchHash;
1314
1315    $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs;
1316    $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs;
1317    $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash;
1318
1319    return \%preparedPatchHash;
1320}
1321
1322# Return localtime() for the project's time zone, given an integer time as
1323# returned by Perl's time() function.
1324sub localTimeInProjectTimeZone($)
1325{
1326    my $epochTime = shift;
1327
1328    # Change the time zone temporarily for the localtime() call.
1329    my $savedTimeZone = $ENV{'TZ'};
1330    $ENV{'TZ'} = $changeLogTimeZone;
1331    my @localTime = localtime($epochTime);
1332    if (defined $savedTimeZone) {
1333         $ENV{'TZ'} = $savedTimeZone;
1334    } else {
1335         delete $ENV{'TZ'};
1336    }
1337
1338    return @localTime;
1339}
1340
1341# Set the reviewer and date in a ChangeLog patch, and return the new patch.
1342#
1343# Args:
1344#   $patch: a ChangeLog patch as a string.
1345#   $reviewer: the name of the reviewer, or undef if the reviewer should not be set.
1346#   $epochTime: an integer time as returned by Perl's time() function.
1347sub setChangeLogDateAndReviewer($$$)
1348{
1349    my ($patch, $reviewer, $epochTime) = @_;
1350
1351    my @localTime = localTimeInProjectTimeZone($epochTime);
1352    my $newDate = strftime("%Y-%m-%d", @localTime);
1353
1354    my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}(  )#;
1355    $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/;
1356
1357    if (defined($reviewer)) {
1358        # We include a leading plus ("+") in the regular expression to make
1359        # the regular expression less likely to match text in the leading junk
1360        # for the patch, if the patch has leading junk.
1361        $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/;
1362    }
1363
1364    return $patch;
1365}
1366
1367# If possible, returns a ChangeLog patch equivalent to the given one,
1368# but with the newest ChangeLog entry inserted at the top of the
1369# file -- i.e. no leading context and all lines starting with "+".
1370#
1371# If given a patch string not representable as a patch with the above
1372# properties, it returns the input back unchanged.
1373#
1374# WARNING: This subroutine can return an inequivalent patch string if
1375# both the beginning of the new ChangeLog file matches the beginning
1376# of the source ChangeLog, and the source beginning was modified.
1377# Otherwise, it is guaranteed to return an equivalent patch string,
1378# if it returns.
1379#
1380# Applying this subroutine to ChangeLog patches allows svn-apply to
1381# insert new ChangeLog entries at the top of the ChangeLog file.
1382# svn-apply uses patch with --fuzz=3 to do this. We need to apply
1383# this subroutine because the diff(1) command is greedy when matching
1384# lines. A new ChangeLog entry with the same date and author as the
1385# previous will match and cause the diff to have lines of starting
1386# context.
1387#
1388# This subroutine has unit tests in VCSUtils_unittest.pl.
1389#
1390# Returns $changeLogHashRef:
1391#   $changeLogHashRef: a hash reference representing a change log patch.
1392#     patch: a ChangeLog patch equivalent to the given one, but with the
1393#            newest ChangeLog entry inserted at the top of the file, if possible.
1394sub fixChangeLogPatch($)
1395{
1396    my $patch = shift; # $patch will only contain patch fragments for ChangeLog.
1397
1398    $patch =~ /(\r?\n)/;
1399    my $lineEnding = $1;
1400    my @lines = split(/$lineEnding/, $patch);
1401
1402    my $i = 0; # We reuse the same index throughout.
1403
1404    # Skip to beginning of first chunk.
1405    for (; $i < @lines; ++$i) {
1406        if (substr($lines[$i], 0, 1) eq "@") {
1407            last;
1408        }
1409    }
1410    my $chunkStartIndex = ++$i;
1411    my %changeLogHashRef;
1412
1413    # Optimization: do not process if new lines already begin the chunk.
1414    if (substr($lines[$i], 0, 1) eq "+") {
1415        $changeLogHashRef{patch} = $patch;
1416        return \%changeLogHashRef;
1417    }
1418
1419    # Skip to first line of newly added ChangeLog entry.
1420    # For example, +2009-06-03  Eric Seidel  <eric@webkit.org>
1421    my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date
1422                         . '\s+(.+)\s+' # name
1423                         . '<([^<>]+)>$'; # e-mail address
1424
1425    for (; $i < @lines; ++$i) {
1426        my $line = $lines[$i];
1427        my $firstChar = substr($line, 0, 1);
1428        if ($line =~ /$dateStartRegEx/) {
1429            last;
1430        } elsif ($firstChar eq " " or $firstChar eq "+") {
1431            next;
1432        }
1433        $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found.
1434        return \%changeLogHashRef;
1435    }
1436    if ($i >= @lines) {
1437        $changeLogHashRef{patch} = $patch; # Do not change if date not found.
1438        return \%changeLogHashRef;
1439    }
1440    my $dateStartIndex = $i;
1441
1442    # Rewrite overlapping lines to lead with " ".
1443    my @overlappingLines = (); # These will include a leading "+".
1444    for (; $i < @lines; ++$i) {
1445        my $line = $lines[$i];
1446        if (substr($line, 0, 1) ne "+") {
1447          last;
1448        }
1449        push(@overlappingLines, $line);
1450        $lines[$i] = " " . substr($line, 1);
1451    }
1452
1453    # Remove excess ending context, if necessary.
1454    my $shouldTrimContext = 1;
1455    for (; $i < @lines; ++$i) {
1456        my $firstChar = substr($lines[$i], 0, 1);
1457        if ($firstChar eq " ") {
1458            next;
1459        } elsif ($firstChar eq "@") {
1460            last;
1461        }
1462        $shouldTrimContext = 0; # For example, if "+" or "-" encountered.
1463        last;
1464    }
1465    my $deletedLineCount = 0;
1466    if ($shouldTrimContext) { # Also occurs if end of file reached.
1467        splice(@lines, $i - @overlappingLines, @overlappingLines);
1468        $deletedLineCount = @overlappingLines;
1469    }
1470
1471    # Work backwards, shifting overlapping lines towards front
1472    # while checking that patch stays equivalent.
1473    for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) {
1474        my $line = $lines[$i];
1475        if (substr($line, 0, 1) ne " ") {
1476            next;
1477        }
1478        my $text = substr($line, 1);
1479        my $newLine = pop(@overlappingLines);
1480        if ($text ne substr($newLine, 1)) {
1481            $changeLogHashRef{patch} = $patch; # Unexpected difference.
1482            return \%changeLogHashRef;
1483        }
1484        $lines[$i] = "+$text";
1485    }
1486
1487    # If @overlappingLines > 0, this is where we make use of the
1488    # assumption that the beginning of the source file was not modified.
1489    splice(@lines, $chunkStartIndex, 0, @overlappingLines);
1490
1491    # Update the date start index as it may have changed after shifting
1492    # the overlapping lines towards the front.
1493    for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) {
1494        $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/;
1495    }
1496    splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry.
1497    $deletedLineCount += $dateStartIndex - $chunkStartIndex;
1498
1499    # Update the initial chunk range.
1500    if ($lines[$chunkStartIndex - 1] !~ /$chunkRangeRegEx/) {
1501        # FIXME: Handle errors differently from ChangeLog files that
1502        # are okay but should not be altered. That way we can find out
1503        # if improvements to the script ever become necessary.
1504        $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format.
1505        return \%changeLogHashRef;
1506    }
1507    my $oldSourceLineCount = $2;
1508    my $oldTargetLineCount = $3;
1509
1510    my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount;
1511    my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount;
1512    $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@";
1513
1514    $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline.
1515    return \%changeLogHashRef;
1516}
1517
1518# This is a supporting method for runPatchCommand.
1519#
1520# Arg: the optional $args parameter passed to runPatchCommand (can be undefined).
1521#
1522# Returns ($patchCommand, $isForcing).
1523#
1524# This subroutine has unit tests in VCSUtils_unittest.pl.
1525sub generatePatchCommand($)
1526{
1527    my ($passedArgsHashRef) = @_;
1528
1529    my $argsHashRef = { # Defaults
1530        ensureForce => 0,
1531        shouldReverse => 0,
1532        options => []
1533    };
1534
1535    # Merges hash references. It's okay here if passed hash reference is undefined.
1536    @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef};
1537
1538    my $ensureForce = $argsHashRef->{ensureForce};
1539    my $shouldReverse = $argsHashRef->{shouldReverse};
1540    my $options = $argsHashRef->{options};
1541
1542    if (! $options) {
1543        $options = [];
1544    } else {
1545        $options = [@{$options}]; # Copy to avoid side effects.
1546    }
1547
1548    my $isForcing = 0;
1549    if (grep /^--force$/, @{$options}) {
1550        $isForcing = 1;
1551    } elsif ($ensureForce) {
1552        push @{$options}, "--force";
1553        $isForcing = 1;
1554    }
1555
1556    if ($shouldReverse) { # No check: --reverse should never be passed explicitly.
1557        push @{$options}, "--reverse";
1558    }
1559
1560    @{$options} = sort(@{$options}); # For easier testing.
1561
1562    my $patchCommand = join(" ", "patch -p0", @{$options});
1563
1564    return ($patchCommand, $isForcing);
1565}
1566
1567# Apply the given patch using the patch(1) command.
1568#
1569# On success, return the resulting exit status. Otherwise, exit with the
1570# exit status. If "--force" is passed as an option, however, then never
1571# exit and always return the exit status.
1572#
1573# Args:
1574#   $patch: a patch string.
1575#   $repositoryRootPath: an absolute path to the repository root.
1576#   $pathRelativeToRoot: the path of the file to be patched, relative to the
1577#                        repository root. This should normally be the path
1578#                        found in the patch's "Index:" line. It is passed
1579#                        explicitly rather than reparsed from the patch
1580#                        string for optimization purposes.
1581#                            This is used only for error reporting. The
1582#                        patch command gleans the actual file to patch
1583#                        from the patch string.
1584#   $args: a reference to a hash of optional arguments. The possible
1585#          keys are --
1586#            ensureForce: whether to ensure --force is passed (defaults to 0).
1587#            shouldReverse: whether to pass --reverse (defaults to 0).
1588#            options: a reference to an array of options to pass to the
1589#                     patch command. The subroutine passes the -p0 option
1590#                     no matter what. This should not include --reverse.
1591#
1592# This subroutine has unit tests in VCSUtils_unittest.pl.
1593sub runPatchCommand($$$;$)
1594{
1595    my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_;
1596
1597    my ($patchCommand, $isForcing) = generatePatchCommand($args);
1598
1599    # Temporarily change the working directory since the path found
1600    # in the patch's "Index:" line is relative to the repository root
1601    # (i.e. the same as $pathRelativeToRoot).
1602    my $cwd = Cwd::getcwd();
1603    chdir $repositoryRootPath;
1604
1605    open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!";
1606    print PATCH $patch;
1607    close PATCH;
1608    my $exitStatus = exitStatus($?);
1609
1610    chdir $cwd;
1611
1612    if ($exitStatus && !$isForcing) {
1613        print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " .
1614              "status $exitStatus.  Pass --force to ignore patch failures.\n";
1615        exit $exitStatus;
1616    }
1617
1618    return $exitStatus;
1619}
1620
1621# Merge ChangeLog patches using a three-file approach.
1622#
1623# This is used by resolve-ChangeLogs when it's operated as a merge driver
1624# and when it's used to merge conflicts after a patch is applied or after
1625# an svn update.
1626#
1627# It's also used for traditional rejected patches.
1628#
1629# Args:
1630#   $fileMine:  The merged version of the file.  Also known in git as the
1631#               other branch's version (%B) or "ours".
1632#               For traditional patch rejects, this is the *.rej file.
1633#   $fileOlder: The base version of the file.  Also known in git as the
1634#               ancestor version (%O) or "base".
1635#               For traditional patch rejects, this is the *.orig file.
1636#   $fileNewer: The current version of the file.  Also known in git as the
1637#               current version (%A) or "theirs".
1638#               For traditional patch rejects, this is the original-named
1639#               file.
1640#
1641# Returns 1 if merge was successful, else 0.
1642sub mergeChangeLogs($$$)
1643{
1644    my ($fileMine, $fileOlder, $fileNewer) = @_;
1645
1646    my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0;
1647
1648    local $/ = undef;
1649
1650    my $patch;
1651    if ($traditionalReject) {
1652        open(DIFF, "<", $fileMine) or die $!;
1653        $patch = <DIFF>;
1654        close(DIFF);
1655        rename($fileMine, "$fileMine.save");
1656        rename($fileOlder, "$fileOlder.save");
1657    } else {
1658        open(DIFF, "-|", qw(diff -u -a --binary), $fileOlder, $fileMine) or die $!;
1659        $patch = <DIFF>;
1660        close(DIFF);
1661    }
1662
1663    unlink("${fileNewer}.orig");
1664    unlink("${fileNewer}.rej");
1665
1666    open(PATCH, "| patch --force --fuzz=3 --binary $fileNewer > " . File::Spec->devnull()) or die $!;
1667    if ($traditionalReject) {
1668        print PATCH $patch;
1669    } else {
1670        my $changeLogHash = fixChangeLogPatch($patch);
1671        print PATCH $changeLogHash->{patch};
1672    }
1673    close(PATCH);
1674
1675    my $result = !exitStatus($?);
1676
1677    # Refuse to merge the patch if it did not apply cleanly
1678    if (-e "${fileNewer}.rej") {
1679        unlink("${fileNewer}.rej");
1680        if (-f "${fileNewer}.orig") {
1681            unlink($fileNewer);
1682            rename("${fileNewer}.orig", $fileNewer);
1683        }
1684    } else {
1685        unlink("${fileNewer}.orig");
1686    }
1687
1688    if ($traditionalReject) {
1689        rename("$fileMine.save", $fileMine);
1690        rename("$fileOlder.save", $fileOlder);
1691    }
1692
1693    return $result;
1694}
1695
1696sub gitConfig($)
1697{
1698    return unless $isGit;
1699
1700    my ($config) = @_;
1701
1702    my $result = `git config $config`;
1703    if (($? >> 8)) {
1704        $result = `git repo-config $config`;
1705    }
1706    chomp $result;
1707    return $result;
1708}
1709
1710sub changeLogNameError($)
1711{
1712    my ($message) = @_;
1713    print STDERR "$message\nEither:\n";
1714    print STDERR "  set CHANGE_LOG_NAME in your environment\n";
1715    print STDERR "  OR pass --name= on the command line\n";
1716    print STDERR "  OR set REAL_NAME in your environment";
1717    print STDERR "  OR git users can set 'git config user.name'\n";
1718    exit(1);
1719}
1720
1721sub changeLogName()
1722{
1723    my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0];
1724
1725    changeLogNameError("Failed to determine ChangeLog name.") unless $name;
1726    # getpwuid seems to always succeed on windows, returning the username instead of the full name.  This check will catch that case.
1727    changeLogNameError("'$name' does not contain a space!  ChangeLogs should contain your full name.") unless ($name =~ /\w \w/);
1728
1729    return $name;
1730}
1731
1732sub changeLogEmailAddressError($)
1733{
1734    my ($message) = @_;
1735    print STDERR "$message\nEither:\n";
1736    print STDERR "  set CHANGE_LOG_EMAIL_ADDRESS in your environment\n";
1737    print STDERR "  OR pass --email= on the command line\n";
1738    print STDERR "  OR set EMAIL_ADDRESS in your environment\n";
1739    print STDERR "  OR git users can set 'git config user.email'\n";
1740    exit(1);
1741}
1742
1743sub changeLogEmailAddress()
1744{
1745    my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email");
1746
1747    changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress;
1748    changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/);
1749
1750    return $emailAddress;
1751}
1752
1753# http://tools.ietf.org/html/rfc1924
1754sub decodeBase85($)
1755{
1756    my ($encoded) = @_;
1757    my %table;
1758    my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~');
1759    for (my $i = 0; $i < 85; $i++) {
1760        $table{$characters[$i]} = $i;
1761    }
1762
1763    my $decoded = '';
1764    my @encodedChars = $encoded =~ /./g;
1765
1766    for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) {
1767        my $digit = 0;
1768        for (my $i = 0; $i < 5; $i++) {
1769            $digit *= 85;
1770            my $char = $encodedChars[$encodedIter];
1771            $digit += $table{$char};
1772            $encodedIter++;
1773        }
1774
1775        for (my $i = 0; $i < 4; $i++) {
1776            $decoded .= chr(($digit >> (3 - $i) * 8) & 255);
1777        }
1778    }
1779
1780    return $decoded;
1781}
1782
1783sub decodeGitBinaryChunk($$)
1784{
1785    my ($contents, $fullPath) = @_;
1786
1787    # Load this module lazily in case the user don't have this module
1788    # and won't handle git binary patches.
1789    require Compress::Zlib;
1790
1791    my $encoded = "";
1792    my $compressedSize = 0;
1793    while ($contents =~ /^([A-Za-z])(.*)$/gm) {
1794        my $line = $2;
1795        next if $line eq "";
1796        die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0;
1797        my $actualSize = length($2) / 5 * 4;
1798        my $encodedExpectedSize = ord($1);
1799        my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27;
1800
1801        die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize;
1802        $compressedSize += $expectedSize;
1803        $encoded .= $line;
1804    }
1805
1806    my $compressed = decodeBase85($encoded);
1807    $compressed = substr($compressed, 0, $compressedSize);
1808    return Compress::Zlib::uncompress($compressed);
1809}
1810
1811sub decodeGitBinaryPatch($$)
1812{
1813    my ($contents, $fullPath) = @_;
1814
1815    # Git binary patch has two chunks. One is for the normal patching
1816    # and another is for the reverse patching.
1817    #
1818    # Each chunk a line which starts from either "literal" or "delta",
1819    # followed by a number which specifies decoded size of the chunk.
1820    #
1821    # Then, content of the chunk comes. To decode the content, we
1822    # need decode it with base85 first, and then zlib.
1823    my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n';
1824    if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") {
1825        die "$fullPath: unknown git binary patch format"
1826    }
1827
1828    my $binaryChunkType = $1;
1829    my $binaryChunkExpectedSize = $2;
1830    my $encodedChunk = $3;
1831    my $reverseBinaryChunkType = $4;
1832    my $reverseBinaryChunkExpectedSize = $5;
1833    my $encodedReverseChunk = $6;
1834
1835    my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath);
1836    my $binaryChunkActualSize = length($binaryChunk);
1837    my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath);
1838    my $reverseBinaryChunkActualSize = length($reverseBinaryChunk);
1839
1840    die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize);
1841    die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
1842
1843    return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk);
1844}
1845
1846sub readByte($$)
1847{
1848    my ($data, $location) = @_;
1849
1850    # Return the byte at $location in $data as a numeric value.
1851    return ord(substr($data, $location, 1));
1852}
1853
1854# The git binary delta format is undocumented, except in code:
1855# - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source
1856#   of the algorithm in decodeGitBinaryPatchDeltaSize.
1857# - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source
1858#   of the algorithm in applyGitBinaryPatchDelta.
1859sub decodeGitBinaryPatchDeltaSize($)
1860{
1861    my ($binaryChunk) = @_;
1862
1863    # Source and destination buffer sizes are stored in 7-bit chunks at the
1864    # start of the binary delta patch data.  The highest bit in each byte
1865    # except the last is set; the remaining 7 bits provide the next
1866    # chunk of the size.  The chunks are stored in ascending significance
1867    # order.
1868    my $cmd;
1869    my $size = 0;
1870    my $shift = 0;
1871    for (my $i = 0; $i < length($binaryChunk);) {
1872        $cmd = readByte($binaryChunk, $i++);
1873        $size |= ($cmd & 0x7f) << $shift;
1874        $shift += 7;
1875        if (!($cmd & 0x80)) {
1876            return ($size, $i);
1877        }
1878    }
1879}
1880
1881sub applyGitBinaryPatchDelta($$)
1882{
1883    my ($binaryChunk, $originalContents) = @_;
1884
1885    # Git delta format consists of two headers indicating source buffer size
1886    # and result size, then a series of commands.  Each command is either
1887    # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta
1888    # command.  Commands are applied sequentially to generate the result.
1889    #
1890    # A copy-from-old-version command encodes an offset and size to copy
1891    # from in subsequent bits, while a copy-from-delta command consists only
1892    # of the number of bytes to copy from the delta.
1893
1894    # We don't use these values, but we need to know how big they are so that
1895    # we can skip to the diff data.
1896    my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
1897    $binaryChunk = substr($binaryChunk, $bytesUsed);
1898    ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
1899    $binaryChunk = substr($binaryChunk, $bytesUsed);
1900
1901    my $out = "";
1902    for (my $i = 0; $i < length($binaryChunk); ) {
1903        my $cmd = ord(substr($binaryChunk, $i++, 1));
1904        if ($cmd & 0x80) {
1905            # Extract an offset and size from the delta data, then copy
1906            # $size bytes from $offset in the original data into the output.
1907            my $offset = 0;
1908            my $size = 0;
1909            if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); }
1910            if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; }
1911            if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; }
1912            if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; }
1913            if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); }
1914            if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; }
1915            if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; }
1916            if ($size == 0) { $size = 0x10000; }
1917            $out .= substr($originalContents, $offset, $size);
1918        } elsif ($cmd) {
1919            # Copy $cmd bytes from the delta data into the output.
1920            $out .= substr($binaryChunk, $i, $cmd);
1921            $i += $cmd;
1922        } else {
1923            die "unexpected delta opcode 0";
1924        }
1925    }
1926
1927    return $out;
1928}
1929
19301;
1931