• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 # Copyright (C) 2007, 2008, 2009 Apple Inc.  All rights reserved.
2 # Copyright (C) 2009, 2010 Chris Jerdonek (chris.jerdonek@gmail.com)
3 # Copyright (C) 2010, 2011 Research In Motion Limited. All rights reserved.
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions
7 # are met:
8 #
9 # 1.  Redistributions of source code must retain the above copyright
10 #     notice, this list of conditions and the following disclaimer.
11 # 2.  Redistributions in binary form must reproduce the above copyright
12 #     notice, this list of conditions and the following disclaimer in the
13 #     documentation and/or other materials provided with the distribution.
14 # 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
15 #     its contributors may be used to endorse or promote products derived
16 #     from this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 # Module to share code to work with various version control systems.
30 package VCSUtils;
31 
32 use strict;
33 use warnings;
34 
35 use Cwd qw();  # "qw()" prevents warnings about redefining getcwd() with "use POSIX;"
36 use English; # for $POSTMATCH, etc.
37 use File::Basename;
38 use File::Spec;
39 use POSIX;
40 
41 BEGIN {
42     use Exporter   ();
43     our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
44     $VERSION     = 1.00;
45     @ISA         = qw(Exporter);
46     @EXPORT      = qw(
47         &applyGitBinaryPatchDelta
48         &callSilently
49         &canonicalizePath
50         &changeLogEmailAddress
51         &changeLogName
52         &chdirReturningRelativePath
53         &decodeGitBinaryChunk
54         &decodeGitBinaryPatch
55         &determineSVNRoot
56         &determineVCSRoot
57         &exitStatus
58         &fixChangeLogPatch
59         &gitBranch
60         &gitdiff2svndiff
61         &isGit
62         &isGitSVN
63         &isGitBranchBuild
64         &isGitDirectory
65         &isSVN
66         &isSVNDirectory
67         &isSVNVersion16OrNewer
68         &makeFilePathRelative
69         &mergeChangeLogs
70         &normalizePath
71         &parseFirstEOL
72         &parsePatch
73         &pathRelativeToSVNRepositoryRootForPath
74         &prepareParsedPatch
75         &removeEOL
76         &runPatchCommand
77         &scmMoveOrRenameFile
78         &scmToggleExecutableBit
79         &setChangeLogDateAndReviewer
80         &svnRevisionForDirectory
81         &svnStatus
82         &toWindowsLineEndings
83     );
84     %EXPORT_TAGS = ( );
85     @EXPORT_OK   = ();
86 }
87 
88 our @EXPORT_OK;
89 
90 my $gitBranch;
91 my $gitRoot;
92 my $isGit;
93 my $isGitSVN;
94 my $isGitBranchBuild;
95 my $isSVN;
96 my $svnVersion;
97 
98 # Project time zone for Cupertino, CA, US
99 my $changeLogTimeZone = "PST8PDT";
100 
101 my $chunkRangeRegEx = qr#^\@\@ -(\d+),(\d+) \+\d+,(\d+) \@\@$#; # e.g. @@ -2,6 +2,18 @@
102 my $gitDiffStartRegEx = qr#^diff --git (\w/)?(.+) (\w/)?([^\r\n]+)#;
103 my $svnDiffStartRegEx = qr#^Index: ([^\r\n]+)#;
104 my $svnPropertiesStartRegEx = qr#^Property changes on: ([^\r\n]+)#; # $1 is normally the same as the index path.
105 my $svnPropertyStartRegEx = qr#^(Modified|Name|Added|Deleted): ([^\r\n]+)#; # $2 is the name of the property.
106 my $svnPropertyValueStartRegEx = qr#^   (\+|-|Merged|Reverse-merged) ([^\r\n]+)#; # $2 is the start of the property's value (which may span multiple lines).
107 
108 # This method is for portability. Return the system-appropriate exit
109 # status of a child process.
110 #
111 # Args: pass the child error status returned by the last pipe close,
112 #       for example "$?".
113 sub exitStatus($)
114 {
115     my ($returnvalue) = @_;
116     if ($^O eq "MSWin32") {
117         return $returnvalue >> 8;
118     }
119     return WEXITSTATUS($returnvalue);
120 }
121 
122 # Call a function while suppressing STDERR, and return the return values
123 # as an array.
124 sub callSilently($@) {
125     my ($func, @args) = @_;
126 
127     # The following pattern was taken from here:
128     #   http://www.sdsc.edu/~moreland/courses/IntroPerl/docs/manual/pod/perlfunc/open.html
129     #
130     # Also see this Perl documentation (search for "open OLDERR"):
131     #   http://perldoc.perl.org/functions/open.html
132     open(OLDERR, ">&STDERR");
133     close(STDERR);
134     my @returnValue = &$func(@args);
135     open(STDERR, ">&OLDERR");
136     close(OLDERR);
137 
138     return @returnValue;
139 }
140 
141 sub toWindowsLineEndings
142 {
143     my ($text) = @_;
144     $text =~ s/\n/\r\n/g;
145     return $text;
146 }
147 
148 # Note, this method will not error if the file corresponding to the $source path does not exist.
149 sub scmMoveOrRenameFile
150 {
151     my ($source, $destination) = @_;
152     return if ! -e $source;
153     if (isSVN()) {
154         system("svn", "move", $source, $destination);
155     } elsif (isGit()) {
156         system("git", "mv", $source, $destination);
157     }
158 }
159 
160 # Note, this method will not error if the file corresponding to the path does not exist.
161 sub scmToggleExecutableBit
162 {
163     my ($path, $executableBitDelta) = @_;
164     return if ! -e $path;
165     if ($executableBitDelta == 1) {
166         scmAddExecutableBit($path);
167     } elsif ($executableBitDelta == -1) {
168         scmRemoveExecutableBit($path);
169     }
170 }
171 
172 sub scmAddExecutableBit($)
173 {
174     my ($path) = @_;
175 
176     if (isSVN()) {
177         system("svn", "propset", "svn:executable", "on", $path) == 0 or die "Failed to run 'svn propset svn:executable on $path'.";
178     } elsif (isGit()) {
179         chmod(0755, $path);
180     }
181 }
182 
183 sub scmRemoveExecutableBit($)
184 {
185     my ($path) = @_;
186 
187     if (isSVN()) {
188         system("svn", "propdel", "svn:executable", $path) == 0 or die "Failed to run 'svn propdel svn:executable $path'.";
189     } elsif (isGit()) {
190         chmod(0664, $path);
191     }
192 }
193 
194 sub isGitDirectory($)
195 {
196     my ($dir) = @_;
197     return system("cd $dir && git rev-parse > " . File::Spec->devnull() . " 2>&1") == 0;
198 }
199 
200 sub isGit()
201 {
202     return $isGit if defined $isGit;
203 
204     $isGit = isGitDirectory(".");
205     return $isGit;
206 }
207 
208 sub isGitSVN()
209 {
210     return $isGitSVN if defined $isGitSVN;
211 
212     # There doesn't seem to be an officially documented way to determine
213     # if you're in a git-svn checkout. The best suggestions seen so far
214     # all use something like the following:
215     my $output = `git config --get svn-remote.svn.fetch 2>& 1`;
216     $isGitSVN = $output ne '';
217     return $isGitSVN;
218 }
219 
220 sub gitBranch()
221 {
222     unless (defined $gitBranch) {
223         chomp($gitBranch = `git symbolic-ref -q HEAD`);
224         $gitBranch = "" if exitStatus($?);
225         $gitBranch =~ s#^refs/heads/##;
226         $gitBranch = "" if $gitBranch eq "master";
227     }
228 
229     return $gitBranch;
230 }
231 
232 sub isGitBranchBuild()
233 {
234     my $branch = gitBranch();
235     chomp(my $override = `git config --bool branch.$branch.webKitBranchBuild`);
236     return 1 if $override eq "true";
237     return 0 if $override eq "false";
238 
239     unless (defined $isGitBranchBuild) {
240         chomp(my $gitBranchBuild = `git config --bool core.webKitBranchBuild`);
241         $isGitBranchBuild = $gitBranchBuild eq "true";
242     }
243 
244     return $isGitBranchBuild;
245 }
246 
247 sub isSVNDirectory($)
248 {
249     my ($dir) = @_;
250 
251     return -d File::Spec->catdir($dir, ".svn");
252 }
253 
254 sub isSVN()
255 {
256     return $isSVN if defined $isSVN;
257 
258     $isSVN = isSVNDirectory(".");
259     return $isSVN;
260 }
261 
262 sub svnVersion()
263 {
264     return $svnVersion if defined $svnVersion;
265 
266     if (!isSVN()) {
267         $svnVersion = 0;
268     } else {
269         chomp($svnVersion = `svn --version --quiet`);
270     }
271     return $svnVersion;
272 }
273 
274 sub isSVNVersion16OrNewer()
275 {
276     my $version = svnVersion();
277     return eval "v$version" ge v1.6;
278 }
279 
280 sub chdirReturningRelativePath($)
281 {
282     my ($directory) = @_;
283     my $previousDirectory = Cwd::getcwd();
284     chdir $directory;
285     my $newDirectory = Cwd::getcwd();
286     return "." if $newDirectory eq $previousDirectory;
287     return File::Spec->abs2rel($previousDirectory, $newDirectory);
288 }
289 
290 sub determineGitRoot()
291 {
292     chomp(my $gitDir = `git rev-parse --git-dir`);
293     return dirname($gitDir);
294 }
295 
296 sub determineSVNRoot()
297 {
298     my $last = '';
299     my $path = '.';
300     my $parent = '..';
301     my $repositoryRoot;
302     my $repositoryUUID;
303     while (1) {
304         my $thisRoot;
305         my $thisUUID;
306         # Ignore error messages in case we've run past the root of the checkout.
307         open INFO, "svn info '$path' 2> " . File::Spec->devnull() . " |" or die;
308         while (<INFO>) {
309             if (/^Repository Root: (.+)/) {
310                 $thisRoot = $1;
311             }
312             if (/^Repository UUID: (.+)/) {
313                 $thisUUID = $1;
314             }
315             if ($thisRoot && $thisUUID) {
316                 local $/ = undef;
317                 <INFO>; # Consume the rest of the input.
318             }
319         }
320         close INFO;
321 
322         # It's possible (e.g. for developers of some ports) to have a WebKit
323         # checkout in a subdirectory of another checkout.  So abort if the
324         # repository root or the repository UUID suddenly changes.
325         last if !$thisUUID;
326         $repositoryUUID = $thisUUID if !$repositoryUUID;
327         last if $thisUUID ne $repositoryUUID;
328 
329         last if !$thisRoot;
330         $repositoryRoot = $thisRoot if !$repositoryRoot;
331         last if $thisRoot ne $repositoryRoot;
332 
333         $last = $path;
334         $path = File::Spec->catdir($parent, $path);
335     }
336 
337     return File::Spec->rel2abs($last);
338 }
339 
340 sub determineVCSRoot()
341 {
342     if (isGit()) {
343         return determineGitRoot();
344     }
345 
346     if (!isSVN()) {
347         # Some users have a workflow where svn-create-patch, svn-apply and
348         # svn-unapply are used outside of multiple svn working directores,
349         # so warn the user and assume Subversion is being used in this case.
350         warn "Unable to determine VCS root; assuming Subversion";
351         $isSVN = 1;
352     }
353 
354     return determineSVNRoot();
355 }
356 
357 sub svnRevisionForDirectory($)
358 {
359     my ($dir) = @_;
360     my $revision;
361 
362     if (isSVNDirectory($dir)) {
363         my $svnInfo = `LC_ALL=C svn info $dir | grep Revision:`;
364         ($revision) = ($svnInfo =~ m/Revision: (\d+).*/g);
365     } elsif (isGitDirectory($dir)) {
366         my $gitLog = `cd $dir && LC_ALL=C git log --grep='git-svn-id: ' -n 1 | grep git-svn-id:`;
367         ($revision) = ($gitLog =~ m/ +git-svn-id: .+@(\d+) /g);
368     }
369     die "Unable to determine current SVN revision in $dir" unless (defined $revision);
370     return $revision;
371 }
372 
373 sub pathRelativeToSVNRepositoryRootForPath($)
374 {
375     my ($file) = @_;
376     my $relativePath = File::Spec->abs2rel($file);
377 
378     my $svnInfo;
379     if (isSVN()) {
380         $svnInfo = `LC_ALL=C svn info $relativePath`;
381     } elsif (isGit()) {
382         $svnInfo = `LC_ALL=C git svn info $relativePath`;
383     }
384 
385     $svnInfo =~ /.*^URL: (.*?)$/m;
386     my $svnURL = $1;
387 
388     $svnInfo =~ /.*^Repository Root: (.*?)$/m;
389     my $repositoryRoot = $1;
390 
391     $svnURL =~ s/$repositoryRoot\///;
392     return $svnURL;
393 }
394 
395 sub makeFilePathRelative($)
396 {
397     my ($path) = @_;
398     return $path unless isGit();
399 
400     unless (defined $gitRoot) {
401         chomp($gitRoot = `git rev-parse --show-cdup`);
402     }
403     return $gitRoot . $path;
404 }
405 
406 sub normalizePath($)
407 {
408     my ($path) = @_;
409     $path =~ s/\\/\//g;
410     return $path;
411 }
412 
413 sub adjustPathForRecentRenamings($)
414 {
415     my ($fullPath) = @_;
416 
417     if ($fullPath =~ m|^WebCore/|
418         || $fullPath =~ m|^JavaScriptCore/|
419         || $fullPath =~ m|^WebKit/|
420         || $fullPath =~ m|^WebKit2/|) {
421         return "Source/$fullPath";
422     }
423     return $fullPath;
424 }
425 
426 sub canonicalizePath($)
427 {
428     my ($file) = @_;
429 
430     # Remove extra slashes and '.' directories in path
431     $file = File::Spec->canonpath($file);
432 
433     # Remove '..' directories in path
434     my @dirs = ();
435     foreach my $dir (File::Spec->splitdir($file)) {
436         if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') {
437             pop(@dirs);
438         } else {
439             push(@dirs, $dir);
440         }
441     }
442     return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : ".";
443 }
444 
445 sub removeEOL($)
446 {
447     my ($line) = @_;
448     return "" unless $line;
449 
450     $line =~ s/[\r\n]+$//g;
451     return $line;
452 }
453 
454 sub parseFirstEOL($)
455 {
456     my ($fileHandle) = @_;
457 
458     # Make input record separator the new-line character to simplify regex matching below.
459     my $savedInputRecordSeparator = $INPUT_RECORD_SEPARATOR;
460     $INPUT_RECORD_SEPARATOR = "\n";
461     my $firstLine  = <$fileHandle>;
462     $INPUT_RECORD_SEPARATOR = $savedInputRecordSeparator;
463 
464     return unless defined($firstLine);
465 
466     my $eol;
467     if ($firstLine =~ /\r\n/) {
468         $eol = "\r\n";
469     } elsif ($firstLine =~ /\r/) {
470         $eol = "\r";
471     } elsif ($firstLine =~ /\n/) {
472         $eol = "\n";
473     }
474     return $eol;
475 }
476 
477 sub firstEOLInFile($)
478 {
479     my ($file) = @_;
480     my $eol;
481     if (open(FILE, $file)) {
482         $eol = parseFirstEOL(*FILE);
483         close(FILE);
484     }
485     return $eol;
486 }
487 
488 sub svnStatus($)
489 {
490     my ($fullPath) = @_;
491     my $svnStatus;
492     open SVN, "svn status --non-interactive --non-recursive '$fullPath' |" or die;
493     if (-d $fullPath) {
494         # When running "svn stat" on a directory, we can't assume that only one
495         # status will be returned (since any files with a status below the
496         # directory will be returned), and we can't assume that the directory will
497         # be first (since any files with unknown status will be listed first).
498         my $normalizedFullPath = File::Spec->catdir(File::Spec->splitdir($fullPath));
499         while (<SVN>) {
500             # Input may use a different EOL sequence than $/, so avoid chomp.
501             $_ = removeEOL($_);
502             my $normalizedStatPath = File::Spec->catdir(File::Spec->splitdir(substr($_, 7)));
503             if ($normalizedFullPath eq $normalizedStatPath) {
504                 $svnStatus = "$_\n";
505                 last;
506             }
507         }
508         # Read the rest of the svn command output to avoid a broken pipe warning.
509         local $/ = undef;
510         <SVN>;
511     }
512     else {
513         # Files will have only one status returned.
514         $svnStatus = removeEOL(<SVN>) . "\n";
515     }
516     close SVN;
517     return $svnStatus;
518 }
519 
520 # Return whether the given file mode is executable in the source control
521 # sense.  We make this determination based on whether the executable bit
522 # is set for "others" rather than the stronger condition that it be set
523 # for the user, group, and others.  This is sufficient for distinguishing
524 # the default behavior in Git and SVN.
525 #
526 # Args:
527 #   $fileMode: A number or string representing a file mode in octal notation.
528 sub isExecutable($)
529 {
530     my $fileMode = shift;
531 
532     return $fileMode % 2;
533 }
534 
535 # Parse the next Git diff header from the given file handle, and advance
536 # the handle so the last line read is the first line after the header.
537 #
538 # This subroutine dies if given leading junk.
539 #
540 # Args:
541 #   $fileHandle: advanced so the last line read from the handle is the first
542 #                line of the header to parse.  This should be a line
543 #                beginning with "diff --git".
544 #   $line: the line last read from $fileHandle
545 #
546 # Returns ($headerHashRef, $lastReadLine):
547 #   $headerHashRef: a hash reference representing a diff header, as follows--
548 #     copiedFromPath: the path from which the file was copied or moved if
549 #                     the diff is a copy or move.
550 #     executableBitDelta: the value 1 or -1 if the executable bit was added or
551 #                         removed, respectively.  New and deleted files have
552 #                         this value only if the file is executable, in which
553 #                         case the value is 1 and -1, respectively.
554 #     indexPath: the path of the target file.
555 #     isBinary: the value 1 if the diff is for a binary file.
556 #     isDeletion: the value 1 if the diff is a file deletion.
557 #     isCopyWithChanges: the value 1 if the file was copied or moved and
558 #                        the target file was changed in some way after being
559 #                        copied or moved (e.g. if its contents or executable
560 #                        bit were changed).
561 #     isNew: the value 1 if the diff is for a new file.
562 #     shouldDeleteSource: the value 1 if the file was copied or moved and
563 #                         the source file was deleted -- i.e. if the copy
564 #                         was actually a move.
565 #     svnConvertedText: the header text with some lines converted to SVN
566 #                       format.  Git-specific lines are preserved.
567 #   $lastReadLine: the line last read from $fileHandle.
568 sub parseGitDiffHeader($$)
569 {
570     my ($fileHandle, $line) = @_;
571 
572     $_ = $line;
573 
574     my $indexPath;
575     if (/$gitDiffStartRegEx/) {
576         # The first and second paths can differ in the case of copies
577         # and renames.  We use the second file path because it is the
578         # destination path.
579         $indexPath = adjustPathForRecentRenamings($4);
580         # Use $POSTMATCH to preserve the end-of-line character.
581         $_ = "Index: $indexPath$POSTMATCH"; # Convert to SVN format.
582     } else {
583         die("Could not parse leading \"diff --git\" line: \"$line\".");
584     }
585 
586     my $copiedFromPath;
587     my $foundHeaderEnding;
588     my $isBinary;
589     my $isDeletion;
590     my $isNew;
591     my $newExecutableBit = 0;
592     my $oldExecutableBit = 0;
593     my $shouldDeleteSource = 0;
594     my $similarityIndex = 0;
595     my $svnConvertedText;
596     while (1) {
597         # Temporarily strip off any end-of-line characters to simplify
598         # regex matching below.
599         s/([\n\r]+)$//;
600         my $eol = $1;
601 
602         if (/^(deleted file|old) mode (\d+)/) {
603             $oldExecutableBit = (isExecutable($2) ? 1 : 0);
604             $isDeletion = 1 if $1 eq "deleted file";
605         } elsif (/^new( file)? mode (\d+)/) {
606             $newExecutableBit = (isExecutable($2) ? 1 : 0);
607             $isNew = 1 if $1;
608         } elsif (/^similarity index (\d+)%/) {
609             $similarityIndex = $1;
610         } elsif (/^copy from (\S+)/) {
611             $copiedFromPath = $1;
612         } elsif (/^rename from (\S+)/) {
613             # FIXME: Record this as a move rather than as a copy-and-delete.
614             #        This will simplify adding rename support to svn-unapply.
615             #        Otherwise, the hash for a deletion would have to know
616             #        everything about the file being deleted in order to
617             #        support undoing itself.  Recording as a move will also
618             #        permit us to use "svn move" and "git move".
619             $copiedFromPath = $1;
620             $shouldDeleteSource = 1;
621         } elsif (/^--- \S+/) {
622             $_ = "--- $indexPath"; # Convert to SVN format.
623         } elsif (/^\+\+\+ \S+/) {
624             $_ = "+++ $indexPath"; # Convert to SVN format.
625             $foundHeaderEnding = 1;
626         } elsif (/^GIT binary patch$/ ) {
627             $isBinary = 1;
628             $foundHeaderEnding = 1;
629         # The "git diff" command includes a line of the form "Binary files
630         # <path1> and <path2> differ" if the --binary flag is not used.
631         } elsif (/^Binary files / ) {
632             die("Error: the Git diff contains a binary file without the binary data in ".
633                 "line: \"$_\".  Be sure to use the --binary flag when invoking \"git diff\" ".
634                 "with diffs containing binary files.");
635         }
636 
637         $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters.
638 
639         $_ = <$fileHandle>; # Not defined if end-of-file reached.
640 
641         last if (!defined($_) || /$gitDiffStartRegEx/ || $foundHeaderEnding);
642     }
643 
644     my $executableBitDelta = $newExecutableBit - $oldExecutableBit;
645 
646     my %header;
647 
648     $header{copiedFromPath} = $copiedFromPath if $copiedFromPath;
649     $header{executableBitDelta} = $executableBitDelta if $executableBitDelta;
650     $header{indexPath} = $indexPath;
651     $header{isBinary} = $isBinary if $isBinary;
652     $header{isCopyWithChanges} = 1 if ($copiedFromPath && ($similarityIndex != 100 || $executableBitDelta));
653     $header{isDeletion} = $isDeletion if $isDeletion;
654     $header{isNew} = $isNew if $isNew;
655     $header{shouldDeleteSource} = $shouldDeleteSource if $shouldDeleteSource;
656     $header{svnConvertedText} = $svnConvertedText;
657 
658     return (\%header, $_);
659 }
660 
661 # Parse the next SVN diff header from the given file handle, and advance
662 # the handle so the last line read is the first line after the header.
663 #
664 # This subroutine dies if given leading junk or if it could not detect
665 # the end of the header block.
666 #
667 # Args:
668 #   $fileHandle: advanced so the last line read from the handle is the first
669 #                line of the header to parse.  This should be a line
670 #                beginning with "Index:".
671 #   $line: the line last read from $fileHandle
672 #
673 # Returns ($headerHashRef, $lastReadLine):
674 #   $headerHashRef: a hash reference representing a diff header, as follows--
675 #     copiedFromPath: the path from which the file was copied if the diff
676 #                     is a copy.
677 #     indexPath: the path of the target file, which is the path found in
678 #                the "Index:" line.
679 #     isBinary: the value 1 if the diff is for a binary file.
680 #     isNew: the value 1 if the diff is for a new file.
681 #     sourceRevision: the revision number of the source, if it exists.  This
682 #                     is the same as the revision number the file was copied
683 #                     from, in the case of a file copy.
684 #     svnConvertedText: the header text converted to a header with the paths
685 #                       in some lines corrected.
686 #   $lastReadLine: the line last read from $fileHandle.
687 sub parseSvnDiffHeader($$)
688 {
689     my ($fileHandle, $line) = @_;
690 
691     $_ = $line;
692 
693     my $indexPath;
694     if (/$svnDiffStartRegEx/) {
695         $indexPath = adjustPathForRecentRenamings($1);
696     } else {
697         die("First line of SVN diff does not begin with \"Index \": \"$_\"");
698     }
699 
700     my $copiedFromPath;
701     my $foundHeaderEnding;
702     my $isBinary;
703     my $isNew;
704     my $sourceRevision;
705     my $svnConvertedText;
706     while (1) {
707         # Temporarily strip off any end-of-line characters to simplify
708         # regex matching below.
709         s/([\n\r]+)$//;
710         my $eol = $1;
711 
712         # Fix paths on ""---" and "+++" lines to match the leading
713         # index line.
714         if (s/^--- \S+/--- $indexPath/) {
715             # ---
716             if (/^--- .+\(revision (\d+)\)/) {
717                 $sourceRevision = $1;
718                 $isNew = 1 if !$sourceRevision; # if revision 0.
719                 if (/\(from (\S+):(\d+)\)$/) {
720                     # The "from" clause is created by svn-create-patch, in
721                     # which case there is always also a "revision" clause.
722                     $copiedFromPath = $1;
723                     die("Revision number \"$2\" in \"from\" clause does not match " .
724                         "source revision number \"$sourceRevision\".") if ($2 != $sourceRevision);
725                 }
726             }
727         } elsif (s/^\+\+\+ \S+/+++ $indexPath/) {
728             $foundHeaderEnding = 1;
729         } elsif (/^Cannot display: file marked as a binary type.$/) {
730             $isBinary = 1;
731             $foundHeaderEnding = 1;
732         }
733 
734         $svnConvertedText .= "$_$eol"; # Also restore end-of-line characters.
735 
736         $_ = <$fileHandle>; # Not defined if end-of-file reached.
737 
738         last if (!defined($_) || /$svnDiffStartRegEx/ || $foundHeaderEnding);
739     }
740 
741     if (!$foundHeaderEnding) {
742         die("Did not find end of header block corresponding to index path \"$indexPath\".");
743     }
744 
745     my %header;
746 
747     $header{copiedFromPath} = $copiedFromPath if $copiedFromPath;
748     $header{indexPath} = $indexPath;
749     $header{isBinary} = $isBinary if $isBinary;
750     $header{isNew} = $isNew if $isNew;
751     $header{sourceRevision} = $sourceRevision if $sourceRevision;
752     $header{svnConvertedText} = $svnConvertedText;
753 
754     return (\%header, $_);
755 }
756 
757 # Parse the next diff header from the given file handle, and advance
758 # the handle so the last line read is the first line after the header.
759 #
760 # This subroutine dies if given leading junk or if it could not detect
761 # the end of the header block.
762 #
763 # Args:
764 #   $fileHandle: advanced so the last line read from the handle is the first
765 #                line of the header to parse.  For SVN-formatted diffs, this
766 #                is a line beginning with "Index:".  For Git, this is a line
767 #                beginning with "diff --git".
768 #   $line: the line last read from $fileHandle
769 #
770 # Returns ($headerHashRef, $lastReadLine):
771 #   $headerHashRef: a hash reference representing a diff header
772 #     copiedFromPath: the path from which the file was copied if the diff
773 #                     is a copy.
774 #     executableBitDelta: the value 1 or -1 if the executable bit was added or
775 #                         removed, respectively.  New and deleted files have
776 #                         this value only if the file is executable, in which
777 #                         case the value is 1 and -1, respectively.
778 #     indexPath: the path of the target file.
779 #     isBinary: the value 1 if the diff is for a binary file.
780 #     isGit: the value 1 if the diff is Git-formatted.
781 #     isSvn: the value 1 if the diff is SVN-formatted.
782 #     sourceRevision: the revision number of the source, if it exists.  This
783 #                     is the same as the revision number the file was copied
784 #                     from, in the case of a file copy.
785 #     svnConvertedText: the header text with some lines converted to SVN
786 #                       format.  Git-specific lines are preserved.
787 #   $lastReadLine: the line last read from $fileHandle.
788 sub parseDiffHeader($$)
789 {
790     my ($fileHandle, $line) = @_;
791 
792     my $header;  # This is a hash ref.
793     my $isGit;
794     my $isSvn;
795     my $lastReadLine;
796 
797     if ($line =~ $svnDiffStartRegEx) {
798         $isSvn = 1;
799         ($header, $lastReadLine) = parseSvnDiffHeader($fileHandle, $line);
800     } elsif ($line =~ $gitDiffStartRegEx) {
801         $isGit = 1;
802         ($header, $lastReadLine) = parseGitDiffHeader($fileHandle, $line);
803     } else {
804         die("First line of diff does not begin with \"Index:\" or \"diff --git\": \"$line\"");
805     }
806 
807     $header->{isGit} = $isGit if $isGit;
808     $header->{isSvn} = $isSvn if $isSvn;
809 
810     return ($header, $lastReadLine);
811 }
812 
813 # FIXME: The %diffHash "object" should not have an svnConvertedText property.
814 #        Instead, the hash object should store its information in a
815 #        structured way as properties.  This should be done in a way so
816 #        that, if necessary, the text of an SVN or Git patch can be
817 #        reconstructed from the information in those hash properties.
818 #
819 # A %diffHash is a hash representing a source control diff of a single
820 # file operation (e.g. a file modification, copy, or delete).
821 #
822 # These hashes appear, for example, in the parseDiff(), parsePatch(),
823 # and prepareParsedPatch() subroutines of this package.
824 #
825 # The corresponding values are--
826 #
827 #   copiedFromPath: the path from which the file was copied if the diff
828 #                   is a copy.
829 #   executableBitDelta: the value 1 or -1 if the executable bit was added or
830 #                       removed from the target file, respectively.
831 #   indexPath: the path of the target file.  For SVN-formatted diffs,
832 #              this is the same as the path in the "Index:" line.
833 #   isBinary: the value 1 if the diff is for a binary file.
834 #   isDeletion: the value 1 if the diff is known from the header to be a deletion.
835 #   isGit: the value 1 if the diff is Git-formatted.
836 #   isNew: the value 1 if the dif is known from the header to be a new file.
837 #   isSvn: the value 1 if the diff is SVN-formatted.
838 #   sourceRevision: the revision number of the source, if it exists.  This
839 #                   is the same as the revision number the file was copied
840 #                   from, in the case of a file copy.
841 #   svnConvertedText: the diff with some lines converted to SVN format.
842 #                     Git-specific lines are preserved.
843 
844 # Parse one diff from a patch file created by svn-create-patch, and
845 # advance the file handle so the last line read is the first line
846 # of the next header block.
847 #
848 # This subroutine preserves any leading junk encountered before the header.
849 #
850 # Composition of an SVN diff
851 #
852 # There are three parts to an SVN diff: the header, the property change, and
853 # the binary contents, in that order. Either the header or the property change
854 # may be ommitted, but not both. If there are binary changes, then you always
855 # have all three.
856 #
857 # Args:
858 #   $fileHandle: a file handle advanced to the first line of the next
859 #                header block. Leading junk is okay.
860 #   $line: the line last read from $fileHandle.
861 #   $optionsHashRef: a hash reference representing optional options to use
862 #                    when processing a diff.
863 #     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
864 #                               instead of the line endings in the target file; the
865 #                               value of 1 if svnConvertedText should use the line
866 #                               endings in the diff.
867 #
868 # Returns ($diffHashRefs, $lastReadLine):
869 #   $diffHashRefs: A reference to an array of references to %diffHash hashes.
870 #                  See the %diffHash documentation above.
871 #   $lastReadLine: the line last read from $fileHandle
872 sub parseDiff($$;$)
873 {
874     # FIXME: Adjust this method so that it dies if the first line does not
875     #        match the start of a diff.  This will require a change to
876     #        parsePatch() so that parsePatch() skips over leading junk.
877     my ($fileHandle, $line, $optionsHashRef) = @_;
878 
879     my $headerStartRegEx = $svnDiffStartRegEx; # SVN-style header for the default
880 
881     my $headerHashRef; # Last header found, as returned by parseDiffHeader().
882     my $svnPropertiesHashRef; # Last SVN properties diff found, as returned by parseSvnDiffProperties().
883     my $svnText;
884     my $indexPathEOL;
885     while (defined($line)) {
886         if (!$headerHashRef && ($line =~ $gitDiffStartRegEx)) {
887             # Then assume all diffs in the patch are Git-formatted. This
888             # block was made to be enterable at most once since we assume
889             # all diffs in the patch are formatted the same (SVN or Git).
890             $headerStartRegEx = $gitDiffStartRegEx;
891         }
892 
893         if ($line =~ $svnPropertiesStartRegEx) {
894             my $propertyPath = $1;
895             if ($svnPropertiesHashRef || $headerHashRef && ($propertyPath ne $headerHashRef->{indexPath})) {
896                 # This is the start of the second diff in the while loop, which happens to
897                 # be a property diff.  If $svnPropertiesHasRef is defined, then this is the
898                 # second consecutive property diff, otherwise it's the start of a property
899                 # diff for a file that only has property changes.
900                 last;
901             }
902             ($svnPropertiesHashRef, $line) = parseSvnDiffProperties($fileHandle, $line);
903             next;
904         }
905         if ($line !~ $headerStartRegEx) {
906             # Then we are in the body of the diff.
907             if ($indexPathEOL && $line !~ /$chunkRangeRegEx/) {
908                 # The chunk range is part of the body of the diff, but its line endings should't be
909                 # modified or patch(1) will complain. So, we only modify non-chunk range lines.
910                 $line =~ s/\r\n|\r|\n/$indexPathEOL/g;
911             }
912             $svnText .= $line;
913             $line = <$fileHandle>;
914             next;
915         } # Otherwise, we found a diff header.
916 
917         if ($svnPropertiesHashRef || $headerHashRef) {
918             # Then either we just processed an SVN property change or this
919             # is the start of the second diff header of this while loop.
920             last;
921         }
922 
923         ($headerHashRef, $line) = parseDiffHeader($fileHandle, $line);
924         if (!$optionsHashRef || !$optionsHashRef->{shouldNotUseIndexPathEOL}) {
925             $indexPathEOL = firstEOLInFile($headerHashRef->{indexPath}) if !$headerHashRef->{isNew} && !$headerHashRef->{isBinary};
926         }
927 
928         $svnText .= $headerHashRef->{svnConvertedText};
929     }
930 
931     my @diffHashRefs;
932 
933     if ($headerHashRef->{shouldDeleteSource}) {
934         my %deletionHash;
935         $deletionHash{indexPath} = $headerHashRef->{copiedFromPath};
936         $deletionHash{isDeletion} = 1;
937         push @diffHashRefs, \%deletionHash;
938     }
939     if ($headerHashRef->{copiedFromPath}) {
940         my %copyHash;
941         $copyHash{copiedFromPath} = $headerHashRef->{copiedFromPath};
942         $copyHash{indexPath} = $headerHashRef->{indexPath};
943         $copyHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision};
944         if ($headerHashRef->{isSvn}) {
945             $copyHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
946         }
947         push @diffHashRefs, \%copyHash;
948     }
949 
950     # Note, the order of evaluation for the following if conditional has been explicitly chosen so that
951     # it evaluates to false when there is no headerHashRef (e.g. a property change diff for a file that
952     # only has property changes).
953     if ($headerHashRef->{isCopyWithChanges} || (%$headerHashRef && !$headerHashRef->{copiedFromPath})) {
954         # Then add the usual file modification.
955         my %diffHash;
956         # FIXME: We should expand this code to support other properties.  In the future,
957         #        parseSvnDiffProperties may return a hash whose keys are the properties.
958         if ($headerHashRef->{isSvn}) {
959             # SVN records the change to the executable bit in a separate property change diff
960             # that follows the contents of the diff, except for binary diffs.  For binary
961             # diffs, the property change diff follows the diff header.
962             $diffHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
963         } elsif ($headerHashRef->{isGit}) {
964             # Git records the change to the executable bit in the header of a diff.
965             $diffHash{executableBitDelta} = $headerHashRef->{executableBitDelta} if $headerHashRef->{executableBitDelta};
966         }
967         $diffHash{indexPath} = $headerHashRef->{indexPath};
968         $diffHash{isBinary} = $headerHashRef->{isBinary} if $headerHashRef->{isBinary};
969         $diffHash{isDeletion} = $headerHashRef->{isDeletion} if $headerHashRef->{isDeletion};
970         $diffHash{isGit} = $headerHashRef->{isGit} if $headerHashRef->{isGit};
971         $diffHash{isNew} = $headerHashRef->{isNew} if $headerHashRef->{isNew};
972         $diffHash{isSvn} = $headerHashRef->{isSvn} if $headerHashRef->{isSvn};
973         if (!$headerHashRef->{copiedFromPath}) {
974             # If the file was copied, then we have already incorporated the
975             # sourceRevision information into the change.
976             $diffHash{sourceRevision} = $headerHashRef->{sourceRevision} if $headerHashRef->{sourceRevision};
977         }
978         # FIXME: Remove the need for svnConvertedText.  See the %diffHash
979         #        code comments above for more information.
980         #
981         # Note, we may not always have SVN converted text since we intend
982         # to deprecate it in the future.  For example, a property change
983         # diff for a file that only has property changes will not return
984         # any SVN converted text.
985         $diffHash{svnConvertedText} = $svnText if $svnText;
986         push @diffHashRefs, \%diffHash;
987     }
988 
989     if (!%$headerHashRef && $svnPropertiesHashRef) {
990         # A property change diff for a file that only has property changes.
991         my %propertyChangeHash;
992         $propertyChangeHash{executableBitDelta} = $svnPropertiesHashRef->{executableBitDelta} if $svnPropertiesHashRef->{executableBitDelta};
993         $propertyChangeHash{indexPath} = $svnPropertiesHashRef->{propertyPath};
994         $propertyChangeHash{isSvn} = 1;
995         push @diffHashRefs, \%propertyChangeHash;
996     }
997 
998     return (\@diffHashRefs, $line);
999 }
1000 
1001 # Parse an SVN property change diff from the given file handle, and advance
1002 # the handle so the last line read is the first line after this diff.
1003 #
1004 # For the case of an SVN binary diff, the binary contents will follow the
1005 # the property changes.
1006 #
1007 # This subroutine dies if the first line does not begin with "Property changes on"
1008 # or if the separator line that follows this line is missing.
1009 #
1010 # Args:
1011 #   $fileHandle: advanced so the last line read from the handle is the first
1012 #                line of the footer to parse.  This line begins with
1013 #                "Property changes on".
1014 #   $line: the line last read from $fileHandle.
1015 #
1016 # Returns ($propertyHashRef, $lastReadLine):
1017 #   $propertyHashRef: a hash reference representing an SVN diff footer.
1018 #     propertyPath: the path of the target file.
1019 #     executableBitDelta: the value 1 or -1 if the executable bit was added or
1020 #                         removed from the target file, respectively.
1021 #   $lastReadLine: the line last read from $fileHandle.
1022 sub parseSvnDiffProperties($$)
1023 {
1024     my ($fileHandle, $line) = @_;
1025 
1026     $_ = $line;
1027 
1028     my %footer;
1029     if (/$svnPropertiesStartRegEx/) {
1030         $footer{propertyPath} = $1;
1031     } else {
1032         die("Failed to find start of SVN property change, \"Property changes on \": \"$_\"");
1033     }
1034 
1035     # We advance $fileHandle two lines so that the next line that
1036     # we process is $svnPropertyStartRegEx in a well-formed footer.
1037     # A well-formed footer has the form:
1038     # Property changes on: FileA
1039     # ___________________________________________________________________
1040     # Added: svn:executable
1041     #    + *
1042     $_ = <$fileHandle>; # Not defined if end-of-file reached.
1043     my $separator = "_" x 67;
1044     if (defined($_) && /^$separator[\r\n]+$/) {
1045         $_ = <$fileHandle>;
1046     } else {
1047         die("Failed to find separator line: \"$_\".");
1048     }
1049 
1050     # FIXME: We should expand this to support other SVN properties
1051     #        (e.g. return a hash of property key-values that represents
1052     #        all properties).
1053     #
1054     # Notice, we keep processing until we hit end-of-file or some
1055     # line that does not resemble $svnPropertyStartRegEx, such as
1056     # the empty line that precedes the start of the binary contents
1057     # of a patch, or the start of the next diff (e.g. "Index:").
1058     my $propertyHashRef;
1059     while (defined($_) && /$svnPropertyStartRegEx/) {
1060         ($propertyHashRef, $_) = parseSvnProperty($fileHandle, $_);
1061         if ($propertyHashRef->{name} eq "svn:executable") {
1062             # Notice, for SVN properties, propertyChangeDelta is always non-zero
1063             # because a property can only be added or removed.
1064             $footer{executableBitDelta} = $propertyHashRef->{propertyChangeDelta};
1065         }
1066     }
1067 
1068     return(\%footer, $_);
1069 }
1070 
1071 # Parse the next SVN property from the given file handle, and advance the handle so the last
1072 # line read is the first line after the property.
1073 #
1074 # This subroutine dies if the first line is not a valid start of an SVN property,
1075 # or the property is missing a value, or the property change type (e.g. "Added")
1076 # does not correspond to the property value type (e.g. "+").
1077 #
1078 # Args:
1079 #   $fileHandle: advanced so the last line read from the handle is the first
1080 #                line of the property to parse.  This should be a line
1081 #                that matches $svnPropertyStartRegEx.
1082 #   $line: the line last read from $fileHandle.
1083 #
1084 # Returns ($propertyHashRef, $lastReadLine):
1085 #   $propertyHashRef: a hash reference representing a SVN property.
1086 #     name: the name of the property.
1087 #     value: the last property value.  For instance, suppose the property is "Modified".
1088 #            Then it has both a '-' and '+' property value in that order.  Therefore,
1089 #            the value of this key is the value of the '+' property by ordering (since
1090 #            it is the last value).
1091 #     propertyChangeDelta: the value 1 or -1 if the property was added or
1092 #                          removed, respectively.
1093 #   $lastReadLine: the line last read from $fileHandle.
1094 sub parseSvnProperty($$)
1095 {
1096     my ($fileHandle, $line) = @_;
1097 
1098     $_ = $line;
1099 
1100     my $propertyName;
1101     my $propertyChangeType;
1102     if (/$svnPropertyStartRegEx/) {
1103         $propertyChangeType = $1;
1104         $propertyName = $2;
1105     } else {
1106         die("Failed to find SVN property: \"$_\".");
1107     }
1108 
1109     $_ = <$fileHandle>; # Not defined if end-of-file reached.
1110 
1111     # The "svn diff" command neither inserts newline characters between property values
1112     # nor between successive properties.
1113     #
1114     # FIXME: We do not support property values that contain tailing newline characters
1115     #        as it is difficult to disambiguate these trailing newlines from the empty
1116     #        line that precedes the contents of a binary patch.
1117     my $propertyValue;
1118     my $propertyValueType;
1119     while (defined($_) && /$svnPropertyValueStartRegEx/) {
1120         # Note, a '-' property may be followed by a '+' property in the case of a "Modified"
1121         # or "Name" property.  We only care about the ending value (i.e. the '+' property)
1122         # in such circumstances.  So, we take the property value for the property to be its
1123         # last parsed property value.
1124         #
1125         # FIXME: We may want to consider strictly enforcing a '-', '+' property ordering or
1126         #        add error checking to prevent '+', '+', ..., '+' and other invalid combinations.
1127         $propertyValueType = $1;
1128         ($propertyValue, $_) = parseSvnPropertyValue($fileHandle, $_);
1129     }
1130 
1131     if (!$propertyValue) {
1132         die("Failed to find the property value for the SVN property \"$propertyName\": \"$_\".");
1133     }
1134 
1135     my $propertyChangeDelta;
1136     if ($propertyValueType eq "+" || $propertyValueType eq "Merged") {
1137         $propertyChangeDelta = 1;
1138     } elsif ($propertyValueType eq "-" || $propertyValueType eq "Reverse-merged") {
1139         $propertyChangeDelta = -1;
1140     } else {
1141         die("Not reached.");
1142     }
1143 
1144     # We perform a simple validation that an "Added" or "Deleted" property
1145     # change type corresponds with a "+" and "-" value type, respectively.
1146     my $expectedChangeDelta;
1147     if ($propertyChangeType eq "Added") {
1148         $expectedChangeDelta = 1;
1149     } elsif ($propertyChangeType eq "Deleted") {
1150         $expectedChangeDelta = -1;
1151     }
1152 
1153     if ($expectedChangeDelta && $propertyChangeDelta != $expectedChangeDelta) {
1154         die("The final property value type found \"$propertyValueType\" does not " .
1155             "correspond to the property change type found \"$propertyChangeType\".");
1156     }
1157 
1158     my %propertyHash;
1159     $propertyHash{name} = $propertyName;
1160     $propertyHash{propertyChangeDelta} = $propertyChangeDelta;
1161     $propertyHash{value} = $propertyValue;
1162     return (\%propertyHash, $_);
1163 }
1164 
1165 # Parse the value of an SVN property from the given file handle, and advance
1166 # the handle so the last line read is the first line after the property value.
1167 #
1168 # This subroutine dies if the first line is an invalid SVN property value line
1169 # (i.e. a line that does not begin with "   +" or "   -").
1170 #
1171 # Args:
1172 #   $fileHandle: advanced so the last line read from the handle is the first
1173 #                line of the property value to parse.  This should be a line
1174 #                beginning with "   +" or "   -".
1175 #   $line: the line last read from $fileHandle.
1176 #
1177 # Returns ($propertyValue, $lastReadLine):
1178 #   $propertyValue: the value of the property.
1179 #   $lastReadLine: the line last read from $fileHandle.
1180 sub parseSvnPropertyValue($$)
1181 {
1182     my ($fileHandle, $line) = @_;
1183 
1184     $_ = $line;
1185 
1186     my $propertyValue;
1187     my $eol;
1188     if (/$svnPropertyValueStartRegEx/) {
1189         $propertyValue = $2; # Does not include the end-of-line character(s).
1190         $eol = $POSTMATCH;
1191     } else {
1192         die("Failed to find property value beginning with '+', '-', 'Merged', or 'Reverse-merged': \"$_\".");
1193     }
1194 
1195     while (<$fileHandle>) {
1196         if (/^[\r\n]+$/ || /$svnPropertyValueStartRegEx/ || /$svnPropertyStartRegEx/) {
1197             # Note, we may encounter an empty line before the contents of a binary patch.
1198             # Also, we check for $svnPropertyValueStartRegEx because a '-' property may be
1199             # followed by a '+' property in the case of a "Modified" or "Name" property.
1200             # We check for $svnPropertyStartRegEx because it indicates the start of the
1201             # next property to parse.
1202             last;
1203         }
1204 
1205         # Temporarily strip off any end-of-line characters. We add the end-of-line characters
1206         # from the previously processed line to the start of this line so that the last line
1207         # of the property value does not end in end-of-line characters.
1208         s/([\n\r]+)$//;
1209         $propertyValue .= "$eol$_";
1210         $eol = $1;
1211     }
1212 
1213     return ($propertyValue, $_);
1214 }
1215 
1216 # Parse a patch file created by svn-create-patch.
1217 #
1218 # Args:
1219 #   $fileHandle: A file handle to the patch file that has not yet been
1220 #                read from.
1221 #   $optionsHashRef: a hash reference representing optional options to use
1222 #                    when processing a diff.
1223 #     shouldNotUseIndexPathEOL: whether to use the line endings in the diff instead
1224 #                               instead of the line endings in the target file; the
1225 #                               value of 1 if svnConvertedText should use the line
1226 #                               endings in the diff.
1227 #
1228 # Returns:
1229 #   @diffHashRefs: an array of diff hash references.
1230 #                  See the %diffHash documentation above.
1231 sub parsePatch($;$)
1232 {
1233     my ($fileHandle, $optionsHashRef) = @_;
1234 
1235     my $newDiffHashRefs;
1236     my @diffHashRefs; # return value
1237 
1238     my $line = <$fileHandle>;
1239 
1240     while (defined($line)) { # Otherwise, at EOF.
1241 
1242         ($newDiffHashRefs, $line) = parseDiff($fileHandle, $line, $optionsHashRef);
1243 
1244         push @diffHashRefs, @$newDiffHashRefs;
1245     }
1246 
1247     return @diffHashRefs;
1248 }
1249 
1250 # Prepare the results of parsePatch() for use in svn-apply and svn-unapply.
1251 #
1252 # Args:
1253 #   $shouldForce: Whether to continue processing if an unexpected
1254 #                 state occurs.
1255 #   @diffHashRefs: An array of references to %diffHashes.
1256 #                  See the %diffHash documentation above.
1257 #
1258 # Returns $preparedPatchHashRef:
1259 #   copyDiffHashRefs: A reference to an array of the $diffHashRefs in
1260 #                     @diffHashRefs that represent file copies. The original
1261 #                     ordering is preserved.
1262 #   nonCopyDiffHashRefs: A reference to an array of the $diffHashRefs in
1263 #                        @diffHashRefs that do not represent file copies.
1264 #                        The original ordering is preserved.
1265 #   sourceRevisionHash: A reference to a hash of source path to source
1266 #                       revision number.
1267 sub prepareParsedPatch($@)
1268 {
1269     my ($shouldForce, @diffHashRefs) = @_;
1270 
1271     my %copiedFiles;
1272 
1273     # Return values
1274     my @copyDiffHashRefs = ();
1275     my @nonCopyDiffHashRefs = ();
1276     my %sourceRevisionHash = ();
1277     for my $diffHashRef (@diffHashRefs) {
1278         my $copiedFromPath = $diffHashRef->{copiedFromPath};
1279         my $indexPath = $diffHashRef->{indexPath};
1280         my $sourceRevision = $diffHashRef->{sourceRevision};
1281         my $sourcePath;
1282 
1283         if (defined($copiedFromPath)) {
1284             # Then the diff is a copy operation.
1285             $sourcePath = $copiedFromPath;
1286 
1287             # FIXME: Consider printing a warning or exiting if
1288             #        exists($copiedFiles{$indexPath}) is true -- i.e. if
1289             #        $indexPath appears twice as a copy target.
1290             $copiedFiles{$indexPath} = $sourcePath;
1291 
1292             push @copyDiffHashRefs, $diffHashRef;
1293         } else {
1294             # Then the diff is not a copy operation.
1295             $sourcePath = $indexPath;
1296 
1297             push @nonCopyDiffHashRefs, $diffHashRef;
1298         }
1299 
1300         if (defined($sourceRevision)) {
1301             if (exists($sourceRevisionHash{$sourcePath}) &&
1302                 ($sourceRevisionHash{$sourcePath} != $sourceRevision)) {
1303                 if (!$shouldForce) {
1304                     die "Two revisions of the same file required as a source:\n".
1305                         "    $sourcePath:$sourceRevisionHash{$sourcePath}\n".
1306                         "    $sourcePath:$sourceRevision";
1307                 }
1308             }
1309             $sourceRevisionHash{$sourcePath} = $sourceRevision;
1310         }
1311     }
1312 
1313     my %preparedPatchHash;
1314 
1315     $preparedPatchHash{copyDiffHashRefs} = \@copyDiffHashRefs;
1316     $preparedPatchHash{nonCopyDiffHashRefs} = \@nonCopyDiffHashRefs;
1317     $preparedPatchHash{sourceRevisionHash} = \%sourceRevisionHash;
1318 
1319     return \%preparedPatchHash;
1320 }
1321 
1322 # Return localtime() for the project's time zone, given an integer time as
1323 # returned by Perl's time() function.
1324 sub localTimeInProjectTimeZone($)
1325 {
1326     my $epochTime = shift;
1327 
1328     # Change the time zone temporarily for the localtime() call.
1329     my $savedTimeZone = $ENV{'TZ'};
1330     $ENV{'TZ'} = $changeLogTimeZone;
1331     my @localTime = localtime($epochTime);
1332     if (defined $savedTimeZone) {
1333          $ENV{'TZ'} = $savedTimeZone;
1334     } else {
1335          delete $ENV{'TZ'};
1336     }
1337 
1338     return @localTime;
1339 }
1340 
1341 # Set the reviewer and date in a ChangeLog patch, and return the new patch.
1342 #
1343 # Args:
1344 #   $patch: a ChangeLog patch as a string.
1345 #   $reviewer: the name of the reviewer, or undef if the reviewer should not be set.
1346 #   $epochTime: an integer time as returned by Perl's time() function.
1347 sub setChangeLogDateAndReviewer($$$)
1348 {
1349     my ($patch, $reviewer, $epochTime) = @_;
1350 
1351     my @localTime = localTimeInProjectTimeZone($epochTime);
1352     my $newDate = strftime("%Y-%m-%d", @localTime);
1353 
1354     my $firstChangeLogLineRegEx = qr#(\n\+)\d{4}-[^-]{2}-[^-]{2}(  )#;
1355     $patch =~ s/$firstChangeLogLineRegEx/$1$newDate$2/;
1356 
1357     if (defined($reviewer)) {
1358         # We include a leading plus ("+") in the regular expression to make
1359         # the regular expression less likely to match text in the leading junk
1360         # for the patch, if the patch has leading junk.
1361         $patch =~ s/(\n\+.*)NOBODY \(OOPS!\)/$1$reviewer/;
1362     }
1363 
1364     return $patch;
1365 }
1366 
1367 # If possible, returns a ChangeLog patch equivalent to the given one,
1368 # but with the newest ChangeLog entry inserted at the top of the
1369 # file -- i.e. no leading context and all lines starting with "+".
1370 #
1371 # If given a patch string not representable as a patch with the above
1372 # properties, it returns the input back unchanged.
1373 #
1374 # WARNING: This subroutine can return an inequivalent patch string if
1375 # both the beginning of the new ChangeLog file matches the beginning
1376 # of the source ChangeLog, and the source beginning was modified.
1377 # Otherwise, it is guaranteed to return an equivalent patch string,
1378 # if it returns.
1379 #
1380 # Applying this subroutine to ChangeLog patches allows svn-apply to
1381 # insert new ChangeLog entries at the top of the ChangeLog file.
1382 # svn-apply uses patch with --fuzz=3 to do this. We need to apply
1383 # this subroutine because the diff(1) command is greedy when matching
1384 # lines. A new ChangeLog entry with the same date and author as the
1385 # previous will match and cause the diff to have lines of starting
1386 # context.
1387 #
1388 # This subroutine has unit tests in VCSUtils_unittest.pl.
1389 #
1390 # Returns $changeLogHashRef:
1391 #   $changeLogHashRef: a hash reference representing a change log patch.
1392 #     patch: a ChangeLog patch equivalent to the given one, but with the
1393 #            newest ChangeLog entry inserted at the top of the file, if possible.
1394 sub fixChangeLogPatch($)
1395 {
1396     my $patch = shift; # $patch will only contain patch fragments for ChangeLog.
1397 
1398     $patch =~ /(\r?\n)/;
1399     my $lineEnding = $1;
1400     my @lines = split(/$lineEnding/, $patch);
1401 
1402     my $i = 0; # We reuse the same index throughout.
1403 
1404     # Skip to beginning of first chunk.
1405     for (; $i < @lines; ++$i) {
1406         if (substr($lines[$i], 0, 1) eq "@") {
1407             last;
1408         }
1409     }
1410     my $chunkStartIndex = ++$i;
1411     my %changeLogHashRef;
1412 
1413     # Optimization: do not process if new lines already begin the chunk.
1414     if (substr($lines[$i], 0, 1) eq "+") {
1415         $changeLogHashRef{patch} = $patch;
1416         return \%changeLogHashRef;
1417     }
1418 
1419     # Skip to first line of newly added ChangeLog entry.
1420     # For example, +2009-06-03  Eric Seidel  <eric@webkit.org>
1421     my $dateStartRegEx = '^\+(\d{4}-\d{2}-\d{2})' # leading "+" and date
1422                          . '\s+(.+)\s+' # name
1423                          . '<([^<>]+)>$'; # e-mail address
1424 
1425     for (; $i < @lines; ++$i) {
1426         my $line = $lines[$i];
1427         my $firstChar = substr($line, 0, 1);
1428         if ($line =~ /$dateStartRegEx/) {
1429             last;
1430         } elsif ($firstChar eq " " or $firstChar eq "+") {
1431             next;
1432         }
1433         $changeLogHashRef{patch} = $patch; # Do not change if, for example, "-" or "@" found.
1434         return \%changeLogHashRef;
1435     }
1436     if ($i >= @lines) {
1437         $changeLogHashRef{patch} = $patch; # Do not change if date not found.
1438         return \%changeLogHashRef;
1439     }
1440     my $dateStartIndex = $i;
1441 
1442     # Rewrite overlapping lines to lead with " ".
1443     my @overlappingLines = (); # These will include a leading "+".
1444     for (; $i < @lines; ++$i) {
1445         my $line = $lines[$i];
1446         if (substr($line, 0, 1) ne "+") {
1447           last;
1448         }
1449         push(@overlappingLines, $line);
1450         $lines[$i] = " " . substr($line, 1);
1451     }
1452 
1453     # Remove excess ending context, if necessary.
1454     my $shouldTrimContext = 1;
1455     for (; $i < @lines; ++$i) {
1456         my $firstChar = substr($lines[$i], 0, 1);
1457         if ($firstChar eq " ") {
1458             next;
1459         } elsif ($firstChar eq "@") {
1460             last;
1461         }
1462         $shouldTrimContext = 0; # For example, if "+" or "-" encountered.
1463         last;
1464     }
1465     my $deletedLineCount = 0;
1466     if ($shouldTrimContext) { # Also occurs if end of file reached.
1467         splice(@lines, $i - @overlappingLines, @overlappingLines);
1468         $deletedLineCount = @overlappingLines;
1469     }
1470 
1471     # Work backwards, shifting overlapping lines towards front
1472     # while checking that patch stays equivalent.
1473     for ($i = $dateStartIndex - 1; @overlappingLines && $i >= $chunkStartIndex; --$i) {
1474         my $line = $lines[$i];
1475         if (substr($line, 0, 1) ne " ") {
1476             next;
1477         }
1478         my $text = substr($line, 1);
1479         my $newLine = pop(@overlappingLines);
1480         if ($text ne substr($newLine, 1)) {
1481             $changeLogHashRef{patch} = $patch; # Unexpected difference.
1482             return \%changeLogHashRef;
1483         }
1484         $lines[$i] = "+$text";
1485     }
1486 
1487     # If @overlappingLines > 0, this is where we make use of the
1488     # assumption that the beginning of the source file was not modified.
1489     splice(@lines, $chunkStartIndex, 0, @overlappingLines);
1490 
1491     # Update the date start index as it may have changed after shifting
1492     # the overlapping lines towards the front.
1493     for ($i = $chunkStartIndex; $i < $dateStartIndex; ++$i) {
1494         $dateStartIndex = $i if $lines[$i] =~ /$dateStartRegEx/;
1495     }
1496     splice(@lines, $chunkStartIndex, $dateStartIndex - $chunkStartIndex); # Remove context of later entry.
1497     $deletedLineCount += $dateStartIndex - $chunkStartIndex;
1498 
1499     # Update the initial chunk range.
1500     if ($lines[$chunkStartIndex - 1] !~ /$chunkRangeRegEx/) {
1501         # FIXME: Handle errors differently from ChangeLog files that
1502         # are okay but should not be altered. That way we can find out
1503         # if improvements to the script ever become necessary.
1504         $changeLogHashRef{patch} = $patch; # Error: unexpected patch string format.
1505         return \%changeLogHashRef;
1506     }
1507     my $oldSourceLineCount = $2;
1508     my $oldTargetLineCount = $3;
1509 
1510     my $sourceLineCount = $oldSourceLineCount + @overlappingLines - $deletedLineCount;
1511     my $targetLineCount = $oldTargetLineCount + @overlappingLines - $deletedLineCount;
1512     $lines[$chunkStartIndex - 1] = "@@ -1,$sourceLineCount +1,$targetLineCount @@";
1513 
1514     $changeLogHashRef{patch} = join($lineEnding, @lines) . "\n"; # patch(1) expects an extra trailing newline.
1515     return \%changeLogHashRef;
1516 }
1517 
1518 # This is a supporting method for runPatchCommand.
1519 #
1520 # Arg: the optional $args parameter passed to runPatchCommand (can be undefined).
1521 #
1522 # Returns ($patchCommand, $isForcing).
1523 #
1524 # This subroutine has unit tests in VCSUtils_unittest.pl.
1525 sub generatePatchCommand($)
1526 {
1527     my ($passedArgsHashRef) = @_;
1528 
1529     my $argsHashRef = { # Defaults
1530         ensureForce => 0,
1531         shouldReverse => 0,
1532         options => []
1533     };
1534 
1535     # Merges hash references. It's okay here if passed hash reference is undefined.
1536     @{$argsHashRef}{keys %{$passedArgsHashRef}} = values %{$passedArgsHashRef};
1537 
1538     my $ensureForce = $argsHashRef->{ensureForce};
1539     my $shouldReverse = $argsHashRef->{shouldReverse};
1540     my $options = $argsHashRef->{options};
1541 
1542     if (! $options) {
1543         $options = [];
1544     } else {
1545         $options = [@{$options}]; # Copy to avoid side effects.
1546     }
1547 
1548     my $isForcing = 0;
1549     if (grep /^--force$/, @{$options}) {
1550         $isForcing = 1;
1551     } elsif ($ensureForce) {
1552         push @{$options}, "--force";
1553         $isForcing = 1;
1554     }
1555 
1556     if ($shouldReverse) { # No check: --reverse should never be passed explicitly.
1557         push @{$options}, "--reverse";
1558     }
1559 
1560     @{$options} = sort(@{$options}); # For easier testing.
1561 
1562     my $patchCommand = join(" ", "patch -p0", @{$options});
1563 
1564     return ($patchCommand, $isForcing);
1565 }
1566 
1567 # Apply the given patch using the patch(1) command.
1568 #
1569 # On success, return the resulting exit status. Otherwise, exit with the
1570 # exit status. If "--force" is passed as an option, however, then never
1571 # exit and always return the exit status.
1572 #
1573 # Args:
1574 #   $patch: a patch string.
1575 #   $repositoryRootPath: an absolute path to the repository root.
1576 #   $pathRelativeToRoot: the path of the file to be patched, relative to the
1577 #                        repository root. This should normally be the path
1578 #                        found in the patch's "Index:" line. It is passed
1579 #                        explicitly rather than reparsed from the patch
1580 #                        string for optimization purposes.
1581 #                            This is used only for error reporting. The
1582 #                        patch command gleans the actual file to patch
1583 #                        from the patch string.
1584 #   $args: a reference to a hash of optional arguments. The possible
1585 #          keys are --
1586 #            ensureForce: whether to ensure --force is passed (defaults to 0).
1587 #            shouldReverse: whether to pass --reverse (defaults to 0).
1588 #            options: a reference to an array of options to pass to the
1589 #                     patch command. The subroutine passes the -p0 option
1590 #                     no matter what. This should not include --reverse.
1591 #
1592 # This subroutine has unit tests in VCSUtils_unittest.pl.
1593 sub runPatchCommand($$$;$)
1594 {
1595     my ($patch, $repositoryRootPath, $pathRelativeToRoot, $args) = @_;
1596 
1597     my ($patchCommand, $isForcing) = generatePatchCommand($args);
1598 
1599     # Temporarily change the working directory since the path found
1600     # in the patch's "Index:" line is relative to the repository root
1601     # (i.e. the same as $pathRelativeToRoot).
1602     my $cwd = Cwd::getcwd();
1603     chdir $repositoryRootPath;
1604 
1605     open PATCH, "| $patchCommand" or die "Could not call \"$patchCommand\" for file \"$pathRelativeToRoot\": $!";
1606     print PATCH $patch;
1607     close PATCH;
1608     my $exitStatus = exitStatus($?);
1609 
1610     chdir $cwd;
1611 
1612     if ($exitStatus && !$isForcing) {
1613         print "Calling \"$patchCommand\" for file \"$pathRelativeToRoot\" returned " .
1614               "status $exitStatus.  Pass --force to ignore patch failures.\n";
1615         exit $exitStatus;
1616     }
1617 
1618     return $exitStatus;
1619 }
1620 
1621 # Merge ChangeLog patches using a three-file approach.
1622 #
1623 # This is used by resolve-ChangeLogs when it's operated as a merge driver
1624 # and when it's used to merge conflicts after a patch is applied or after
1625 # an svn update.
1626 #
1627 # It's also used for traditional rejected patches.
1628 #
1629 # Args:
1630 #   $fileMine:  The merged version of the file.  Also known in git as the
1631 #               other branch's version (%B) or "ours".
1632 #               For traditional patch rejects, this is the *.rej file.
1633 #   $fileOlder: The base version of the file.  Also known in git as the
1634 #               ancestor version (%O) or "base".
1635 #               For traditional patch rejects, this is the *.orig file.
1636 #   $fileNewer: The current version of the file.  Also known in git as the
1637 #               current version (%A) or "theirs".
1638 #               For traditional patch rejects, this is the original-named
1639 #               file.
1640 #
1641 # Returns 1 if merge was successful, else 0.
1642 sub mergeChangeLogs($$$)
1643 {
1644     my ($fileMine, $fileOlder, $fileNewer) = @_;
1645 
1646     my $traditionalReject = $fileMine =~ /\.rej$/ ? 1 : 0;
1647 
1648     local $/ = undef;
1649 
1650     my $patch;
1651     if ($traditionalReject) {
1652         open(DIFF, "<", $fileMine) or die $!;
1653         $patch = <DIFF>;
1654         close(DIFF);
1655         rename($fileMine, "$fileMine.save");
1656         rename($fileOlder, "$fileOlder.save");
1657     } else {
1658         open(DIFF, "-|", qw(diff -u -a --binary), $fileOlder, $fileMine) or die $!;
1659         $patch = <DIFF>;
1660         close(DIFF);
1661     }
1662 
1663     unlink("${fileNewer}.orig");
1664     unlink("${fileNewer}.rej");
1665 
1666     open(PATCH, "| patch --force --fuzz=3 --binary $fileNewer > " . File::Spec->devnull()) or die $!;
1667     if ($traditionalReject) {
1668         print PATCH $patch;
1669     } else {
1670         my $changeLogHash = fixChangeLogPatch($patch);
1671         print PATCH $changeLogHash->{patch};
1672     }
1673     close(PATCH);
1674 
1675     my $result = !exitStatus($?);
1676 
1677     # Refuse to merge the patch if it did not apply cleanly
1678     if (-e "${fileNewer}.rej") {
1679         unlink("${fileNewer}.rej");
1680         if (-f "${fileNewer}.orig") {
1681             unlink($fileNewer);
1682             rename("${fileNewer}.orig", $fileNewer);
1683         }
1684     } else {
1685         unlink("${fileNewer}.orig");
1686     }
1687 
1688     if ($traditionalReject) {
1689         rename("$fileMine.save", $fileMine);
1690         rename("$fileOlder.save", $fileOlder);
1691     }
1692 
1693     return $result;
1694 }
1695 
1696 sub gitConfig($)
1697 {
1698     return unless $isGit;
1699 
1700     my ($config) = @_;
1701 
1702     my $result = `git config $config`;
1703     if (($? >> 8)) {
1704         $result = `git repo-config $config`;
1705     }
1706     chomp $result;
1707     return $result;
1708 }
1709 
1710 sub changeLogNameError($)
1711 {
1712     my ($message) = @_;
1713     print STDERR "$message\nEither:\n";
1714     print STDERR "  set CHANGE_LOG_NAME in your environment\n";
1715     print STDERR "  OR pass --name= on the command line\n";
1716     print STDERR "  OR set REAL_NAME in your environment";
1717     print STDERR "  OR git users can set 'git config user.name'\n";
1718     exit(1);
1719 }
1720 
1721 sub changeLogName()
1722 {
1723     my $name = $ENV{CHANGE_LOG_NAME} || $ENV{REAL_NAME} || gitConfig("user.name") || (split /\s*,\s*/, (getpwuid $<)[6])[0];
1724 
1725     changeLogNameError("Failed to determine ChangeLog name.") unless $name;
1726     # getpwuid seems to always succeed on windows, returning the username instead of the full name.  This check will catch that case.
1727     changeLogNameError("'$name' does not contain a space!  ChangeLogs should contain your full name.") unless ($name =~ /\w \w/);
1728 
1729     return $name;
1730 }
1731 
1732 sub changeLogEmailAddressError($)
1733 {
1734     my ($message) = @_;
1735     print STDERR "$message\nEither:\n";
1736     print STDERR "  set CHANGE_LOG_EMAIL_ADDRESS in your environment\n";
1737     print STDERR "  OR pass --email= on the command line\n";
1738     print STDERR "  OR set EMAIL_ADDRESS in your environment\n";
1739     print STDERR "  OR git users can set 'git config user.email'\n";
1740     exit(1);
1741 }
1742 
1743 sub changeLogEmailAddress()
1744 {
1745     my $emailAddress = $ENV{CHANGE_LOG_EMAIL_ADDRESS} || $ENV{EMAIL_ADDRESS} || gitConfig("user.email");
1746 
1747     changeLogEmailAddressError("Failed to determine email address for ChangeLog.") unless $emailAddress;
1748     changeLogEmailAddressError("Email address '$emailAddress' does not contain '\@' and is likely invalid.") unless ($emailAddress =~ /\@/);
1749 
1750     return $emailAddress;
1751 }
1752 
1753 # http://tools.ietf.org/html/rfc1924
1754 sub decodeBase85($)
1755 {
1756     my ($encoded) = @_;
1757     my %table;
1758     my @characters = ('0'..'9', 'A'..'Z', 'a'..'z', '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', '|', '}', '~');
1759     for (my $i = 0; $i < 85; $i++) {
1760         $table{$characters[$i]} = $i;
1761     }
1762 
1763     my $decoded = '';
1764     my @encodedChars = $encoded =~ /./g;
1765 
1766     for (my $encodedIter = 0; defined($encodedChars[$encodedIter]);) {
1767         my $digit = 0;
1768         for (my $i = 0; $i < 5; $i++) {
1769             $digit *= 85;
1770             my $char = $encodedChars[$encodedIter];
1771             $digit += $table{$char};
1772             $encodedIter++;
1773         }
1774 
1775         for (my $i = 0; $i < 4; $i++) {
1776             $decoded .= chr(($digit >> (3 - $i) * 8) & 255);
1777         }
1778     }
1779 
1780     return $decoded;
1781 }
1782 
1783 sub decodeGitBinaryChunk($$)
1784 {
1785     my ($contents, $fullPath) = @_;
1786 
1787     # Load this module lazily in case the user don't have this module
1788     # and won't handle git binary patches.
1789     require Compress::Zlib;
1790 
1791     my $encoded = "";
1792     my $compressedSize = 0;
1793     while ($contents =~ /^([A-Za-z])(.*)$/gm) {
1794         my $line = $2;
1795         next if $line eq "";
1796         die "$fullPath: unexpected size of a line: $&" if length($2) % 5 != 0;
1797         my $actualSize = length($2) / 5 * 4;
1798         my $encodedExpectedSize = ord($1);
1799         my $expectedSize = $encodedExpectedSize <= ord("Z") ? $encodedExpectedSize - ord("A") + 1 : $encodedExpectedSize - ord("a") + 27;
1800 
1801         die "$fullPath: unexpected size of a line: $&" if int(($expectedSize + 3) / 4) * 4 != $actualSize;
1802         $compressedSize += $expectedSize;
1803         $encoded .= $line;
1804     }
1805 
1806     my $compressed = decodeBase85($encoded);
1807     $compressed = substr($compressed, 0, $compressedSize);
1808     return Compress::Zlib::uncompress($compressed);
1809 }
1810 
1811 sub decodeGitBinaryPatch($$)
1812 {
1813     my ($contents, $fullPath) = @_;
1814 
1815     # Git binary patch has two chunks. One is for the normal patching
1816     # and another is for the reverse patching.
1817     #
1818     # Each chunk a line which starts from either "literal" or "delta",
1819     # followed by a number which specifies decoded size of the chunk.
1820     #
1821     # Then, content of the chunk comes. To decode the content, we
1822     # need decode it with base85 first, and then zlib.
1823     my $gitPatchRegExp = '(literal|delta) ([0-9]+)\n([A-Za-z0-9!#$%&()*+-;<=>?@^_`{|}~\\n]*?)\n\n';
1824     if ($contents !~ m"\nGIT binary patch\n$gitPatchRegExp$gitPatchRegExp\Z") {
1825         die "$fullPath: unknown git binary patch format"
1826     }
1827 
1828     my $binaryChunkType = $1;
1829     my $binaryChunkExpectedSize = $2;
1830     my $encodedChunk = $3;
1831     my $reverseBinaryChunkType = $4;
1832     my $reverseBinaryChunkExpectedSize = $5;
1833     my $encodedReverseChunk = $6;
1834 
1835     my $binaryChunk = decodeGitBinaryChunk($encodedChunk, $fullPath);
1836     my $binaryChunkActualSize = length($binaryChunk);
1837     my $reverseBinaryChunk = decodeGitBinaryChunk($encodedReverseChunk, $fullPath);
1838     my $reverseBinaryChunkActualSize = length($reverseBinaryChunk);
1839 
1840     die "$fullPath: unexpected size of the first chunk (expected $binaryChunkExpectedSize but was $binaryChunkActualSize" if ($binaryChunkType eq "literal" and $binaryChunkExpectedSize != $binaryChunkActualSize);
1841     die "$fullPath: unexpected size of the second chunk (expected $reverseBinaryChunkExpectedSize but was $reverseBinaryChunkActualSize" if ($reverseBinaryChunkType eq "literal" and $reverseBinaryChunkExpectedSize != $reverseBinaryChunkActualSize);
1842 
1843     return ($binaryChunkType, $binaryChunk, $reverseBinaryChunkType, $reverseBinaryChunk);
1844 }
1845 
1846 sub readByte($$)
1847 {
1848     my ($data, $location) = @_;
1849 
1850     # Return the byte at $location in $data as a numeric value.
1851     return ord(substr($data, $location, 1));
1852 }
1853 
1854 # The git binary delta format is undocumented, except in code:
1855 # - https://github.com/git/git/blob/master/delta.h:get_delta_hdr_size is the source
1856 #   of the algorithm in decodeGitBinaryPatchDeltaSize.
1857 # - https://github.com/git/git/blob/master/patch-delta.c:patch_delta is the source
1858 #   of the algorithm in applyGitBinaryPatchDelta.
1859 sub decodeGitBinaryPatchDeltaSize($)
1860 {
1861     my ($binaryChunk) = @_;
1862 
1863     # Source and destination buffer sizes are stored in 7-bit chunks at the
1864     # start of the binary delta patch data.  The highest bit in each byte
1865     # except the last is set; the remaining 7 bits provide the next
1866     # chunk of the size.  The chunks are stored in ascending significance
1867     # order.
1868     my $cmd;
1869     my $size = 0;
1870     my $shift = 0;
1871     for (my $i = 0; $i < length($binaryChunk);) {
1872         $cmd = readByte($binaryChunk, $i++);
1873         $size |= ($cmd & 0x7f) << $shift;
1874         $shift += 7;
1875         if (!($cmd & 0x80)) {
1876             return ($size, $i);
1877         }
1878     }
1879 }
1880 
1881 sub applyGitBinaryPatchDelta($$)
1882 {
1883     my ($binaryChunk, $originalContents) = @_;
1884 
1885     # Git delta format consists of two headers indicating source buffer size
1886     # and result size, then a series of commands.  Each command is either
1887     # a copy-from-old-version (the 0x80 bit is set) or a copy-from-delta
1888     # command.  Commands are applied sequentially to generate the result.
1889     #
1890     # A copy-from-old-version command encodes an offset and size to copy
1891     # from in subsequent bits, while a copy-from-delta command consists only
1892     # of the number of bytes to copy from the delta.
1893 
1894     # We don't use these values, but we need to know how big they are so that
1895     # we can skip to the diff data.
1896     my ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
1897     $binaryChunk = substr($binaryChunk, $bytesUsed);
1898     ($size, $bytesUsed) = decodeGitBinaryPatchDeltaSize($binaryChunk);
1899     $binaryChunk = substr($binaryChunk, $bytesUsed);
1900 
1901     my $out = "";
1902     for (my $i = 0; $i < length($binaryChunk); ) {
1903         my $cmd = ord(substr($binaryChunk, $i++, 1));
1904         if ($cmd & 0x80) {
1905             # Extract an offset and size from the delta data, then copy
1906             # $size bytes from $offset in the original data into the output.
1907             my $offset = 0;
1908             my $size = 0;
1909             if ($cmd & 0x01) { $offset = readByte($binaryChunk, $i++); }
1910             if ($cmd & 0x02) { $offset |= readByte($binaryChunk, $i++) << 8; }
1911             if ($cmd & 0x04) { $offset |= readByte($binaryChunk, $i++) << 16; }
1912             if ($cmd & 0x08) { $offset |= readByte($binaryChunk, $i++) << 24; }
1913             if ($cmd & 0x10) { $size = readByte($binaryChunk, $i++); }
1914             if ($cmd & 0x20) { $size |= readByte($binaryChunk, $i++) << 8; }
1915             if ($cmd & 0x40) { $size |= readByte($binaryChunk, $i++) << 16; }
1916             if ($size == 0) { $size = 0x10000; }
1917             $out .= substr($originalContents, $offset, $size);
1918         } elsif ($cmd) {
1919             # Copy $cmd bytes from the delta data into the output.
1920             $out .= substr($binaryChunk, $i, $cmd);
1921             $i += $cmd;
1922         } else {
1923             die "unexpected delta opcode 0";
1924         }
1925     }
1926 
1927     return $out;
1928 }
1929 
1930 1;
1931