1#!/usr/bin/perl -w 2 3# Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions 7# are met: 8# 9# 1. Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 2. Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of 15# its contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29# Extended "svn diff" script for WebKit Open Source Project, used to make patches. 30 31# Differences from standard "svn diff": 32# 33# Uses the real diff, not svn's built-in diff. 34# Always passes "-p" to diff so it will try to include function names. 35# Handles binary files (encoded as a base64 chunk of text). 36# Sorts the diffs alphabetically by text files, then binary files. 37# Handles copied and moved files. 38# 39# Missing features: 40# 41# Handle copied and moved directories. 42 43use strict; 44use warnings; 45 46use Config; 47use File::Basename; 48use File::Spec; 49use File::stat; 50use FindBin; 51use Getopt::Long; 52use lib $FindBin::Bin; 53use MIME::Base64; 54use POSIX qw(:errno_h); 55use Time::gmtime; 56use VCSUtils; 57 58sub binarycmp($$); 59sub canonicalizePath($); 60sub findBaseUrl($); 61sub findMimeType($;$); 62sub findModificationType($); 63sub findSourceFileAndRevision($); 64sub fixChangeLogPatch($); 65sub generateDiff($$); 66sub generateFileList($\%); 67sub isBinaryMimeType($); 68sub manufacturePatchForAdditionWithHistory($); 69sub numericcmp($$); 70sub outputBinaryContent($); 71sub patchpathcmp($$); 72sub pathcmp($$); 73sub processPaths(\@); 74sub splitpath($); 75sub testfilecmp($$); 76 77$ENV{'LC_ALL'} = 'C'; 78 79my $showHelp; 80my $svnVersion = `svn --version --quiet`; 81my $devNull = File::Spec->devnull(); 82 83my $result = GetOptions( 84 "help" => \$showHelp, 85); 86if (!$result || $showHelp) { 87 print STDERR basename($0) . " [-h|--help] [svndir1 [svndir2 ...]]\n"; 88 exit 1; 89} 90 91# Sort the diffs for easier reviewing. 92my %paths = processPaths(@ARGV); 93 94# Generate a list of files requiring diffs. 95my %diffFiles; 96for my $path (keys %paths) { 97 generateFileList($path, %diffFiles); 98} 99 100my $svnRoot = determineSVNRoot(); 101my $prefix = chdirReturningRelativePath($svnRoot); 102 103# Generate the diffs, in a order chosen for easy reviewing. 104for my $path (sort patchpathcmp values %diffFiles) { 105 generateDiff($path, $prefix); 106} 107 108exit 0; 109 110# Overall sort, considering multiple criteria. 111sub patchpathcmp($$) 112{ 113 my ($a, $b) = @_; 114 115 # All binary files come after all non-binary files. 116 my $result = binarycmp($a, $b); 117 return $result if $result; 118 119 # All test files come after all non-test files. 120 $result = testfilecmp($a, $b); 121 return $result if $result; 122 123 # Final sort is a "smart" sort by directory and file name. 124 return pathcmp($a, $b); 125} 126 127# Sort so text files appear before binary files. 128sub binarycmp($$) 129{ 130 my ($fileDataA, $fileDataB) = @_; 131 return $fileDataA->{isBinary} <=> $fileDataB->{isBinary}; 132} 133 134sub canonicalizePath($) 135{ 136 my ($file) = @_; 137 138 # Remove extra slashes and '.' directories in path 139 $file = File::Spec->canonpath($file); 140 141 # Remove '..' directories in path 142 my @dirs = (); 143 foreach my $dir (File::Spec->splitdir($file)) { 144 if ($dir eq '..' && $#dirs >= 0 && $dirs[$#dirs] ne '..') { 145 pop(@dirs); 146 } else { 147 push(@dirs, $dir); 148 } 149 } 150 return ($#dirs >= 0) ? File::Spec->catdir(@dirs) : "."; 151} 152 153sub findBaseUrl($) 154{ 155 my ($infoPath) = @_; 156 my $baseUrl; 157 open INFO, "svn info '$infoPath' |" or die; 158 while (<INFO>) { 159 if (/^URL: (.+)/) { 160 $baseUrl = $1; 161 } 162 } 163 close INFO; 164 return $baseUrl; 165} 166 167sub findMimeType($;$) 168{ 169 my ($file, $revision) = @_; 170 my $args = $revision ? "--revision $revision" : ""; 171 open PROPGET, "svn propget svn:mime-type $args '$file' |" or die; 172 my $mimeType = <PROPGET>; 173 close PROPGET; 174 # svn may output a different EOL sequence than $/, so avoid chomp. 175 if ($mimeType) { 176 $mimeType =~ s/[\r\n]+$//g; 177 } 178 return $mimeType; 179} 180 181sub findModificationType($) 182{ 183 my ($stat) = @_; 184 my $fileStat = substr($stat, 0, 1); 185 my $propertyStat = substr($stat, 1, 1); 186 if ($fileStat eq "A" || $fileStat eq "R") { 187 my $additionWithHistory = substr($stat, 3, 1); 188 return $additionWithHistory eq "+" ? "additionWithHistory" : "addition"; 189 } 190 return "modification" if ($fileStat eq "M" || $propertyStat eq "M"); 191 return "deletion" if ($fileStat eq "D"); 192 return undef; 193} 194 195sub findSourceFileAndRevision($) 196{ 197 my ($file) = @_; 198 my $baseUrl = findBaseUrl("."); 199 my $sourceFile; 200 my $sourceRevision; 201 open INFO, "svn info '$file' |" or die; 202 while (<INFO>) { 203 if (/^Copied From URL: (.+)/) { 204 $sourceFile = File::Spec->abs2rel($1, $baseUrl); 205 } elsif (/^Copied From Rev: ([0-9]+)/) { 206 $sourceRevision = $1; 207 } 208 } 209 close INFO; 210 return ($sourceFile, $sourceRevision); 211} 212 213sub fixChangeLogPatch($) 214{ 215 my $patch = shift; 216 my $contextLineCount = 3; 217 218 return $patch if $patch !~ /\n@@ -1,(\d+) \+1,(\d+) @@\n( .*\n)+(\+.*\n)+( .*\n){$contextLineCount}$/m; 219 my ($oldLineCount, $newLineCount) = ($1, $2); 220 return $patch if $oldLineCount <= $contextLineCount; 221 222 # The diff(1) command is greedy when matching lines, so a new ChangeLog entry will 223 # have lines of context at the top of a patch when the existing entry has the same 224 # date and author as the new entry. This nifty loop alters a ChangeLog patch so 225 # that the added lines ("+") in the patch always start at the beginning of the 226 # patch and there are no initial lines of context. 227 my $newPatch; 228 my $lineCountInState = 0; 229 my $oldContentLineCountReduction = $oldLineCount - $contextLineCount; 230 my $newContentLineCountWithoutContext = $newLineCount - $oldLineCount - $oldContentLineCountReduction; 231 my ($stateHeader, $statePreContext, $stateNewChanges, $statePostContext) = (1..4); 232 my $state = $stateHeader; 233 foreach my $line (split(/\n/, $patch)) { 234 $lineCountInState++; 235 if ($state == $stateHeader && $line =~ /^@@ -1,$oldLineCount \+1,$newLineCount @\@$/) { 236 $line = "@@ -1,$contextLineCount +1," . ($newLineCount - $oldContentLineCountReduction) . " @@"; 237 $lineCountInState = 0; 238 $state = $statePreContext; 239 } elsif ($state == $statePreContext && substr($line, 0, 1) eq " ") { 240 $line = "+" . substr($line, 1); 241 if ($lineCountInState == $oldContentLineCountReduction) { 242 $lineCountInState = 0; 243 $state = $stateNewChanges; 244 } 245 } elsif ($state == $stateNewChanges && substr($line, 0, 1) eq "+") { 246 # No changes to these lines 247 if ($lineCountInState == $newContentLineCountWithoutContext) { 248 $lineCountInState = 0; 249 $state = $statePostContext; 250 } 251 } elsif ($state == $statePostContext) { 252 if (substr($line, 0, 1) eq "+" && $lineCountInState <= $oldContentLineCountReduction) { 253 $line = " " . substr($line, 1); 254 } elsif ($lineCountInState > $contextLineCount && substr($line, 0, 1) eq " ") { 255 next; # Discard 256 } 257 } 258 $newPatch .= $line . "\n"; 259 } 260 261 return $newPatch; 262} 263 264sub generateDiff($$) 265{ 266 my ($fileData, $prefix) = @_; 267 my $file = File::Spec->catdir($prefix, $fileData->{path}); 268 my $patch; 269 if ($fileData->{modificationType} eq "additionWithHistory") { 270 manufacturePatchForAdditionWithHistory($fileData); 271 } 272 open DIFF, "svn diff --diff-cmd diff -x -uaNp '$file' |" or die; 273 while (<DIFF>) { 274 $patch .= $_; 275 } 276 close DIFF; 277 $patch = fixChangeLogPatch($patch) if basename($file) eq "ChangeLog"; 278 print $patch if $patch; 279 if ($fileData->{isBinary}) { 280 print "\n" if ($patch && $patch =~ m/\n\S+$/m); 281 outputBinaryContent($file); 282 } 283} 284 285sub generateFileList($\%) 286{ 287 my ($statPath, $diffFiles) = @_; 288 my %testDirectories = map { $_ => 1 } qw(LayoutTests); 289 open STAT, "svn stat '$statPath' |" or die; 290 while (my $line = <STAT>) { 291 # svn may output a different EOL sequence than $/, so avoid chomp. 292 $line =~ s/[\r\n]+$//g; 293 my $stat; 294 my $path; 295 if (eval "v$svnVersion" ge v1.6) { 296 $stat = substr($line, 0, 8); 297 $path = substr($line, 8); 298 } else { 299 $stat = substr($line, 0, 7); 300 $path = substr($line, 7); 301 } 302 next if -d $path; 303 my $modificationType = findModificationType($stat); 304 if ($modificationType) { 305 $diffFiles->{$path}->{path} = $path; 306 $diffFiles->{$path}->{modificationType} = $modificationType; 307 $diffFiles->{$path}->{isBinary} = isBinaryMimeType($path); 308 $diffFiles->{$path}->{isTestFile} = exists $testDirectories{(File::Spec->splitdir($path))[0]} ? 1 : 0; 309 if ($modificationType eq "additionWithHistory") { 310 my ($sourceFile, $sourceRevision) = findSourceFileAndRevision($path); 311 $diffFiles->{$path}->{sourceFile} = $sourceFile; 312 $diffFiles->{$path}->{sourceRevision} = $sourceRevision; 313 } 314 } else { 315 print STDERR $line, "\n"; 316 } 317 } 318 close STAT; 319} 320 321sub isBinaryMimeType($) 322{ 323 my ($file) = @_; 324 my $mimeType = findMimeType($file); 325 return 0 if (!$mimeType || substr($mimeType, 0, 5) eq "text/"); 326 return 1; 327} 328 329sub manufacturePatchForAdditionWithHistory($) 330{ 331 my ($fileData) = @_; 332 my $file = $fileData->{path}; 333 print "Index: ${file}\n"; 334 print "=" x 67, "\n"; 335 my $sourceFile = $fileData->{sourceFile}; 336 my $sourceRevision = $fileData->{sourceRevision}; 337 print "--- ${file}\t(revision ${sourceRevision})\t(from ${sourceFile}:${sourceRevision})\n"; 338 print "+++ ${file}\t(working copy)\n"; 339 if ($fileData->{isBinary}) { 340 print "\nCannot display: file marked as a binary type.\n"; 341 my $mimeType = findMimeType($file, $sourceRevision); 342 print "svn:mime-type = ${mimeType}\n\n"; 343 } else { 344 print `svn cat ${sourceFile} | diff -u $devNull - | tail -n +3`; 345 } 346} 347 348# Sort numeric parts of strings as numbers, other parts as strings. 349# Makes 1.33 come after 1.3, which is cool. 350sub numericcmp($$) 351{ 352 my ($aa, $bb) = @_; 353 354 my @a = split /(\d+)/, $aa; 355 my @b = split /(\d+)/, $bb; 356 357 # Compare one chunk at a time. 358 # Each chunk is either all numeric digits, or all not numeric digits. 359 while (@a && @b) { 360 my $a = shift @a; 361 my $b = shift @b; 362 363 # Use numeric comparison if chunks are non-equal numbers. 364 return $a <=> $b if $a =~ /^\d/ && $b =~ /^\d/ && $a != $b; 365 366 # Use string comparison if chunks are any other kind of non-equal string. 367 return $a cmp $b if $a ne $b; 368 } 369 370 # One of the two is now empty; compare lengths for result in this case. 371 return @a <=> @b; 372} 373 374sub outputBinaryContent($) 375{ 376 my ($path) = @_; 377 # Deletion 378 return if (! -e $path); 379 # Addition or Modification 380 my $buffer; 381 open BINARY, $path or die; 382 while (read(BINARY, $buffer, 60*57)) { 383 print encode_base64($buffer); 384 } 385 close BINARY; 386 print "\n"; 387} 388 389# Sort first by directory, then by file, so all paths in one directory are grouped 390# rather than being interspersed with items from subdirectories. 391# Use numericcmp to sort directory and filenames to make order logical. 392# Also include a special case for ChangeLog, which comes first in any directory. 393sub pathcmp($$) 394{ 395 my ($fileDataA, $fileDataB) = @_; 396 397 my ($dira, $namea) = splitpath($fileDataA->{path}); 398 my ($dirb, $nameb) = splitpath($fileDataB->{path}); 399 400 return numericcmp($dira, $dirb) if $dira ne $dirb; 401 return -1 if $namea eq "ChangeLog" && $nameb ne "ChangeLog"; 402 return +1 if $namea ne "ChangeLog" && $nameb eq "ChangeLog"; 403 return numericcmp($namea, $nameb); 404} 405 406sub processPaths(\@) 407{ 408 my ($paths) = @_; 409 return ("." => 1) if (!@{$paths}); 410 411 my %result = (); 412 413 for my $file (@{$paths}) { 414 die "can't handle absolute paths like \"$file\"\n" if File::Spec->file_name_is_absolute($file); 415 die "can't handle empty string path\n" if $file eq ""; 416 die "can't handle path with single quote in the name like \"$file\"\n" if $file =~ /'/; # ' (keep Xcode syntax highlighting happy) 417 418 my $untouchedFile = $file; 419 420 $file = canonicalizePath($file); 421 422 die "can't handle paths with .. like \"$untouchedFile\"\n" if $file =~ m|/\.\./|; 423 424 $result{$file} = 1; 425 } 426 427 return ("." => 1) if ($result{"."}); 428 429 # Remove any paths that also have a parent listed. 430 for my $path (keys %result) { 431 for (my $parent = dirname($path); $parent ne '.'; $parent = dirname($parent)) { 432 if ($result{$parent}) { 433 delete $result{$path}; 434 last; 435 } 436 } 437 } 438 439 return %result; 440} 441 442# Break up a path into the directory (with slash) and base name. 443sub splitpath($) 444{ 445 my ($path) = @_; 446 447 my $pathSeparator = "/"; 448 my $dirname = dirname($path) . $pathSeparator; 449 $dirname = "" if $dirname eq "." . $pathSeparator; 450 451 return ($dirname, basename($path)); 452} 453 454# Sort so source code files appear before test files. 455sub testfilecmp($$) 456{ 457 my ($fileDataA, $fileDataB) = @_; 458 return $fileDataA->{isTestFile} <=> $fileDataB->{isTestFile}; 459} 460 461