1#!/usr/bin/perl -w 2# Copyright (c) 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6# Use: find_copyrights.pl <start-from> [exclude-dir ...] 7 8use strict; 9use warnings; 10use File::Basename; 11 12sub check_is_generated_file($); 13sub start_copyright_parsing(); 14 15my $progname = basename($0); 16 17my $root_dir = shift @ARGV; 18my @find_args = (); 19while (@ARGV) { 20 my $path = shift @ARGV; 21 push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )' 22} 23push @find_args, qw(-follow -type f -print); 24 25open FIND, '-|', 'find', $root_dir, @find_args 26 or die "$progname: Couldn't exec find: $!\n"; 27my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' . 28 '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' . 29 '|tex|mli?)$'; 30my @files = (); 31while (<FIND>) { 32 chomp; 33 push @files, $_ unless (-z $_ || !m%$check_regex%); 34} 35close FIND; 36 37my $generated_file_scan_boundary = 25; 38while (@files) { 39 my $file = shift @files; 40 my $file_header = ''; 41 my %copyrights; 42 open (F, "<$file") or die "$progname: Unable to access $file\n"; 43 my $parse_copyright = start_copyright_parsing(); 44 while (<F>) { 45 $file_header .= $_ unless $. > $generated_file_scan_boundary; 46 my $copyright_match = $parse_copyright->($_, $.); 47 if ($copyright_match) { 48 $copyrights{lc("$copyright_match")} = "$copyright_match"; 49 } 50 } 51 close(F); 52 my $copyright = join(" / ", sort values %copyrights); 53 print "$file\t"; 54 if (check_is_generated_file($file_header)) { 55 print "GENERATED FILE"; 56 } else { 57 print ($copyright or "*No copyright*"); 58 } 59 print "\n"; 60} 61 62sub check_is_generated_file($) { 63 my $license = uc($_[0]); 64 # Remove Python multiline comments to avoid false positives 65 if (index($license, '"""') != -1) { 66 $license =~ s/"""[^"]*(?:"""|$)//mg; 67 } 68 if (index($license, "'''") != -1) { 69 $license =~ s/'''[^']*(?:'''|$)//mg; 70 } 71 # Quick checks using index. 72 if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) { 73 return 1; 74 } 75 if (index($license, 'DO NOT EDIT') != -1 || 76 index($license, 'DO NOT DELETE') != -1 || 77 index($license, 'GENERATED') != -1) { 78 return ($license =~ /(All changes made in this file will be lost' . 79 'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' . 80 '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i); 81 } 82 return 0; 83} 84 85sub are_within_increasing_progression($$$) { 86 my $delta = $_[0] - $_[1]; 87 return $delta >= 0 && $delta <= $_[2]; 88} 89 90sub start_copyright_parsing() { 91 my $max_line_numbers_proximity = 3; 92 # Set up the defaults the way that proximity checks will not succeed. 93 my $last_a_item_line_number = -200; 94 my $last_b_item_line_number = -100; 95 96 return sub { 97 my $line = $_[0]; 98 my $line_number = $_[1]; 99 100 # Remove C / C++ strings to avoid false positives. 101 if (index($line, '"') != -1) { 102 $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g; 103 } 104 105 my $uc_line = uc($line); 106 107 # Record '(a)' and '(b)' last occurences in C++ comments. 108 my $cpp_comment_idx = index($uc_line, '//'); 109 if ($cpp_comment_idx != -1) { 110 if (index($uc_line, '(A)') > $cpp_comment_idx) { 111 $last_a_item_line_number = $line_number; 112 } 113 if (index($uc_line, '(B)') > $cpp_comment_idx) { 114 $last_b_item_line_number = $line_number; 115 } 116 } 117 118 # Fast bailout, uses the same patterns as the regexp. 119 if (index($uc_line, 'COPYRIGHT') == -1 && 120 index($uc_line, 'COPR.') == -1 && 121 index($uc_line, '\x{00a9}') == -1 && 122 index($uc_line, '\xc2\xa9') == -1) { 123 124 my $c_item_index = index($uc_line, '(C)'); 125 return '' if ($c_item_index == -1); 126 # Filter out 'c' used as a list item inside C++ comments. 127 # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah" 128 if ($c_item_index > $cpp_comment_idx && 129 are_within_increasing_progression( 130 $line_number, 131 $last_b_item_line_number, 132 $max_line_numbers_proximity) && 133 are_within_increasing_progression( 134 $last_b_item_line_number, 135 $last_a_item_line_number, 136 $max_line_numbers_proximity)) { 137 return ''; 138 } 139 } 140 141 my $copyright_indicator_regex = 142 '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))'; 143 my $full_copyright_indicator_regex = 144 sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex; 145 my $copyright_disindicator_regex = 146 '\b(?:info(?:rmation)?|notice|and|or)\b'; 147 148 my $copyright = ''; 149 if ($line =~ m%$full_copyright_indicator_regex%i) { 150 my $match = $1; 151 if ($match !~ m%^\s*$copyright_disindicator_regex%i) { 152 $match =~ s/([,.])?\s*$//; 153 $match =~ s/$copyright_indicator_regex//ig; 154 $match =~ s/^\s+//; 155 $match =~ s/\s{2,}/ /g; 156 $match =~ s/\\@/@/g; 157 $copyright = $match; 158 } 159 } 160 161 return $copyright; 162 } 163} 164