• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/perl -w
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6# Use: find_copyrights.pl <start-from> [exclude-dir ...]
7
8use strict;
9use warnings;
10use File::Basename;
11
12sub check_is_generated_file($);
13sub start_copyright_parsing();
14
15my $progname = basename($0);
16
17my $root_dir = shift @ARGV;
18my @find_args = ();
19while (@ARGV) {
20    my $path = shift @ARGV;
21    push @find_args, qw'-not ( -path', "*/$path/*", qw'-prune )'
22}
23push @find_args, qw(-follow -type f -print);
24
25open FIND, '-|', 'find', $root_dir, @find_args
26            or die "$progname: Couldn't exec find: $!\n";
27my $check_regex = '\.(asm|c(c|pp|xx)?|h(h|pp|xx)?|p(l|m)|xs|sh|php|py(|x)' .
28    '|rb|idl|java|el|sc(i|e)|cs|pas|inc|js|pac|html|dtd|xsl|mod|mm?' .
29    '|tex|mli?)$';
30my @files = ();
31while (<FIND>) {
32    chomp;
33    push @files, $_ unless (-z $_ || !m%$check_regex%);
34}
35close FIND;
36
37my $generated_file_scan_boundary = 25;
38while (@files) {
39    my $file = shift @files;
40    my $file_header = '';
41    my %copyrights;
42    open (F, "<$file") or die "$progname: Unable to access $file\n";
43    my $parse_copyright = start_copyright_parsing();
44    while (<F>) {
45        $file_header .= $_ unless $. > $generated_file_scan_boundary;
46        my $copyright_match = $parse_copyright->($_, $.);
47        if ($copyright_match) {
48            $copyrights{lc("$copyright_match")} = "$copyright_match";
49        }
50    }
51    close(F);
52    my $copyright = join(" / ", sort values %copyrights);
53    print "$file\t";
54    if (check_is_generated_file($file_header)) {
55        print "GENERATED FILE";
56    } else {
57        print ($copyright or "*No copyright*");
58    }
59    print "\n";
60}
61
62sub check_is_generated_file($) {
63    my $license = uc($_[0]);
64    # Remove Python multiline comments to avoid false positives
65    if (index($license, '"""') != -1) {
66        $license =~ s/"""[^"]*(?:"""|$)//mg;
67    }
68    if (index($license, "'''") != -1) {
69        $license =~ s/'''[^']*(?:'''|$)//mg;
70    }
71    # Quick checks using index.
72    if (index($license, 'ALL CHANGES MADE IN THIS FILE WILL BE LOST') != -1) {
73        return 1;
74    }
75    if (index($license, 'DO NOT EDIT') != -1 ||
76        index($license, 'DO NOT DELETE') != -1 ||
77        index($license, 'GENERATED') != -1) {
78        return ($license =~ /(All changes made in this file will be lost' .
79            'DO NOT (EDIT|delete this file)|Generated (at|automatically|data)' .
80            '|Automatically generated|\Wgenerated\s+(?:\w+\s+)*file\W)/i);
81    }
82    return 0;
83}
84
85sub are_within_increasing_progression($$$) {
86    my $delta = $_[0] - $_[1];
87    return $delta >= 0 && $delta <= $_[2];
88}
89
90sub start_copyright_parsing() {
91    my $max_line_numbers_proximity = 3;
92    # Set up the defaults the way that proximity checks will not succeed.
93    my $last_a_item_line_number = -200;
94    my $last_b_item_line_number = -100;
95
96    return sub {
97        my $line = $_[0];
98        my $line_number = $_[1];
99
100        # Remove C / C++ strings to avoid false positives.
101        if (index($line, '"') != -1) {
102            $line =~ s/"[^"\\]*(?:\\.[^"\\]*)*"//g;
103        }
104
105        my $uc_line = uc($line);
106
107        # Record '(a)' and '(b)' last occurences in C++ comments.
108        my $cpp_comment_idx = index($uc_line, '//');
109        if ($cpp_comment_idx != -1) {
110            if (index($uc_line, '(A)') > $cpp_comment_idx) {
111                $last_a_item_line_number = $line_number;
112            }
113            if (index($uc_line, '(B)') > $cpp_comment_idx) {
114                $last_b_item_line_number = $line_number;
115            }
116        }
117
118        # Fast bailout, uses the same patterns as the regexp.
119        if (index($uc_line, 'COPYRIGHT') == -1 &&
120            index($uc_line, 'COPR.') == -1 &&
121            index($uc_line, '\x{00a9}') == -1 &&
122            index($uc_line, '\xc2\xa9') == -1) {
123
124            my $c_item_index = index($uc_line, '(C)');
125            return '' if ($c_item_index == -1);
126            # Filter out 'c' used as a list item inside C++ comments.
127            # E.g. "// blah-blah (a) blah\n// blah-blah (b) and (c) blah"
128            if ($c_item_index > $cpp_comment_idx &&
129                are_within_increasing_progression(
130                    $line_number,
131                    $last_b_item_line_number,
132                    $max_line_numbers_proximity) &&
133                are_within_increasing_progression(
134                    $last_b_item_line_number,
135                    $last_a_item_line_number,
136                    $max_line_numbers_proximity)) {
137                return '';
138            }
139        }
140
141        my $copyright_indicator_regex =
142            '(?:copyright|copr\.|\x{00a9}|\xc2\xa9|\(c\))';
143        my $full_copyright_indicator_regex =
144            sprintf '(?:\W|^)%s(?::\s*|\s+)(\w.*)$', $copyright_indicator_regex;
145        my $copyright_disindicator_regex =
146            '\b(?:info(?:rmation)?|notice|and|or)\b';
147
148        my $copyright = '';
149        if ($line =~ m%$full_copyright_indicator_regex%i) {
150            my $match = $1;
151            if ($match !~ m%^\s*$copyright_disindicator_regex%i) {
152                $match =~ s/([,.])?\s*$//;
153                $match =~ s/$copyright_indicator_regex//ig;
154                $match =~ s/^\s+//;
155                $match =~ s/\s{2,}/ /g;
156                $match =~ s/\\@/@/g;
157                $copyright = $match;
158            }
159        }
160
161        return $copyright;
162    }
163}
164