• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Run with directory arguments from any directory, with no special setup
3# required.
4
5import os
6from pathlib import Path
7import re
8import sys
9from typing import Sequence
10
11VERBOSE = False
12
13copyrights = set()
14
15
16def warn(s):
17    sys.stderr.write("warning: %s\n" % s)
18
19
20def warn_verbose(s):
21    if VERBOSE:
22        warn(s)
23
24
25def is_interesting(path_str: str) -> bool:
26    path = Path(path_str.lower())
27    uninteresting_extensions = [
28        ".bp",
29        ".map",
30        ".md",
31        ".mk",
32        ".py",
33        ".pyc",
34        ".swp",
35        ".txt",
36        ".xml",
37    ]
38    if path.suffix in uninteresting_extensions:
39        return False
40    if path.name in {"notice", "readme", "pylintrc"}:
41        return False
42    # Backup files for some editors.
43    if path.match("*~"):
44        return False
45    return True
46
47
48def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
49    endings = [
50        " $FreeBSD: ",
51        "$Citrus$",
52        "$FreeBSD$",
53        "*/",
54        "From: @(#)",
55        # OpenBSD likes to say where stuff originally came from:
56        "Original version ID:",
57        "\t$Citrus: ",
58        "\t$NetBSD: ",
59        "\t$OpenBSD: ",
60        "\t@(#)",
61        "\tcitrus Id: ",
62        "\tfrom: @(#)",
63        "from OpenBSD:",
64    ]
65    if first_line_was_hash and not line:
66        return True
67
68    for ending in endings:
69        if ending in line:
70            return True
71
72    return False
73
74
75def extract_copyright_at(lines: Sequence[str], i: int) -> int:
76    first_line_was_hash = lines[i].startswith("#")
77
78    # Do we need to back up to find the start of the copyright header?
79    start = i
80    if not first_line_was_hash:
81        while start > 0:
82            if "/*" in lines[start - 1]:
83                break
84            start -= 1
85
86    # Read comment lines until we hit something that terminates a
87    # copyright header.
88    while i < len(lines):
89        if is_copyright_end(lines[i], first_line_was_hash):
90            break
91        i += 1
92
93    end = i
94
95    # Trim trailing cruft.
96    while end > 0:
97        line = lines[end - 1]
98        if line not in {
99                " *", " * ===================================================="
100        }:
101            break
102        end -= 1
103
104    # Remove C/assembler comment formatting, pulling out just the text.
105    clean_lines = []
106    for line in lines[start:end]:
107        line = line.replace("\t", "    ")
108        line = line.replace("/* ", "")
109        line = re.sub(r"^ \* ", "", line)
110        line = line.replace("** ", "")
111        line = line.replace("# ", "")
112        if line.startswith("++Copyright++"):
113            continue
114        line = line.replace("--Copyright--", "")
115        line = line.rstrip()
116        # These come last and take care of "blank" comment lines.
117        if line in {"#", " *", "**", "-"}:
118            line = ""
119        clean_lines.append(line)
120
121    # Trim blank lines from head and tail.
122    while clean_lines[0] == "":
123        clean_lines = clean_lines[1:]
124    while clean_lines[len(clean_lines) - 1] == "":
125        clean_lines = clean_lines[0:(len(clean_lines) - 1)]
126
127    copyrights.add("\n".join(clean_lines))
128
129    return i
130
131
132def do_file(path: str) -> None:
133    raw = Path(path).read_bytes()
134    try:
135        content = raw.decode("utf-8")
136    except UnicodeDecodeError:
137        warn("bad UTF-8 in %s" % path)
138        content = raw.decode("iso-8859-1")
139
140    lines = content.split("\n")
141
142    if len(lines) <= 4:
143        warn_verbose("ignoring short file %s" % path)
144        return
145
146    if not "Copyright" in content:
147        if "public domain" in content.lower():
148            warn_verbose("ignoring public domain file %s" % path)
149            return
150        warn('no copyright notice found in "%s" (%d lines)' %
151             (path, len(lines)))
152        return
153
154    # Skip over our own files if they're SPDX licensed.
155    # Because we use the // comment style, without this we'd copy the whole source file!
156    if re.compile('^// Copyright \(C\) 2\d\d\d The Android Open Source Project\n' + \
157                  '// SPDX-License-Identifier: ').match(content):
158        return
159
160    # Manually iterate because extract_copyright_at tells us how many lines to
161    # skip.
162    i = 0
163    while i < len(lines):
164        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
165            i = extract_copyright_at(lines, i)
166        else:
167            i += 1
168
169
170def do_dir(arg):
171    for directory, sub_directories, filenames in os.walk(arg):
172        if ".git" in sub_directories:
173            sub_directories.remove(".git")
174        sub_directories = sorted(sub_directories)
175
176        for filename in sorted(filenames):
177            path = os.path.join(directory, filename)
178            if is_interesting(path):
179                do_file(path)
180
181
182def main() -> None:
183    args = sys.argv[1:]
184    if len(args) == 0:
185        args = ["."]
186
187    for arg in args:
188        if os.path.isdir(arg):
189            do_dir(arg)
190        else:
191            do_file(arg)
192
193    for notice in sorted(copyrights):
194        print(notice)
195        print()
196        print("-" * 67)
197        print()
198
199
200if __name__ == "__main__":
201    main()
202