• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Run with directory arguments from any directory, with no special setup required.
3
4import ftplib
5import hashlib
6import os
7import re
8import shutil
9import string
10import subprocess
11import sys
12import tarfile
13import tempfile
14
15VERBOSE = False
16
17def warn(s):
18    sys.stderr.write("warning: %s\n" % s)
19
20def warn_verbose(s):
21    if VERBOSE:
22        warn(s)
23
24def is_interesting(path):
25    path = path.lower()
26    uninteresting_extensions = [
27        ".bp",
28        ".map",
29        ".mk",
30        ".py",
31        ".pyc",
32        ".swp",
33        ".txt",
34    ]
35    if os.path.splitext(path)[1] in uninteresting_extensions:
36        return False
37    if path.endswith("/notice") or path.endswith("/readme"):
38        return False
39    return True
40
41def is_auto_generated(content):
42    if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
43        return True
44    if "This header was automatically generated from a Linux kernel header" in content:
45        return True
46    return False
47
48copyrights = set()
49
50def extract_copyright_at(lines, i):
51    hash = lines[i].startswith("#")
52
53    # Do we need to back up to find the start of the copyright header?
54    start = i
55    if not hash:
56        while start > 0:
57            if "/*" in lines[start - 1]:
58                break
59            start -= 1
60
61    # Read comment lines until we hit something that terminates a
62    # copyright header.
63    while i < len(lines):
64        if "*/" in lines[i]:
65            break
66        if hash and len(lines[i]) == 0:
67            break
68        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
69            break
70        if "\tcitrus Id: " in lines[i]:
71            break
72        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
73            break
74        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
75            break
76        # OpenBSD likes to say where stuff originally came from:
77        if "Original version ID:" in lines[i]:
78            break
79        i += 1
80
81    end = i
82
83    # Trim trailing cruft.
84    while end > 0:
85        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
86            break
87        end -= 1
88
89    # Remove C/assembler comment formatting, pulling out just the text.
90    clean_lines = []
91    for line in lines[start:end]:
92        line = line.replace("\t", "    ")
93        line = line.replace("/* ", "")
94        line = re.sub("^ \* ", "", line)
95        line = line.replace("** ", "")
96        line = line.replace("# ", "")
97        if "SPDX-License-Identifier:" in line:
98            continue
99        if line.startswith("++Copyright++"):
100            continue
101        line = line.replace("--Copyright--", "")
102        line = line.rstrip()
103        # These come last and take care of "blank" comment lines.
104        if line == "#" or line == " *" or line == "**" or line == "-":
105            line = ""
106        clean_lines.append(line)
107
108    # Trim blank lines from head and tail.
109    while clean_lines[0] == "":
110        clean_lines = clean_lines[1:]
111    while clean_lines[len(clean_lines) - 1] == "":
112        clean_lines = clean_lines[0:(len(clean_lines) - 1)]
113
114    copyright = "\n".join(clean_lines)
115    copyrights.add(copyright)
116
117    return i
118
119
120def do_file(path):
121    with open(path, "r") as the_file:
122        try:
123            content = open(path, "r").read().decode("utf-8")
124        except UnicodeDecodeError:
125            warn("bad UTF-8 in %s" % path)
126            content = open(path, "r").read().decode("iso-8859-1")
127
128    lines = content.split("\n")
129
130    if len(lines) <= 4:
131        warn_verbose("ignoring short file %s" % path)
132        return
133
134    if is_auto_generated(content):
135        warn_verbose("ignoring auto-generated file %s" % path)
136        return
137
138    if not "Copyright" in content:
139        if "public domain" in content.lower():
140            warn("ignoring public domain file %s" % path)
141            return
142        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
143        return
144
145    # Manually iterate because extract_copyright_at tells us how many lines to skip.
146    i = 0
147    while i < len(lines):
148        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
149            i = extract_copyright_at(lines, i)
150        else:
151            i += 1
152
153
154def do_dir(path):
155    for directory, sub_directories, filenames in os.walk(arg):
156        if ".git" in sub_directories:
157            sub_directories.remove(".git")
158        sub_directories = sorted(sub_directories)
159
160        for filename in sorted(filenames):
161            path = os.path.join(directory, filename)
162            if is_interesting(path):
163                do_file(path)
164
165
166args = sys.argv[1:]
167if len(args) == 0:
168    args = [ "." ]
169
170for arg in args:
171    if os.path.isdir(arg):
172        do_dir(arg)
173    else:
174        do_file(arg)
175
176for copyright in sorted(copyrights):
177    print copyright.encode("utf-8")
178    print
179    print "-------------------------------------------------------------------"
180    print
181
182sys.exit(0)
183