• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/python
2# Run with directory arguments from any directory, with no special setup required.
3# Or:
4# for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
5
6import ftplib
7import hashlib
8import os
9import re
10import shutil
11import string
12import subprocess
13import sys
14import tarfile
15import tempfile
16
17def IsUninteresting(path):
18    path = path.lower()
19    if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"):
20        return True
21    if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
22        return True
23    if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
24        return True
25    return False
26
27def IsAutoGenerated(content):
28    if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
29        return True
30    if "This header was automatically generated from a Linux kernel header" in content:
31        return True
32    return False
33
34copyrights = set()
35
36def ExtractCopyrightAt(lines, i):
37    hash = lines[i].startswith("#")
38
39    # Do we need to back up to find the start of the copyright header?
40    start = i
41    if not hash:
42        while start > 0:
43            if "/*" in lines[start - 1]:
44                break
45            start -= 1
46
47    # Read comment lines until we hit something that terminates a
48    # copyright header.
49    while i < len(lines):
50        if "*/" in lines[i]:
51            break
52        if hash and len(lines[i]) == 0:
53            break
54        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
55            break
56        if "\tcitrus Id: " in lines[i]:
57            break
58        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
59            break
60        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
61            break
62        # OpenBSD likes to say where stuff originally came from:
63        if "Original version ID:" in lines[i]:
64            break
65        i += 1
66
67    end = i
68
69    # Trim trailing cruft.
70    while end > 0:
71        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
72            break
73        end -= 1
74
75    # Remove C/assembler comment formatting, pulling out just the text.
76    clean_lines = []
77    for line in lines[start:end]:
78        line = line.replace("\t", "    ")
79        line = line.replace("/* ", "")
80        line = re.sub("^ \* ", "", line)
81        line = line.replace("** ", "")
82        line = line.replace("# ", "")
83        if line.startswith("++Copyright++"):
84            continue
85        line = line.replace("--Copyright--", "")
86        line = line.rstrip()
87        # These come last and take care of "blank" comment lines.
88        if line == "#" or line == " *" or line == "**" or line == "-":
89            line = ""
90        clean_lines.append(line)
91
92    # Trim blank lines from head and tail.
93    while clean_lines[0] == "":
94        clean_lines = clean_lines[1:]
95    while clean_lines[len(clean_lines) - 1] == "":
96        clean_lines = clean_lines[0:(len(clean_lines) - 1)]
97
98    copyright = "\n".join(clean_lines)
99    copyrights.add(copyright)
100
101    return i
102
103args = sys.argv[1:]
104if len(args) == 0:
105    args = [ "." ]
106
107for arg in args:
108    sys.stderr.write('Searching for source files in "%s"...\n' % arg)
109
110    for directory, sub_directories, filenames in os.walk(arg):
111        if ".git" in sub_directories:
112            sub_directories.remove(".git")
113        sub_directories = sorted(sub_directories)
114
115        for filename in sorted(filenames):
116            path = os.path.join(directory, filename)
117            if IsUninteresting(path):
118                #print "ignoring uninteresting file %s" % path
119                continue
120
121            try:
122                content = open(path, 'r').read().decode('utf-8')
123            except:
124                # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already.
125                sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
126                content = open(path, 'r').read().decode('iso-8859-1')
127
128            lines = content.split("\n")
129
130            if len(lines) <= 4:
131                #print "ignoring short file %s" % path
132                continue
133
134            if IsAutoGenerated(content):
135                #print "ignoring auto-generated file %s" % path
136                continue
137
138            if not "Copyright" in content:
139                if "public domain" in content.lower():
140                    #print "ignoring public domain file %s" % path
141                    continue
142                sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
143                continue
144
145            i = 0
146            while i < len(lines):
147                if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
148                    i = ExtractCopyrightAt(lines, i)
149                i += 1
150
151            #print path
152
153for copyright in sorted(copyrights):
154    print copyright.encode('utf-8')
155    print
156    print '-------------------------------------------------------------------'
157    print
158
159sys.exit(0)
160