1#!/usr/bin/env python 2# Run with directory arguments from any directory, with no special setup required. 3 4import ftplib 5import hashlib 6import os 7import re 8import shutil 9import string 10import subprocess 11import sys 12import tarfile 13import tempfile 14 15VERBOSE = False 16 17def warn(s): 18 sys.stderr.write("warning: %s\n" % s) 19 20def warn_verbose(s): 21 if VERBOSE: 22 warn(s) 23 24def is_interesting(path): 25 path = path.lower() 26 uninteresting_extensions = [ 27 ".bp", 28 ".map", 29 ".mk", 30 ".py", 31 ".pyc", 32 ".swp", 33 ".txt", 34 ] 35 if os.path.splitext(path)[1] in uninteresting_extensions: 36 return False 37 if path.endswith("/notice") or path.endswith("/readme"): 38 return False 39 return True 40 41def is_auto_generated(content): 42 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: 43 return True 44 if "This header was automatically generated from a Linux kernel header" in content: 45 return True 46 return False 47 48copyrights = set() 49 50def extract_copyright_at(lines, i): 51 hash = lines[i].startswith("#") 52 53 # Do we need to back up to find the start of the copyright header? 54 start = i 55 if not hash: 56 while start > 0: 57 if "/*" in lines[start - 1]: 58 break 59 start -= 1 60 61 # Read comment lines until we hit something that terminates a 62 # copyright header. 63 while i < len(lines): 64 if "*/" in lines[i]: 65 break 66 if hash and len(lines[i]) == 0: 67 break 68 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: 69 break 70 if "\tcitrus Id: " in lines[i]: 71 break 72 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: 73 break 74 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: 75 break 76 # OpenBSD likes to say where stuff originally came from: 77 if "Original version ID:" in lines[i]: 78 break 79 i += 1 80 81 end = i 82 83 # Trim trailing cruft. 84 while end > 0: 85 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": 86 break 87 end -= 1 88 89 # Remove C/assembler comment formatting, pulling out just the text. 90 clean_lines = [] 91 for line in lines[start:end]: 92 line = line.replace("\t", " ") 93 line = line.replace("/* ", "") 94 line = re.sub("^ \* ", "", line) 95 line = line.replace("** ", "") 96 line = line.replace("# ", "") 97 if "SPDX-License-Identifier:" in line: 98 continue 99 if line.startswith("++Copyright++"): 100 continue 101 line = line.replace("--Copyright--", "") 102 line = line.rstrip() 103 # These come last and take care of "blank" comment lines. 104 if line == "#" or line == " *" or line == "**" or line == "-": 105 line = "" 106 clean_lines.append(line) 107 108 # Trim blank lines from head and tail. 109 while clean_lines[0] == "": 110 clean_lines = clean_lines[1:] 111 while clean_lines[len(clean_lines) - 1] == "": 112 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 113 114 copyright = "\n".join(clean_lines) 115 copyrights.add(copyright) 116 117 return i 118 119 120def do_file(path): 121 with open(path, "r") as the_file: 122 try: 123 content = open(path, "r").read().decode("utf-8") 124 except UnicodeDecodeError: 125 warn("bad UTF-8 in %s" % path) 126 content = open(path, "r").read().decode("iso-8859-1") 127 128 lines = content.split("\n") 129 130 if len(lines) <= 4: 131 warn_verbose("ignoring short file %s" % path) 132 return 133 134 if is_auto_generated(content): 135 warn_verbose("ignoring auto-generated file %s" % path) 136 return 137 138 if not "Copyright" in content: 139 if "public domain" in content.lower(): 140 warn("ignoring public domain file %s" % path) 141 return 142 warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) 143 return 144 145 # Manually iterate because extract_copyright_at tells us how many lines to skip. 146 i = 0 147 while i < len(lines): 148 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 149 i = extract_copyright_at(lines, i) 150 else: 151 i += 1 152 153 154def do_dir(path): 155 for directory, sub_directories, filenames in os.walk(arg): 156 if ".git" in sub_directories: 157 sub_directories.remove(".git") 158 sub_directories = sorted(sub_directories) 159 160 for filename in sorted(filenames): 161 path = os.path.join(directory, filename) 162 if is_interesting(path): 163 do_file(path) 164 165 166args = sys.argv[1:] 167if len(args) == 0: 168 args = [ "." ] 169 170for arg in args: 171 if os.path.isdir(arg): 172 do_dir(arg) 173 else: 174 do_file(arg) 175 176for copyright in sorted(copyrights): 177 print copyright.encode("utf-8") 178 print 179 print "-------------------------------------------------------------------" 180 print 181 182sys.exit(0) 183