1#!/usr/bin/python 2# Run with directory arguments from any directory, with no special setup required. 3# Or: 4# for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done 5 6import ftplib 7import hashlib 8import os 9import re 10import shutil 11import string 12import subprocess 13import sys 14import tarfile 15import tempfile 16 17def IsUninteresting(path): 18 path = path.lower() 19 if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"): 20 return True 21 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"): 22 return True 23 if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"): 24 return True 25 return False 26 27def IsAutoGenerated(content): 28 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: 29 return True 30 if "This header was automatically generated from a Linux kernel header" in content: 31 return True 32 return False 33 34copyrights = set() 35 36def ExtractCopyrightAt(lines, i): 37 hash = lines[i].startswith("#") 38 39 # Do we need to back up to find the start of the copyright header? 40 start = i 41 if not hash: 42 while start > 0: 43 if "/*" in lines[start - 1]: 44 break 45 start -= 1 46 47 # Read comment lines until we hit something that terminates a 48 # copyright header. 49 while i < len(lines): 50 if "*/" in lines[i]: 51 break 52 if hash and len(lines[i]) == 0: 53 break 54 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: 55 break 56 if "\tcitrus Id: " in lines[i]: 57 break 58 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: 59 break 60 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: 61 break 62 # OpenBSD likes to say where stuff originally came from: 63 if "Original version ID:" in lines[i]: 64 break 65 i += 1 66 67 end = i 68 69 # Trim trailing cruft. 70 while end > 0: 71 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": 72 break 73 end -= 1 74 75 # Remove C/assembler comment formatting, pulling out just the text. 76 clean_lines = [] 77 for line in lines[start:end]: 78 line = line.replace("\t", " ") 79 line = line.replace("/* ", "") 80 line = re.sub("^ \* ", "", line) 81 line = line.replace("** ", "") 82 line = line.replace("# ", "") 83 if line.startswith("++Copyright++"): 84 continue 85 line = line.replace("--Copyright--", "") 86 line = line.rstrip() 87 # These come last and take care of "blank" comment lines. 88 if line == "#" or line == " *" or line == "**" or line == "-": 89 line = "" 90 clean_lines.append(line) 91 92 # Trim blank lines from head and tail. 93 while clean_lines[0] == "": 94 clean_lines = clean_lines[1:] 95 while clean_lines[len(clean_lines) - 1] == "": 96 clean_lines = clean_lines[0:(len(clean_lines) - 1)] 97 98 copyright = "\n".join(clean_lines) 99 copyrights.add(copyright) 100 101 return i 102 103args = sys.argv[1:] 104if len(args) == 0: 105 args = [ "." ] 106 107for arg in args: 108 sys.stderr.write('Searching for source files in "%s"...\n' % arg) 109 110 for directory, sub_directories, filenames in os.walk(arg): 111 if ".git" in sub_directories: 112 sub_directories.remove(".git") 113 sub_directories = sorted(sub_directories) 114 115 for filename in sorted(filenames): 116 path = os.path.join(directory, filename) 117 if IsUninteresting(path): 118 #print "ignoring uninteresting file %s" % path 119 continue 120 121 try: 122 content = open(path, 'r').read().decode('utf-8') 123 except: 124 # TODO: update hash.h, md5.c, and md5.h; upstream is probably UTF-8 already. 125 sys.stderr.write('warning: bad UTF-8 in %s\n' % path) 126 content = open(path, 'r').read().decode('iso-8859-1') 127 128 lines = content.split("\n") 129 130 if len(lines) <= 4: 131 #print "ignoring short file %s" % path 132 continue 133 134 if IsAutoGenerated(content): 135 #print "ignoring auto-generated file %s" % path 136 continue 137 138 if not "Copyright" in content: 139 if "public domain" in content.lower(): 140 #print "ignoring public domain file %s" % path 141 continue 142 sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines))) 143 continue 144 145 i = 0 146 while i < len(lines): 147 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: 148 i = ExtractCopyrightAt(lines, i) 149 i += 1 150 151 #print path 152 153for copyright in sorted(copyrights): 154 print copyright.encode('utf-8') 155 print 156 print '-------------------------------------------------------------------' 157 print 158 159sys.exit(0) 160