1#!/usr/bin/env python 2# 3# Copyright (C) 2012 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16""" 17Usage: generate-notice-files [plain text output file] [html output file] [file title] [directory of notices] 18 19Generate the Android notice files, including both text and html files. 20 21-h to display this usage message and exit. 22""" 23from collections import defaultdict 24import getopt 25import hashlib 26import itertools 27import os 28import os.path 29import re 30import sys 31 32MD5_BLOCKSIZE = 1024 * 1024 33HTML_ESCAPE_TABLE = { 34 "&": "&", 35 '"': """, 36 "'": "'", 37 ">": ">", 38 "<": "<", 39 } 40 41try: 42 opts, args = getopt.getopt(sys.argv[1:], "h") 43except getopt.GetoptError, err: 44 print str(err) 45 print __doc__ 46 sys.exit(2) 47 48for o, a in opts: 49 if o == "-h": 50 print __doc__ 51 sys.exit(2) 52 else: 53 print >> sys.stderr, "unhandled option %s" % (o,) 54 55if len(args) != 4: 56 print """need exactly four arguments, the two output files, the file title 57 and the directory containing notices, not %d""" % (len(args),) 58 print __doc__ 59 sys.exit(1) 60 61def hexify(s): 62 return ("%02x"*len(s)) % tuple(map(ord, s)) 63 64def md5sum(filename): 65 """Calculate an MD5 of the file given by FILENAME, 66 and return hex digest as a string. 67 Output should be compatible with md5sum command""" 68 69 f = open(filename, "rb") 70 sum = hashlib.md5() 71 while 1: 72 block = f.read(MD5_BLOCKSIZE) 73 if not block: 74 break 75 sum.update(block) 76 f.close() 77 return hexify(sum.digest()) 78 79 80def html_escape(text): 81 """Produce entities within text.""" 82 return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text) 83 84HTML_OUTPUT_CSS=""" 85<style type="text/css"> 86body { padding: 0; font-family: sans-serif; } 87.same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; } 88.label { font-weight: bold; } 89.file-list { margin-left: 1em; color: blue; } 90</style> 91""" 92 93def combine_notice_files_html(file_hash, input_dir, output_filename): 94 """Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME.""" 95 96 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 97 98 # Set up a filename to row id table (anchors inside tables don't work in 99 # most browsers, but href's to table row ids do) 100 id_table = {} 101 id_count = 0 102 for value in file_hash.values(): 103 for filename in value: 104 id_table[filename] = id_count 105 id_count += 1 106 107 # Open the output file, and output the header pieces 108 output_file = open(output_filename, "wb") 109 110 print >> output_file, "<html><head>" 111 print >> output_file, HTML_OUTPUT_CSS 112 print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">' 113 114 # Output our table of contents 115 print >> output_file, '<div class="toc">' 116 print >> output_file, "<ul>" 117 118 # Flatten the list of lists into a single list of filenames 119 sorted_filenames = sorted(itertools.chain.from_iterable(file_hash.values())) 120 121 # Print out a nice table of contents 122 for filename in sorted_filenames: 123 stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename) 124 print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename) 125 126 print >> output_file, "</ul>" 127 print >> output_file, "</div><!-- table of contents -->" 128 # Output the individual notice file lists 129 print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">' 130 for value in file_hash.values(): 131 print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0]) 132 print >> output_file, '<div class="label">Notices for file(s):</div>' 133 print >> output_file, '<div class="file-list">' 134 for filename in sorted(value): 135 print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename)) 136 print >> output_file, "</div><!-- file-list -->" 137 print >> output_file 138 print >> output_file, '<pre class="license-text">' 139 print >> output_file, html_escape(open(value[0]).read()) 140 print >> output_file, "</pre><!-- license-text -->" 141 print >> output_file, "</td></tr><!-- same-license -->" 142 print >> output_file 143 print >> output_file 144 print >> output_file 145 146 # Finish off the file output 147 print >> output_file, "</table>" 148 print >> output_file, "</body></html>" 149 output_file.close() 150 151def combine_notice_files_text(file_hash, input_dir, output_filename, file_title): 152 """Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME.""" 153 154 SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt") 155 output_file = open(output_filename, "wb") 156 print >> output_file, file_title 157 for value in file_hash.values(): 158 print >> output_file, "============================================================" 159 print >> output_file, "Notices for file(s):" 160 for filename in sorted(value): 161 print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename) 162 print >> output_file, "------------------------------------------------------------" 163 print >> output_file, open(value[0]).read() 164 output_file.close() 165 166def main(args): 167 txt_output_file = args[0] 168 html_output_file = args[1] 169 file_title = args[2] 170 171 # Find all the notice files and md5 them 172 input_dir = os.path.normpath(args[3]) 173 files_with_same_hash = defaultdict(list) 174 for root, dir, files in os.walk(input_dir): 175 for file in files: 176 if file.endswith(".txt"): 177 filename = os.path.join(root, file) 178 file_md5sum = md5sum(filename) 179 files_with_same_hash[file_md5sum].append(filename) 180 181 182 print "Combining NOTICE files into HTML" 183 combine_notice_files_html(files_with_same_hash, input_dir, html_output_file) 184 print "Combining NOTICE files into text" 185 combine_notice_files_text(files_with_same_hash, input_dir, txt_output_file, file_title) 186 187if __name__ == "__main__": 188 main(args) 189