1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6'''This utility cleans up the html files as emitted by doxygen so 7that they are suitable for publication on a Google documentation site. 8''' 9 10import optparse 11import os 12import re 13import shutil 14import string 15import sys 16try: 17 from BeautifulSoup import BeautifulSoup, Tag 18except (ImportError, NotImplementedError): 19 print ("This tool requires the BeautifulSoup package " 20 "(see http://www.crummy.com/software/BeautifulSoup/).\n" 21 "Make sure that the file BeautifulSoup.py is either in this directory " 22 "or is available in your PYTHON_PATH") 23 raise 24 25 26class HTMLFixer(object): 27 '''This class cleans up the html strings as produced by Doxygen 28 ''' 29 30 def __init__(self, html): 31 self.soup = BeautifulSoup(html) 32 33 def FixTableHeadings(self): 34 '''Fixes the doxygen table headings. 35 36 This includes: 37 - Using bare <h2> title row instead of row embedded in <tr><td> in table 38 - Putting the "name" attribute into the "id" attribute of the <tr> tag. 39 - Splitting up tables into multiple separate tables if a table 40 heading appears in the middle of a table. 41 42 For example, this html: 43 <table> 44 <tr><td colspan="2"><h2><a name="pub-attribs"></a> 45 Data Fields List</h2></td></tr> 46 ... 47 </table> 48 49 would be converted to this: 50 <h2>Data Fields List</h2> 51 <table> 52 ... 53 </table> 54 ''' 55 56 table_headers = [] 57 for tag in self.soup.findAll('tr'): 58 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: 59 #tag['id'] = tag.td.h2.a['name'] 60 tag.string = tag.td.h2.a.next 61 tag.name = 'h2' 62 table_headers.append(tag) 63 64 # reverse the list so that earlier tags don't delete later tags 65 table_headers.reverse() 66 # Split up tables that have multiple table header (th) rows 67 for tag in table_headers: 68 print "Header tag: %s is %s" % (tag.name, tag.string.strip()) 69 # Is this a heading in the middle of a table? 70 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': 71 print "Splitting Table named %s" % tag.string.strip() 72 table = tag.parent 73 table_parent = table.parent 74 table_index = table_parent.contents.index(table) 75 new_table = Tag(self.soup, name='table', attrs=table.attrs) 76 table_parent.insert(table_index + 1, new_table) 77 tag_index = table.contents.index(tag) 78 for index, row in enumerate(table.contents[tag_index:]): 79 new_table.insert(index, row) 80 # Now move the <h2> tag to be in front of the <table> tag 81 assert tag.parent.name == 'table' 82 table = tag.parent 83 table_parent = table.parent 84 table_index = table_parent.contents.index(table) 85 table_parent.insert(table_index, tag) 86 87 def RemoveTopHeadings(self): 88 '''Removes <div> sections with a header, tabs, or navpath class attribute''' 89 header_tags = self.soup.findAll( 90 name='div', 91 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) 92 [tag.extract() for tag in header_tags] 93 94 def FixAll(self): 95 self.FixTableHeadings() 96 self.RemoveTopHeadings() 97 98 def __str__(self): 99 return str(self.soup) 100 101 102def main(): 103 '''Main entry for the doxy_cleanup utility 104 105 doxy_cleanup takes a list of html files and modifies them in place.''' 106 107 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') 108 109 parser.add_option('-m', '--move', dest='move', action='store_true', 110 default=False, help='move html files to "original_html"') 111 112 options, files = parser.parse_args() 113 114 if not files: 115 parser.print_usage() 116 return 1 117 118 for filename in files: 119 try: 120 with open(filename, 'r') as file: 121 html = file.read() 122 123 print "Processing %s" % filename 124 fixer = HTMLFixer(html) 125 fixer.FixAll() 126 with open(filename, 'w') as file: 127 file.write(str(fixer)) 128 if options.move: 129 new_directory = os.path.join( 130 os.path.dirname(os.path.dirname(filename)), 'original_html') 131 if not os.path.exists(new_directory): 132 os.mkdir(new_directory) 133 shutil.move(filename, new_directory) 134 except: 135 print "Error while processing %s" % filename 136 raise 137 138 return 0 139 140 141if __name__ == '__main__': 142 sys.exit(main()) 143