• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''This utility cleans up the html files as emitted by doxygen so
7that they are suitable for publication on a Google documentation site.
8'''
9
10import optparse
11import os
12import re
13import shutil
14import string
15import sys
16try:
17  from BeautifulSoup import BeautifulSoup, Tag
18except (ImportError, NotImplementedError):
19  print ("This tool requires the BeautifulSoup package "
20         "(see http://www.crummy.com/software/BeautifulSoup/).\n"
21         "Make sure that the file BeautifulSoup.py is either in this directory "
22         "or is available in your PYTHON_PATH")
23  raise
24
25
26class HTMLFixer(object):
27  '''This class cleans up the html strings as produced by Doxygen
28  '''
29
30  def __init__(self, html):
31    self.soup = BeautifulSoup(html)
32
33  def FixTableHeadings(self):
34    '''Fixes the doxygen table headings.
35
36    This includes:
37      - Using bare <h2> title row instead of row embedded in <tr><td> in table
38      - Putting the "name" attribute into the "id" attribute of the <tr> tag.
39      - Splitting up tables into multiple separate tables if a table
40        heading appears in the middle of a table.
41
42    For example, this html:
43     <table>
44      <tr><td colspan="2"><h2><a name="pub-attribs"></a>
45      Data Fields List</h2></td></tr>
46      ...
47     </table>
48
49    would be converted to this:
50     <h2>Data Fields List</h2>
51     <table>
52      ...
53     </table>
54    '''
55
56    table_headers = []
57    for tag in self.soup.findAll('tr'):
58      if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
59        #tag['id'] = tag.td.h2.a['name']
60        tag.string = tag.td.h2.a.next
61        tag.name = 'h2'
62        table_headers.append(tag)
63
64    # reverse the list so that earlier tags don't delete later tags
65    table_headers.reverse()
66    # Split up tables that have multiple table header (th) rows
67    for tag in table_headers:
68      print "Header tag: %s is %s" % (tag.name, tag.string.strip())
69      # Is this a heading in the middle of a table?
70      if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
71        print "Splitting Table named %s" % tag.string.strip()
72        table = tag.parent
73        table_parent = table.parent
74        table_index = table_parent.contents.index(table)
75        new_table = Tag(self.soup, name='table', attrs=table.attrs)
76        table_parent.insert(table_index + 1, new_table)
77        tag_index = table.contents.index(tag)
78        for index, row in enumerate(table.contents[tag_index:]):
79          new_table.insert(index, row)
80      # Now move the <h2> tag to be in front of the <table> tag
81      assert tag.parent.name == 'table'
82      table = tag.parent
83      table_parent = table.parent
84      table_index = table_parent.contents.index(table)
85      table_parent.insert(table_index, tag)
86
87  def RemoveTopHeadings(self):
88    '''Removes <div> sections with a header, tabs, or navpath class attribute'''
89    header_tags = self.soup.findAll(
90        name='div',
91        attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')})
92    [tag.extract() for tag in header_tags]
93
94  def FixAll(self):
95    self.FixTableHeadings()
96    self.RemoveTopHeadings()
97
98  def __str__(self):
99    return str(self.soup)
100
101
102def main():
103  '''Main entry for the doxy_cleanup utility
104
105  doxy_cleanup takes a list of html files and modifies them in place.'''
106
107  parser = optparse.OptionParser(usage='Usage: %prog [options] files...')
108
109  parser.add_option('-m', '--move', dest='move', action='store_true',
110                    default=False, help='move html files to "original_html"')
111
112  options, files = parser.parse_args()
113
114  if not files:
115    parser.print_usage()
116    return 1
117
118  for filename in files:
119    try:
120      with open(filename, 'r') as file:
121        html = file.read()
122
123      print "Processing %s" % filename
124      fixer = HTMLFixer(html)
125      fixer.FixAll()
126      with open(filename, 'w') as file:
127        file.write(str(fixer))
128      if options.move:
129        new_directory = os.path.join(
130            os.path.dirname(os.path.dirname(filename)), 'original_html')
131        if not os.path.exists(new_directory):
132          os.mkdir(new_directory)
133        shutil.move(filename, new_directory)
134    except:
135      print "Error while processing %s" % filename
136      raise
137
138  return 0
139
140
141if __name__ == '__main__':
142  sys.exit(main())
143