• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Authors: John Dennis <jdennis@redhat.com>
2#
3# Copyright (C) 2007 Red Hat, Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 2 of the License, or
8# (at your option) any later version.
9#
10# This program is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13# GNU General Public License for more details.
14#
15# You should have received a copy of the GNU General Public License
16# along with this program; if not, write to the Free Software
17# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18#
19
20
21__all__ = [
22    'escape_html',
23    'unescape_html',
24    'html_to_text',
25
26    'html_document',
27]
28
29import htmllib
30import formatter as Formatter
31import string
32from types import *
33try:
34    from io import StringIO
35except ImportError:
36    from StringIO import StringIO
37
38#------------------------------------------------------------------------------
39
40
41class TextWriter(Formatter.DumbWriter):
42
43    def __init__(self, file=None, maxcol=80, indent_width=4):
44        Formatter.DumbWriter.__init__(self, file, maxcol)
45        self.indent_level = 0
46        self.indent_width = indent_width
47        self._set_indent()
48
49    def _set_indent(self):
50        self.indent_col = self.indent_level * self.indent_width
51        self.indent = ' ' * self.indent_col
52
53    def new_margin(self, margin, level):
54        self.indent_level = level
55        self._set_indent()
56
57    def send_label_data(self, data):
58        data = data + ' '
59        if len(data) > self.indent_col:
60            self.send_literal_data(data)
61        else:
62            offset = self.indent_col - len(data)
63            self.send_literal_data(' ' * offset + data)
64
65    def send_flowing_data(self, data):
66        if not data:
67            return
68        atbreak = self.atbreak or data[0] in string.whitespace
69        col = self.col
70        maxcol = self.maxcol
71        write = self.file.write
72        col = self.col
73        if col == 0:
74            write(self.indent)
75            col = self.indent_col
76        for word in data.split():
77            if atbreak:
78                if col + len(word) >= maxcol:
79                    write('\n' + self.indent)
80                    col = self.indent_col
81                else:
82                    write(' ')
83                    col = col + 1
84            write(word)
85            col = col + len(word)
86            atbreak = 1
87        self.col = col
88        self.atbreak = data[-1] in string.whitespace
89
90
91class HTMLParserAnchor(htmllib.HTMLParser):
92
93    def __init__(self, formatter, verbose=0):
94        htmllib.HTMLParser.__init__(self, formatter, verbose)
95
96    def anchor_bgn(self, href, name, type):
97        self.anchor = href
98
99    def anchor_end(self):
100        if self.anchor:
101            self.handle_data(' (%s) ' % self.anchor)
102            self.anchor = None
103
104#------------------------------------------------------------------------------
105
106
107def escape_html(s):
108    if s is None:
109        return None
110    s = s.replace("&", "&amp;")  # Must be done first!
111    s = s.replace("<", "&lt;")
112    s = s.replace(">", "&gt;")
113    s = s.replace("'", "&apos;")
114    s = s.replace('"', "&quot;")
115    return s
116
117
118def unescape_html(s):
119    if s is None:
120        return None
121    if '&' not in s:
122        return s
123    s = s.replace("&lt;", "<")
124    s = s.replace("&gt;", ">")
125    s = s.replace("&apos;", "'")
126    s = s.replace("&quot;", '"')
127    s = s.replace("&amp;", "&")  # Must be last
128    return s
129
130
131def html_to_text(html, maxcol=80):
132    try:
133        buffer = StringIO()
134        formatter = Formatter.AbstractFormatter(TextWriter(buffer, maxcol))
135        parser = HTMLParserAnchor(formatter)
136        parser.feed(html)
137        parser.close()
138        text = buffer.getvalue()
139        buffer.close()
140        return text
141    except Exception as e:
142        log_program.error('cannot convert html to text: %s' % e)
143        return None
144
145
146def html_document(*body_components):
147    '''Wrap the body components in a HTML document structure with a valid header.
148    Accepts a variable number of arguments of of which canb be:
149    * string
150    * a sequences of strings (tuple or list).
151    * a callable object taking no parameters and returning a string or sequence of strings.
152    '''
153    head = '<html>\n  <head>\n    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n  </head>\n  <body>\n'
154    tail = '\n  </body>\n</html>'
155
156    doc = head
157
158    for body_component in body_components:
159        if type(body_component) is StringTypes:
160            doc += body_component
161        elif type(body_component) in [TupleType, ListType]:
162            for item in body_component:
163                doc += item
164        elif callable(body_component):
165            result = body_component()
166            if type(result) in [TupleType, ListType]:
167                for item in result:
168                    doc += item
169            else:
170                doc += result
171        else:
172            doc += body_component
173
174    doc += tail
175    return doc
176