• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3
4import cgi
5import six
6import re
7from six.moves import html_entities
8from six.moves.urllib.parse import quote, unquote
9
10
11__all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote',
12           'strip_html']
13
14default_encoding = 'UTF-8'
15
16def html_quote(v, encoding=None):
17    r"""
18    Quote the value (turned to a string) as HTML.  This quotes <, >,
19    and quotes:
20    """
21    encoding = encoding or default_encoding
22    if v is None:
23        return ''
24    elif isinstance(v, six.binary_type):
25        return cgi.escape(v, 1)
26    elif isinstance(v, six.text_type):
27        if six.PY3:
28            return cgi.escape(v, 1)
29        else:
30            return cgi.escape(v.encode(encoding), 1)
31    else:
32        if six.PY3:
33            return cgi.escape(six.text_type(v), 1)
34        else:
35            return cgi.escape(six.text_type(v).encode(encoding), 1)
36
37_unquote_re = re.compile(r'&([a-zA-Z]+);')
38def _entity_subber(match, name2c=html_entities.name2codepoint):
39    code = name2c.get(match.group(1))
40    if code:
41        return six.unichr(code)
42    else:
43        return match.group(0)
44
45def html_unquote(s, encoding=None):
46    r"""
47    Decode the value.
48
49    """
50    if isinstance(s, six.binary_type):
51        s = s.decode(encoding or default_encoding)
52    return _unquote_re.sub(_entity_subber, s)
53
54def strip_html(s):
55    # should this use html_unquote?
56    s = re.sub('<.*?>', '', s)
57    s = html_unquote(s)
58    return s
59
60def no_quote(s):
61    """
62    Quoting that doesn't do anything
63    """
64    return s
65
66_comment_quote_re = re.compile(r'\-\s*\>')
67# Everything but \r, \n, \t:
68_bad_chars_re = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
69def comment_quote(s):
70    """
71    Quote that makes sure text can't escape a comment
72    """
73    comment = str(s)
74    #comment = _bad_chars_re.sub('', comment)
75    #print('in ', repr(str(s)))
76    #print('out', repr(comment))
77    comment = _comment_quote_re.sub('-&gt;', comment)
78    return comment
79
80url_quote = quote
81url_unquote = unquote
82
83if __name__ == '__main__':
84    import doctest
85    doctest.testmod()
86