• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2"""
3markupsafe
4~~~~~~~~~~
5
6Implements an escape function and a Markup string to replace HTML
7special characters with safe representations.
8
9:copyright: 2010 Pallets
10:license: BSD-3-Clause
11"""
12import re
13import string
14
15from ._compat import int_types
16from ._compat import iteritems
17from ._compat import Mapping
18from ._compat import PY2
19from ._compat import string_types
20from ._compat import text_type
21from ._compat import unichr
22
23__version__ = "1.1.1"
24
25__all__ = ["Markup", "soft_unicode", "escape", "escape_silent"]
26
27_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)")
28_entity_re = re.compile(r"&([^& ;]+);")
29
30
31class Markup(text_type):
32    """A string that is ready to be safely inserted into an HTML or XML
33    document, either because it was escaped or because it was marked
34    safe.
35
36    Passing an object to the constructor converts it to text and wraps
37    it to mark it safe without escaping. To escape the text, use the
38    :meth:`escape` class method instead.
39
40    >>> Markup('Hello, <em>World</em>!')
41    Markup('Hello, <em>World</em>!')
42    >>> Markup(42)
43    Markup('42')
44    >>> Markup.escape('Hello, <em>World</em>!')
45    Markup('Hello &lt;em&gt;World&lt;/em&gt;!')
46
47    This implements the ``__html__()`` interface that some frameworks
48    use. Passing an object that implements ``__html__()`` will wrap the
49    output of that method, marking it safe.
50
51    >>> class Foo:
52    ...     def __html__(self):
53    ...         return '<a href="/foo">foo</a>'
54    ...
55    >>> Markup(Foo())
56    Markup('<a href="/foo">foo</a>')
57
58    This is a subclass of the text type (``str`` in Python 3,
59    ``unicode`` in Python 2). It has the same methods as that type, but
60    all methods escape their arguments and return a ``Markup`` instance.
61
62    >>> Markup('<em>%s</em>') % 'foo & bar'
63    Markup('<em>foo &amp; bar</em>')
64    >>> Markup('<em>Hello</em> ') + '<foo>'
65    Markup('<em>Hello</em> &lt;foo&gt;')
66    """
67
68    __slots__ = ()
69
70    def __new__(cls, base=u"", encoding=None, errors="strict"):
71        if hasattr(base, "__html__"):
72            base = base.__html__()
73        if encoding is None:
74            return text_type.__new__(cls, base)
75        return text_type.__new__(cls, base, encoding, errors)
76
77    def __html__(self):
78        return self
79
80    def __add__(self, other):
81        if isinstance(other, string_types) or hasattr(other, "__html__"):
82            return self.__class__(super(Markup, self).__add__(self.escape(other)))
83        return NotImplemented
84
85    def __radd__(self, other):
86        if hasattr(other, "__html__") or isinstance(other, string_types):
87            return self.escape(other).__add__(self)
88        return NotImplemented
89
90    def __mul__(self, num):
91        if isinstance(num, int_types):
92            return self.__class__(text_type.__mul__(self, num))
93        return NotImplemented
94
95    __rmul__ = __mul__
96
97    def __mod__(self, arg):
98        if isinstance(arg, tuple):
99            arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
100        else:
101            arg = _MarkupEscapeHelper(arg, self.escape)
102        return self.__class__(text_type.__mod__(self, arg))
103
104    def __repr__(self):
105        return "%s(%s)" % (self.__class__.__name__, text_type.__repr__(self))
106
107    def join(self, seq):
108        return self.__class__(text_type.join(self, map(self.escape, seq)))
109
110    join.__doc__ = text_type.join.__doc__
111
112    def split(self, *args, **kwargs):
113        return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
114
115    split.__doc__ = text_type.split.__doc__
116
117    def rsplit(self, *args, **kwargs):
118        return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
119
120    rsplit.__doc__ = text_type.rsplit.__doc__
121
122    def splitlines(self, *args, **kwargs):
123        return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs)))
124
125    splitlines.__doc__ = text_type.splitlines.__doc__
126
127    def unescape(self):
128        """Convert escaped markup back into a text string. This replaces
129        HTML entities with the characters they represent.
130
131        >>> Markup('Main &raquo; <em>About</em>').unescape()
132        'Main » <em>About</em>'
133        """
134        from ._constants import HTML_ENTITIES
135
136        def handle_match(m):
137            name = m.group(1)
138            if name in HTML_ENTITIES:
139                return unichr(HTML_ENTITIES[name])
140            try:
141                if name[:2] in ("#x", "#X"):
142                    return unichr(int(name[2:], 16))
143                elif name.startswith("#"):
144                    return unichr(int(name[1:]))
145            except ValueError:
146                pass
147            # Don't modify unexpected input.
148            return m.group()
149
150        return _entity_re.sub(handle_match, text_type(self))
151
152    def striptags(self):
153        """:meth:`unescape` the markup, remove tags, and normalize
154        whitespace to single spaces.
155
156        >>> Markup('Main &raquo;\t<em>About</em>').striptags()
157        'Main » About'
158        """
159        stripped = u" ".join(_striptags_re.sub("", self).split())
160        return Markup(stripped).unescape()
161
162    @classmethod
163    def escape(cls, s):
164        """Escape a string. Calls :func:`escape` and ensures that for
165        subclasses the correct type is returned.
166        """
167        rv = escape(s)
168        if rv.__class__ is not cls:
169            return cls(rv)
170        return rv
171
172    def make_simple_escaping_wrapper(name):  # noqa: B902
173        orig = getattr(text_type, name)
174
175        def func(self, *args, **kwargs):
176            args = _escape_argspec(list(args), enumerate(args), self.escape)
177            _escape_argspec(kwargs, iteritems(kwargs), self.escape)
178            return self.__class__(orig(self, *args, **kwargs))
179
180        func.__name__ = orig.__name__
181        func.__doc__ = orig.__doc__
182        return func
183
184    for method in (
185        "__getitem__",
186        "capitalize",
187        "title",
188        "lower",
189        "upper",
190        "replace",
191        "ljust",
192        "rjust",
193        "lstrip",
194        "rstrip",
195        "center",
196        "strip",
197        "translate",
198        "expandtabs",
199        "swapcase",
200        "zfill",
201    ):
202        locals()[method] = make_simple_escaping_wrapper(method)
203
204    def partition(self, sep):
205        return tuple(map(self.__class__, text_type.partition(self, self.escape(sep))))
206
207    def rpartition(self, sep):
208        return tuple(map(self.__class__, text_type.rpartition(self, self.escape(sep))))
209
210    def format(self, *args, **kwargs):
211        formatter = EscapeFormatter(self.escape)
212        kwargs = _MagicFormatMapping(args, kwargs)
213        return self.__class__(formatter.vformat(self, args, kwargs))
214
215    def __html_format__(self, format_spec):
216        if format_spec:
217            raise ValueError("Unsupported format specification " "for Markup.")
218        return self
219
220    # not in python 3
221    if hasattr(text_type, "__getslice__"):
222        __getslice__ = make_simple_escaping_wrapper("__getslice__")
223
224    del method, make_simple_escaping_wrapper
225
226
227class _MagicFormatMapping(Mapping):
228    """This class implements a dummy wrapper to fix a bug in the Python
229    standard library for string formatting.
230
231    See http://bugs.python.org/issue13598 for information about why
232    this is necessary.
233    """
234
235    def __init__(self, args, kwargs):
236        self._args = args
237        self._kwargs = kwargs
238        self._last_index = 0
239
240    def __getitem__(self, key):
241        if key == "":
242            idx = self._last_index
243            self._last_index += 1
244            try:
245                return self._args[idx]
246            except LookupError:
247                pass
248            key = str(idx)
249        return self._kwargs[key]
250
251    def __iter__(self):
252        return iter(self._kwargs)
253
254    def __len__(self):
255        return len(self._kwargs)
256
257
258if hasattr(text_type, "format"):
259
260    class EscapeFormatter(string.Formatter):
261        def __init__(self, escape):
262            self.escape = escape
263
264        def format_field(self, value, format_spec):
265            if hasattr(value, "__html_format__"):
266                rv = value.__html_format__(format_spec)
267            elif hasattr(value, "__html__"):
268                if format_spec:
269                    raise ValueError(
270                        "Format specifier {0} given, but {1} does not"
271                        " define __html_format__. A class that defines"
272                        " __html__ must define __html_format__ to work"
273                        " with format specifiers.".format(format_spec, type(value))
274                    )
275                rv = value.__html__()
276            else:
277                # We need to make sure the format spec is unicode here as
278                # otherwise the wrong callback methods are invoked.  For
279                # instance a byte string there would invoke __str__ and
280                # not __unicode__.
281                rv = string.Formatter.format_field(self, value, text_type(format_spec))
282            return text_type(self.escape(rv))
283
284
285def _escape_argspec(obj, iterable, escape):
286    """Helper for various string-wrapped functions."""
287    for key, value in iterable:
288        if hasattr(value, "__html__") or isinstance(value, string_types):
289            obj[key] = escape(value)
290    return obj
291
292
293class _MarkupEscapeHelper(object):
294    """Helper for Markup.__mod__"""
295
296    def __init__(self, obj, escape):
297        self.obj = obj
298        self.escape = escape
299
300    def __getitem__(self, item):
301        return _MarkupEscapeHelper(self.obj[item], self.escape)
302
303    def __str__(self):
304        return text_type(self.escape(self.obj))
305
306    __unicode__ = __str__
307
308    def __repr__(self):
309        return str(self.escape(repr(self.obj)))
310
311    def __int__(self):
312        return int(self.obj)
313
314    def __float__(self):
315        return float(self.obj)
316
317
318# we have to import it down here as the speedups and native
319# modules imports the markup type which is define above.
320try:
321    from ._speedups import escape, escape_silent, soft_unicode
322except ImportError:
323    from ._native import escape, escape_silent, soft_unicode
324
325if not PY2:
326    soft_str = soft_unicode
327    __all__.append("soft_str")
328