1# -*- coding: utf-8 -*- 2""" 3markupsafe 4~~~~~~~~~~ 5 6Implements an escape function and a Markup string to replace HTML 7special characters with safe representations. 8 9:copyright: 2010 Pallets 10:license: BSD-3-Clause 11""" 12import re 13import string 14 15from ._compat import int_types 16from ._compat import iteritems 17from ._compat import Mapping 18from ._compat import PY2 19from ._compat import string_types 20from ._compat import text_type 21from ._compat import unichr 22 23__version__ = "1.1.1" 24 25__all__ = ["Markup", "soft_unicode", "escape", "escape_silent"] 26 27_striptags_re = re.compile(r"(<!--.*?-->|<[^>]*>)") 28_entity_re = re.compile(r"&([^& ;]+);") 29 30 31class Markup(text_type): 32 """A string that is ready to be safely inserted into an HTML or XML 33 document, either because it was escaped or because it was marked 34 safe. 35 36 Passing an object to the constructor converts it to text and wraps 37 it to mark it safe without escaping. To escape the text, use the 38 :meth:`escape` class method instead. 39 40 >>> Markup('Hello, <em>World</em>!') 41 Markup('Hello, <em>World</em>!') 42 >>> Markup(42) 43 Markup('42') 44 >>> Markup.escape('Hello, <em>World</em>!') 45 Markup('Hello <em>World</em>!') 46 47 This implements the ``__html__()`` interface that some frameworks 48 use. Passing an object that implements ``__html__()`` will wrap the 49 output of that method, marking it safe. 50 51 >>> class Foo: 52 ... def __html__(self): 53 ... return '<a href="/foo">foo</a>' 54 ... 55 >>> Markup(Foo()) 56 Markup('<a href="/foo">foo</a>') 57 58 This is a subclass of the text type (``str`` in Python 3, 59 ``unicode`` in Python 2). It has the same methods as that type, but 60 all methods escape their arguments and return a ``Markup`` instance. 61 62 >>> Markup('<em>%s</em>') % 'foo & bar' 63 Markup('<em>foo & bar</em>') 64 >>> Markup('<em>Hello</em> ') + '<foo>' 65 Markup('<em>Hello</em> <foo>') 66 """ 67 68 __slots__ = () 69 70 def __new__(cls, base=u"", encoding=None, errors="strict"): 71 if hasattr(base, "__html__"): 72 base = base.__html__() 73 if encoding is None: 74 return text_type.__new__(cls, base) 75 return text_type.__new__(cls, base, encoding, errors) 76 77 def __html__(self): 78 return self 79 80 def __add__(self, other): 81 if isinstance(other, string_types) or hasattr(other, "__html__"): 82 return self.__class__(super(Markup, self).__add__(self.escape(other))) 83 return NotImplemented 84 85 def __radd__(self, other): 86 if hasattr(other, "__html__") or isinstance(other, string_types): 87 return self.escape(other).__add__(self) 88 return NotImplemented 89 90 def __mul__(self, num): 91 if isinstance(num, int_types): 92 return self.__class__(text_type.__mul__(self, num)) 93 return NotImplemented 94 95 __rmul__ = __mul__ 96 97 def __mod__(self, arg): 98 if isinstance(arg, tuple): 99 arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) 100 else: 101 arg = _MarkupEscapeHelper(arg, self.escape) 102 return self.__class__(text_type.__mod__(self, arg)) 103 104 def __repr__(self): 105 return "%s(%s)" % (self.__class__.__name__, text_type.__repr__(self)) 106 107 def join(self, seq): 108 return self.__class__(text_type.join(self, map(self.escape, seq))) 109 110 join.__doc__ = text_type.join.__doc__ 111 112 def split(self, *args, **kwargs): 113 return list(map(self.__class__, text_type.split(self, *args, **kwargs))) 114 115 split.__doc__ = text_type.split.__doc__ 116 117 def rsplit(self, *args, **kwargs): 118 return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs))) 119 120 rsplit.__doc__ = text_type.rsplit.__doc__ 121 122 def splitlines(self, *args, **kwargs): 123 return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs))) 124 125 splitlines.__doc__ = text_type.splitlines.__doc__ 126 127 def unescape(self): 128 """Convert escaped markup back into a text string. This replaces 129 HTML entities with the characters they represent. 130 131 >>> Markup('Main » <em>About</em>').unescape() 132 'Main » <em>About</em>' 133 """ 134 from ._constants import HTML_ENTITIES 135 136 def handle_match(m): 137 name = m.group(1) 138 if name in HTML_ENTITIES: 139 return unichr(HTML_ENTITIES[name]) 140 try: 141 if name[:2] in ("#x", "#X"): 142 return unichr(int(name[2:], 16)) 143 elif name.startswith("#"): 144 return unichr(int(name[1:])) 145 except ValueError: 146 pass 147 # Don't modify unexpected input. 148 return m.group() 149 150 return _entity_re.sub(handle_match, text_type(self)) 151 152 def striptags(self): 153 """:meth:`unescape` the markup, remove tags, and normalize 154 whitespace to single spaces. 155 156 >>> Markup('Main »\t<em>About</em>').striptags() 157 'Main » About' 158 """ 159 stripped = u" ".join(_striptags_re.sub("", self).split()) 160 return Markup(stripped).unescape() 161 162 @classmethod 163 def escape(cls, s): 164 """Escape a string. Calls :func:`escape` and ensures that for 165 subclasses the correct type is returned. 166 """ 167 rv = escape(s) 168 if rv.__class__ is not cls: 169 return cls(rv) 170 return rv 171 172 def make_simple_escaping_wrapper(name): # noqa: B902 173 orig = getattr(text_type, name) 174 175 def func(self, *args, **kwargs): 176 args = _escape_argspec(list(args), enumerate(args), self.escape) 177 _escape_argspec(kwargs, iteritems(kwargs), self.escape) 178 return self.__class__(orig(self, *args, **kwargs)) 179 180 func.__name__ = orig.__name__ 181 func.__doc__ = orig.__doc__ 182 return func 183 184 for method in ( 185 "__getitem__", 186 "capitalize", 187 "title", 188 "lower", 189 "upper", 190 "replace", 191 "ljust", 192 "rjust", 193 "lstrip", 194 "rstrip", 195 "center", 196 "strip", 197 "translate", 198 "expandtabs", 199 "swapcase", 200 "zfill", 201 ): 202 locals()[method] = make_simple_escaping_wrapper(method) 203 204 def partition(self, sep): 205 return tuple(map(self.__class__, text_type.partition(self, self.escape(sep)))) 206 207 def rpartition(self, sep): 208 return tuple(map(self.__class__, text_type.rpartition(self, self.escape(sep)))) 209 210 def format(self, *args, **kwargs): 211 formatter = EscapeFormatter(self.escape) 212 kwargs = _MagicFormatMapping(args, kwargs) 213 return self.__class__(formatter.vformat(self, args, kwargs)) 214 215 def __html_format__(self, format_spec): 216 if format_spec: 217 raise ValueError("Unsupported format specification " "for Markup.") 218 return self 219 220 # not in python 3 221 if hasattr(text_type, "__getslice__"): 222 __getslice__ = make_simple_escaping_wrapper("__getslice__") 223 224 del method, make_simple_escaping_wrapper 225 226 227class _MagicFormatMapping(Mapping): 228 """This class implements a dummy wrapper to fix a bug in the Python 229 standard library for string formatting. 230 231 See http://bugs.python.org/issue13598 for information about why 232 this is necessary. 233 """ 234 235 def __init__(self, args, kwargs): 236 self._args = args 237 self._kwargs = kwargs 238 self._last_index = 0 239 240 def __getitem__(self, key): 241 if key == "": 242 idx = self._last_index 243 self._last_index += 1 244 try: 245 return self._args[idx] 246 except LookupError: 247 pass 248 key = str(idx) 249 return self._kwargs[key] 250 251 def __iter__(self): 252 return iter(self._kwargs) 253 254 def __len__(self): 255 return len(self._kwargs) 256 257 258if hasattr(text_type, "format"): 259 260 class EscapeFormatter(string.Formatter): 261 def __init__(self, escape): 262 self.escape = escape 263 264 def format_field(self, value, format_spec): 265 if hasattr(value, "__html_format__"): 266 rv = value.__html_format__(format_spec) 267 elif hasattr(value, "__html__"): 268 if format_spec: 269 raise ValueError( 270 "Format specifier {0} given, but {1} does not" 271 " define __html_format__. A class that defines" 272 " __html__ must define __html_format__ to work" 273 " with format specifiers.".format(format_spec, type(value)) 274 ) 275 rv = value.__html__() 276 else: 277 # We need to make sure the format spec is unicode here as 278 # otherwise the wrong callback methods are invoked. For 279 # instance a byte string there would invoke __str__ and 280 # not __unicode__. 281 rv = string.Formatter.format_field(self, value, text_type(format_spec)) 282 return text_type(self.escape(rv)) 283 284 285def _escape_argspec(obj, iterable, escape): 286 """Helper for various string-wrapped functions.""" 287 for key, value in iterable: 288 if hasattr(value, "__html__") or isinstance(value, string_types): 289 obj[key] = escape(value) 290 return obj 291 292 293class _MarkupEscapeHelper(object): 294 """Helper for Markup.__mod__""" 295 296 def __init__(self, obj, escape): 297 self.obj = obj 298 self.escape = escape 299 300 def __getitem__(self, item): 301 return _MarkupEscapeHelper(self.obj[item], self.escape) 302 303 def __str__(self): 304 return text_type(self.escape(self.obj)) 305 306 __unicode__ = __str__ 307 308 def __repr__(self): 309 return str(self.escape(repr(self.obj))) 310 311 def __int__(self): 312 return int(self.obj) 313 314 def __float__(self): 315 return float(self.obj) 316 317 318# we have to import it down here as the speedups and native 319# modules imports the markup type which is define above. 320try: 321 from ._speedups import escape, escape_silent, soft_unicode 322except ImportError: 323 from ._native import escape, escape_silent, soft_unicode 324 325if not PY2: 326 soft_str = soft_unicode 327 __all__.append("soft_str") 328