1""" 2Python Markdown 3 4A Python implementation of John Gruber's Markdown. 5 6Documentation: https://python-markdown.github.io/ 7GitHub: https://github.com/Python-Markdown/markdown/ 8PyPI: https://pypi.org/project/Markdown/ 9 10Started by Manfred Stienstra (http://www.dwerg.net/). 11Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). 12Currently maintained by Waylan Limberg (https://github.com/waylan), 13Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). 14 15Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later) 16Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 17Copyright 2004 Manfred Stienstra (the original version) 18 19License: BSD (see LICENSE.md for details). 20""" 21 22import re 23import sys 24import warnings 25from collections import namedtuple 26from functools import wraps, lru_cache 27from itertools import count 28 29 30""" 31Constants you might want to modify 32----------------------------------------------------------------------------- 33""" 34 35 36BLOCK_LEVEL_ELEMENTS = [ 37 # Elements which are invalid to wrap in a `<p>` tag. 38 # See https://w3c.github.io/html/grouping-content.html#the-p-element 39 'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl', 40 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 41 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol', 42 'p', 'pre', 'section', 'table', 'ul', 43 # Other elements which Markdown should not be mucking up the contents of. 44 'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend', 45 'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script', 46 'style', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video' 47] 48 49# Placeholders 50STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder 51ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder 52INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:" 53INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX 54INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)') 55AMP_SUBSTITUTE = STX+"amp"+ETX 56HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX 57HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)') 58TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX 59 60 61""" 62Constants you probably do not need to change 63----------------------------------------------------------------------------- 64""" 65 66RTL_BIDI_RANGES = ( 67 ('\u0590', '\u07FF'), 68 # Hebrew (0590-05FF), Arabic (0600-06FF), 69 # Syriac (0700-074F), Arabic supplement (0750-077F), 70 # Thaana (0780-07BF), Nko (07C0-07FF). 71 ('\u2D30', '\u2D7F') # Tifinagh 72) 73 74 75""" 76AUXILIARY GLOBAL FUNCTIONS 77============================================================================= 78""" 79 80 81@lru_cache(maxsize=None) 82def get_installed_extensions(): 83 if sys.version_info >= (3, 10): 84 from importlib import metadata 85 else: # <PY310 use backport 86 import importlib_metadata as metadata 87 # Only load extension entry_points once. 88 return metadata.entry_points(group='markdown.extensions') 89 90 91def deprecated(message, stacklevel=2): 92 """ 93 Raise a DeprecationWarning when wrapped function/method is called. 94 95 Usage: 96 @deprecated("This method will be removed in version X; use Y instead.") 97 def some_method()" 98 pass 99 """ 100 def wrapper(func): 101 @wraps(func) 102 def deprecated_func(*args, **kwargs): 103 warnings.warn( 104 f"'{func.__name__}' is deprecated. {message}", 105 category=DeprecationWarning, 106 stacklevel=stacklevel 107 ) 108 return func(*args, **kwargs) 109 return deprecated_func 110 return wrapper 111 112 113def parseBoolValue(value, fail_on_errors=True, preserve_none=False): 114 """Parses a string representing bool value. If parsing was successful, 115 returns True or False. If preserve_none=True, returns True, False, 116 or None. If parsing was not successful, raises ValueError, or, if 117 fail_on_errors=False, returns None.""" 118 if not isinstance(value, str): 119 if preserve_none and value is None: 120 return value 121 return bool(value) 122 elif preserve_none and value.lower() == 'none': 123 return None 124 elif value.lower() in ('true', 'yes', 'y', 'on', '1'): 125 return True 126 elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'): 127 return False 128 elif fail_on_errors: 129 raise ValueError('Cannot parse bool value: %r' % value) 130 131 132def code_escape(text): 133 """Escape code.""" 134 if "&" in text: 135 text = text.replace("&", "&") 136 if "<" in text: 137 text = text.replace("<", "<") 138 if ">" in text: 139 text = text.replace(">", ">") 140 return text 141 142 143def _get_stack_depth(size=2): 144 """Get current stack depth, performantly. 145 """ 146 frame = sys._getframe(size) 147 148 for size in count(size): 149 frame = frame.f_back 150 if not frame: 151 return size 152 153 154def nearing_recursion_limit(): 155 """Return true if current stack depth is within 100 of maximum limit.""" 156 return sys.getrecursionlimit() - _get_stack_depth() < 100 157 158 159""" 160MISC AUXILIARY CLASSES 161============================================================================= 162""" 163 164 165class AtomicString(str): 166 """A string which should not be further processed.""" 167 pass 168 169 170class Processor: 171 def __init__(self, md=None): 172 self.md = md 173 174 175class HtmlStash: 176 """ 177 This class is used for stashing HTML objects that we extract 178 in the beginning and replace with place-holders. 179 """ 180 181 def __init__(self): 182 """ Create a HtmlStash. """ 183 self.html_counter = 0 # for counting inline html segments 184 self.rawHtmlBlocks = [] 185 self.tag_counter = 0 186 self.tag_data = [] # list of dictionaries in the order tags appear 187 188 def store(self, html): 189 """ 190 Saves an HTML segment for later reinsertion. Returns a 191 placeholder string that needs to be inserted into the 192 document. 193 194 Keyword arguments: 195 196 * html: an html segment 197 198 Returns : a placeholder string 199 200 """ 201 self.rawHtmlBlocks.append(html) 202 placeholder = self.get_placeholder(self.html_counter) 203 self.html_counter += 1 204 return placeholder 205 206 def reset(self): 207 self.html_counter = 0 208 self.rawHtmlBlocks = [] 209 210 def get_placeholder(self, key): 211 return HTML_PLACEHOLDER % key 212 213 def store_tag(self, tag, attrs, left_index, right_index): 214 """Store tag data and return a placeholder.""" 215 self.tag_data.append({'tag': tag, 'attrs': attrs, 216 'left_index': left_index, 217 'right_index': right_index}) 218 placeholder = TAG_PLACEHOLDER % str(self.tag_counter) 219 self.tag_counter += 1 # equal to the tag's index in self.tag_data 220 return placeholder 221 222 223# Used internally by `Registry` for each item in its sorted list. 224# Provides an easier to read API when editing the code later. 225# For example, `item.name` is more clear than `item[0]`. 226_PriorityItem = namedtuple('PriorityItem', ['name', 'priority']) 227 228 229class Registry: 230 """ 231 A priority sorted registry. 232 233 A `Registry` instance provides two public methods to alter the data of the 234 registry: `register` and `deregister`. Use `register` to add items and 235 `deregister` to remove items. See each method for specifics. 236 237 When registering an item, a "name" and a "priority" must be provided. All 238 items are automatically sorted by "priority" from highest to lowest. The 239 "name" is used to remove ("deregister") and get items. 240 241 A `Registry` instance it like a list (which maintains order) when reading 242 data. You may iterate over the items, get an item and get a count (length) 243 of all items. You may also check that the registry contains an item. 244 245 When getting an item you may use either the index of the item or the 246 string-based "name". For example: 247 248 registry = Registry() 249 registry.register(SomeItem(), 'itemname', 20) 250 # Get the item by index 251 item = registry[0] 252 # Get the item by name 253 item = registry['itemname'] 254 255 When checking that the registry contains an item, you may use either the 256 string-based "name", or a reference to the actual item. For example: 257 258 someitem = SomeItem() 259 registry.register(someitem, 'itemname', 20) 260 # Contains the name 261 assert 'itemname' in registry 262 # Contains the item instance 263 assert someitem in registry 264 265 The method `get_index_for_name` is also available to obtain the index of 266 an item using that item's assigned "name". 267 """ 268 269 def __init__(self): 270 self._data = {} 271 self._priority = [] 272 self._is_sorted = False 273 274 def __contains__(self, item): 275 if isinstance(item, str): 276 # Check if an item exists by this name. 277 return item in self._data.keys() 278 # Check if this instance exists. 279 return item in self._data.values() 280 281 def __iter__(self): 282 self._sort() 283 return iter([self._data[k] for k, p in self._priority]) 284 285 def __getitem__(self, key): 286 self._sort() 287 if isinstance(key, slice): 288 data = Registry() 289 for k, p in self._priority[key]: 290 data.register(self._data[k], k, p) 291 return data 292 if isinstance(key, int): 293 return self._data[self._priority[key].name] 294 return self._data[key] 295 296 def __len__(self): 297 return len(self._priority) 298 299 def __repr__(self): 300 return '<{}({})>'.format(self.__class__.__name__, list(self)) 301 302 def get_index_for_name(self, name): 303 """ 304 Return the index of the given name. 305 """ 306 if name in self: 307 self._sort() 308 return self._priority.index( 309 [x for x in self._priority if x.name == name][0] 310 ) 311 raise ValueError('No item named "{}" exists.'.format(name)) 312 313 def register(self, item, name, priority): 314 """ 315 Add an item to the registry with the given name and priority. 316 317 Parameters: 318 319 * `item`: The item being registered. 320 * `name`: A string used to reference the item. 321 * `priority`: An integer or float used to sort against all items. 322 323 If an item is registered with a "name" which already exists, the 324 existing item is replaced with the new item. Treat carefully as the 325 old item is lost with no way to recover it. The new item will be 326 sorted according to its priority and will **not** retain the position 327 of the old item. 328 """ 329 if name in self: 330 # Remove existing item of same name first 331 self.deregister(name) 332 self._is_sorted = False 333 self._data[name] = item 334 self._priority.append(_PriorityItem(name, priority)) 335 336 def deregister(self, name, strict=True): 337 """ 338 Remove an item from the registry. 339 340 Set `strict=False` to fail silently. 341 """ 342 try: 343 index = self.get_index_for_name(name) 344 del self._priority[index] 345 del self._data[name] 346 except ValueError: 347 if strict: 348 raise 349 350 def _sort(self): 351 """ 352 Sort the registry by priority from highest to lowest. 353 354 This method is called internally and should never be explicitly called. 355 """ 356 if not self._is_sorted: 357 self._priority.sort(key=lambda item: item.priority, reverse=True) 358 self._is_sorted = True 359