• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Python Markdown
3
4A Python implementation of John Gruber's Markdown.
5
6Documentation: https://python-markdown.github.io/
7GitHub: https://github.com/Python-Markdown/markdown/
8PyPI: https://pypi.org/project/Markdown/
9
10Started by Manfred Stienstra (http://www.dwerg.net/).
11Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
12Currently maintained by Waylan Limberg (https://github.com/waylan),
13Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
14
15Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
16Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
17Copyright 2004 Manfred Stienstra (the original version)
18
19License: BSD (see LICENSE.md for details).
20"""
21
22import re
23import sys
24import warnings
25from collections import namedtuple
26from functools import wraps, lru_cache
27from itertools import count
28
29
30"""
31Constants you might want to modify
32-----------------------------------------------------------------------------
33"""
34
35
36BLOCK_LEVEL_ELEMENTS = [
37    # Elements which are invalid to wrap in a `<p>` tag.
38    # See https://w3c.github.io/html/grouping-content.html#the-p-element
39    'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
40    'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
41    'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
42    'p', 'pre', 'section', 'table', 'ul',
43    # Other elements which Markdown should not be mucking up the contents of.
44    'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'iframe', 'li', 'legend',
45    'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
46    'style', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
47]
48
49# Placeholders
50STX = '\u0002'  # Use STX ("Start of text") for start-of-placeholder
51ETX = '\u0003'  # Use ETX ("End of text") for end-of-placeholder
52INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
53INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
54INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
55AMP_SUBSTITUTE = STX+"amp"+ETX
56HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
57HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
58TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
59
60
61"""
62Constants you probably do not need to change
63-----------------------------------------------------------------------------
64"""
65
66RTL_BIDI_RANGES = (
67    ('\u0590', '\u07FF'),
68    # Hebrew (0590-05FF), Arabic (0600-06FF),
69    # Syriac (0700-074F), Arabic supplement (0750-077F),
70    # Thaana (0780-07BF), Nko (07C0-07FF).
71    ('\u2D30', '\u2D7F')  # Tifinagh
72)
73
74
75"""
76AUXILIARY GLOBAL FUNCTIONS
77=============================================================================
78"""
79
80
81@lru_cache(maxsize=None)
82def get_installed_extensions():
83    if sys.version_info >= (3, 10):
84        from importlib import metadata
85    else:  # <PY310 use backport
86        import importlib_metadata as metadata
87    # Only load extension entry_points once.
88    return metadata.entry_points(group='markdown.extensions')
89
90
91def deprecated(message, stacklevel=2):
92    """
93    Raise a DeprecationWarning when wrapped function/method is called.
94
95    Usage:
96        @deprecated("This method will be removed in version X; use Y instead.")
97        def some_method()"
98            pass
99    """
100    def wrapper(func):
101        @wraps(func)
102        def deprecated_func(*args, **kwargs):
103            warnings.warn(
104                f"'{func.__name__}' is deprecated. {message}",
105                category=DeprecationWarning,
106                stacklevel=stacklevel
107            )
108            return func(*args, **kwargs)
109        return deprecated_func
110    return wrapper
111
112
113def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
114    """Parses a string representing bool value. If parsing was successful,
115       returns True or False. If preserve_none=True, returns True, False,
116       or None. If parsing was not successful, raises  ValueError, or, if
117       fail_on_errors=False, returns None."""
118    if not isinstance(value, str):
119        if preserve_none and value is None:
120            return value
121        return bool(value)
122    elif preserve_none and value.lower() == 'none':
123        return None
124    elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
125        return True
126    elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
127        return False
128    elif fail_on_errors:
129        raise ValueError('Cannot parse bool value: %r' % value)
130
131
132def code_escape(text):
133    """Escape code."""
134    if "&" in text:
135        text = text.replace("&", "&amp;")
136    if "<" in text:
137        text = text.replace("<", "&lt;")
138    if ">" in text:
139        text = text.replace(">", "&gt;")
140    return text
141
142
143def _get_stack_depth(size=2):
144    """Get current stack depth, performantly.
145    """
146    frame = sys._getframe(size)
147
148    for size in count(size):
149        frame = frame.f_back
150        if not frame:
151            return size
152
153
154def nearing_recursion_limit():
155    """Return true if current stack depth is within 100 of maximum limit."""
156    return sys.getrecursionlimit() - _get_stack_depth() < 100
157
158
159"""
160MISC AUXILIARY CLASSES
161=============================================================================
162"""
163
164
165class AtomicString(str):
166    """A string which should not be further processed."""
167    pass
168
169
170class Processor:
171    def __init__(self, md=None):
172        self.md = md
173
174
175class HtmlStash:
176    """
177    This class is used for stashing HTML objects that we extract
178    in the beginning and replace with place-holders.
179    """
180
181    def __init__(self):
182        """ Create a HtmlStash. """
183        self.html_counter = 0  # for counting inline html segments
184        self.rawHtmlBlocks = []
185        self.tag_counter = 0
186        self.tag_data = []  # list of dictionaries in the order tags appear
187
188    def store(self, html):
189        """
190        Saves an HTML segment for later reinsertion.  Returns a
191        placeholder string that needs to be inserted into the
192        document.
193
194        Keyword arguments:
195
196        * html: an html segment
197
198        Returns : a placeholder string
199
200        """
201        self.rawHtmlBlocks.append(html)
202        placeholder = self.get_placeholder(self.html_counter)
203        self.html_counter += 1
204        return placeholder
205
206    def reset(self):
207        self.html_counter = 0
208        self.rawHtmlBlocks = []
209
210    def get_placeholder(self, key):
211        return HTML_PLACEHOLDER % key
212
213    def store_tag(self, tag, attrs, left_index, right_index):
214        """Store tag data and return a placeholder."""
215        self.tag_data.append({'tag': tag, 'attrs': attrs,
216                              'left_index': left_index,
217                              'right_index': right_index})
218        placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
219        self.tag_counter += 1  # equal to the tag's index in self.tag_data
220        return placeholder
221
222
223# Used internally by `Registry` for each item in its sorted list.
224# Provides an easier to read API when editing the code later.
225# For example, `item.name` is more clear than `item[0]`.
226_PriorityItem = namedtuple('PriorityItem', ['name', 'priority'])
227
228
229class Registry:
230    """
231    A priority sorted registry.
232
233    A `Registry` instance provides two public methods to alter the data of the
234    registry: `register` and `deregister`. Use `register` to add items and
235    `deregister` to remove items. See each method for specifics.
236
237    When registering an item, a "name" and a "priority" must be provided. All
238    items are automatically sorted by "priority" from highest to lowest. The
239    "name" is used to remove ("deregister") and get items.
240
241    A `Registry` instance it like a list (which maintains order) when reading
242    data. You may iterate over the items, get an item and get a count (length)
243    of all items. You may also check that the registry contains an item.
244
245    When getting an item you may use either the index of the item or the
246    string-based "name". For example:
247
248        registry = Registry()
249        registry.register(SomeItem(), 'itemname', 20)
250        # Get the item by index
251        item = registry[0]
252        # Get the item by name
253        item = registry['itemname']
254
255    When checking that the registry contains an item, you may use either the
256    string-based "name", or a reference to the actual item. For example:
257
258        someitem = SomeItem()
259        registry.register(someitem, 'itemname', 20)
260        # Contains the name
261        assert 'itemname' in registry
262        # Contains the item instance
263        assert someitem in registry
264
265    The method `get_index_for_name` is also available to obtain the index of
266    an item using that item's assigned "name".
267    """
268
269    def __init__(self):
270        self._data = {}
271        self._priority = []
272        self._is_sorted = False
273
274    def __contains__(self, item):
275        if isinstance(item, str):
276            # Check if an item exists by this name.
277            return item in self._data.keys()
278        # Check if this instance exists.
279        return item in self._data.values()
280
281    def __iter__(self):
282        self._sort()
283        return iter([self._data[k] for k, p in self._priority])
284
285    def __getitem__(self, key):
286        self._sort()
287        if isinstance(key, slice):
288            data = Registry()
289            for k, p in self._priority[key]:
290                data.register(self._data[k], k, p)
291            return data
292        if isinstance(key, int):
293            return self._data[self._priority[key].name]
294        return self._data[key]
295
296    def __len__(self):
297        return len(self._priority)
298
299    def __repr__(self):
300        return '<{}({})>'.format(self.__class__.__name__, list(self))
301
302    def get_index_for_name(self, name):
303        """
304        Return the index of the given name.
305        """
306        if name in self:
307            self._sort()
308            return self._priority.index(
309                [x for x in self._priority if x.name == name][0]
310            )
311        raise ValueError('No item named "{}" exists.'.format(name))
312
313    def register(self, item, name, priority):
314        """
315        Add an item to the registry with the given name and priority.
316
317        Parameters:
318
319        * `item`: The item being registered.
320        * `name`: A string used to reference the item.
321        * `priority`: An integer or float used to sort against all items.
322
323        If an item is registered with a "name" which already exists, the
324        existing item is replaced with the new item. Treat carefully as the
325        old item is lost with no way to recover it. The new item will be
326        sorted according to its priority and will **not** retain the position
327        of the old item.
328        """
329        if name in self:
330            # Remove existing item of same name first
331            self.deregister(name)
332        self._is_sorted = False
333        self._data[name] = item
334        self._priority.append(_PriorityItem(name, priority))
335
336    def deregister(self, name, strict=True):
337        """
338        Remove an item from the registry.
339
340        Set `strict=False` to fail silently.
341        """
342        try:
343            index = self.get_index_for_name(name)
344            del self._priority[index]
345            del self._data[name]
346        except ValueError:
347            if strict:
348                raise
349
350    def _sort(self):
351        """
352        Sort the registry by priority from highest to lowest.
353
354        This method is called internally and should never be explicitly called.
355        """
356        if not self._is_sorted:
357            self._priority.sort(key=lambda item: item.priority, reverse=True)
358            self._is_sorted = True
359