• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Python Markdown
3
4A Python implementation of John Gruber's Markdown.
5
6Documentation: https://python-markdown.github.io/
7GitHub: https://github.com/Python-Markdown/markdown/
8PyPI: https://pypi.org/project/Markdown/
9
10Started by Manfred Stienstra (http://www.dwerg.net/).
11Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
12Currently maintained by Waylan Limberg (https://github.com/waylan),
13Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
14
15Copyright 2007-2018 The Python Markdown Project (v. 1.7 and later)
16Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
17Copyright 2004 Manfred Stienstra (the original version)
18
19License: BSD (see LICENSE.md for details).
20
21INLINE PATTERNS
22=============================================================================
23
24Inline patterns such as *emphasis* are handled by means of auxiliary
25objects, one per pattern.  Pattern objects must be instances of classes
26that extend markdown.Pattern.  Each pattern object uses a single regular
27expression and needs support the following methods:
28
29    pattern.getCompiledRegExp() # returns a regular expression
30
31    pattern.handleMatch(m) # takes a match object and returns
32                           # an ElementTree element or just plain text
33
34All of python markdown's built-in patterns subclass from Pattern,
35but you can add additional patterns that don't.
36
37Also note that all the regular expressions used by inline must
38capture the whole block.  For this reason, they all start with
39'^(.*)' and end with '(.*)!'.  In case with built-in expression
40Pattern takes care of adding the "^(.*)" and "(.*)!".
41
42Finally, the order in which regular expressions are applied is very
43important - e.g. if we first replace http://.../ links with <a> tags
44and _then_ try to replace inline html, we would end up with a mess.
45So, we apply the expressions in the following order:
46
47* escape and backticks have to go before everything else, so
48  that we can preempt any markdown patterns by escaping them.
49
50* then we handle auto-links (must be done before inline html)
51
52* then we handle inline HTML.  At this point we will simply
53  replace all inline HTML strings with a placeholder and add
54  the actual HTML to a hash.
55
56* then inline images (must be done before links)
57
58* then bracketed links, first regular then reference-style
59
60* finally we apply strong and emphasis
61"""
62
63from . import util
64from collections import namedtuple
65import re
66import xml.etree.ElementTree as etree
67try:  # pragma: no cover
68    from html import entities
69except ImportError:  # pragma: no cover
70    import htmlentitydefs as entities
71
72
73def build_inlinepatterns(md, **kwargs):
74    """ Build the default set of inline patterns for Markdown. """
75    inlinePatterns = util.Registry()
76    inlinePatterns.register(BacktickInlineProcessor(BACKTICK_RE), 'backtick', 190)
77    inlinePatterns.register(EscapeInlineProcessor(ESCAPE_RE, md), 'escape', 180)
78    inlinePatterns.register(ReferenceInlineProcessor(REFERENCE_RE, md), 'reference', 170)
79    inlinePatterns.register(LinkInlineProcessor(LINK_RE, md), 'link', 160)
80    inlinePatterns.register(ImageInlineProcessor(IMAGE_LINK_RE, md), 'image_link', 150)
81    inlinePatterns.register(
82        ImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'image_reference', 140
83    )
84    inlinePatterns.register(
85        ShortReferenceInlineProcessor(REFERENCE_RE, md), 'short_reference', 130
86    )
87    inlinePatterns.register(
88        ShortImageReferenceInlineProcessor(IMAGE_REFERENCE_RE, md), 'short_image_ref', 125
89    )
90    inlinePatterns.register(AutolinkInlineProcessor(AUTOLINK_RE, md), 'autolink', 120)
91    inlinePatterns.register(AutomailInlineProcessor(AUTOMAIL_RE, md), 'automail', 110)
92    inlinePatterns.register(SubstituteTagInlineProcessor(LINE_BREAK_RE, 'br'), 'linebreak', 100)
93    inlinePatterns.register(HtmlInlineProcessor(HTML_RE, md), 'html', 90)
94    inlinePatterns.register(HtmlInlineProcessor(ENTITY_RE, md), 'entity', 80)
95    inlinePatterns.register(SimpleTextInlineProcessor(NOT_STRONG_RE), 'not_strong', 70)
96    inlinePatterns.register(AsteriskProcessor(r'\*'), 'em_strong', 60)
97    inlinePatterns.register(UnderscoreProcessor(r'_'), 'em_strong2', 50)
98    return inlinePatterns
99
100
101"""
102The actual regular expressions for patterns
103-----------------------------------------------------------------------------
104"""
105
106NOIMG = r'(?<!\!)'
107
108# `e=f()` or ``e=f("`")``
109BACKTICK_RE = r'(?:(?<!\\)((?:\\{2})+)(?=`+)|(?<!\\)(`+)(.+?)(?<!`)\2(?!`))'
110
111# \<
112ESCAPE_RE = r'\\(.)'
113
114# *emphasis*
115EMPHASIS_RE = r'(\*)([^\*]+)\1'
116
117# **strong**
118STRONG_RE = r'(\*{2})(.+?)\1'
119
120# __smart__strong__
121SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\1(?!\w)'
122
123# _smart_emphasis_
124SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\1(?!\w)'
125
126# __strong _em__
127SMART_STRONG_EM_RE = r'(?<!\w)(\_)\1(?!\1)(.+?)(?<!\w)\1(?!\1)(.+?)\1{3}(?!\w)'
128
129# ***strongem*** or ***em*strong**
130EM_STRONG_RE = r'(\*)\1{2}(.+?)\1(.*?)\1{2}'
131
132# ___strongem___ or ___em_strong__
133EM_STRONG2_RE = r'(_)\1{2}(.+?)\1(.*?)\1{2}'
134
135# ***strong**em*
136STRONG_EM_RE = r'(\*)\1{2}(.+?)\1{2}(.*?)\1'
137
138# ___strong__em_
139STRONG_EM2_RE = r'(_)\1{2}(.+?)\1{2}(.*?)\1'
140
141# **strong*em***
142STRONG_EM3_RE = r'(\*)\1(?!\1)([^*]+?)\1(?!\1)(.+?)\1{3}'
143
144# [text](url) or [text](<url>) or [text](url "title")
145LINK_RE = NOIMG + r'\['
146
147# ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
148IMAGE_LINK_RE = r'\!\['
149
150# [Google][3]
151REFERENCE_RE = LINK_RE
152
153# ![alt text][2]
154IMAGE_REFERENCE_RE = IMAGE_LINK_RE
155
156# stand-alone * or _
157NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'
158
159# <http://www.123.com>
160AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
161
162# <me@example.com>
163AUTOMAIL_RE = r'<([^<> !]+@[^@<> ]+)>'
164
165# <...>
166HTML_RE = r'(<(\/?[a-zA-Z][^<>@ ]*( [^<>]*)?|!--(?:(?!<!--|-->).)*--)>)'
167
168# "&#38;" (decimal) or "&#x26;" (hex) or "&amp;" (named)
169ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)'
170
171# two spaces at end of line
172LINE_BREAK_RE = r'  \n'
173
174
175def dequote(string):
176    """Remove quotes from around a string."""
177    if ((string.startswith('"') and string.endswith('"')) or
178       (string.startswith("'") and string.endswith("'"))):
179        return string[1:-1]
180    else:
181        return string
182
183
184class EmStrongItem(namedtuple('EmStrongItem', ['pattern', 'builder', 'tags'])):
185    """Emphasis/strong pattern item."""
186
187
188"""
189The pattern classes
190-----------------------------------------------------------------------------
191"""
192
193
194class Pattern:  # pragma: no cover
195    """Base class that inline patterns subclass. """
196
197    ANCESTOR_EXCLUDES = tuple()
198
199    def __init__(self, pattern, md=None):
200        """
201        Create an instant of an inline pattern.
202
203        Keyword arguments:
204
205        * pattern: A regular expression that matches a pattern
206
207        """
208        self.pattern = pattern
209        self.compiled_re = re.compile(r"^(.*?)%s(.*)$" % pattern,
210                                      re.DOTALL | re.UNICODE)
211
212        self.md = md
213
214    def getCompiledRegExp(self):
215        """ Return a compiled regular expression. """
216        return self.compiled_re
217
218    def handleMatch(self, m):
219        """Return a ElementTree element from the given match.
220
221        Subclasses should override this method.
222
223        Keyword arguments:
224
225        * m: A re match object containing a match of the pattern.
226
227        """
228        pass  # pragma: no cover
229
230    def type(self):
231        """ Return class name, to define pattern type """
232        return self.__class__.__name__
233
234    def unescape(self, text):
235        """ Return unescaped text given text with an inline placeholder. """
236        try:
237            stash = self.md.treeprocessors['inline'].stashed_nodes
238        except KeyError:  # pragma: no cover
239            return text
240
241        def get_stash(m):
242            id = m.group(1)
243            if id in stash:
244                value = stash.get(id)
245                if isinstance(value, str):
246                    return value
247                else:
248                    # An etree Element - return text content only
249                    return ''.join(value.itertext())
250        return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
251
252
253class InlineProcessor(Pattern):
254    """
255    Base class that inline patterns subclass.
256
257    This is the newer style inline processor that uses a more
258    efficient and flexible search approach.
259    """
260
261    def __init__(self, pattern, md=None):
262        """
263        Create an instant of an inline pattern.
264
265        Keyword arguments:
266
267        * pattern: A regular expression that matches a pattern
268
269        """
270        self.pattern = pattern
271        self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
272
273        # Api for Markdown to pass safe_mode into instance
274        self.safe_mode = False
275        self.md = md
276
277    def handleMatch(self, m, data):
278        """Return a ElementTree element from the given match and the
279        start and end index of the matched text.
280
281        If `start` and/or `end` are returned as `None`, it will be
282        assumed that the processor did not find a valid region of text.
283
284        Subclasses should override this method.
285
286        Keyword arguments:
287
288        * m: A re match object containing a match of the pattern.
289        * data: The buffer current under analysis
290
291        Returns:
292
293        * el: The ElementTree element, text or None.
294        * start: The start of the region that has been matched or None.
295        * end: The end of the region that has been matched or None.
296
297        """
298        pass  # pragma: no cover
299
300
301class SimpleTextPattern(Pattern):  # pragma: no cover
302    """ Return a simple text of group(2) of a Pattern. """
303    def handleMatch(self, m):
304        return m.group(2)
305
306
307class SimpleTextInlineProcessor(InlineProcessor):
308    """ Return a simple text of group(1) of a Pattern. """
309    def handleMatch(self, m, data):
310        return m.group(1), m.start(0), m.end(0)
311
312
313class EscapeInlineProcessor(InlineProcessor):
314    """ Return an escaped character. """
315
316    def handleMatch(self, m, data):
317        char = m.group(1)
318        if char in self.md.ESCAPED_CHARS:
319            return '{}{}{}'.format(util.STX, ord(char), util.ETX), m.start(0), m.end(0)
320        else:
321            return None, m.start(0), m.end(0)
322
323
324class SimpleTagPattern(Pattern):  # pragma: no cover
325    """
326    Return element of type `tag` with a text attribute of group(3)
327    of a Pattern.
328
329    """
330    def __init__(self, pattern, tag):
331        Pattern.__init__(self, pattern)
332        self.tag = tag
333
334    def handleMatch(self, m):
335        el = etree.Element(self.tag)
336        el.text = m.group(3)
337        return el
338
339
340class SimpleTagInlineProcessor(InlineProcessor):
341    """
342    Return element of type `tag` with a text attribute of group(2)
343    of a Pattern.
344
345    """
346    def __init__(self, pattern, tag):
347        InlineProcessor.__init__(self, pattern)
348        self.tag = tag
349
350    def handleMatch(self, m, data):  # pragma: no cover
351        el = etree.Element(self.tag)
352        el.text = m.group(2)
353        return el, m.start(0), m.end(0)
354
355
356class SubstituteTagPattern(SimpleTagPattern):  # pragma: no cover
357    """ Return an element of type `tag` with no children. """
358    def handleMatch(self, m):
359        return etree.Element(self.tag)
360
361
362class SubstituteTagInlineProcessor(SimpleTagInlineProcessor):
363    """ Return an element of type `tag` with no children. """
364    def handleMatch(self, m, data):
365        return etree.Element(self.tag), m.start(0), m.end(0)
366
367
368class BacktickInlineProcessor(InlineProcessor):
369    """ Return a `<code>` element containing the matching text. """
370    def __init__(self, pattern):
371        InlineProcessor.__init__(self, pattern)
372        self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX)
373        self.tag = 'code'
374
375    def handleMatch(self, m, data):
376        if m.group(3):
377            el = etree.Element(self.tag)
378            el.text = util.AtomicString(util.code_escape(m.group(3).strip()))
379            return el, m.start(0), m.end(0)
380        else:
381            return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
382
383
384class DoubleTagPattern(SimpleTagPattern):  # pragma: no cover
385    """Return a ElementTree element nested in tag2 nested in tag1.
386
387    Useful for strong emphasis etc.
388
389    """
390    def handleMatch(self, m):
391        tag1, tag2 = self.tag.split(",")
392        el1 = etree.Element(tag1)
393        el2 = etree.SubElement(el1, tag2)
394        el2.text = m.group(3)
395        if len(m.groups()) == 5:
396            el2.tail = m.group(4)
397        return el1
398
399
400class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
401    """Return a ElementTree element nested in tag2 nested in tag1.
402
403    Useful for strong emphasis etc.
404
405    """
406    def handleMatch(self, m, data):  # pragma: no cover
407        tag1, tag2 = self.tag.split(",")
408        el1 = etree.Element(tag1)
409        el2 = etree.SubElement(el1, tag2)
410        el2.text = m.group(2)
411        if len(m.groups()) == 3:
412            el2.tail = m.group(3)
413        return el1, m.start(0), m.end(0)
414
415
416class HtmlInlineProcessor(InlineProcessor):
417    """ Store raw inline html and return a placeholder. """
418    def handleMatch(self, m, data):
419        rawhtml = self.unescape(m.group(1))
420        place_holder = self.md.htmlStash.store(rawhtml)
421        return place_holder, m.start(0), m.end(0)
422
423    def unescape(self, text):
424        """ Return unescaped text given text with an inline placeholder. """
425        try:
426            stash = self.md.treeprocessors['inline'].stashed_nodes
427        except KeyError:  # pragma: no cover
428            return text
429
430        def get_stash(m):
431            id = m.group(1)
432            value = stash.get(id)
433            if value is not None:
434                try:
435                    return self.md.serializer(value)
436                except Exception:
437                    return r'\%s' % value
438
439        return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
440
441
442class AsteriskProcessor(InlineProcessor):
443    """Emphasis processor for handling strong and em matches inside asterisks."""
444
445    PATTERNS = [
446        EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
447        EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
448        EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
449        EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
450        EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
451    ]
452
453    def build_single(self, m, tag, idx):
454        """Return single tag."""
455        el1 = etree.Element(tag)
456        text = m.group(2)
457        self.parse_sub_patterns(text, el1, None, idx)
458        return el1
459
460    def build_double(self, m, tags, idx):
461        """Return double tag."""
462
463        tag1, tag2 = tags.split(",")
464        el1 = etree.Element(tag1)
465        el2 = etree.Element(tag2)
466        text = m.group(2)
467        self.parse_sub_patterns(text, el2, None, idx)
468        el1.append(el2)
469        if len(m.groups()) == 3:
470            text = m.group(3)
471            self.parse_sub_patterns(text, el1, el2, idx)
472        return el1
473
474    def build_double2(self, m, tags, idx):
475        """Return double tags (variant 2): `<strong>text <em>text</em></strong>`."""
476
477        tag1, tag2 = tags.split(",")
478        el1 = etree.Element(tag1)
479        el2 = etree.Element(tag2)
480        text = m.group(2)
481        self.parse_sub_patterns(text, el1, None, idx)
482        text = m.group(3)
483        el1.append(el2)
484        self.parse_sub_patterns(text, el2, None, idx)
485        return el1
486
487    def parse_sub_patterns(self, data, parent, last, idx):
488        """
489        Parses sub patterns.
490
491        `data` (`str`):
492            text to evaluate.
493
494        `parent` (`etree.Element`):
495            Parent to attach text and sub elements to.
496
497        `last` (`etree.Element`):
498            Last appended child to parent. Can also be None if parent has no children.
499
500        `idx` (`int`):
501            Current pattern index that was used to evaluate the parent.
502
503        """
504
505        offset = 0
506        pos = 0
507
508        length = len(data)
509        while pos < length:
510            # Find the start of potential emphasis or strong tokens
511            if self.compiled_re.match(data, pos):
512                matched = False
513                # See if the we can match an emphasis/strong pattern
514                for index, item in enumerate(self.PATTERNS):
515                    # Only evaluate patterns that are after what was used on the parent
516                    if index <= idx:
517                        continue
518                    m = item.pattern.match(data, pos)
519                    if m:
520                        # Append child nodes to parent
521                        # Text nodes should be appended to the last
522                        # child if present, and if not, it should
523                        # be added as the parent's text node.
524                        text = data[offset:m.start(0)]
525                        if text:
526                            if last is not None:
527                                last.tail = text
528                            else:
529                                parent.text = text
530                        el = self.build_element(m, item.builder, item.tags, index)
531                        parent.append(el)
532                        last = el
533                        # Move our position past the matched hunk
534                        offset = pos = m.end(0)
535                        matched = True
536                if not matched:
537                    # We matched nothing, move on to the next character
538                    pos += 1
539            else:
540                # Increment position as no potential emphasis start was found.
541                pos += 1
542
543        # Append any leftover text as a text node.
544        text = data[offset:]
545        if text:
546            if last is not None:
547                last.tail = text
548            else:
549                parent.text = text
550
551    def build_element(self, m, builder, tags, index):
552        """Element builder."""
553
554        if builder == 'double2':
555            return self.build_double2(m, tags, index)
556        elif builder == 'double':
557            return self.build_double(m, tags, index)
558        else:
559            return self.build_single(m, tags, index)
560
561    def handleMatch(self, m, data):
562        """Parse patterns."""
563
564        el = None
565        start = None
566        end = None
567
568        for index, item in enumerate(self.PATTERNS):
569            m1 = item.pattern.match(data, m.start(0))
570            if m1:
571                start = m1.start(0)
572                end = m1.end(0)
573                el = self.build_element(m1, item.builder, item.tags, index)
574                break
575        return el, start, end
576
577
578class UnderscoreProcessor(AsteriskProcessor):
579    """Emphasis processor for handling strong and em matches inside underscores."""
580
581    PATTERNS = [
582        EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
583        EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
584        EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
585        EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
586        EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
587    ]
588
589
590class LinkInlineProcessor(InlineProcessor):
591    """ Return a link element from the given match. """
592    RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
593    RE_TITLE_CLEAN = re.compile(r'\s')
594
595    def handleMatch(self, m, data):
596        text, index, handled = self.getText(data, m.end(0))
597
598        if not handled:
599            return None, None, None
600
601        href, title, index, handled = self.getLink(data, index)
602        if not handled:
603            return None, None, None
604
605        el = etree.Element("a")
606        el.text = text
607
608        el.set("href", href)
609
610        if title is not None:
611            el.set("title", title)
612
613        return el, m.start(0), index
614
615    def getLink(self, data, index):
616        """Parse data between `()` of `[Text]()` allowing recursive `()`. """
617
618        href = ''
619        title = None
620        handled = False
621
622        m = self.RE_LINK.match(data, pos=index)
623        if m and m.group(1):
624            # Matches [Text](<link> "title")
625            href = m.group(1)[1:-1].strip()
626            if m.group(2):
627                title = m.group(2)[1:-1]
628            index = m.end(0)
629            handled = True
630        elif m:
631            # Track bracket nesting and index in string
632            bracket_count = 1
633            backtrack_count = 1
634            start_index = m.end()
635            index = start_index
636            last_bracket = -1
637
638            # Primary (first found) quote tracking.
639            quote = None
640            start_quote = -1
641            exit_quote = -1
642            ignore_matches = False
643
644            # Secondary (second found) quote tracking.
645            alt_quote = None
646            start_alt_quote = -1
647            exit_alt_quote = -1
648
649            # Track last character
650            last = ''
651
652            for pos in range(index, len(data)):
653                c = data[pos]
654                if c == '(':
655                    # Count nested (
656                    # Don't increment the bracket count if we are sure we're in a title.
657                    if not ignore_matches:
658                        bracket_count += 1
659                    elif backtrack_count > 0:
660                        backtrack_count -= 1
661                elif c == ')':
662                    # Match nested ) to (
663                    # Don't decrement if we are sure we are in a title that is unclosed.
664                    if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)):
665                        bracket_count = 0
666                    elif not ignore_matches:
667                        bracket_count -= 1
668                    elif backtrack_count > 0:
669                        backtrack_count -= 1
670                        # We've found our backup end location if the title doesn't resolve.
671                        if backtrack_count == 0:
672                            last_bracket = index + 1
673
674                elif c in ("'", '"'):
675                    # Quote has started
676                    if not quote:
677                        # We'll assume we are now in a title.
678                        # Brackets are quoted, so no need to match them (except for the final one).
679                        ignore_matches = True
680                        backtrack_count = bracket_count
681                        bracket_count = 1
682                        start_quote = index + 1
683                        quote = c
684                    # Secondary quote (in case the first doesn't resolve): [text](link'"title")
685                    elif c != quote and not alt_quote:
686                        start_alt_quote = index + 1
687                        alt_quote = c
688                    # Update primary quote match
689                    elif c == quote:
690                        exit_quote = index + 1
691                    # Update secondary quote match
692                    elif alt_quote and c == alt_quote:
693                        exit_alt_quote = index + 1
694
695                index += 1
696
697                # Link is closed, so let's break out of the loop
698                if bracket_count == 0:
699                    # Get the title if we closed a title string right before link closed
700                    if exit_quote >= 0 and quote == last:
701                        href = data[start_index:start_quote - 1]
702                        title = ''.join(data[start_quote:exit_quote - 1])
703                    elif exit_alt_quote >= 0 and alt_quote == last:
704                        href = data[start_index:start_alt_quote - 1]
705                        title = ''.join(data[start_alt_quote:exit_alt_quote - 1])
706                    else:
707                        href = data[start_index:index - 1]
708                    break
709
710                if c != ' ':
711                    last = c
712
713            # We have a scenario: [test](link"notitle)
714            # When we enter a string, we stop tracking bracket resolution in the main counter,
715            # but we do keep a backup counter up until we discover where we might resolve all brackets
716            # if the title string fails to resolve.
717            if bracket_count != 0 and backtrack_count == 0:
718                href = data[start_index:last_bracket - 1]
719                index = last_bracket
720                bracket_count = 0
721
722            handled = bracket_count == 0
723
724        if title is not None:
725            title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip())))
726
727        href = self.unescape(href).strip()
728
729        return href, title, index, handled
730
731    def getText(self, data, index):
732        """Parse the content between `[]` of the start of an image or link
733        resolving nested square brackets.
734
735        """
736        bracket_count = 1
737        text = []
738        for pos in range(index, len(data)):
739            c = data[pos]
740            if c == ']':
741                bracket_count -= 1
742            elif c == '[':
743                bracket_count += 1
744            index += 1
745            if bracket_count == 0:
746                break
747            text.append(c)
748        return ''.join(text), index, bracket_count == 0
749
750
751class ImageInlineProcessor(LinkInlineProcessor):
752    """ Return a img element from the given match. """
753
754    def handleMatch(self, m, data):
755        text, index, handled = self.getText(data, m.end(0))
756        if not handled:
757            return None, None, None
758
759        src, title, index, handled = self.getLink(data, index)
760        if not handled:
761            return None, None, None
762
763        el = etree.Element("img")
764
765        el.set("src", src)
766
767        if title is not None:
768            el.set("title", title)
769
770        el.set('alt', self.unescape(text))
771        return el, m.start(0), index
772
773
774class ReferenceInlineProcessor(LinkInlineProcessor):
775    """ Match to a stored reference and return link element. """
776    NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE)
777
778    RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
779
780    def handleMatch(self, m, data):
781        text, index, handled = self.getText(data, m.end(0))
782        if not handled:
783            return None, None, None
784
785        id, end, handled = self.evalId(data, index, text)
786        if not handled:
787            return None, None, None
788
789        # Clean up linebreaks in id
790        id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
791        if id not in self.md.references:  # ignore undefined refs
792            return None, m.start(0), end
793
794        href, title = self.md.references[id]
795
796        return self.makeTag(href, title, text), m.start(0), end
797
798    def evalId(self, data, index, text):
799        """
800        Evaluate the id portion of [ref][id].
801
802        If [ref][] use [ref].
803        """
804        m = self.RE_LINK.match(data, pos=index)
805        if not m:
806            return None, index, False
807        else:
808            id = m.group(1).lower()
809            end = m.end(0)
810            if not id:
811                id = text.lower()
812        return id, end, True
813
814    def makeTag(self, href, title, text):
815        el = etree.Element('a')
816
817        el.set('href', href)
818        if title:
819            el.set('title', title)
820
821        el.text = text
822        return el
823
824
825class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
826    """Short form of reference: [google]. """
827    def evalId(self, data, index, text):
828        """Evaluate the id from of [ref]  """
829
830        return text.lower(), index, True
831
832
833class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
834    """ Match to a stored reference and return img element. """
835    def makeTag(self, href, title, text):
836        el = etree.Element("img")
837        el.set("src", href)
838        if title:
839            el.set("title", title)
840        el.set("alt", self.unescape(text))
841        return el
842
843
844class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor):
845    """ Short form of inage reference: ![ref]. """
846    def evalId(self, data, index, text):
847        """Evaluate the id from of [ref]  """
848
849        return text.lower(), index, True
850
851
852class AutolinkInlineProcessor(InlineProcessor):
853    """ Return a link Element given an autolink (`<http://example/com>`). """
854    def handleMatch(self, m, data):
855        el = etree.Element("a")
856        el.set('href', self.unescape(m.group(1)))
857        el.text = util.AtomicString(m.group(1))
858        return el, m.start(0), m.end(0)
859
860
861class AutomailInlineProcessor(InlineProcessor):
862    """
863    Return a mailto link Element given an automail link (`<foo@example.com>`).
864    """
865    def handleMatch(self, m, data):
866        el = etree.Element('a')
867        email = self.unescape(m.group(1))
868        if email.startswith("mailto:"):
869            email = email[len("mailto:"):]
870
871        def codepoint2name(code):
872            """Return entity definition by code, or the code if not defined."""
873            entity = entities.codepoint2name.get(code)
874            if entity:
875                return "{}{};".format(util.AMP_SUBSTITUTE, entity)
876            else:
877                return "%s#%d;" % (util.AMP_SUBSTITUTE, code)
878
879        letters = [codepoint2name(ord(letter)) for letter in email]
880        el.text = util.AtomicString(''.join(letters))
881
882        mailto = "mailto:" + email
883        mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
884                          ord(letter) for letter in mailto])
885        el.set('href', mailto)
886        return el, m.start(0), m.end(0)
887