• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""
2Attribute List Extension for Python-Markdown
3============================================
4
5Adds attribute list syntax. Inspired by
6[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
7feature of the same name.
8
9See <https://Python-Markdown.github.io/extensions/attr_list>
10for documentation.
11
12Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/).
13
14All changes Copyright 2011-2014 The Python Markdown Project
15
16License: [BSD](https://opensource.org/licenses/bsd-license.php)
17
18"""
19
20from . import Extension
21from ..treeprocessors import Treeprocessor
22import re
23
24
25def _handle_double_quote(s, t):
26    k, v = t.split('=', 1)
27    return k, v.strip('"')
28
29
30def _handle_single_quote(s, t):
31    k, v = t.split('=', 1)
32    return k, v.strip("'")
33
34
35def _handle_key_value(s, t):
36    return t.split('=', 1)
37
38
39def _handle_word(s, t):
40    if t.startswith('.'):
41        return '.', t[1:]
42    if t.startswith('#'):
43        return 'id', t[1:]
44    return t, t
45
46
47_scanner = re.Scanner([
48    (r'[^ =]+=".*?"', _handle_double_quote),
49    (r"[^ =]+='.*?'", _handle_single_quote),
50    (r'[^ =]+=[^ =]+', _handle_key_value),
51    (r'[^ =]+', _handle_word),
52    (r' ', None)
53])
54
55
56def get_attrs(str):
57    """ Parse attribute list and return a list of attribute tuples. """
58    return _scanner.scan(str)[0]
59
60
61def isheader(elem):
62    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
63
64
65class AttrListTreeprocessor(Treeprocessor):
66
67    BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
68    HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
69    BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
70    INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
71    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
72                         r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
73                         r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
74                         r'\uf900-\ufdcf\ufdf0-\ufffd'
75                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
76
77    def run(self, doc):
78        for elem in doc.iter():
79            if self.md.is_block_level(elem.tag):
80                # Block level: check for attrs on last line of text
81                RE = self.BLOCK_RE
82                if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
83                    # header, def-term, or table cell: check for attrs at end of element
84                    RE = self.HEADER_RE
85                if len(elem) and elem.tag == 'li':
86                    # special case list items. children may include a ul or ol.
87                    pos = None
88                    # find the ul or ol position
89                    for i, child in enumerate(elem):
90                        if child.tag in ['ul', 'ol']:
91                            pos = i
92                            break
93                    if pos is None and elem[-1].tail:
94                        # use tail of last child. no ul or ol.
95                        m = RE.search(elem[-1].tail)
96                        if m:
97                            self.assign_attrs(elem, m.group(1))
98                            elem[-1].tail = elem[-1].tail[:m.start()]
99                    elif pos is not None and pos > 0 and elem[pos-1].tail:
100                        # use tail of last child before ul or ol
101                        m = RE.search(elem[pos-1].tail)
102                        if m:
103                            self.assign_attrs(elem, m.group(1))
104                            elem[pos-1].tail = elem[pos-1].tail[:m.start()]
105                    elif elem.text:
106                        # use text. ul is first child.
107                        m = RE.search(elem.text)
108                        if m:
109                            self.assign_attrs(elem, m.group(1))
110                            elem.text = elem.text[:m.start()]
111                elif len(elem) and elem[-1].tail:
112                    # has children. Get from tail of last child
113                    m = RE.search(elem[-1].tail)
114                    if m:
115                        self.assign_attrs(elem, m.group(1))
116                        elem[-1].tail = elem[-1].tail[:m.start()]
117                        if isheader(elem):
118                            # clean up trailing #s
119                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
120                elif elem.text:
121                    # no children. Get from text.
122                    m = RE.search(elem.text)
123                    if m:
124                        self.assign_attrs(elem, m.group(1))
125                        elem.text = elem.text[:m.start()]
126                        if isheader(elem):
127                            # clean up trailing #s
128                            elem.text = elem.text.rstrip('#').rstrip()
129            else:
130                # inline: check for attrs at start of tail
131                if elem.tail:
132                    m = self.INLINE_RE.match(elem.tail)
133                    if m:
134                        self.assign_attrs(elem, m.group(1))
135                        elem.tail = elem.tail[m.end():]
136
137    def assign_attrs(self, elem, attrs):
138        """ Assign attrs to element. """
139        for k, v in get_attrs(attrs):
140            if k == '.':
141                # add to class
142                cls = elem.get('class')
143                if cls:
144                    elem.set('class', '{} {}'.format(cls, v))
145                else:
146                    elem.set('class', v)
147            else:
148                # assign attr k with v
149                elem.set(self.sanitize_name(k), v)
150
151    def sanitize_name(self, name):
152        """
153        Sanitize name as 'an XML Name, minus the ":"'.
154        See https://www.w3.org/TR/REC-xml-names/#NT-NCName
155        """
156        return self.NAME_RE.sub('_', name)
157
158
159class AttrListExtension(Extension):
160    def extendMarkdown(self, md):
161        md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
162        md.registerExtension(self)
163
164
165def makeExtension(**kwargs):  # pragma: no cover
166    return AttrListExtension(**kwargs)
167