1""" 2Attribute List Extension for Python-Markdown 3============================================ 4 5Adds attribute list syntax. Inspired by 6[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s 7feature of the same name. 8 9See <https://Python-Markdown.github.io/extensions/attr_list> 10for documentation. 11 12Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). 13 14All changes Copyright 2011-2014 The Python Markdown Project 15 16License: [BSD](https://opensource.org/licenses/bsd-license.php) 17 18""" 19 20from . import Extension 21from ..treeprocessors import Treeprocessor 22import re 23 24 25def _handle_double_quote(s, t): 26 k, v = t.split('=', 1) 27 return k, v.strip('"') 28 29 30def _handle_single_quote(s, t): 31 k, v = t.split('=', 1) 32 return k, v.strip("'") 33 34 35def _handle_key_value(s, t): 36 return t.split('=', 1) 37 38 39def _handle_word(s, t): 40 if t.startswith('.'): 41 return '.', t[1:] 42 if t.startswith('#'): 43 return 'id', t[1:] 44 return t, t 45 46 47_scanner = re.Scanner([ 48 (r'[^ =]+=".*?"', _handle_double_quote), 49 (r"[^ =]+='.*?'", _handle_single_quote), 50 (r'[^ =]+=[^ =]+', _handle_key_value), 51 (r'[^ =]+', _handle_word), 52 (r' ', None) 53]) 54 55 56def get_attrs(str): 57 """ Parse attribute list and return a list of attribute tuples. """ 58 return _scanner.scan(str)[0] 59 60 61def isheader(elem): 62 return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] 63 64 65class AttrListTreeprocessor(Treeprocessor): 66 67 BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}' 68 HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) 69 BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) 70 INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) 71 NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' 72 r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' 73 r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' 74 r'\uf900-\ufdcf\ufdf0-\ufffd' 75 r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') 76 77 def run(self, doc): 78 for elem in doc.iter(): 79 if self.md.is_block_level(elem.tag): 80 # Block level: check for attrs on last line of text 81 RE = self.BLOCK_RE 82 if isheader(elem) or elem.tag in ['dt', 'td', 'th']: 83 # header, def-term, or table cell: check for attrs at end of element 84 RE = self.HEADER_RE 85 if len(elem) and elem.tag == 'li': 86 # special case list items. children may include a ul or ol. 87 pos = None 88 # find the ul or ol position 89 for i, child in enumerate(elem): 90 if child.tag in ['ul', 'ol']: 91 pos = i 92 break 93 if pos is None and elem[-1].tail: 94 # use tail of last child. no ul or ol. 95 m = RE.search(elem[-1].tail) 96 if m: 97 self.assign_attrs(elem, m.group(1)) 98 elem[-1].tail = elem[-1].tail[:m.start()] 99 elif pos is not None and pos > 0 and elem[pos-1].tail: 100 # use tail of last child before ul or ol 101 m = RE.search(elem[pos-1].tail) 102 if m: 103 self.assign_attrs(elem, m.group(1)) 104 elem[pos-1].tail = elem[pos-1].tail[:m.start()] 105 elif elem.text: 106 # use text. ul is first child. 107 m = RE.search(elem.text) 108 if m: 109 self.assign_attrs(elem, m.group(1)) 110 elem.text = elem.text[:m.start()] 111 elif len(elem) and elem[-1].tail: 112 # has children. Get from tail of last child 113 m = RE.search(elem[-1].tail) 114 if m: 115 self.assign_attrs(elem, m.group(1)) 116 elem[-1].tail = elem[-1].tail[:m.start()] 117 if isheader(elem): 118 # clean up trailing #s 119 elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() 120 elif elem.text: 121 # no children. Get from text. 122 m = RE.search(elem.text) 123 if m: 124 self.assign_attrs(elem, m.group(1)) 125 elem.text = elem.text[:m.start()] 126 if isheader(elem): 127 # clean up trailing #s 128 elem.text = elem.text.rstrip('#').rstrip() 129 else: 130 # inline: check for attrs at start of tail 131 if elem.tail: 132 m = self.INLINE_RE.match(elem.tail) 133 if m: 134 self.assign_attrs(elem, m.group(1)) 135 elem.tail = elem.tail[m.end():] 136 137 def assign_attrs(self, elem, attrs): 138 """ Assign attrs to element. """ 139 for k, v in get_attrs(attrs): 140 if k == '.': 141 # add to class 142 cls = elem.get('class') 143 if cls: 144 elem.set('class', '{} {}'.format(cls, v)) 145 else: 146 elem.set('class', v) 147 else: 148 # assign attr k with v 149 elem.set(self.sanitize_name(k), v) 150 151 def sanitize_name(self, name): 152 """ 153 Sanitize name as 'an XML Name, minus the ":"'. 154 See https://www.w3.org/TR/REC-xml-names/#NT-NCName 155 """ 156 return self.NAME_RE.sub('_', name) 157 158 159class AttrListExtension(Extension): 160 def extendMarkdown(self, md): 161 md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) 162 md.registerExtension(self) 163 164 165def makeExtension(**kwargs): # pragma: no cover 166 return AttrListExtension(**kwargs) 167