1import markdown 2import re 3 4def isString(s): 5 """ Check if it's string """ 6 return isinstance(s, unicode) or isinstance(s, str) 7 8class Processor: 9 def __init__(self, markdown_instance=None): 10 if markdown_instance: 11 self.markdown = markdown_instance 12 13class Treeprocessor(Processor): 14 """ 15 Treeprocessors are run on the ElementTree object before serialization. 16 17 Each Treeprocessor implements a "run" method that takes a pointer to an 18 ElementTree, modifies it as necessary and returns an ElementTree 19 object. 20 21 Treeprocessors must extend markdown.Treeprocessor. 22 23 """ 24 def run(self, root): 25 """ 26 Subclasses of Treeprocessor should implement a `run` method, which 27 takes a root ElementTree. This method can return another ElementTree 28 object, and the existing root ElementTree will be replaced, or it can 29 modify the current tree and return None. 30 """ 31 pass 32 33 34class InlineProcessor(Treeprocessor): 35 """ 36 A Treeprocessor that traverses a tree, applying inline patterns. 37 """ 38 39 def __init__ (self, md): 40 self.__placeholder_prefix = markdown.INLINE_PLACEHOLDER_PREFIX 41 self.__placeholder_suffix = markdown.ETX 42 self.__placeholder_length = 4 + len(self.__placeholder_prefix) \ 43 + len(self.__placeholder_suffix) 44 self.__placeholder_re = re.compile(markdown.INLINE_PLACEHOLDER % r'([0-9]{4})') 45 self.markdown = md 46 47 def __makePlaceholder(self, type): 48 """ Generate a placeholder """ 49 id = "%04d" % len(self.stashed_nodes) 50 hash = markdown.INLINE_PLACEHOLDER % id 51 return hash, id 52 53 def __findPlaceholder(self, data, index): 54 """ 55 Extract id from data string, start from index 56 57 Keyword arguments: 58 59 * data: string 60 * index: index, from which we start search 61 62 Returns: placeholder id and string index, after the found placeholder. 63 """ 64 65 m = self.__placeholder_re.search(data, index) 66 if m: 67 return m.group(1), m.end() 68 else: 69 return None, index + 1 70 71 def __stashNode(self, node, type): 72 """ Add node to stash """ 73 placeholder, id = self.__makePlaceholder(type) 74 self.stashed_nodes[id] = node 75 return placeholder 76 77 def __handleInline(self, data, patternIndex=0): 78 """ 79 Process string with inline patterns and replace it 80 with placeholders 81 82 Keyword arguments: 83 84 * data: A line of Markdown text 85 * patternIndex: The index of the inlinePattern to start with 86 87 Returns: String with placeholders. 88 89 """ 90 if not isinstance(data, markdown.AtomicString): 91 startIndex = 0 92 while patternIndex < len(self.markdown.inlinePatterns): 93 data, matched, startIndex = self.__applyPattern( 94 self.markdown.inlinePatterns.value_for_index(patternIndex), 95 data, patternIndex, startIndex) 96 if not matched: 97 patternIndex += 1 98 return data 99 100 def __processElementText(self, node, subnode, isText=True): 101 """ 102 Process placeholders in Element.text or Element.tail 103 of Elements popped from self.stashed_nodes. 104 105 Keywords arguments: 106 107 * node: parent node 108 * subnode: processing node 109 * isText: bool variable, True - it's text, False - it's tail 110 111 Returns: None 112 113 """ 114 if isText: 115 text = subnode.text 116 subnode.text = None 117 else: 118 text = subnode.tail 119 subnode.tail = None 120 121 childResult = self.__processPlaceholders(text, subnode) 122 123 if not isText and node is not subnode: 124 pos = node.getchildren().index(subnode) 125 node.remove(subnode) 126 else: 127 pos = 0 128 129 childResult.reverse() 130 for newChild in childResult: 131 node.insert(pos, newChild) 132 133 def __processPlaceholders(self, data, parent): 134 """ 135 Process string with placeholders and generate ElementTree tree. 136 137 Keyword arguments: 138 139 * data: string with placeholders instead of ElementTree elements. 140 * parent: Element, which contains processing inline data 141 142 Returns: list with ElementTree elements with applied inline patterns. 143 """ 144 def linkText(text): 145 if text: 146 if result: 147 if result[-1].tail: 148 result[-1].tail += text 149 else: 150 result[-1].tail = text 151 else: 152 if parent.text: 153 parent.text += text 154 else: 155 parent.text = text 156 157 result = [] 158 strartIndex = 0 159 while data: 160 index = data.find(self.__placeholder_prefix, strartIndex) 161 if index != -1: 162 id, phEndIndex = self.__findPlaceholder(data, index) 163 164 if id in self.stashed_nodes: 165 node = self.stashed_nodes.get(id) 166 167 if index > 0: 168 text = data[strartIndex:index] 169 linkText(text) 170 171 if not isString(node): # it's Element 172 for child in [node] + node.getchildren(): 173 if child.tail: 174 if child.tail.strip(): 175 self.__processElementText(node, child, False) 176 if child.text: 177 if child.text.strip(): 178 self.__processElementText(child, child) 179 else: # it's just a string 180 linkText(node) 181 strartIndex = phEndIndex 182 continue 183 184 strartIndex = phEndIndex 185 result.append(node) 186 187 else: # wrong placeholder 188 end = index + len(prefix) 189 linkText(data[strartIndex:end]) 190 strartIndex = end 191 else: 192 text = data[strartIndex:] 193 linkText(text) 194 data = "" 195 196 return result 197 198 def __applyPattern(self, pattern, data, patternIndex, startIndex=0): 199 """ 200 Check if the line fits the pattern, create the necessary 201 elements, add it to stashed_nodes. 202 203 Keyword arguments: 204 205 * data: the text to be processed 206 * pattern: the pattern to be checked 207 * patternIndex: index of current pattern 208 * startIndex: string index, from which we starting search 209 210 Returns: String with placeholders instead of ElementTree elements. 211 212 """ 213 match = pattern.getCompiledRegExp().match(data[startIndex:]) 214 leftData = data[:startIndex] 215 216 if not match: 217 return data, False, 0 218 219 node = pattern.handleMatch(match) 220 221 if node is None: 222 return data, True, len(leftData) + match.span(len(match.groups()))[0] 223 224 if not isString(node): 225 if not isinstance(node.text, markdown.AtomicString): 226 # We need to process current node too 227 for child in [node] + node.getchildren(): 228 if not isString(node): 229 if child.text: 230 child.text = self.__handleInline(child.text, 231 patternIndex + 1) 232 if child.tail: 233 child.tail = self.__handleInline(child.tail, 234 patternIndex) 235 236 placeholder = self.__stashNode(node, pattern.type()) 237 238 return "%s%s%s%s" % (leftData, 239 match.group(1), 240 placeholder, match.groups()[-1]), True, 0 241 242 def run(self, tree): 243 """Apply inline patterns to a parsed Markdown tree. 244 245 Iterate over ElementTree, find elements with inline tag, apply inline 246 patterns and append newly created Elements to tree. If you don't 247 want process your data with inline paterns, instead of normal string, 248 use subclass AtomicString: 249 250 node.text = markdown.AtomicString("data won't be processed with inline patterns") 251 252 Arguments: 253 254 * markdownTree: ElementTree object, representing Markdown tree. 255 256 Returns: ElementTree object with applied inline patterns. 257 258 """ 259 self.stashed_nodes = {} 260 261 stack = [tree] 262 263 while stack: 264 currElement = stack.pop() 265 insertQueue = [] 266 for child in currElement.getchildren(): 267 if child.text and not isinstance(child.text, markdown.AtomicString): 268 text = child.text 269 child.text = None 270 lst = self.__processPlaceholders(self.__handleInline( 271 text), child) 272 stack += lst 273 insertQueue.append((child, lst)) 274 275 if child.getchildren(): 276 stack.append(child) 277 278 for element, lst in insertQueue: 279 if element.text: 280 element.text = \ 281 markdown.inlinepatterns.handleAttributes(element.text, 282 element) 283 i = 0 284 for newChild in lst: 285 # Processing attributes 286 if newChild.tail: 287 newChild.tail = \ 288 markdown.inlinepatterns.handleAttributes(newChild.tail, 289 element) 290 if newChild.text: 291 newChild.text = \ 292 markdown.inlinepatterns.handleAttributes(newChild.text, 293 newChild) 294 element.insert(i, newChild) 295 i += 1 296 return tree 297 298 299class PrettifyTreeprocessor(Treeprocessor): 300 """ Add linebreaks to the html document. """ 301 302 def _prettifyETree(self, elem): 303 """ Recursively add linebreaks to ElementTree children. """ 304 305 i = "\n" 306 if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']: 307 if (not elem.text or not elem.text.strip()) \ 308 and len(elem) and markdown.isBlockLevel(elem[0].tag): 309 elem.text = i 310 for e in elem: 311 if markdown.isBlockLevel(e.tag): 312 self._prettifyETree(e) 313 if not elem.tail or not elem.tail.strip(): 314 elem.tail = i 315 if not elem.tail or not elem.tail.strip(): 316 elem.tail = i 317 318 def run(self, root): 319 """ Add linebreaks to ElementTree root object. """ 320 321 self._prettifyETree(root) 322 # Do <br />'s seperately as they are often in the middle of 323 # inline content and missed by _prettifyETree. 324 brs = root.getiterator('br') 325 for br in brs: 326 if not br.tail or not br.tail.strip(): 327 br.tail = '\n' 328 else: 329 br.tail = '\n%s' % br.tail 330