1# markdown/searializers.py 2# 3# Add x/html serialization to Elementree 4# Taken from ElementTree 1.3 preview with slight modifications 5# 6# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. 7# 8# fredrik@pythonware.com 9# https://www.pythonware.com/ 10# 11# -------------------------------------------------------------------- 12# The ElementTree toolkit is 13# 14# Copyright (c) 1999-2007 by Fredrik Lundh 15# 16# By obtaining, using, and/or copying this software and/or its 17# associated documentation, you agree that you have read, understood, 18# and will comply with the following terms and conditions: 19# 20# Permission to use, copy, modify, and distribute this software and 21# its associated documentation for any purpose and without fee is 22# hereby granted, provided that the above copyright notice appears in 23# all copies, and that both that copyright notice and this permission 24# notice appear in supporting documentation, and that the name of 25# Secret Labs AB or the author not be used in advertising or publicity 26# pertaining to distribution of the software without specific, written 27# prior permission. 28# 29# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 30# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 31# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 32# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 33# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 34# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 35# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 36# OF THIS SOFTWARE. 37# -------------------------------------------------------------------- 38 39 40from xml.etree.ElementTree import ProcessingInstruction 41from xml.etree.ElementTree import Comment, ElementTree, QName 42import re 43 44__all__ = ['to_html_string', 'to_xhtml_string'] 45 46HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 47 "img", "input", "isindex", "link", "meta", "param") 48RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I) 49 50try: 51 HTML_EMPTY = set(HTML_EMPTY) 52except NameError: # pragma: no cover 53 pass 54 55 56def _raise_serialization_error(text): # pragma: no cover 57 raise TypeError( 58 "cannot serialize {!r} (type {})".format(text, type(text).__name__) 59 ) 60 61 62def _escape_cdata(text): 63 # escape character data 64 try: 65 # it's worth avoiding do-nothing calls for strings that are 66 # shorter than 500 character, or so. assume that's, by far, 67 # the most common case in most applications. 68 if "&" in text: 69 # Only replace & when not part of an entity 70 text = RE_AMP.sub('&', text) 71 if "<" in text: 72 text = text.replace("<", "<") 73 if ">" in text: 74 text = text.replace(">", ">") 75 return text 76 except (TypeError, AttributeError): # pragma: no cover 77 _raise_serialization_error(text) 78 79 80def _escape_attrib(text): 81 # escape attribute value 82 try: 83 if "&" in text: 84 # Only replace & when not part of an entity 85 text = RE_AMP.sub('&', text) 86 if "<" in text: 87 text = text.replace("<", "<") 88 if ">" in text: 89 text = text.replace(">", ">") 90 if "\"" in text: 91 text = text.replace("\"", """) 92 if "\n" in text: 93 text = text.replace("\n", " ") 94 return text 95 except (TypeError, AttributeError): # pragma: no cover 96 _raise_serialization_error(text) 97 98 99def _escape_attrib_html(text): 100 # escape attribute value 101 try: 102 if "&" in text: 103 # Only replace & when not part of an entity 104 text = RE_AMP.sub('&', text) 105 if "<" in text: 106 text = text.replace("<", "<") 107 if ">" in text: 108 text = text.replace(">", ">") 109 if "\"" in text: 110 text = text.replace("\"", """) 111 return text 112 except (TypeError, AttributeError): # pragma: no cover 113 _raise_serialization_error(text) 114 115 116def _serialize_html(write, elem, format): 117 tag = elem.tag 118 text = elem.text 119 if tag is Comment: 120 write("<!--%s-->" % _escape_cdata(text)) 121 elif tag is ProcessingInstruction: 122 write("<?%s?>" % _escape_cdata(text)) 123 elif tag is None: 124 if text: 125 write(_escape_cdata(text)) 126 for e in elem: 127 _serialize_html(write, e, format) 128 else: 129 namespace_uri = None 130 if isinstance(tag, QName): 131 # QNAME objects store their data as a string: `{uri}tag` 132 if tag.text[:1] == "{": 133 namespace_uri, tag = tag.text[1:].split("}", 1) 134 else: 135 raise ValueError('QName objects must define a tag.') 136 write("<" + tag) 137 items = elem.items() 138 if items: 139 items = sorted(items) # lexical order 140 for k, v in items: 141 if isinstance(k, QName): 142 # Assume a text only QName 143 k = k.text 144 if isinstance(v, QName): 145 # Assume a text only QName 146 v = v.text 147 else: 148 v = _escape_attrib_html(v) 149 if k == v and format == 'html': 150 # handle boolean attributes 151 write(" %s" % v) 152 else: 153 write(' {}="{}"'.format(k, v)) 154 if namespace_uri: 155 write(' xmlns="%s"' % (_escape_attrib(namespace_uri))) 156 if format == "xhtml" and tag.lower() in HTML_EMPTY: 157 write(" />") 158 else: 159 write(">") 160 if text: 161 if tag.lower() in ["script", "style"]: 162 write(text) 163 else: 164 write(_escape_cdata(text)) 165 for e in elem: 166 _serialize_html(write, e, format) 167 if tag.lower() not in HTML_EMPTY: 168 write("</" + tag + ">") 169 if elem.tail: 170 write(_escape_cdata(elem.tail)) 171 172 173def _write_html(root, format="html"): 174 assert root is not None 175 data = [] 176 write = data.append 177 _serialize_html(write, root, format) 178 return "".join(data) 179 180 181# -------------------------------------------------------------------- 182# public functions 183 184def to_html_string(element): 185 return _write_html(ElementTree(element).getroot(), format="html") 186 187 188def to_xhtml_string(element): 189 return _write_html(ElementTree(element).getroot(), format="xhtml") 190