1# markdown/html4.py 2# 3# Add html4 serialization to older versions of Elementree 4# Taken from ElementTree 1.3 preview with slight modifications 5# 6# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. 7# 8# fredrik@pythonware.com 9# http://www.pythonware.com 10# 11# -------------------------------------------------------------------- 12# The ElementTree toolkit is 13# 14# Copyright (c) 1999-2007 by Fredrik Lundh 15# 16# By obtaining, using, and/or copying this software and/or its 17# associated documentation, you agree that you have read, understood, 18# and will comply with the following terms and conditions: 19# 20# Permission to use, copy, modify, and distribute this software and 21# its associated documentation for any purpose and without fee is 22# hereby granted, provided that the above copyright notice appears in 23# all copies, and that both that copyright notice and this permission 24# notice appear in supporting documentation, and that the name of 25# Secret Labs AB or the author not be used in advertising or publicity 26# pertaining to distribution of the software without specific, written 27# prior permission. 28# 29# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 30# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 31# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 32# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 33# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 34# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 35# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 36# OF THIS SOFTWARE. 37# -------------------------------------------------------------------- 38 39 40import markdown 41ElementTree = markdown.etree.ElementTree 42QName = markdown.etree.QName 43Comment = markdown.etree.Comment 44PI = markdown.etree.PI 45ProcessingInstruction = markdown.etree.ProcessingInstruction 46 47HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 48 "img", "input", "isindex", "link", "meta" "param") 49 50try: 51 HTML_EMPTY = set(HTML_EMPTY) 52except NameError: 53 pass 54 55_namespace_map = { 56 # "well-known" namespace prefixes 57 "http://www.w3.org/XML/1998/namespace": "xml", 58 "http://www.w3.org/1999/xhtml": "html", 59 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 60 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 61 # xml schema 62 "http://www.w3.org/2001/XMLSchema": "xs", 63 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 64 # dublic core 65 "http://purl.org/dc/elements/1.1/": "dc", 66} 67 68 69def _raise_serialization_error(text): 70 raise TypeError( 71 "cannot serialize %r (type %s)" % (text, type(text).__name__) 72 ) 73 74def _encode(text, encoding): 75 try: 76 return text.encode(encoding, "xmlcharrefreplace") 77 except (TypeError, AttributeError): 78 _raise_serialization_error(text) 79 80def _escape_cdata(text, encoding): 81 # escape character data 82 try: 83 # it's worth avoiding do-nothing calls for strings that are 84 # shorter than 500 character, or so. assume that's, by far, 85 # the most common case in most applications. 86 if "&" in text: 87 text = text.replace("&", "&") 88 if "<" in text: 89 text = text.replace("<", "<") 90 if ">" in text: 91 text = text.replace(">", ">") 92 return text.encode(encoding, "xmlcharrefreplace") 93 except (TypeError, AttributeError): 94 _raise_serialization_error(text) 95 96 97def _escape_attrib(text, encoding): 98 # escape attribute value 99 try: 100 if "&" in text: 101 text = text.replace("&", "&") 102 if "<" in text: 103 text = text.replace("<", "<") 104 if ">" in text: 105 text = text.replace(">", ">") 106 if "\"" in text: 107 text = text.replace("\"", """) 108 if "\n" in text: 109 text = text.replace("\n", " ") 110 return text.encode(encoding, "xmlcharrefreplace") 111 except (TypeError, AttributeError): 112 _raise_serialization_error(text) 113 114def _escape_attrib_html(text, encoding): 115 # escape attribute value 116 try: 117 if "&" in text: 118 text = text.replace("&", "&") 119 if ">" in text: 120 text = text.replace(">", ">") 121 if "\"" in text: 122 text = text.replace("\"", """) 123 return text.encode(encoding, "xmlcharrefreplace") 124 except (TypeError, AttributeError): 125 _raise_serialization_error(text) 126 127 128def _serialize_html(write, elem, encoding, qnames, namespaces): 129 tag = elem.tag 130 text = elem.text 131 if tag is Comment: 132 write("<!--%s-->" % _escape_cdata(text, encoding)) 133 elif tag is ProcessingInstruction: 134 write("<?%s?>" % _escape_cdata(text, encoding)) 135 else: 136 tag = qnames[tag] 137 if tag is None: 138 if text: 139 write(_escape_cdata(text, encoding)) 140 for e in elem: 141 _serialize_html(write, e, encoding, qnames, None) 142 else: 143 write("<" + tag) 144 items = elem.items() 145 if items or namespaces: 146 items.sort() # lexical order 147 for k, v in items: 148 if isinstance(k, QName): 149 k = k.text 150 if isinstance(v, QName): 151 v = qnames[v.text] 152 else: 153 v = _escape_attrib_html(v, encoding) 154 # FIXME: handle boolean attributes 155 write(" %s=\"%s\"" % (qnames[k], v)) 156 if namespaces: 157 items = namespaces.items() 158 items.sort(key=lambda x: x[1]) # sort on prefix 159 for v, k in items: 160 if k: 161 k = ":" + k 162 write(" xmlns%s=\"%s\"" % ( 163 k.encode(encoding), 164 _escape_attrib(v, encoding) 165 )) 166 write(">") 167 tag = tag.lower() 168 if text: 169 if tag == "script" or tag == "style": 170 write(_encode(text, encoding)) 171 else: 172 write(_escape_cdata(text, encoding)) 173 for e in elem: 174 _serialize_html(write, e, encoding, qnames, None) 175 if tag not in HTML_EMPTY: 176 write("</" + tag + ">") 177 if elem.tail: 178 write(_escape_cdata(elem.tail, encoding)) 179 180def write_html(root, f, 181 # keyword arguments 182 encoding="us-ascii", 183 default_namespace=None): 184 assert root is not None 185 if not hasattr(f, "write"): 186 f = open(f, "wb") 187 write = f.write 188 if not encoding: 189 encoding = "us-ascii" 190 qnames, namespaces = _namespaces( 191 root, encoding, default_namespace 192 ) 193 _serialize_html( 194 write, root, encoding, qnames, namespaces 195 ) 196 197# -------------------------------------------------------------------- 198# serialization support 199 200def _namespaces(elem, encoding, default_namespace=None): 201 # identify namespaces used in this tree 202 203 # maps qnames to *encoded* prefix:local names 204 qnames = {None: None} 205 206 # maps uri:s to prefixes 207 namespaces = {} 208 if default_namespace: 209 namespaces[default_namespace] = "" 210 211 def encode(text): 212 return text.encode(encoding) 213 214 def add_qname(qname): 215 # calculate serialized qname representation 216 try: 217 if qname[:1] == "{": 218 uri, tag = qname[1:].split("}", 1) 219 prefix = namespaces.get(uri) 220 if prefix is None: 221 prefix = _namespace_map.get(uri) 222 if prefix is None: 223 prefix = "ns%d" % len(namespaces) 224 if prefix != "xml": 225 namespaces[uri] = prefix 226 if prefix: 227 qnames[qname] = encode("%s:%s" % (prefix, tag)) 228 else: 229 qnames[qname] = encode(tag) # default element 230 else: 231 if default_namespace: 232 # FIXME: can this be handled in XML 1.0? 233 raise ValueError( 234 "cannot use non-qualified names with " 235 "default_namespace option" 236 ) 237 qnames[qname] = encode(qname) 238 except TypeError: 239 _raise_serialization_error(qname) 240 241 # populate qname and namespaces table 242 try: 243 iterate = elem.iter 244 except AttributeError: 245 iterate = elem.getiterator # cET compatibility 246 for elem in iterate(): 247 tag = elem.tag 248 if isinstance(tag, QName) and tag.text not in qnames: 249 add_qname(tag.text) 250 elif isinstance(tag, basestring): 251 if tag not in qnames: 252 add_qname(tag) 253 elif tag is not None and tag is not Comment and tag is not PI: 254 _raise_serialization_error(tag) 255 for key, value in elem.items(): 256 if isinstance(key, QName): 257 key = key.text 258 if key not in qnames: 259 add_qname(key) 260 if isinstance(value, QName) and value.text not in qnames: 261 add_qname(value.text) 262 text = elem.text 263 if isinstance(text, QName) and text.text not in qnames: 264 add_qname(text.text) 265 return qnames, namespaces 266 267def to_html_string(element, encoding=None): 268 class dummy: 269 pass 270 data = [] 271 file = dummy() 272 file.write = data.append 273 write_html(ElementTree(element).getroot(),file,encoding) 274 return "".join(data) 275