1"""xmlWriter.py -- Simple XML authoring class""" 2 3from __future__ import print_function, division, absolute_import 4from fontTools.misc.py23 import * 5import sys 6import os 7import string 8 9INDENT = " " 10 11 12class XMLWriter(object): 13 14 def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None, encoding="utf_8", 15 newlinestr=None): 16 if encoding.lower().replace('-','').replace('_','') != 'utf8': 17 raise Exception('Only UTF-8 encoding is supported.') 18 if fileOrPath == '-': 19 fileOrPath = sys.stdout 20 if not hasattr(fileOrPath, "write"): 21 self.filename = fileOrPath 22 self.file = open(fileOrPath, "wb") 23 self._closeStream = True 24 else: 25 self.filename = None 26 # assume writable file object 27 self.file = fileOrPath 28 self._closeStream = False 29 30 # Figure out if writer expects bytes or unicodes 31 try: 32 # The bytes check should be first. See: 33 # https://github.com/fonttools/fonttools/pull/233 34 self.file.write(b'') 35 self.totype = tobytes 36 except TypeError: 37 # This better not fail. 38 self.file.write(tounicode('')) 39 self.totype = tounicode 40 self.indentwhite = self.totype(indentwhite) 41 if newlinestr is None: 42 self.newlinestr = self.totype(os.linesep) 43 else: 44 self.newlinestr = self.totype(newlinestr) 45 self.indentlevel = 0 46 self.stack = [] 47 self.needindent = 1 48 self.idlefunc = idlefunc 49 self.idlecounter = 0 50 self._writeraw('<?xml version="1.0" encoding="UTF-8"?>') 51 self.newline() 52 53 def __enter__(self): 54 return self 55 56 def __exit__(self, exception_type, exception_value, traceback): 57 self.close() 58 59 def close(self): 60 if self._closeStream: 61 self.file.close() 62 63 def write(self, string, indent=True): 64 """Writes text.""" 65 self._writeraw(escape(string), indent=indent) 66 67 def writecdata(self, string): 68 """Writes text in a CDATA section.""" 69 self._writeraw("<![CDATA[" + string + "]]>") 70 71 def write8bit(self, data, strip=False): 72 """Writes a bytes() sequence into the XML, escaping 73 non-ASCII bytes. When this is read in xmlReader, 74 the original bytes can be recovered by encoding to 75 'latin-1'.""" 76 self._writeraw(escape8bit(data), strip=strip) 77 78 def write_noindent(self, string): 79 """Writes text without indentation.""" 80 self._writeraw(escape(string), indent=False) 81 82 def _writeraw(self, data, indent=True, strip=False): 83 """Writes bytes, possibly indented.""" 84 if indent and self.needindent: 85 self.file.write(self.indentlevel * self.indentwhite) 86 self.needindent = 0 87 s = self.totype(data, encoding="utf_8") 88 if (strip): 89 s = s.strip() 90 self.file.write(s) 91 92 def newline(self): 93 self.file.write(self.newlinestr) 94 self.needindent = 1 95 idlecounter = self.idlecounter 96 if not idlecounter % 100 and self.idlefunc is not None: 97 self.idlefunc() 98 self.idlecounter = idlecounter + 1 99 100 def comment(self, data): 101 data = escape(data) 102 lines = data.split("\n") 103 self._writeraw("<!-- " + lines[0]) 104 for line in lines[1:]: 105 self.newline() 106 self._writeraw(" " + line) 107 self._writeraw(" -->") 108 109 def simpletag(self, _TAG_, *args, **kwargs): 110 attrdata = self.stringifyattrs(*args, **kwargs) 111 data = "<%s%s/>" % (_TAG_, attrdata) 112 self._writeraw(data) 113 114 def begintag(self, _TAG_, *args, **kwargs): 115 attrdata = self.stringifyattrs(*args, **kwargs) 116 data = "<%s%s>" % (_TAG_, attrdata) 117 self._writeraw(data) 118 self.stack.append(_TAG_) 119 self.indent() 120 121 def endtag(self, _TAG_): 122 assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag" 123 del self.stack[-1] 124 self.dedent() 125 data = "</%s>" % _TAG_ 126 self._writeraw(data) 127 128 def dumphex(self, data): 129 linelength = 16 130 hexlinelength = linelength * 2 131 chunksize = 8 132 for i in range(0, len(data), linelength): 133 hexline = hexStr(data[i:i+linelength]) 134 line = "" 135 white = "" 136 for j in range(0, hexlinelength, chunksize): 137 line = line + white + hexline[j:j+chunksize] 138 white = " " 139 self._writeraw(line) 140 self.newline() 141 142 def indent(self): 143 self.indentlevel = self.indentlevel + 1 144 145 def dedent(self): 146 assert self.indentlevel > 0 147 self.indentlevel = self.indentlevel - 1 148 149 def stringifyattrs(self, *args, **kwargs): 150 if kwargs: 151 assert not args 152 attributes = sorted(kwargs.items()) 153 elif args: 154 assert len(args) == 1 155 attributes = args[0] 156 else: 157 return "" 158 data = "" 159 for attr, value in attributes: 160 if not isinstance(value, (bytes, unicode)): 161 value = str(value) 162 data = data + ' %s="%s"' % (attr, escapeattr(value)) 163 return data 164 165 166def escape(data): 167 data = tostr(data, 'utf_8') 168 data = data.replace("&", "&") 169 data = data.replace("<", "<") 170 data = data.replace(">", ">") 171 data = data.replace("\r", " ") 172 return data 173 174def escapeattr(data): 175 data = escape(data) 176 data = data.replace('"', """) 177 return data 178 179def escape8bit(data): 180 """Input is Unicode string.""" 181 def escapechar(c): 182 n = ord(c) 183 if 32 <= n <= 127 and c not in "<&>": 184 return c 185 else: 186 return "&#" + repr(n) + ";" 187 return strjoin(map(escapechar, data.decode('latin-1'))) 188 189def hexStr(s): 190 h = string.hexdigits 191 r = '' 192 for c in s: 193 i = byteord(c) 194 r = r + h[(i >> 4) & 0xF] + h[i & 0xF] 195 return r 196