1"""xmlWriter.py -- Simple XML authoring class""" 2 3from fontTools.misc.textTools import byteord, strjoin, tobytes, tostr 4import sys 5import os 6import string 7 8INDENT = " " 9 10 11class XMLWriter(object): 12 13 def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None, encoding="utf_8", 14 newlinestr="\n"): 15 if encoding.lower().replace('-','').replace('_','') != 'utf8': 16 raise Exception('Only UTF-8 encoding is supported.') 17 if fileOrPath == '-': 18 fileOrPath = sys.stdout 19 if not hasattr(fileOrPath, "write"): 20 self.filename = fileOrPath 21 self.file = open(fileOrPath, "wb") 22 self._closeStream = True 23 else: 24 self.filename = None 25 # assume writable file object 26 self.file = fileOrPath 27 self._closeStream = False 28 29 # Figure out if writer expects bytes or unicodes 30 try: 31 # The bytes check should be first. See: 32 # https://github.com/fonttools/fonttools/pull/233 33 self.file.write(b'') 34 self.totype = tobytes 35 except TypeError: 36 # This better not fail. 37 self.file.write('') 38 self.totype = tostr 39 self.indentwhite = self.totype(indentwhite) 40 if newlinestr is None: 41 self.newlinestr = self.totype(os.linesep) 42 else: 43 self.newlinestr = self.totype(newlinestr) 44 self.indentlevel = 0 45 self.stack = [] 46 self.needindent = 1 47 self.idlefunc = idlefunc 48 self.idlecounter = 0 49 self._writeraw('<?xml version="1.0" encoding="UTF-8"?>') 50 self.newline() 51 52 def __enter__(self): 53 return self 54 55 def __exit__(self, exception_type, exception_value, traceback): 56 self.close() 57 58 def close(self): 59 if self._closeStream: 60 self.file.close() 61 62 def write(self, string, indent=True): 63 """Writes text.""" 64 self._writeraw(escape(string), indent=indent) 65 66 def writecdata(self, string): 67 """Writes text in a CDATA section.""" 68 self._writeraw("<![CDATA[" + string + "]]>") 69 70 def write8bit(self, data, strip=False): 71 """Writes a bytes() sequence into the XML, escaping 72 non-ASCII bytes. When this is read in xmlReader, 73 the original bytes can be recovered by encoding to 74 'latin-1'.""" 75 self._writeraw(escape8bit(data), strip=strip) 76 77 def write_noindent(self, string): 78 """Writes text without indentation.""" 79 self._writeraw(escape(string), indent=False) 80 81 def _writeraw(self, data, indent=True, strip=False): 82 """Writes bytes, possibly indented.""" 83 if indent and self.needindent: 84 self.file.write(self.indentlevel * self.indentwhite) 85 self.needindent = 0 86 s = self.totype(data, encoding="utf_8") 87 if (strip): 88 s = s.strip() 89 self.file.write(s) 90 91 def newline(self): 92 self.file.write(self.newlinestr) 93 self.needindent = 1 94 idlecounter = self.idlecounter 95 if not idlecounter % 100 and self.idlefunc is not None: 96 self.idlefunc() 97 self.idlecounter = idlecounter + 1 98 99 def comment(self, data): 100 data = escape(data) 101 lines = data.split("\n") 102 self._writeraw("<!-- " + lines[0]) 103 for line in lines[1:]: 104 self.newline() 105 self._writeraw(" " + line) 106 self._writeraw(" -->") 107 108 def simpletag(self, _TAG_, *args, **kwargs): 109 attrdata = self.stringifyattrs(*args, **kwargs) 110 data = "<%s%s/>" % (_TAG_, attrdata) 111 self._writeraw(data) 112 113 def begintag(self, _TAG_, *args, **kwargs): 114 attrdata = self.stringifyattrs(*args, **kwargs) 115 data = "<%s%s>" % (_TAG_, attrdata) 116 self._writeraw(data) 117 self.stack.append(_TAG_) 118 self.indent() 119 120 def endtag(self, _TAG_): 121 assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag" 122 del self.stack[-1] 123 self.dedent() 124 data = "</%s>" % _TAG_ 125 self._writeraw(data) 126 127 def dumphex(self, data): 128 linelength = 16 129 hexlinelength = linelength * 2 130 chunksize = 8 131 for i in range(0, len(data), linelength): 132 hexline = hexStr(data[i:i+linelength]) 133 line = "" 134 white = "" 135 for j in range(0, hexlinelength, chunksize): 136 line = line + white + hexline[j:j+chunksize] 137 white = " " 138 self._writeraw(line) 139 self.newline() 140 141 def indent(self): 142 self.indentlevel = self.indentlevel + 1 143 144 def dedent(self): 145 assert self.indentlevel > 0 146 self.indentlevel = self.indentlevel - 1 147 148 def stringifyattrs(self, *args, **kwargs): 149 if kwargs: 150 assert not args 151 attributes = sorted(kwargs.items()) 152 elif args: 153 assert len(args) == 1 154 attributes = args[0] 155 else: 156 return "" 157 data = "" 158 for attr, value in attributes: 159 if not isinstance(value, (bytes, str)): 160 value = str(value) 161 data = data + ' %s="%s"' % (attr, escapeattr(value)) 162 return data 163 164 165def escape(data): 166 data = tostr(data, 'utf_8') 167 data = data.replace("&", "&") 168 data = data.replace("<", "<") 169 data = data.replace(">", ">") 170 data = data.replace("\r", " ") 171 return data 172 173def escapeattr(data): 174 data = escape(data) 175 data = data.replace('"', """) 176 return data 177 178def escape8bit(data): 179 """Input is Unicode string.""" 180 def escapechar(c): 181 n = ord(c) 182 if 32 <= n <= 127 and c not in "<&>": 183 return c 184 else: 185 return "&#" + repr(n) + ";" 186 return strjoin(map(escapechar, data.decode('latin-1'))) 187 188def hexStr(s): 189 h = string.hexdigits 190 r = '' 191 for c in s: 192 i = byteord(c) 193 r = r + h[(i >> 4) & 0xF] + h[i & 0xF] 194 return r 195