1from __future__ import print_function, division, absolute_import 2from fontTools.misc.py23 import * 3from fontTools.misc import eexec 4from .psOperators import * 5import re 6try: 7 from collections.abc import Callable 8except ImportError: # python < 3.3 9 from collections import Callable 10from string import whitespace 11import logging 12 13 14log = logging.getLogger(__name__) 15 16ps_special = b'()<>[]{}%' # / is one too, but we take care of that one differently 17 18skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"])) 19endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"]) 20endofthingRE = re.compile(endofthingPat) 21commentRE = re.compile(b"%[^\n\r]*") 22 23# XXX This not entirely correct as it doesn't allow *nested* embedded parens: 24stringPat = br""" 25 \( 26 ( 27 ( 28 [^()]* \ [()] 29 ) 30 | 31 ( 32 [^()]* \( [^()]* \) 33 ) 34 )* 35 [^()]* 36 \) 37""" 38stringPat = b"".join(stringPat.split()) 39stringRE = re.compile(stringPat) 40 41hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"])) 42 43class PSTokenError(Exception): pass 44class PSError(Exception): pass 45 46 47class PSTokenizer(object): 48 49 def __init__(self, buf=b'', encoding="ascii"): 50 # Force self.buf to be a byte string 51 buf = tobytes(buf) 52 self.buf = buf 53 self.len = len(buf) 54 self.pos = 0 55 self.closed = False 56 self.encoding = encoding 57 58 def read(self, n=-1): 59 """Read at most 'n' bytes from the buffer, or less if the read 60 hits EOF before obtaining 'n' bytes. 61 If 'n' is negative or omitted, read all data until EOF is reached. 62 """ 63 if self.closed: 64 raise ValueError("I/O operation on closed file") 65 if n is None or n < 0: 66 newpos = self.len 67 else: 68 newpos = min(self.pos+n, self.len) 69 r = self.buf[self.pos:newpos] 70 self.pos = newpos 71 return r 72 73 def close(self): 74 if not self.closed: 75 self.closed = True 76 del self.buf, self.pos 77 78 def getnexttoken(self, 79 # localize some stuff, for performance 80 len=len, 81 ps_special=ps_special, 82 stringmatch=stringRE.match, 83 hexstringmatch=hexstringRE.match, 84 commentmatch=commentRE.match, 85 endmatch=endofthingRE.match): 86 87 self.skipwhite() 88 if self.pos >= self.len: 89 return None, None 90 pos = self.pos 91 buf = self.buf 92 char = bytechr(byteord(buf[pos])) 93 if char in ps_special: 94 if char in b'{}[]': 95 tokentype = 'do_special' 96 token = char 97 elif char == b'%': 98 tokentype = 'do_comment' 99 _, nextpos = commentmatch(buf, pos).span() 100 token = buf[pos:nextpos] 101 elif char == b'(': 102 tokentype = 'do_string' 103 m = stringmatch(buf, pos) 104 if m is None: 105 raise PSTokenError('bad string at character %d' % pos) 106 _, nextpos = m.span() 107 token = buf[pos:nextpos] 108 elif char == b'<': 109 tokentype = 'do_hexstring' 110 m = hexstringmatch(buf, pos) 111 if m is None: 112 raise PSTokenError('bad hexstring at character %d' % pos) 113 _, nextpos = m.span() 114 token = buf[pos:nextpos] 115 else: 116 raise PSTokenError('bad token at character %d' % pos) 117 else: 118 if char == b'/': 119 tokentype = 'do_literal' 120 m = endmatch(buf, pos+1) 121 else: 122 tokentype = '' 123 m = endmatch(buf, pos) 124 if m is None: 125 raise PSTokenError('bad token at character %d' % pos) 126 _, nextpos = m.span() 127 token = buf[pos:nextpos] 128 self.pos = pos + len(token) 129 token = tostr(token, encoding=self.encoding) 130 return tokentype, token 131 132 def skipwhite(self, whitematch=skipwhiteRE.match): 133 _, nextpos = whitematch(self.buf, self.pos).span() 134 self.pos = nextpos 135 136 def starteexec(self): 137 self.pos = self.pos + 1 138 self.dirtybuf = self.buf[self.pos:] 139 self.buf, R = eexec.decrypt(self.dirtybuf, 55665) 140 self.len = len(self.buf) 141 self.pos = 4 142 143 def stopeexec(self): 144 if not hasattr(self, 'dirtybuf'): 145 return 146 self.buf = self.dirtybuf 147 del self.dirtybuf 148 149 150class PSInterpreter(PSOperators): 151 152 def __init__(self, encoding="ascii"): 153 systemdict = {} 154 userdict = {} 155 self.encoding = encoding 156 self.dictstack = [systemdict, userdict] 157 self.stack = [] 158 self.proclevel = 0 159 self.procmark = ps_procmark() 160 self.fillsystemdict() 161 162 def fillsystemdict(self): 163 systemdict = self.dictstack[0] 164 systemdict['['] = systemdict['mark'] = self.mark = ps_mark() 165 systemdict[']'] = ps_operator(']', self.do_makearray) 166 systemdict['true'] = ps_boolean(1) 167 systemdict['false'] = ps_boolean(0) 168 systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding) 169 systemdict['FontDirectory'] = ps_dict({}) 170 self.suckoperators(systemdict, self.__class__) 171 172 def suckoperators(self, systemdict, klass): 173 for name in dir(klass): 174 attr = getattr(self, name) 175 if isinstance(attr, Callable) and name[:3] == 'ps_': 176 name = name[3:] 177 systemdict[name] = ps_operator(name, attr) 178 for baseclass in klass.__bases__: 179 self.suckoperators(systemdict, baseclass) 180 181 def interpret(self, data, getattr=getattr): 182 tokenizer = self.tokenizer = PSTokenizer(data, self.encoding) 183 getnexttoken = tokenizer.getnexttoken 184 do_token = self.do_token 185 handle_object = self.handle_object 186 try: 187 while 1: 188 tokentype, token = getnexttoken() 189 if not token: 190 break 191 if tokentype: 192 handler = getattr(self, tokentype) 193 object = handler(token) 194 else: 195 object = do_token(token) 196 if object is not None: 197 handle_object(object) 198 tokenizer.close() 199 self.tokenizer = None 200 except: 201 if self.tokenizer is not None: 202 log.debug( 203 'ps error:\n' 204 '- - - - - - -\n' 205 '%s\n' 206 '>>>\n' 207 '%s\n' 208 '- - - - - - -', 209 self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos], 210 self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50]) 211 raise 212 213 def handle_object(self, object): 214 if not (self.proclevel or object.literal or object.type == 'proceduretype'): 215 if object.type != 'operatortype': 216 object = self.resolve_name(object.value) 217 if object.literal: 218 self.push(object) 219 else: 220 if object.type == 'proceduretype': 221 self.call_procedure(object) 222 else: 223 object.function() 224 else: 225 self.push(object) 226 227 def call_procedure(self, proc): 228 handle_object = self.handle_object 229 for item in proc.value: 230 handle_object(item) 231 232 def resolve_name(self, name): 233 dictstack = self.dictstack 234 for i in range(len(dictstack)-1, -1, -1): 235 if name in dictstack[i]: 236 return dictstack[i][name] 237 raise PSError('name error: ' + str(name)) 238 239 def do_token(self, token, 240 int=int, 241 float=float, 242 ps_name=ps_name, 243 ps_integer=ps_integer, 244 ps_real=ps_real): 245 try: 246 num = int(token) 247 except (ValueError, OverflowError): 248 try: 249 num = float(token) 250 except (ValueError, OverflowError): 251 if '#' in token: 252 hashpos = token.find('#') 253 try: 254 base = int(token[:hashpos]) 255 num = int(token[hashpos+1:], base) 256 except (ValueError, OverflowError): 257 return ps_name(token) 258 else: 259 return ps_integer(num) 260 else: 261 return ps_name(token) 262 else: 263 return ps_real(num) 264 else: 265 return ps_integer(num) 266 267 def do_comment(self, token): 268 pass 269 270 def do_literal(self, token): 271 return ps_literal(token[1:]) 272 273 def do_string(self, token): 274 return ps_string(token[1:-1]) 275 276 def do_hexstring(self, token): 277 hexStr = "".join(token[1:-1].split()) 278 if len(hexStr) % 2: 279 hexStr = hexStr + '0' 280 cleanstr = [] 281 for i in range(0, len(hexStr), 2): 282 cleanstr.append(chr(int(hexStr[i:i+2], 16))) 283 cleanstr = "".join(cleanstr) 284 return ps_string(cleanstr) 285 286 def do_special(self, token): 287 if token == '{': 288 self.proclevel = self.proclevel + 1 289 return self.procmark 290 elif token == '}': 291 proc = [] 292 while 1: 293 topobject = self.pop() 294 if topobject == self.procmark: 295 break 296 proc.append(topobject) 297 self.proclevel = self.proclevel - 1 298 proc.reverse() 299 return ps_procedure(proc) 300 elif token == '[': 301 return self.mark 302 elif token == ']': 303 return ps_name(']') 304 else: 305 raise PSTokenError('huh?') 306 307 def push(self, object): 308 self.stack.append(object) 309 310 def pop(self, *types): 311 stack = self.stack 312 if not stack: 313 raise PSError('stack underflow') 314 object = stack[-1] 315 if types: 316 if object.type not in types: 317 raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type)) 318 del stack[-1] 319 return object 320 321 def do_makearray(self): 322 array = [] 323 while 1: 324 topobject = self.pop() 325 if topobject == self.mark: 326 break 327 array.append(topobject) 328 array.reverse() 329 self.push(ps_array(array)) 330 331 def close(self): 332 """Remove circular references.""" 333 del self.stack 334 del self.dictstack 335 336 337def unpack_item(item): 338 tp = type(item.value) 339 if tp == dict: 340 newitem = {} 341 for key, value in item.value.items(): 342 newitem[key] = unpack_item(value) 343 elif tp == list: 344 newitem = [None] * len(item.value) 345 for i in range(len(item.value)): 346 newitem[i] = unpack_item(item.value[i]) 347 if item.type == 'proceduretype': 348 newitem = tuple(newitem) 349 else: 350 newitem = item.value 351 return newitem 352 353def suckfont(data, encoding="ascii"): 354 m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data) 355 if m: 356 fontName = m.group(1) 357 else: 358 fontName = None 359 interpreter = PSInterpreter(encoding=encoding) 360 interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop") 361 interpreter.interpret(data) 362 fontdir = interpreter.dictstack[0]['FontDirectory'].value 363 if fontName in fontdir: 364 rawfont = fontdir[fontName] 365 else: 366 # fall back, in case fontName wasn't found 367 fontNames = list(fontdir.keys()) 368 if len(fontNames) > 1: 369 fontNames.remove("Helvetica") 370 fontNames.sort() 371 rawfont = fontdir[fontNames[0]] 372 interpreter.close() 373 return unpack_item(rawfont) 374