from __future__ import print_function, division, absolute_import from fontTools.misc.py23 import * from fontTools.misc import eexec from .psOperators import * import re import collections from string import whitespace ps_special = '()<>[]{}%' # / is one too, but we take care of that one differently skipwhiteRE = re.compile("[%s]*" % whitespace) endofthingPat = "[^][(){}<>/%%%s]*" % whitespace endofthingRE = re.compile(endofthingPat) commentRE = re.compile("%[^\n\r]*") # XXX This not entirely correct as it doesn't allow *nested* embedded parens: stringPat = r""" \( ( ( [^()]* \ [()] ) | ( [^()]* \( [^()]* \) ) )* [^()]* \) """ stringPat = "".join(stringPat.split()) stringRE = re.compile(stringPat) hexstringRE = re.compile("<[%s0-9A-Fa-f]*>" % whitespace) class PSTokenError(Exception): pass class PSError(Exception): pass class PSTokenizer(StringIO): def getnexttoken(self, # localize some stuff, for performance len=len, ps_special=ps_special, stringmatch=stringRE.match, hexstringmatch=hexstringRE.match, commentmatch=commentRE.match, endmatch=endofthingRE.match, whitematch=skipwhiteRE.match): _, nextpos = whitematch(self.buf, self.pos).span() self.pos = nextpos if self.pos >= self.len: return None, None pos = self.pos buf = self.buf char = buf[pos] if char in ps_special: if char in '{}[]': tokentype = 'do_special' token = char elif char == '%': tokentype = 'do_comment' _, nextpos = commentmatch(buf, pos).span() token = buf[pos:nextpos] elif char == '(': tokentype = 'do_string' m = stringmatch(buf, pos) if m is None: raise PSTokenError('bad string at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] elif char == '<': tokentype = 'do_hexstring' m = hexstringmatch(buf, pos) if m is None: raise PSTokenError('bad hexstring at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] else: raise PSTokenError('bad token at character %d' % pos) else: if char == '/': tokentype = 'do_literal' m = endmatch(buf, pos+1) else: tokentype = '' m = endmatch(buf, pos) if m is None: raise PSTokenError('bad token at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] self.pos = pos + len(token) return tokentype, token def skipwhite(self, whitematch=skipwhiteRE.match): _, nextpos = whitematch(self.buf, self.pos).span() self.pos = nextpos def starteexec(self): self.pos = self.pos + 1 #self.skipwhite() self.dirtybuf = self.buf[self.pos:] self.buf, R = eexec.decrypt(self.dirtybuf, 55665) self.len = len(self.buf) self.pos = 4 def stopeexec(self): if not hasattr(self, 'dirtybuf'): return self.buf = self.dirtybuf del self.dirtybuf def flush(self): if self.buflist: self.buf = self.buf + "".join(self.buflist) self.buflist = [] class PSInterpreter(PSOperators): def __init__(self): systemdict = {} userdict = {} self.dictstack = [systemdict, userdict] self.stack = [] self.proclevel = 0 self.procmark = ps_procmark() self.fillsystemdict() def fillsystemdict(self): systemdict = self.dictstack[0] systemdict['['] = systemdict['mark'] = self.mark = ps_mark() systemdict[']'] = ps_operator(']', self.do_makearray) systemdict['true'] = ps_boolean(1) systemdict['false'] = ps_boolean(0) systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding) systemdict['FontDirectory'] = ps_dict({}) self.suckoperators(systemdict, self.__class__) def suckoperators(self, systemdict, klass): for name in dir(klass): attr = getattr(self, name) if isinstance(attr, collections.Callable) and name[:3] == 'ps_': name = name[3:] systemdict[name] = ps_operator(name, attr) for baseclass in klass.__bases__: self.suckoperators(systemdict, baseclass) def interpret(self, data, getattr = getattr): tokenizer = self.tokenizer = PSTokenizer(data) getnexttoken = tokenizer.getnexttoken do_token = self.do_token handle_object = self.handle_object try: while 1: tokentype, token = getnexttoken() #print token if not token: break if tokentype: handler = getattr(self, tokentype) object = handler(token) else: object = do_token(token) if object is not None: handle_object(object) tokenizer.close() self.tokenizer = None finally: if self.tokenizer is not None: if 0: print('ps error:\n- - - - - - -') print(self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos]) print('>>>') print(self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50]) print('- - - - - - -') def handle_object(self, object): if not (self.proclevel or object.literal or object.type == 'proceduretype'): if object.type != 'operatortype': object = self.resolve_name(object.value) if object.literal: self.push(object) else: if object.type == 'proceduretype': self.call_procedure(object) else: object.function() else: self.push(object) def call_procedure(self, proc): handle_object = self.handle_object for item in proc.value: handle_object(item) def resolve_name(self, name): dictstack = self.dictstack for i in range(len(dictstack)-1, -1, -1): if name in dictstack[i]: return dictstack[i][name] raise PSError('name error: ' + str(name)) def do_token(self, token, int=int, float=float, ps_name=ps_name, ps_integer=ps_integer, ps_real=ps_real): try: num = int(token) except (ValueError, OverflowError): try: num = float(token) except (ValueError, OverflowError): if '#' in token: hashpos = token.find('#') try: base = int(token[:hashpos]) num = int(token[hashpos+1:], base) except (ValueError, OverflowError): return ps_name(token) else: return ps_integer(num) else: return ps_name(token) else: return ps_real(num) else: return ps_integer(num) def do_comment(self, token): pass def do_literal(self, token): return ps_literal(token[1:]) def do_string(self, token): return ps_string(token[1:-1]) def do_hexstring(self, token): hexStr = "".join(token[1:-1].split()) if len(hexStr) % 2: hexStr = hexStr + '0' cleanstr = [] for i in range(0, len(hexStr), 2): cleanstr.append(chr(int(hexStr[i:i+2], 16))) cleanstr = "".join(cleanstr) return ps_string(cleanstr) def do_special(self, token): if token == '{': self.proclevel = self.proclevel + 1 return self.procmark elif token == '}': proc = [] while 1: topobject = self.pop() if topobject == self.procmark: break proc.append(topobject) self.proclevel = self.proclevel - 1 proc.reverse() return ps_procedure(proc) elif token == '[': return self.mark elif token == ']': return ps_name(']') else: raise PSTokenError('huh?') def push(self, object): self.stack.append(object) def pop(self, *types): stack = self.stack if not stack: raise PSError('stack underflow') object = stack[-1] if types: if object.type not in types: raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type)) del stack[-1] return object def do_makearray(self): array = [] while 1: topobject = self.pop() if topobject == self.mark: break array.append(topobject) array.reverse() self.push(ps_array(array)) def close(self): """Remove circular references.""" del self.stack del self.dictstack def unpack_item(item): tp = type(item.value) if tp == dict: newitem = {} for key, value in item.value.items(): newitem[key] = unpack_item(value) elif tp == list: newitem = [None] * len(item.value) for i in range(len(item.value)): newitem[i] = unpack_item(item.value[i]) if item.type == 'proceduretype': newitem = tuple(newitem) else: newitem = item.value return newitem def suckfont(data): import re m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data) if m: fontName = m.group(1) else: fontName = None interpreter = PSInterpreter() interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop") interpreter.interpret(data) fontdir = interpreter.dictstack[0]['FontDirectory'].value if fontName in fontdir: rawfont = fontdir[fontName] else: # fall back, in case fontName wasn't found fontNames = list(fontdir.keys()) if len(fontNames) > 1: fontNames.remove("Helvetica") fontNames.sort() rawfont = fontdir[fontNames[0]] interpreter.close() return unpack_item(rawfont) if __name__ == "__main__": import EasyDialogs path = EasyDialogs.AskFileForOpen() if path: from fontTools import t1Lib data, kind = t1Lib.read(path) font = suckfont(data)