"""Extend the Python codecs module with a few encodings that are used in OpenType (name table) but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details.""" import codecs import encodings class ExtendCodec(codecs.Codec): def __init__(self, name, base_encoding, mapping): self.name = name self.base_encoding = base_encoding self.mapping = mapping self.reverse = {v:k for k,v in mapping.items()} self.max_len = max(len(v) for v in mapping.values()) self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode) codecs.register_error(name, self.error) def _map(self, mapper, output_type, exc_type, input, errors): base_error_handler = codecs.lookup_error(errors) length = len(input) out = output_type() while input: # first try to use self.error as the error handler try: part = mapper(input, self.base_encoding, errors=self.name) out += part break # All converted except exc_type as e: # else convert the correct part, handle error as requested and continue out += mapper(input[:e.start], self.base_encoding, self.name) replacement, pos = base_error_handler(e) out += replacement input = input[pos:] return out, length def encode(self, input, errors='strict'): return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) def decode(self, input, errors='strict'): return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) def error(self, e): if isinstance(e, UnicodeDecodeError): for end in range(e.start + 1, e.end + 1): s = e.object[e.start:end] if s in self.mapping: return self.mapping[s], end elif isinstance(e, UnicodeEncodeError): for end in range(e.start + 1, e.start + self.max_len + 1): s = e.object[e.start:end] if s in self.reverse: return self.reverse[s], end e.encoding = self.name raise e _extended_encodings = { "x_mac_japanese_ttx": ("shift_jis", { b"\xFC": chr(0x007C), b"\x7E": chr(0x007E), b"\x80": chr(0x005C), b"\xA0": chr(0x00A0), b"\xFD": chr(0x00A9), b"\xFE": chr(0x2122), b"\xFF": chr(0x2026), }), "x_mac_trad_chinese_ttx": ("big5", { b"\x80": chr(0x005C), b"\xA0": chr(0x00A0), b"\xFD": chr(0x00A9), b"\xFE": chr(0x2122), b"\xFF": chr(0x2026), }), "x_mac_korean_ttx": ("euc_kr", { b"\x80": chr(0x00A0), b"\x81": chr(0x20A9), b"\x82": chr(0x2014), b"\x83": chr(0x00A9), b"\xFE": chr(0x2122), b"\xFF": chr(0x2026), }), "x_mac_simp_chinese_ttx": ("gb2312", { b"\x80": chr(0x00FC), b"\xA0": chr(0x00A0), b"\xFD": chr(0x00A9), b"\xFE": chr(0x2122), b"\xFF": chr(0x2026), }), } _cache = {} def search_function(name): name = encodings.normalize_encoding(name) # Rather undocumented... if name in _extended_encodings: if name not in _cache: base_encoding, mapping = _extended_encodings[name] assert(name[-4:] == "_ttx") # Python 2 didn't have any of the encodings that we are implementing # in this file. Python 3 added aliases for the East Asian ones, mapping # them "temporarily" to the same base encoding as us, with a comment # suggesting that full implementation will appear some time later. # As such, try the Python version of the x_mac_... first, if that is found, # use *that* as our base encoding. This would make our encoding upgrade # to the full encoding when and if Python finally implements that. # http://bugs.python.org/issue24041 base_encodings = [name[:-4], base_encoding] for base_encoding in base_encodings: try: codecs.lookup(base_encoding) except LookupError: continue _cache[name] = ExtendCodec(name, base_encoding, mapping) break return _cache[name].info return None codecs.register(search_function)