1"""Extend the Python codecs module with a few encodings that are used in OpenType (name table) 2but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details.""" 3 4import codecs 5import encodings 6 7class ExtendCodec(codecs.Codec): 8 9 def __init__(self, name, base_encoding, mapping): 10 self.name = name 11 self.base_encoding = base_encoding 12 self.mapping = mapping 13 self.reverse = {v:k for k,v in mapping.items()} 14 self.max_len = max(len(v) for v in mapping.values()) 15 self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode) 16 codecs.register_error(name, self.error) 17 18 def _map(self, mapper, output_type, exc_type, input, errors): 19 base_error_handler = codecs.lookup_error(errors) 20 length = len(input) 21 out = output_type() 22 while input: 23 # first try to use self.error as the error handler 24 try: 25 part = mapper(input, self.base_encoding, errors=self.name) 26 out += part 27 break # All converted 28 except exc_type as e: 29 # else convert the correct part, handle error as requested and continue 30 out += mapper(input[:e.start], self.base_encoding, self.name) 31 replacement, pos = base_error_handler(e) 32 out += replacement 33 input = input[pos:] 34 return out, length 35 36 def encode(self, input, errors='strict'): 37 return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors) 38 39 def decode(self, input, errors='strict'): 40 return self._map(codecs.decode, str, UnicodeDecodeError, input, errors) 41 42 def error(self, e): 43 if isinstance(e, UnicodeDecodeError): 44 for end in range(e.start + 1, e.end + 1): 45 s = e.object[e.start:end] 46 if s in self.mapping: 47 return self.mapping[s], end 48 elif isinstance(e, UnicodeEncodeError): 49 for end in range(e.start + 1, e.start + self.max_len + 1): 50 s = e.object[e.start:end] 51 if s in self.reverse: 52 return self.reverse[s], end 53 e.encoding = self.name 54 raise e 55 56 57_extended_encodings = { 58 "x_mac_japanese_ttx": ("shift_jis", { 59 b"\xFC": chr(0x007C), 60 b"\x7E": chr(0x007E), 61 b"\x80": chr(0x005C), 62 b"\xA0": chr(0x00A0), 63 b"\xFD": chr(0x00A9), 64 b"\xFE": chr(0x2122), 65 b"\xFF": chr(0x2026), 66 }), 67 "x_mac_trad_chinese_ttx": ("big5", { 68 b"\x80": chr(0x005C), 69 b"\xA0": chr(0x00A0), 70 b"\xFD": chr(0x00A9), 71 b"\xFE": chr(0x2122), 72 b"\xFF": chr(0x2026), 73 }), 74 "x_mac_korean_ttx": ("euc_kr", { 75 b"\x80": chr(0x00A0), 76 b"\x81": chr(0x20A9), 77 b"\x82": chr(0x2014), 78 b"\x83": chr(0x00A9), 79 b"\xFE": chr(0x2122), 80 b"\xFF": chr(0x2026), 81 }), 82 "x_mac_simp_chinese_ttx": ("gb2312", { 83 b"\x80": chr(0x00FC), 84 b"\xA0": chr(0x00A0), 85 b"\xFD": chr(0x00A9), 86 b"\xFE": chr(0x2122), 87 b"\xFF": chr(0x2026), 88 }), 89} 90 91_cache = {} 92 93def search_function(name): 94 name = encodings.normalize_encoding(name) # Rather undocumented... 95 if name in _extended_encodings: 96 if name not in _cache: 97 base_encoding, mapping = _extended_encodings[name] 98 assert(name[-4:] == "_ttx") 99 # Python 2 didn't have any of the encodings that we are implementing 100 # in this file. Python 3 added aliases for the East Asian ones, mapping 101 # them "temporarily" to the same base encoding as us, with a comment 102 # suggesting that full implementation will appear some time later. 103 # As such, try the Python version of the x_mac_... first, if that is found, 104 # use *that* as our base encoding. This would make our encoding upgrade 105 # to the full encoding when and if Python finally implements that. 106 # http://bugs.python.org/issue24041 107 base_encodings = [name[:-4], base_encoding] 108 for base_encoding in base_encodings: 109 try: 110 codecs.lookup(base_encoding) 111 except LookupError: 112 continue 113 _cache[name] = ExtendCodec(name, base_encoding, mapping) 114 break 115 return _cache[name].info 116 117 return None 118 119codecs.register(search_function) 120