• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
2but missing from Python.  See https://github.com/fonttools/fonttools/issues/236 for details."""
3
4import codecs
5import encodings
6
7class ExtendCodec(codecs.Codec):
8
9	def __init__(self, name, base_encoding, mapping):
10		self.name = name
11		self.base_encoding = base_encoding
12		self.mapping = mapping
13		self.reverse = {v:k for k,v in mapping.items()}
14		self.max_len = max(len(v) for v in mapping.values())
15		self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode)
16		codecs.register_error(name, self.error)
17
18	def _map(self, mapper, output_type, exc_type, input, errors):
19		base_error_handler = codecs.lookup_error(errors)
20		length = len(input)
21		out = output_type()
22		while input:
23			# first try to use self.error as the error handler
24			try:
25				part = mapper(input, self.base_encoding, errors=self.name)
26				out += part
27				break  # All converted
28			except exc_type as e:
29				# else convert the correct part, handle error as requested and continue
30				out += mapper(input[:e.start], self.base_encoding, self.name)
31				replacement, pos = base_error_handler(e)
32				out += replacement
33				input = input[pos:]
34		return out, length
35
36	def encode(self, input, errors='strict'):
37		return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
38
39	def decode(self, input, errors='strict'):
40		return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
41
42	def error(self, e):
43		if isinstance(e, UnicodeDecodeError):
44			for end in range(e.start + 1, e.end + 1):
45				s = e.object[e.start:end]
46				if s in self.mapping:
47					return self.mapping[s], end
48		elif isinstance(e, UnicodeEncodeError):
49			for end in range(e.start + 1, e.start + self.max_len + 1):
50				s = e.object[e.start:end]
51				if s in self.reverse:
52					return self.reverse[s], end
53		e.encoding = self.name
54		raise e
55
56
57_extended_encodings = {
58	"x_mac_japanese_ttx": ("shift_jis", {
59					b"\xFC": chr(0x007C),
60					b"\x7E": chr(0x007E),
61					b"\x80": chr(0x005C),
62					b"\xA0": chr(0x00A0),
63					b"\xFD": chr(0x00A9),
64					b"\xFE": chr(0x2122),
65					b"\xFF": chr(0x2026),
66				}),
67	"x_mac_trad_chinese_ttx": ("big5", {
68					b"\x80": chr(0x005C),
69					b"\xA0": chr(0x00A0),
70					b"\xFD": chr(0x00A9),
71					b"\xFE": chr(0x2122),
72					b"\xFF": chr(0x2026),
73				}),
74	"x_mac_korean_ttx": ("euc_kr", {
75					b"\x80": chr(0x00A0),
76					b"\x81": chr(0x20A9),
77					b"\x82": chr(0x2014),
78					b"\x83": chr(0x00A9),
79					b"\xFE": chr(0x2122),
80					b"\xFF": chr(0x2026),
81				}),
82	"x_mac_simp_chinese_ttx": ("gb2312", {
83					b"\x80": chr(0x00FC),
84					b"\xA0": chr(0x00A0),
85					b"\xFD": chr(0x00A9),
86					b"\xFE": chr(0x2122),
87					b"\xFF": chr(0x2026),
88				}),
89}
90
91_cache = {}
92
93def search_function(name):
94	name = encodings.normalize_encoding(name) # Rather undocumented...
95	if name in _extended_encodings:
96		if name not in _cache:
97			base_encoding, mapping = _extended_encodings[name]
98			assert(name[-4:] == "_ttx")
99			# Python 2 didn't have any of the encodings that we are implementing
100			# in this file.  Python 3 added aliases for the East Asian ones, mapping
101			# them "temporarily" to the same base encoding as us, with a comment
102			# suggesting that full implementation will appear some time later.
103			# As such, try the Python version of the x_mac_... first, if that is found,
104			# use *that* as our base encoding.  This would make our encoding upgrade
105			# to the full encoding when and if Python finally implements that.
106			# http://bugs.python.org/issue24041
107			base_encodings = [name[:-4], base_encoding]
108			for base_encoding in base_encodings:
109				try:
110					codecs.lookup(base_encoding)
111				except LookupError:
112					continue
113				_cache[name] = ExtendCodec(name, base_encoding, mapping)
114				break
115		return _cache[name].info
116
117	return None
118
119codecs.register(search_function)
120