• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2from fontTools.misc import sstruct
3from fontTools.misc.textTools import bytechr, byteord, bytesjoin, strjoin, tobytes, tostr, safeEval
4from fontTools.misc.encodingTools import getEncoding
5from fontTools.ttLib import newTable
6from . import DefaultTable
7import struct
8import logging
9
10
11log = logging.getLogger(__name__)
12
13nameRecordFormat = """
14		>	# big endian
15		platformID:	H
16		platEncID:	H
17		langID:		H
18		nameID:		H
19		length:		H
20		offset:		H
21"""
22
23nameRecordSize = sstruct.calcsize(nameRecordFormat)
24
25
26class table__n_a_m_e(DefaultTable.DefaultTable):
27	dependencies = ["ltag"]
28
29	def decompile(self, data, ttFont):
30		format, n, stringOffset = struct.unpack(b">HHH", data[:6])
31		expectedStringOffset = 6 + n * nameRecordSize
32		if stringOffset != expectedStringOffset:
33			log.error(
34				"'name' table stringOffset incorrect. Expected: %s; Actual: %s",
35				expectedStringOffset, stringOffset)
36		stringData = data[stringOffset:]
37		data = data[6:]
38		self.names = []
39		for i in range(n):
40			if len(data) < 12:
41				log.error('skipping malformed name record #%d', i)
42				continue
43			name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
44			name.string = stringData[name.offset:name.offset+name.length]
45			if name.offset + name.length > len(stringData):
46				log.error('skipping malformed name record #%d', i)
47				continue
48			assert len(name.string) == name.length
49			#if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
50			#	if len(name.string) % 2:
51			#		print "2-byte string doesn't have even length!"
52			#		print name.__dict__
53			del name.offset, name.length
54			self.names.append(name)
55
56	def compile(self, ttFont):
57		if not hasattr(self, "names"):
58			# only happens when there are NO name table entries read
59			# from the TTX file
60			self.names = []
61		names = self.names
62		names.sort() # sort according to the spec; see NameRecord.__lt__()
63		stringData = b""
64		format = 0
65		n = len(names)
66		stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
67		data = struct.pack(b">HHH", format, n, stringOffset)
68		lastoffset = 0
69		done = {}  # remember the data so we can reuse the "pointers"
70		for name in names:
71			string = name.toBytes()
72			if string in done:
73				name.offset, name.length = done[string]
74			else:
75				name.offset, name.length = done[string] = len(stringData), len(string)
76				stringData = bytesjoin([stringData, string])
77			data = data + sstruct.pack(nameRecordFormat, name)
78		return data + stringData
79
80	def toXML(self, writer, ttFont):
81		for name in self.names:
82			name.toXML(writer, ttFont)
83
84	def fromXML(self, name, attrs, content, ttFont):
85		if name != "namerecord":
86			return # ignore unknown tags
87		if not hasattr(self, "names"):
88			self.names = []
89		name = NameRecord()
90		self.names.append(name)
91		name.fromXML(name, attrs, content, ttFont)
92
93	def getName(self, nameID, platformID, platEncID, langID=None):
94		for namerecord in self.names:
95			if (	namerecord.nameID == nameID and
96					namerecord.platformID == platformID and
97					namerecord.platEncID == platEncID):
98				if langID is None or namerecord.langID == langID:
99					return namerecord
100		return None # not found
101
102	def getDebugName(self, nameID):
103		englishName = someName = None
104		for name in self.names:
105			if name.nameID != nameID:
106				continue
107			try:
108				unistr = name.toUnicode()
109			except UnicodeDecodeError:
110				continue
111
112			someName = unistr
113			if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
114				englishName = unistr
115				break
116		if englishName:
117			return englishName
118		elif someName:
119			return someName
120		else:
121			return None
122
123	def getFirstDebugName(self, nameIDs):
124		for nameID in nameIDs:
125			name = self.getDebugName(nameID)
126			if name is not None:
127				return name
128		return None
129
130	def getBestFamilyName(self):
131		# 21 = WWS Family Name
132		# 16 = Typographic Family Name
133		# 1 = Family Name
134		return self.getFirstDebugName((21, 16, 1))
135
136	def getBestSubFamilyName(self):
137		# 22 = WWS SubFamily Name
138		# 17 = Typographic SubFamily Name
139		# 2 = SubFamily Name
140		return self.getFirstDebugName((22, 17, 2))
141
142	def getBestFullName(self):
143		# 4 = Full Name
144		# 6 = PostScript Name
145		for nameIDs in ((21, 22), (16, 17), (1, 2), (4, ), (6, )):
146			if len(nameIDs) == 2:
147				name_fam = self.getDebugName(nameIDs[0])
148				name_subfam = self.getDebugName(nameIDs[1])
149				if None in [name_fam, name_subfam]:
150					continue  # if any is None, skip
151				name = f"{name_fam} {name_subfam}"
152				if name_subfam.lower() == 'regular':
153					name = f"{name_fam}"
154				return name
155			else:
156				name = self.getDebugName(nameIDs[0])
157				if name is not None:
158					return name
159		return None
160
161	def setName(self, string, nameID, platformID, platEncID, langID):
162		""" Set the 'string' for the name record identified by 'nameID', 'platformID',
163		'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
164		and append to the name table.
165
166		'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
167		it is assumed to be already encoded with the correct plaform-specific encoding
168		identified by the (platformID, platEncID, langID) triplet. A warning is issued
169		to prevent unexpected results.
170		"""
171		if not hasattr(self, 'names'):
172			self.names = []
173		if not isinstance(string, str):
174			if isinstance(string, bytes):
175				log.warning(
176					"name string is bytes, ensure it's correctly encoded: %r", string)
177			else:
178				raise TypeError(
179					"expected unicode or bytes, found %s: %r" % (
180						type(string).__name__, string))
181		namerecord = self.getName(nameID, platformID, platEncID, langID)
182		if namerecord:
183			namerecord.string = string
184		else:
185			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
186
187	def removeNames(self, nameID=None, platformID=None, platEncID=None, langID=None):
188		"""Remove any name records identified by the given combination of 'nameID',
189		'platformID', 'platEncID' and 'langID'.
190		"""
191		args = {
192			argName: argValue
193			for argName, argValue in (
194				("nameID", nameID),
195				("platformID", platformID),
196				("platEncID", platEncID),
197				("langID", langID),
198			)
199			if argValue is not None
200		}
201		if not args:
202			# no arguments, nothing to do
203			return
204		self.names = [
205			rec for rec in self.names
206			if any(
207				argValue != getattr(rec, argName)
208				for argName, argValue in args.items()
209			)
210		]
211
212	def _findUnusedNameID(self, minNameID=256):
213		"""Finds an unused name id.
214
215		The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
216		following the last nameID in the name table.
217		"""
218		names = getattr(self, 'names', [])
219		nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
220		if nameID > 32767:
221			raise ValueError("nameID must be less than 32768")
222		return nameID
223
224	def findMultilingualName(self, names, windows=True, mac=True, minNameID=0):
225		"""Return the name ID of an existing multilingual name that
226		matches the 'names' dictionary, or None if not found.
227
228		'names' is a dictionary with the name in multiple languages,
229		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
230		The keys can be arbitrary IETF BCP 47 language codes;
231		the values are Unicode strings.
232
233		If 'windows' is True, the returned name ID is guaranteed
234		exist for all requested languages for platformID=3 and
235		platEncID=1.
236		If 'mac' is True, the returned name ID is guaranteed to exist
237		for all requested languages for platformID=1 and platEncID=0.
238
239		The returned name ID will not be less than the 'minNameID'
240		argument.
241		"""
242		# Gather the set of requested
243		#   (string, platformID, platEncID, langID)
244		# tuples
245		reqNameSet = set()
246		for lang, name in sorted(names.items()):
247			if windows:
248				windowsName = _makeWindowsName(name, None, lang)
249				if windowsName is not None:
250					reqNameSet.add((windowsName.string,
251					                windowsName.platformID,
252					                windowsName.platEncID,
253					                windowsName.langID))
254			if mac:
255				macName = _makeMacName(name, None, lang)
256				if macName is not None:
257					reqNameSet.add((macName.string,
258				                    macName.platformID,
259				                    macName.platEncID,
260				                    macName.langID))
261
262		# Collect matching name IDs
263		matchingNames = dict()
264		for name in self.names:
265			try:
266				key = (name.toUnicode(), name.platformID,
267				       name.platEncID, name.langID)
268			except UnicodeDecodeError:
269				continue
270			if key in reqNameSet and name.nameID >= minNameID:
271				nameSet = matchingNames.setdefault(name.nameID, set())
272				nameSet.add(key)
273
274		# Return the first name ID that defines all requested strings
275		for nameID, nameSet in sorted(matchingNames.items()):
276			if nameSet == reqNameSet:
277				return nameID
278
279		return None  # not found
280
281	def addMultilingualName(self, names, ttFont=None, nameID=None,
282	                        windows=True, mac=True, minNameID=0):
283		"""Add a multilingual name, returning its name ID
284
285		'names' is a dictionary with the name in multiple languages,
286		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
287		The keys can be arbitrary IETF BCP 47 language codes;
288		the values are Unicode strings.
289
290		'ttFont' is the TTFont to which the names are added, or None.
291		If present, the font's 'ltag' table can get populated
292		to store exotic language codes, which allows encoding
293		names that otherwise cannot get encoded at all.
294
295		'nameID' is the name ID to be used, or None to let the library
296		find an existing set of name records that match, or pick an
297		unused name ID.
298
299		If 'windows' is True, a platformID=3 name record will be added.
300		If 'mac' is True, a platformID=1 name record will be added.
301
302		If the 'nameID' argument is None, the created nameID will not
303		be less than the 'minNameID' argument.
304		"""
305		if not hasattr(self, 'names'):
306			self.names = []
307		if nameID is None:
308			# Reuse nameID if possible
309			nameID = self.findMultilingualName(
310				names, windows=windows, mac=mac, minNameID=minNameID)
311			if nameID is not None:
312				return nameID
313			nameID = self._findUnusedNameID()
314		# TODO: Should minimize BCP 47 language codes.
315		# https://github.com/fonttools/fonttools/issues/930
316		for lang, name in sorted(names.items()):
317			if windows:
318				windowsName = _makeWindowsName(name, nameID, lang)
319				if windowsName is not None:
320					self.names.append(windowsName)
321				else:
322					# We cannot not make a Windows name: make sure we add a
323					# Mac name as a fallback. This can happen for exotic
324					# BCP47 language tags that have no Windows language code.
325					mac = True
326			if mac:
327				macName = _makeMacName(name, nameID, lang, ttFont)
328				if macName is not None:
329					self.names.append(macName)
330		return nameID
331
332	def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
333		""" Add a new name record containing 'string' for each (platformID, platEncID,
334		langID) tuple specified in the 'platforms' list.
335
336		The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
337		following the last nameID in the name table.
338		If no 'platforms' are specified, two English name records are added, one for the
339		Macintosh (platformID=0), and one for the Windows platform (3).
340
341		The 'string' must be a Unicode string, so it can be encoded with different,
342		platform-specific encodings.
343
344		Return the new nameID.
345		"""
346		assert len(platforms) > 0, \
347			"'platforms' must contain at least one (platformID, platEncID, langID) tuple"
348		if not hasattr(self, 'names'):
349			self.names = []
350		if not isinstance(string, str):
351			raise TypeError(
352				"expected str, found %s: %r" % (type(string).__name__, string))
353		nameID = self._findUnusedNameID(minNameID + 1)
354		for platformID, platEncID, langID in platforms:
355			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
356		return nameID
357
358
359def makeName(string, nameID, platformID, platEncID, langID):
360	name = NameRecord()
361	name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
362		string, nameID, platformID, platEncID, langID)
363	return name
364
365
366def _makeWindowsName(name, nameID, language):
367	"""Create a NameRecord for the Microsoft Windows platform
368
369	'language' is an arbitrary IETF BCP 47 language identifier such
370	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
371	does not support the desired language, the result will be None.
372	Future versions of fonttools might return a NameRecord for the
373	OpenType 'name' table format 1, but this is not implemented yet.
374	"""
375	langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
376	if langID is not None:
377		return makeName(name, nameID, 3, 1, langID)
378	else:
379		log.warning("cannot add Windows name in language %s "
380		            "because fonttools does not yet support "
381		            "name table format 1" % language)
382		return None
383
384
385def _makeMacName(name, nameID, language, font=None):
386	"""Create a NameRecord for Apple platforms
387
388	'language' is an arbitrary IETF BCP 47 language identifier such
389	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
390	create a Macintosh NameRecord that is understood by old applications
391	(platform ID 1 and an old-style Macintosh language enum). If this
392	is not possible, we create a Unicode NameRecord (platform ID 0)
393	whose language points to the font’s 'ltag' table. The latter
394	can encode any string in any language, but legacy applications
395	might not recognize the format (in which case they will ignore
396	those names).
397
398	'font' should be the TTFont for which you want to create a name.
399	If 'font' is None, we only return NameRecords for legacy Macintosh;
400	in that case, the result will be None for names that need to
401	be encoded with an 'ltag' table.
402
403	See the section “The language identifier” in Apple’s specification:
404	https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
405	"""
406	macLang = _MAC_LANGUAGE_CODES.get(language.lower())
407	macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
408	if macLang is not None and macScript is not None:
409		encoding = getEncoding(1, macScript, macLang, default="ascii")
410		# Check if we can actually encode this name. If we can't,
411		# for example because we have no support for the legacy
412		# encoding, or because the name string contains Unicode
413		# characters that the legacy encoding cannot represent,
414		# we fall back to encoding the name in Unicode and put
415		# the language tag into the ltag table.
416		try:
417			_ = tobytes(name, encoding, errors="strict")
418			return makeName(name, nameID, 1, macScript, macLang)
419		except UnicodeEncodeError:
420			pass
421	if font is not None:
422		ltag = font.tables.get("ltag")
423		if ltag is None:
424			ltag = font["ltag"] = newTable("ltag")
425		# 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
426		# “The preferred platform-specific code for Unicode would be 3 or 4.”
427		# https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
428		return makeName(name, nameID, 0, 4, ltag.addTag(language))
429	else:
430		log.warning("cannot store language %s into 'ltag' table "
431		            "without having access to the TTFont object" %
432		            language)
433		return None
434
435
436class NameRecord(object):
437
438	def getEncoding(self, default='ascii'):
439		"""Returns the Python encoding name for this name entry based on its platformID,
440		platEncID, and langID.  If encoding for these values is not known, by default
441		'ascii' is returned.  That can be overriden by passing a value to the default
442		argument.
443		"""
444		return getEncoding(self.platformID, self.platEncID, self.langID, default)
445
446	def encodingIsUnicodeCompatible(self):
447		return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']
448
449	def __str__(self):
450		return self.toStr(errors='backslashreplace')
451
452	def isUnicode(self):
453		return (self.platformID == 0 or
454			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
455
456	def toUnicode(self, errors='strict'):
457		"""
458		If self.string is a Unicode string, return it; otherwise try decoding the
459		bytes in self.string to a Unicode string using the encoding of this
460		entry as returned by self.getEncoding(); Note that  self.getEncoding()
461		returns 'ascii' if the encoding is unknown to the library.
462
463		Certain heuristics are performed to recover data from bytes that are
464		ill-formed in the chosen encoding, or that otherwise look misencoded
465		(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
466		but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
467		the error is handled according to the errors parameter to this function, which is
468		passed to the underlying decode() function; by default it throws a
469		UnicodeDecodeError exception.
470
471		Note: The mentioned heuristics mean that roundtripping a font to XML and back
472		to binary might recover some misencoded data whereas just loading the font
473		and saving it back will not change them.
474		"""
475		def isascii(b):
476			return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
477		encoding = self.getEncoding()
478		string = self.string
479
480		if isinstance(string, bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1:
481			# Recover badly encoded UTF-16 strings that have an odd number of bytes:
482			# - If the last byte is zero, drop it.  Otherwise,
483			# - If all the odd bytes are zero and all the even bytes are ASCII,
484			#   prepend one zero byte.  Otherwise,
485			# - If first byte is zero and all other bytes are ASCII, insert zero
486			#   bytes between consecutive ASCII bytes.
487			#
488			# (Yes, I've seen all of these in the wild... sigh)
489			if byteord(string[-1]) == 0:
490				string = string[:-1]
491			elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
492				string = b'\0' + string
493			elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
494				string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])
495
496		string = tostr(string, encoding=encoding, errors=errors)
497
498		# If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
499		# Fix it up.
500		if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
501			# If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
502			# narrow it down.
503			string = ''.join(c for c in string[1::2])
504
505		return string
506
507	def toBytes(self, errors='strict'):
508		""" If self.string is a bytes object, return it; otherwise try encoding
509		the Unicode string in self.string to bytes using the encoding of this
510		entry as returned by self.getEncoding(); Note that self.getEncoding()
511		returns 'ascii' if the encoding is unknown to the library.
512
513		If the Unicode string cannot be encoded to bytes in the chosen encoding,
514		the error is handled according to the errors parameter to this function,
515		which is passed to the underlying encode() function; by default it throws a
516		UnicodeEncodeError exception.
517		"""
518		return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
519
520	toStr = toUnicode
521
522	def toXML(self, writer, ttFont):
523		try:
524			unistr = self.toUnicode()
525		except UnicodeDecodeError:
526			unistr = None
527		attrs = [
528				("nameID", self.nameID),
529				("platformID", self.platformID),
530				("platEncID", self.platEncID),
531				("langID", hex(self.langID)),
532			]
533
534		if unistr is None or not self.encodingIsUnicodeCompatible():
535			attrs.append(("unicode", unistr is not None))
536
537		writer.begintag("namerecord", attrs)
538		writer.newline()
539		if unistr is not None:
540			writer.write(unistr)
541		else:
542			writer.write8bit(self.string)
543		writer.newline()
544		writer.endtag("namerecord")
545		writer.newline()
546
547	def fromXML(self, name, attrs, content, ttFont):
548		self.nameID = safeEval(attrs["nameID"])
549		self.platformID = safeEval(attrs["platformID"])
550		self.platEncID = safeEval(attrs["platEncID"])
551		self.langID =  safeEval(attrs["langID"])
552		s = strjoin(content).strip()
553		encoding = self.getEncoding()
554		if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
555			self.string = s.encode(encoding)
556		else:
557			# This is the inverse of write8bit...
558			self.string = s.encode("latin1")
559
560	def __lt__(self, other):
561		if type(self) != type(other):
562			return NotImplemented
563
564		try:
565			# implemented so that list.sort() sorts according to the spec.
566			selfTuple = (
567				self.platformID,
568				self.platEncID,
569				self.langID,
570				self.nameID,
571				self.toBytes(),
572			)
573			otherTuple = (
574				other.platformID,
575				other.platEncID,
576				other.langID,
577				other.nameID,
578				other.toBytes(),
579			)
580			return selfTuple < otherTuple
581		except (UnicodeEncodeError, AttributeError):
582			# This can only happen for
583			# 1) an object that is not a NameRecord, or
584			# 2) an unlikely incomplete NameRecord object which has not been
585			#    fully populated, or
586			# 3) when all IDs are identical but the strings can't be encoded
587			#    for their platform encoding.
588			# In all cases it is best to return NotImplemented.
589			return NotImplemented
590
591	def __repr__(self):
592		return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
593				self.nameID, self.platformID, self.langID)
594
595
596# Windows language ID → IETF BCP-47 language tag
597#
598# While Microsoft indicates a region/country for all its language
599# IDs, we follow Unicode practice by omitting “most likely subtags”
600# as per Unicode CLDR. For example, English is simply “en” and not
601# “en-Latn” because according to Unicode, the default script
602# for English is Latin.
603#
604# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
605# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
606_WINDOWS_LANGUAGES = {
607    0x0436: 'af',
608    0x041C: 'sq',
609    0x0484: 'gsw',
610    0x045E: 'am',
611    0x1401: 'ar-DZ',
612    0x3C01: 'ar-BH',
613    0x0C01: 'ar',
614    0x0801: 'ar-IQ',
615    0x2C01: 'ar-JO',
616    0x3401: 'ar-KW',
617    0x3001: 'ar-LB',
618    0x1001: 'ar-LY',
619    0x1801: 'ary',
620    0x2001: 'ar-OM',
621    0x4001: 'ar-QA',
622    0x0401: 'ar-SA',
623    0x2801: 'ar-SY',
624    0x1C01: 'aeb',
625    0x3801: 'ar-AE',
626    0x2401: 'ar-YE',
627    0x042B: 'hy',
628    0x044D: 'as',
629    0x082C: 'az-Cyrl',
630    0x042C: 'az',
631    0x046D: 'ba',
632    0x042D: 'eu',
633    0x0423: 'be',
634    0x0845: 'bn',
635    0x0445: 'bn-IN',
636    0x201A: 'bs-Cyrl',
637    0x141A: 'bs',
638    0x047E: 'br',
639    0x0402: 'bg',
640    0x0403: 'ca',
641    0x0C04: 'zh-HK',
642    0x1404: 'zh-MO',
643    0x0804: 'zh',
644    0x1004: 'zh-SG',
645    0x0404: 'zh-TW',
646    0x0483: 'co',
647    0x041A: 'hr',
648    0x101A: 'hr-BA',
649    0x0405: 'cs',
650    0x0406: 'da',
651    0x048C: 'prs',
652    0x0465: 'dv',
653    0x0813: 'nl-BE',
654    0x0413: 'nl',
655    0x0C09: 'en-AU',
656    0x2809: 'en-BZ',
657    0x1009: 'en-CA',
658    0x2409: 'en-029',
659    0x4009: 'en-IN',
660    0x1809: 'en-IE',
661    0x2009: 'en-JM',
662    0x4409: 'en-MY',
663    0x1409: 'en-NZ',
664    0x3409: 'en-PH',
665    0x4809: 'en-SG',
666    0x1C09: 'en-ZA',
667    0x2C09: 'en-TT',
668    0x0809: 'en-GB',
669    0x0409: 'en',
670    0x3009: 'en-ZW',
671    0x0425: 'et',
672    0x0438: 'fo',
673    0x0464: 'fil',
674    0x040B: 'fi',
675    0x080C: 'fr-BE',
676    0x0C0C: 'fr-CA',
677    0x040C: 'fr',
678    0x140C: 'fr-LU',
679    0x180C: 'fr-MC',
680    0x100C: 'fr-CH',
681    0x0462: 'fy',
682    0x0456: 'gl',
683    0x0437: 'ka',
684    0x0C07: 'de-AT',
685    0x0407: 'de',
686    0x1407: 'de-LI',
687    0x1007: 'de-LU',
688    0x0807: 'de-CH',
689    0x0408: 'el',
690    0x046F: 'kl',
691    0x0447: 'gu',
692    0x0468: 'ha',
693    0x040D: 'he',
694    0x0439: 'hi',
695    0x040E: 'hu',
696    0x040F: 'is',
697    0x0470: 'ig',
698    0x0421: 'id',
699    0x045D: 'iu',
700    0x085D: 'iu-Latn',
701    0x083C: 'ga',
702    0x0434: 'xh',
703    0x0435: 'zu',
704    0x0410: 'it',
705    0x0810: 'it-CH',
706    0x0411: 'ja',
707    0x044B: 'kn',
708    0x043F: 'kk',
709    0x0453: 'km',
710    0x0486: 'quc',
711    0x0487: 'rw',
712    0x0441: 'sw',
713    0x0457: 'kok',
714    0x0412: 'ko',
715    0x0440: 'ky',
716    0x0454: 'lo',
717    0x0426: 'lv',
718    0x0427: 'lt',
719    0x082E: 'dsb',
720    0x046E: 'lb',
721    0x042F: 'mk',
722    0x083E: 'ms-BN',
723    0x043E: 'ms',
724    0x044C: 'ml',
725    0x043A: 'mt',
726    0x0481: 'mi',
727    0x047A: 'arn',
728    0x044E: 'mr',
729    0x047C: 'moh',
730    0x0450: 'mn',
731    0x0850: 'mn-CN',
732    0x0461: 'ne',
733    0x0414: 'nb',
734    0x0814: 'nn',
735    0x0482: 'oc',
736    0x0448: 'or',
737    0x0463: 'ps',
738    0x0415: 'pl',
739    0x0416: 'pt',
740    0x0816: 'pt-PT',
741    0x0446: 'pa',
742    0x046B: 'qu-BO',
743    0x086B: 'qu-EC',
744    0x0C6B: 'qu',
745    0x0418: 'ro',
746    0x0417: 'rm',
747    0x0419: 'ru',
748    0x243B: 'smn',
749    0x103B: 'smj-NO',
750    0x143B: 'smj',
751    0x0C3B: 'se-FI',
752    0x043B: 'se',
753    0x083B: 'se-SE',
754    0x203B: 'sms',
755    0x183B: 'sma-NO',
756    0x1C3B: 'sms',
757    0x044F: 'sa',
758    0x1C1A: 'sr-Cyrl-BA',
759    0x0C1A: 'sr',
760    0x181A: 'sr-Latn-BA',
761    0x081A: 'sr-Latn',
762    0x046C: 'nso',
763    0x0432: 'tn',
764    0x045B: 'si',
765    0x041B: 'sk',
766    0x0424: 'sl',
767    0x2C0A: 'es-AR',
768    0x400A: 'es-BO',
769    0x340A: 'es-CL',
770    0x240A: 'es-CO',
771    0x140A: 'es-CR',
772    0x1C0A: 'es-DO',
773    0x300A: 'es-EC',
774    0x440A: 'es-SV',
775    0x100A: 'es-GT',
776    0x480A: 'es-HN',
777    0x080A: 'es-MX',
778    0x4C0A: 'es-NI',
779    0x180A: 'es-PA',
780    0x3C0A: 'es-PY',
781    0x280A: 'es-PE',
782    0x500A: 'es-PR',
783
784    # Microsoft has defined two different language codes for
785    # “Spanish with modern sorting” and “Spanish with traditional
786    # sorting”. This makes sense for collation APIs, and it would be
787    # possible to express this in BCP 47 language tags via Unicode
788    # extensions (eg., “es-u-co-trad” is “Spanish with traditional
789    # sorting”). However, for storing names in fonts, this distinction
790    # does not make sense, so we use “es” in both cases.
791    0x0C0A: 'es',
792    0x040A: 'es',
793
794    0x540A: 'es-US',
795    0x380A: 'es-UY',
796    0x200A: 'es-VE',
797    0x081D: 'sv-FI',
798    0x041D: 'sv',
799    0x045A: 'syr',
800    0x0428: 'tg',
801    0x085F: 'tzm',
802    0x0449: 'ta',
803    0x0444: 'tt',
804    0x044A: 'te',
805    0x041E: 'th',
806    0x0451: 'bo',
807    0x041F: 'tr',
808    0x0442: 'tk',
809    0x0480: 'ug',
810    0x0422: 'uk',
811    0x042E: 'hsb',
812    0x0420: 'ur',
813    0x0843: 'uz-Cyrl',
814    0x0443: 'uz',
815    0x042A: 'vi',
816    0x0452: 'cy',
817    0x0488: 'wo',
818    0x0485: 'sah',
819    0x0478: 'ii',
820    0x046A: 'yo',
821}
822
823
824_MAC_LANGUAGES = {
825    0: 'en',
826    1: 'fr',
827    2: 'de',
828    3: 'it',
829    4: 'nl',
830    5: 'sv',
831    6: 'es',
832    7: 'da',
833    8: 'pt',
834    9: 'no',
835    10: 'he',
836    11: 'ja',
837    12: 'ar',
838    13: 'fi',
839    14: 'el',
840    15: 'is',
841    16: 'mt',
842    17: 'tr',
843    18: 'hr',
844    19: 'zh-Hant',
845    20: 'ur',
846    21: 'hi',
847    22: 'th',
848    23: 'ko',
849    24: 'lt',
850    25: 'pl',
851    26: 'hu',
852    27: 'es',
853    28: 'lv',
854    29: 'se',
855    30: 'fo',
856    31: 'fa',
857    32: 'ru',
858    33: 'zh',
859    34: 'nl-BE',
860    35: 'ga',
861    36: 'sq',
862    37: 'ro',
863    38: 'cz',
864    39: 'sk',
865    40: 'sl',
866    41: 'yi',
867    42: 'sr',
868    43: 'mk',
869    44: 'bg',
870    45: 'uk',
871    46: 'be',
872    47: 'uz',
873    48: 'kk',
874    49: 'az-Cyrl',
875    50: 'az-Arab',
876    51: 'hy',
877    52: 'ka',
878    53: 'mo',
879    54: 'ky',
880    55: 'tg',
881    56: 'tk',
882    57: 'mn-CN',
883    58: 'mn',
884    59: 'ps',
885    60: 'ks',
886    61: 'ku',
887    62: 'sd',
888    63: 'bo',
889    64: 'ne',
890    65: 'sa',
891    66: 'mr',
892    67: 'bn',
893    68: 'as',
894    69: 'gu',
895    70: 'pa',
896    71: 'or',
897    72: 'ml',
898    73: 'kn',
899    74: 'ta',
900    75: 'te',
901    76: 'si',
902    77: 'my',
903    78: 'km',
904    79: 'lo',
905    80: 'vi',
906    81: 'id',
907    82: 'tl',
908    83: 'ms',
909    84: 'ms-Arab',
910    85: 'am',
911    86: 'ti',
912    87: 'om',
913    88: 'so',
914    89: 'sw',
915    90: 'rw',
916    91: 'rn',
917    92: 'ny',
918    93: 'mg',
919    94: 'eo',
920    128: 'cy',
921    129: 'eu',
922    130: 'ca',
923    131: 'la',
924    132: 'qu',
925    133: 'gn',
926    134: 'ay',
927    135: 'tt',
928    136: 'ug',
929    137: 'dz',
930    138: 'jv',
931    139: 'su',
932    140: 'gl',
933    141: 'af',
934    142: 'br',
935    143: 'iu',
936    144: 'gd',
937    145: 'gv',
938    146: 'ga',
939    147: 'to',
940    148: 'el-polyton',
941    149: 'kl',
942    150: 'az',
943    151: 'nn',
944}
945
946
947_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()}
948_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
949
950
951# MacOS language ID → MacOS script ID
952#
953# Note that the script ID is not sufficient to determine what encoding
954# to use in TrueType files. For some languages, MacOS used a modification
955# of a mainstream script. For example, an Icelandic name would be stored
956# with smRoman in the TrueType naming table, but the actual encoding
957# is a special Icelandic version of the normal Macintosh Roman encoding.
958# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
959# Syllables but MacOS had run out of available script codes, so this was
960# done as a (pretty radical) “modification” of Ethiopic.
961#
962# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
963_MAC_LANGUAGE_TO_SCRIPT = {
964    0: 0,  # langEnglish → smRoman
965    1: 0,  # langFrench → smRoman
966    2: 0,  # langGerman → smRoman
967    3: 0,  # langItalian → smRoman
968    4: 0,  # langDutch → smRoman
969    5: 0,  # langSwedish → smRoman
970    6: 0,  # langSpanish → smRoman
971    7: 0,  # langDanish → smRoman
972    8: 0,  # langPortuguese → smRoman
973    9: 0,  # langNorwegian → smRoman
974    10: 5,  # langHebrew → smHebrew
975    11: 1,  # langJapanese → smJapanese
976    12: 4,  # langArabic → smArabic
977    13: 0,  # langFinnish → smRoman
978    14: 6,  # langGreek → smGreek
979    15: 0,  # langIcelandic → smRoman (modified)
980    16: 0,  # langMaltese → smRoman
981    17: 0,  # langTurkish → smRoman (modified)
982    18: 0,  # langCroatian → smRoman (modified)
983    19: 2,  # langTradChinese → smTradChinese
984    20: 4,  # langUrdu → smArabic
985    21: 9,  # langHindi → smDevanagari
986    22: 21,  # langThai → smThai
987    23: 3,  # langKorean → smKorean
988    24: 29,  # langLithuanian → smCentralEuroRoman
989    25: 29,  # langPolish → smCentralEuroRoman
990    26: 29,  # langHungarian → smCentralEuroRoman
991    27: 29,  # langEstonian → smCentralEuroRoman
992    28: 29,  # langLatvian → smCentralEuroRoman
993    29: 0,  # langSami → smRoman
994    30: 0,  # langFaroese → smRoman (modified)
995    31: 4,  # langFarsi → smArabic (modified)
996    32: 7,  # langRussian → smCyrillic
997    33: 25,  # langSimpChinese → smSimpChinese
998    34: 0,  # langFlemish → smRoman
999    35: 0,  # langIrishGaelic → smRoman (modified)
1000    36: 0,  # langAlbanian → smRoman
1001    37: 0,  # langRomanian → smRoman (modified)
1002    38: 29,  # langCzech → smCentralEuroRoman
1003    39: 29,  # langSlovak → smCentralEuroRoman
1004    40: 0,  # langSlovenian → smRoman (modified)
1005    41: 5,  # langYiddish → smHebrew
1006    42: 7,  # langSerbian → smCyrillic
1007    43: 7,  # langMacedonian → smCyrillic
1008    44: 7,  # langBulgarian → smCyrillic
1009    45: 7,  # langUkrainian → smCyrillic (modified)
1010    46: 7,  # langByelorussian → smCyrillic
1011    47: 7,  # langUzbek → smCyrillic
1012    48: 7,  # langKazakh → smCyrillic
1013    49: 7,  # langAzerbaijani → smCyrillic
1014    50: 4,  # langAzerbaijanAr → smArabic
1015    51: 24,  # langArmenian → smArmenian
1016    52: 23,  # langGeorgian → smGeorgian
1017    53: 7,  # langMoldavian → smCyrillic
1018    54: 7,  # langKirghiz → smCyrillic
1019    55: 7,  # langTajiki → smCyrillic
1020    56: 7,  # langTurkmen → smCyrillic
1021    57: 27,  # langMongolian → smMongolian
1022    58: 7,  # langMongolianCyr → smCyrillic
1023    59: 4,  # langPashto → smArabic
1024    60: 4,  # langKurdish → smArabic
1025    61: 4,  # langKashmiri → smArabic
1026    62: 4,  # langSindhi → smArabic
1027    63: 26,  # langTibetan → smTibetan
1028    64: 9,  # langNepali → smDevanagari
1029    65: 9,  # langSanskrit → smDevanagari
1030    66: 9,  # langMarathi → smDevanagari
1031    67: 13,  # langBengali → smBengali
1032    68: 13,  # langAssamese → smBengali
1033    69: 11,  # langGujarati → smGujarati
1034    70: 10,  # langPunjabi → smGurmukhi
1035    71: 12,  # langOriya → smOriya
1036    72: 17,  # langMalayalam → smMalayalam
1037    73: 16,  # langKannada → smKannada
1038    74: 14,  # langTamil → smTamil
1039    75: 15,  # langTelugu → smTelugu
1040    76: 18,  # langSinhalese → smSinhalese
1041    77: 19,  # langBurmese → smBurmese
1042    78: 20,  # langKhmer → smKhmer
1043    79: 22,  # langLao → smLao
1044    80: 30,  # langVietnamese → smVietnamese
1045    81: 0,  # langIndonesian → smRoman
1046    82: 0,  # langTagalog → smRoman
1047    83: 0,  # langMalayRoman → smRoman
1048    84: 4,  # langMalayArabic → smArabic
1049    85: 28,  # langAmharic → smEthiopic
1050    86: 28,  # langTigrinya → smEthiopic
1051    87: 28,  # langOromo → smEthiopic
1052    88: 0,  # langSomali → smRoman
1053    89: 0,  # langSwahili → smRoman
1054    90: 0,  # langKinyarwanda → smRoman
1055    91: 0,  # langRundi → smRoman
1056    92: 0,  # langNyanja → smRoman
1057    93: 0,  # langMalagasy → smRoman
1058    94: 0,  # langEsperanto → smRoman
1059    128: 0,  # langWelsh → smRoman (modified)
1060    129: 0,  # langBasque → smRoman
1061    130: 0,  # langCatalan → smRoman
1062    131: 0,  # langLatin → smRoman
1063    132: 0,  # langQuechua → smRoman
1064    133: 0,  # langGuarani → smRoman
1065    134: 0,  # langAymara → smRoman
1066    135: 7,  # langTatar → smCyrillic
1067    136: 4,  # langUighur → smArabic
1068    137: 26,  # langDzongkha → smTibetan
1069    138: 0,  # langJavaneseRom → smRoman
1070    139: 0,  # langSundaneseRom → smRoman
1071    140: 0,  # langGalician → smRoman
1072    141: 0,  # langAfrikaans → smRoman
1073    142: 0,  # langBreton → smRoman (modified)
1074    143: 28,  # langInuktitut → smEthiopic (modified)
1075    144: 0,  # langScottishGaelic → smRoman (modified)
1076    145: 0,  # langManxGaelic → smRoman (modified)
1077    146: 0,  # langIrishGaelicScript → smRoman (modified)
1078    147: 0,  # langTongan → smRoman
1079    148: 6,  # langGreekAncient → smRoman
1080    149: 0,  # langGreenlandic → smRoman
1081    150: 0,  # langAzerbaijanRoman → smRoman
1082    151: 0,   # langNynorsk → smRoman
1083}
1084