• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2from __future__ import print_function, division, absolute_import
3from __future__ import unicode_literals
4from fontTools.misc.py23 import *
5from fontTools.misc import sstruct
6from fontTools.misc.textTools import safeEval
7from fontTools.misc.encodingTools import getEncoding
8from fontTools.ttLib import newTable
9from . import DefaultTable
10import struct
11import logging
12
13
14log = logging.getLogger(__name__)
15
16nameRecordFormat = """
17		>	# big endian
18		platformID:	H
19		platEncID:	H
20		langID:		H
21		nameID:		H
22		length:		H
23		offset:		H
24"""
25
26nameRecordSize = sstruct.calcsize(nameRecordFormat)
27
28
29class table__n_a_m_e(DefaultTable.DefaultTable):
30	dependencies = ["ltag"]
31
32	def decompile(self, data, ttFont):
33		format, n, stringOffset = struct.unpack(b">HHH", data[:6])
34		expectedStringOffset = 6 + n * nameRecordSize
35		if stringOffset != expectedStringOffset:
36			log.error(
37				"'name' table stringOffset incorrect. Expected: %s; Actual: %s",
38				expectedStringOffset, stringOffset)
39		stringData = data[stringOffset:]
40		data = data[6:]
41		self.names = []
42		for i in range(n):
43			if len(data) < 12:
44				log.error('skipping malformed name record #%d', i)
45				continue
46			name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
47			name.string = stringData[name.offset:name.offset+name.length]
48			if name.offset + name.length > len(stringData):
49				log.error('skipping malformed name record #%d', i)
50				continue
51			assert len(name.string) == name.length
52			#if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
53			#	if len(name.string) % 2:
54			#		print "2-byte string doesn't have even length!"
55			#		print name.__dict__
56			del name.offset, name.length
57			self.names.append(name)
58
59	def compile(self, ttFont):
60		if not hasattr(self, "names"):
61			# only happens when there are NO name table entries read
62			# from the TTX file
63			self.names = []
64		names = self.names
65		names.sort() # sort according to the spec; see NameRecord.__lt__()
66		stringData = b""
67		format = 0
68		n = len(names)
69		stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
70		data = struct.pack(b">HHH", format, n, stringOffset)
71		lastoffset = 0
72		done = {}  # remember the data so we can reuse the "pointers"
73		for name in names:
74			string = name.toBytes()
75			if string in done:
76				name.offset, name.length = done[string]
77			else:
78				name.offset, name.length = done[string] = len(stringData), len(string)
79				stringData = bytesjoin([stringData, string])
80			data = data + sstruct.pack(nameRecordFormat, name)
81		return data + stringData
82
83	def toXML(self, writer, ttFont):
84		for name in self.names:
85			name.toXML(writer, ttFont)
86
87	def fromXML(self, name, attrs, content, ttFont):
88		if name != "namerecord":
89			return # ignore unknown tags
90		if not hasattr(self, "names"):
91			self.names = []
92		name = NameRecord()
93		self.names.append(name)
94		name.fromXML(name, attrs, content, ttFont)
95
96	def getName(self, nameID, platformID, platEncID, langID=None):
97		for namerecord in self.names:
98			if (	namerecord.nameID == nameID and
99					namerecord.platformID == platformID and
100					namerecord.platEncID == platEncID):
101				if langID is None or namerecord.langID == langID:
102					return namerecord
103		return None # not found
104
105	def getDebugName(self, nameID):
106		englishName = someName = None
107		for name in self.names:
108			if name.nameID != nameID:
109				continue
110			try:
111				unistr = name.toUnicode()
112			except UnicodeDecodeError:
113				continue
114
115			someName = unistr
116			if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
117				englishName = unistr
118				break
119		if englishName:
120			return englishName
121		elif someName:
122			return someName
123		else:
124			return None
125
126	def setName(self, string, nameID, platformID, platEncID, langID):
127		""" Set the 'string' for the name record identified by 'nameID', 'platformID',
128		'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it
129		and append to the name table.
130
131		'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case,
132		it is assumed to be already encoded with the correct plaform-specific encoding
133		identified by the (platformID, platEncID, langID) triplet. A warning is issued
134		to prevent unexpected results.
135		"""
136		if not hasattr(self, 'names'):
137			self.names = []
138		if not isinstance(string, unicode):
139			if isinstance(string, bytes):
140				log.warning(
141					"name string is bytes, ensure it's correctly encoded: %r", string)
142			else:
143				raise TypeError(
144					"expected unicode or bytes, found %s: %r" % (
145						type(string).__name__, string))
146		namerecord = self.getName(nameID, platformID, platEncID, langID)
147		if namerecord:
148			namerecord.string = string
149		else:
150			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
151
152	def _findUnusedNameID(self, minNameID=256):
153		"""Finds an unused name id.
154
155		The nameID is assigned in the range between 'minNameID' and 32767 (inclusive),
156		following the last nameID in the name table.
157		"""
158		names = getattr(self, 'names', [])
159		nameID = 1 + max([n.nameID for n in names] + [minNameID - 1])
160		if nameID > 32767:
161			raise ValueError("nameID must be less than 32768")
162		return nameID
163
164	def addMultilingualName(self, names, ttFont=None, nameID=None,
165	                        windows=True, mac=True):
166		"""Add a multilingual name, returning its name ID
167
168		'names' is a dictionary with the name in multiple languages,
169		such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}.
170		The keys can be arbitrary IETF BCP 47 language codes;
171		the values are Unicode strings.
172
173		'ttFont' is the TTFont to which the names are added, or None.
174		If present, the font's 'ltag' table can get populated
175		to store exotic language codes, which allows encoding
176		names that otherwise cannot get encoded at all.
177
178		'nameID' is the name ID to be used, or None to let the library
179		pick an unused name ID.
180
181		If 'windows' is True, a platformID=3 name record will be added.
182		If 'mac' is True, a platformID=1 name record will be added.
183		"""
184		if not hasattr(self, 'names'):
185			self.names = []
186		if nameID is None:
187			nameID = self._findUnusedNameID()
188		# TODO: Should minimize BCP 47 language codes.
189		# https://github.com/fonttools/fonttools/issues/930
190		for lang, name in sorted(names.items()):
191			if windows:
192				windowsName = _makeWindowsName(name, nameID, lang)
193				if windowsName is not None:
194					self.names.append(windowsName)
195				else:
196					# We cannot not make a Windows name: make sure we add a
197					# Mac name as a fallback. This can happen for exotic
198					# BCP47 language tags that have no Windows language code.
199					mac = True
200			if mac:
201				macName = _makeMacName(name, nameID, lang, ttFont)
202				if macName is not None:
203					self.names.append(macName)
204		return nameID
205
206	def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255):
207		""" Add a new name record containing 'string' for each (platformID, platEncID,
208		langID) tuple specified in the 'platforms' list.
209
210		The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive),
211		following the last nameID in the name table.
212		If no 'platforms' are specified, two English name records are added, one for the
213		Macintosh (platformID=0), and one for the Windows platform (3).
214
215		The 'string' must be a Unicode string, so it can be encoded with different,
216		platform-specific encodings.
217
218		Return the new nameID.
219		"""
220		assert len(platforms) > 0, \
221			"'platforms' must contain at least one (platformID, platEncID, langID) tuple"
222		if not hasattr(self, 'names'):
223			self.names = []
224		if not isinstance(string, unicode):
225			raise TypeError(
226				"expected %s, found %s: %r" % (
227					unicode.__name__, type(string).__name__,string ))
228		nameID = self._findUnusedNameID(minNameID + 1)
229		for platformID, platEncID, langID in platforms:
230			self.names.append(makeName(string, nameID, platformID, platEncID, langID))
231		return nameID
232
233
234def makeName(string, nameID, platformID, platEncID, langID):
235	name = NameRecord()
236	name.string, name.nameID, name.platformID, name.platEncID, name.langID = (
237		string, nameID, platformID, platEncID, langID)
238	return name
239
240
241def _makeWindowsName(name, nameID, language):
242	"""Create a NameRecord for the Microsoft Windows platform
243
244	'language' is an arbitrary IETF BCP 47 language identifier such
245	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows
246	does not support the desired language, the result will be None.
247	Future versions of fonttools might return a NameRecord for the
248	OpenType 'name' table format 1, but this is not implemented yet.
249	"""
250	langID = _WINDOWS_LANGUAGE_CODES.get(language.lower())
251	if langID is not None:
252		return makeName(name, nameID, 3, 1, langID)
253	else:
254		log.warning("cannot add Windows name in language %s "
255		            "because fonttools does not yet support "
256		            "name table format 1" % language)
257		return None
258
259
260def _makeMacName(name, nameID, language, font=None):
261	"""Create a NameRecord for Apple platforms
262
263	'language' is an arbitrary IETF BCP 47 language identifier such
264	as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we
265	create a Macintosh NameRecord that is understood by old applications
266	(platform ID 1 and an old-style Macintosh language enum). If this
267	is not possible, we create a Unicode NameRecord (platform ID 0)
268	whose language points to the font’s 'ltag' table. The latter
269	can encode any string in any language, but legacy applications
270	might not recognize the format (in which case they will ignore
271	those names).
272
273	'font' should be the TTFont for which you want to create a name.
274	If 'font' is None, we only return NameRecords for legacy Macintosh;
275	in that case, the result will be None for names that need to
276	be encoded with an 'ltag' table.
277
278	See the section “The language identifier” in Apple’s specification:
279	https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
280	"""
281	macLang = _MAC_LANGUAGE_CODES.get(language.lower())
282	macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang)
283	if macLang is not None and macScript is not None:
284		encoding = getEncoding(1, macScript, macLang, default="ascii")
285		# Check if we can actually encode this name. If we can't,
286		# for example because we have no support for the legacy
287		# encoding, or because the name string contains Unicode
288		# characters that the legacy encoding cannot represent,
289		# we fall back to encoding the name in Unicode and put
290		# the language tag into the ltag table.
291		try:
292			_ = tobytes(name, encoding, errors="strict")
293			return makeName(name, nameID, 1, macScript, macLang)
294		except UnicodeEncodeError:
295			pass
296	if font is not None:
297		ltag = font.tables.get("ltag")
298		if ltag is None:
299			ltag = font["ltag"] = newTable("ltag")
300		# 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)”
301		# “The preferred platform-specific code for Unicode would be 3 or 4.”
302		# https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html
303		return makeName(name, nameID, 0, 4, ltag.addTag(language))
304	else:
305		log.warning("cannot store language %s into 'ltag' table "
306		            "without having access to the TTFont object" %
307		            language)
308		return None
309
310
311class NameRecord(object):
312
313	def getEncoding(self, default='ascii'):
314		"""Returns the Python encoding name for this name entry based on its platformID,
315		platEncID, and langID.  If encoding for these values is not known, by default
316		'ascii' is returned.  That can be overriden by passing a value to the default
317		argument.
318		"""
319		return getEncoding(self.platformID, self.platEncID, self.langID, default)
320
321	def encodingIsUnicodeCompatible(self):
322		return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']
323
324	def __str__(self):
325		return self.toStr(errors='backslashreplace')
326
327	def isUnicode(self):
328		return (self.platformID == 0 or
329			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
330
331	def toUnicode(self, errors='strict'):
332		"""
333		If self.string is a Unicode string, return it; otherwise try decoding the
334		bytes in self.string to a Unicode string using the encoding of this
335		entry as returned by self.getEncoding(); Note that  self.getEncoding()
336		returns 'ascii' if the encoding is unknown to the library.
337
338		Certain heuristics are performed to recover data from bytes that are
339		ill-formed in the chosen encoding, or that otherwise look misencoded
340		(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
341		but marked otherwise).  If the bytes are ill-formed and the heuristics fail,
342		the error is handled according to the errors parameter to this function, which is
343		passed to the underlying decode() function; by default it throws a
344		UnicodeDecodeError exception.
345
346		Note: The mentioned heuristics mean that roundtripping a font to XML and back
347		to binary might recover some misencoded data whereas just loading the font
348		and saving it back will not change them.
349		"""
350		def isascii(b):
351			return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
352		encoding = self.getEncoding()
353		string = self.string
354
355		if encoding == 'utf_16_be' and len(string) % 2 == 1:
356			# Recover badly encoded UTF-16 strings that have an odd number of bytes:
357			# - If the last byte is zero, drop it.  Otherwise,
358			# - If all the odd bytes are zero and all the even bytes are ASCII,
359			#   prepend one zero byte.  Otherwise,
360			# - If first byte is zero and all other bytes are ASCII, insert zero
361			#   bytes between consecutive ASCII bytes.
362			#
363			# (Yes, I've seen all of these in the wild... sigh)
364			if byteord(string[-1]) == 0:
365				string = string[:-1]
366			elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
367				string = b'\0' + string
368			elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
369				string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])
370
371		string = tounicode(string, encoding=encoding, errors=errors)
372
373		# If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
374		# Fix it up.
375		if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
376			# If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
377			# narrow it down.
378			string = ''.join(c for c in string[1::2])
379
380		return string
381
382	def toBytes(self, errors='strict'):
383		""" If self.string is a bytes object, return it; otherwise try encoding
384		the Unicode string in self.string to bytes using the encoding of this
385		entry as returned by self.getEncoding(); Note that self.getEncoding()
386		returns 'ascii' if the encoding is unknown to the library.
387
388		If the Unicode string cannot be encoded to bytes in the chosen encoding,
389		the error is handled according to the errors parameter to this function,
390		which is passed to the underlying encode() function; by default it throws a
391		UnicodeEncodeError exception.
392		"""
393		return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
394
395	def toStr(self, errors='strict'):
396		if str == bytes:
397			# python 2
398			return self.toBytes(errors)
399		else:
400			# python 3
401			return self.toUnicode(errors)
402
403	def toXML(self, writer, ttFont):
404		try:
405			unistr = self.toUnicode()
406		except UnicodeDecodeError:
407			unistr = None
408		attrs = [
409				("nameID", self.nameID),
410				("platformID", self.platformID),
411				("platEncID", self.platEncID),
412				("langID", hex(self.langID)),
413			]
414
415		if unistr is None or not self.encodingIsUnicodeCompatible():
416			attrs.append(("unicode", unistr is not None))
417
418		writer.begintag("namerecord", attrs)
419		writer.newline()
420		if unistr is not None:
421			writer.write(unistr)
422		else:
423			writer.write8bit(self.string)
424		writer.newline()
425		writer.endtag("namerecord")
426		writer.newline()
427
428	def fromXML(self, name, attrs, content, ttFont):
429		self.nameID = safeEval(attrs["nameID"])
430		self.platformID = safeEval(attrs["platformID"])
431		self.platEncID = safeEval(attrs["platEncID"])
432		self.langID =  safeEval(attrs["langID"])
433		s = strjoin(content).strip()
434		encoding = self.getEncoding()
435		if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
436			self.string = s.encode(encoding)
437		else:
438			# This is the inverse of write8bit...
439			self.string = s.encode("latin1")
440
441	def __lt__(self, other):
442		if type(self) != type(other):
443			return NotImplemented
444
445		# implemented so that list.sort() sorts according to the spec.
446		selfTuple = (
447			getattr(self, "platformID", None),
448			getattr(self, "platEncID", None),
449			getattr(self, "langID", None),
450			getattr(self, "nameID", None),
451			getattr(self, "string", None),
452		)
453		otherTuple = (
454			getattr(other, "platformID", None),
455			getattr(other, "platEncID", None),
456			getattr(other, "langID", None),
457			getattr(other, "nameID", None),
458			getattr(other, "string", None),
459		)
460		return selfTuple < otherTuple
461
462	def __repr__(self):
463		return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
464				self.nameID, self.platformID, self.langID)
465
466
467# Windows language ID → IETF BCP-47 language tag
468#
469# While Microsoft indicates a region/country for all its language
470# IDs, we follow Unicode practice by omitting “most likely subtags”
471# as per Unicode CLDR. For example, English is simply “en” and not
472# “en-Latn” because according to Unicode, the default script
473# for English is Latin.
474#
475# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html
476# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
477_WINDOWS_LANGUAGES = {
478    0x0436: 'af',
479    0x041C: 'sq',
480    0x0484: 'gsw',
481    0x045E: 'am',
482    0x1401: 'ar-DZ',
483    0x3C01: 'ar-BH',
484    0x0C01: 'ar',
485    0x0801: 'ar-IQ',
486    0x2C01: 'ar-JO',
487    0x3401: 'ar-KW',
488    0x3001: 'ar-LB',
489    0x1001: 'ar-LY',
490    0x1801: 'ary',
491    0x2001: 'ar-OM',
492    0x4001: 'ar-QA',
493    0x0401: 'ar-SA',
494    0x2801: 'ar-SY',
495    0x1C01: 'aeb',
496    0x3801: 'ar-AE',
497    0x2401: 'ar-YE',
498    0x042B: 'hy',
499    0x044D: 'as',
500    0x082C: 'az-Cyrl',
501    0x042C: 'az',
502    0x046D: 'ba',
503    0x042D: 'eu',
504    0x0423: 'be',
505    0x0845: 'bn',
506    0x0445: 'bn-IN',
507    0x201A: 'bs-Cyrl',
508    0x141A: 'bs',
509    0x047E: 'br',
510    0x0402: 'bg',
511    0x0403: 'ca',
512    0x0C04: 'zh-HK',
513    0x1404: 'zh-MO',
514    0x0804: 'zh',
515    0x1004: 'zh-SG',
516    0x0404: 'zh-TW',
517    0x0483: 'co',
518    0x041A: 'hr',
519    0x101A: 'hr-BA',
520    0x0405: 'cs',
521    0x0406: 'da',
522    0x048C: 'prs',
523    0x0465: 'dv',
524    0x0813: 'nl-BE',
525    0x0413: 'nl',
526    0x0C09: 'en-AU',
527    0x2809: 'en-BZ',
528    0x1009: 'en-CA',
529    0x2409: 'en-029',
530    0x4009: 'en-IN',
531    0x1809: 'en-IE',
532    0x2009: 'en-JM',
533    0x4409: 'en-MY',
534    0x1409: 'en-NZ',
535    0x3409: 'en-PH',
536    0x4809: 'en-SG',
537    0x1C09: 'en-ZA',
538    0x2C09: 'en-TT',
539    0x0809: 'en-GB',
540    0x0409: 'en',
541    0x3009: 'en-ZW',
542    0x0425: 'et',
543    0x0438: 'fo',
544    0x0464: 'fil',
545    0x040B: 'fi',
546    0x080C: 'fr-BE',
547    0x0C0C: 'fr-CA',
548    0x040C: 'fr',
549    0x140C: 'fr-LU',
550    0x180C: 'fr-MC',
551    0x100C: 'fr-CH',
552    0x0462: 'fy',
553    0x0456: 'gl',
554    0x0437: 'ka',
555    0x0C07: 'de-AT',
556    0x0407: 'de',
557    0x1407: 'de-LI',
558    0x1007: 'de-LU',
559    0x0807: 'de-CH',
560    0x0408: 'el',
561    0x046F: 'kl',
562    0x0447: 'gu',
563    0x0468: 'ha',
564    0x040D: 'he',
565    0x0439: 'hi',
566    0x040E: 'hu',
567    0x040F: 'is',
568    0x0470: 'ig',
569    0x0421: 'id',
570    0x045D: 'iu',
571    0x085D: 'iu-Latn',
572    0x083C: 'ga',
573    0x0434: 'xh',
574    0x0435: 'zu',
575    0x0410: 'it',
576    0x0810: 'it-CH',
577    0x0411: 'ja',
578    0x044B: 'kn',
579    0x043F: 'kk',
580    0x0453: 'km',
581    0x0486: 'quc',
582    0x0487: 'rw',
583    0x0441: 'sw',
584    0x0457: 'kok',
585    0x0412: 'ko',
586    0x0440: 'ky',
587    0x0454: 'lo',
588    0x0426: 'lv',
589    0x0427: 'lt',
590    0x082E: 'dsb',
591    0x046E: 'lb',
592    0x042F: 'mk',
593    0x083E: 'ms-BN',
594    0x043E: 'ms',
595    0x044C: 'ml',
596    0x043A: 'mt',
597    0x0481: 'mi',
598    0x047A: 'arn',
599    0x044E: 'mr',
600    0x047C: 'moh',
601    0x0450: 'mn',
602    0x0850: 'mn-CN',
603    0x0461: 'ne',
604    0x0414: 'nb',
605    0x0814: 'nn',
606    0x0482: 'oc',
607    0x0448: 'or',
608    0x0463: 'ps',
609    0x0415: 'pl',
610    0x0416: 'pt',
611    0x0816: 'pt-PT',
612    0x0446: 'pa',
613    0x046B: 'qu-BO',
614    0x086B: 'qu-EC',
615    0x0C6B: 'qu',
616    0x0418: 'ro',
617    0x0417: 'rm',
618    0x0419: 'ru',
619    0x243B: 'smn',
620    0x103B: 'smj-NO',
621    0x143B: 'smj',
622    0x0C3B: 'se-FI',
623    0x043B: 'se',
624    0x083B: 'se-SE',
625    0x203B: 'sms',
626    0x183B: 'sma-NO',
627    0x1C3B: 'sms',
628    0x044F: 'sa',
629    0x1C1A: 'sr-Cyrl-BA',
630    0x0C1A: 'sr',
631    0x181A: 'sr-Latn-BA',
632    0x081A: 'sr-Latn',
633    0x046C: 'nso',
634    0x0432: 'tn',
635    0x045B: 'si',
636    0x041B: 'sk',
637    0x0424: 'sl',
638    0x2C0A: 'es-AR',
639    0x400A: 'es-BO',
640    0x340A: 'es-CL',
641    0x240A: 'es-CO',
642    0x140A: 'es-CR',
643    0x1C0A: 'es-DO',
644    0x300A: 'es-EC',
645    0x440A: 'es-SV',
646    0x100A: 'es-GT',
647    0x480A: 'es-HN',
648    0x080A: 'es-MX',
649    0x4C0A: 'es-NI',
650    0x180A: 'es-PA',
651    0x3C0A: 'es-PY',
652    0x280A: 'es-PE',
653    0x500A: 'es-PR',
654
655    # Microsoft has defined two different language codes for
656    # “Spanish with modern sorting” and “Spanish with traditional
657    # sorting”. This makes sense for collation APIs, and it would be
658    # possible to express this in BCP 47 language tags via Unicode
659    # extensions (eg., “es-u-co-trad” is “Spanish with traditional
660    # sorting”). However, for storing names in fonts, this distinction
661    # does not make sense, so we use “es” in both cases.
662    0x0C0A: 'es',
663    0x040A: 'es',
664
665    0x540A: 'es-US',
666    0x380A: 'es-UY',
667    0x200A: 'es-VE',
668    0x081D: 'sv-FI',
669    0x041D: 'sv',
670    0x045A: 'syr',
671    0x0428: 'tg',
672    0x085F: 'tzm',
673    0x0449: 'ta',
674    0x0444: 'tt',
675    0x044A: 'te',
676    0x041E: 'th',
677    0x0451: 'bo',
678    0x041F: 'tr',
679    0x0442: 'tk',
680    0x0480: 'ug',
681    0x0422: 'uk',
682    0x042E: 'hsb',
683    0x0420: 'ur',
684    0x0843: 'uz-Cyrl',
685    0x0443: 'uz',
686    0x042A: 'vi',
687    0x0452: 'cy',
688    0x0488: 'wo',
689    0x0485: 'sah',
690    0x0478: 'ii',
691    0x046A: 'yo',
692}
693
694
695_MAC_LANGUAGES = {
696    0: 'en',
697    1: 'fr',
698    2: 'de',
699    3: 'it',
700    4: 'nl',
701    5: 'sv',
702    6: 'es',
703    7: 'da',
704    8: 'pt',
705    9: 'no',
706    10: 'he',
707    11: 'ja',
708    12: 'ar',
709    13: 'fi',
710    14: 'el',
711    15: 'is',
712    16: 'mt',
713    17: 'tr',
714    18: 'hr',
715    19: 'zh-Hant',
716    20: 'ur',
717    21: 'hi',
718    22: 'th',
719    23: 'ko',
720    24: 'lt',
721    25: 'pl',
722    26: 'hu',
723    27: 'es',
724    28: 'lv',
725    29: 'se',
726    30: 'fo',
727    31: 'fa',
728    32: 'ru',
729    33: 'zh',
730    34: 'nl-BE',
731    35: 'ga',
732    36: 'sq',
733    37: 'ro',
734    38: 'cz',
735    39: 'sk',
736    40: 'sl',
737    41: 'yi',
738    42: 'sr',
739    43: 'mk',
740    44: 'bg',
741    45: 'uk',
742    46: 'be',
743    47: 'uz',
744    48: 'kk',
745    49: 'az-Cyrl',
746    50: 'az-Arab',
747    51: 'hy',
748    52: 'ka',
749    53: 'mo',
750    54: 'ky',
751    55: 'tg',
752    56: 'tk',
753    57: 'mn-CN',
754    58: 'mn',
755    59: 'ps',
756    60: 'ks',
757    61: 'ku',
758    62: 'sd',
759    63: 'bo',
760    64: 'ne',
761    65: 'sa',
762    66: 'mr',
763    67: 'bn',
764    68: 'as',
765    69: 'gu',
766    70: 'pa',
767    71: 'or',
768    72: 'ml',
769    73: 'kn',
770    74: 'ta',
771    75: 'te',
772    76: 'si',
773    77: 'my',
774    78: 'km',
775    79: 'lo',
776    80: 'vi',
777    81: 'id',
778    82: 'tl',
779    83: 'ms',
780    84: 'ms-Arab',
781    85: 'am',
782    86: 'ti',
783    87: 'om',
784    88: 'so',
785    89: 'sw',
786    90: 'rw',
787    91: 'rn',
788    92: 'ny',
789    93: 'mg',
790    94: 'eo',
791    128: 'cy',
792    129: 'eu',
793    130: 'ca',
794    131: 'la',
795    132: 'qu',
796    133: 'gn',
797    134: 'ay',
798    135: 'tt',
799    136: 'ug',
800    137: 'dz',
801    138: 'jv',
802    139: 'su',
803    140: 'gl',
804    141: 'af',
805    142: 'br',
806    143: 'iu',
807    144: 'gd',
808    145: 'gv',
809    146: 'ga',
810    147: 'to',
811    148: 'el-polyton',
812    149: 'kl',
813    150: 'az',
814    151: 'nn',
815}
816
817
818_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()}
819_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()}
820
821
822# MacOS language ID → MacOS script ID
823#
824# Note that the script ID is not sufficient to determine what encoding
825# to use in TrueType files. For some languages, MacOS used a modification
826# of a mainstream script. For example, an Icelandic name would be stored
827# with smRoman in the TrueType naming table, but the actual encoding
828# is a special Icelandic version of the normal Macintosh Roman encoding.
829# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal
830# Syllables but MacOS had run out of available script codes, so this was
831# done as a (pretty radical) “modification” of Ethiopic.
832#
833# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt
834_MAC_LANGUAGE_TO_SCRIPT = {
835    0: 0,  # langEnglish → smRoman
836    1: 0,  # langFrench → smRoman
837    2: 0,  # langGerman → smRoman
838    3: 0,  # langItalian → smRoman
839    4: 0,  # langDutch → smRoman
840    5: 0,  # langSwedish → smRoman
841    6: 0,  # langSpanish → smRoman
842    7: 0,  # langDanish → smRoman
843    8: 0,  # langPortuguese → smRoman
844    9: 0,  # langNorwegian → smRoman
845    10: 5,  # langHebrew → smHebrew
846    11: 1,  # langJapanese → smJapanese
847    12: 4,  # langArabic → smArabic
848    13: 0,  # langFinnish → smRoman
849    14: 6,  # langGreek → smGreek
850    15: 0,  # langIcelandic → smRoman (modified)
851    16: 0,  # langMaltese → smRoman
852    17: 0,  # langTurkish → smRoman (modified)
853    18: 0,  # langCroatian → smRoman (modified)
854    19: 2,  # langTradChinese → smTradChinese
855    20: 4,  # langUrdu → smArabic
856    21: 9,  # langHindi → smDevanagari
857    22: 21,  # langThai → smThai
858    23: 3,  # langKorean → smKorean
859    24: 29,  # langLithuanian → smCentralEuroRoman
860    25: 29,  # langPolish → smCentralEuroRoman
861    26: 29,  # langHungarian → smCentralEuroRoman
862    27: 29,  # langEstonian → smCentralEuroRoman
863    28: 29,  # langLatvian → smCentralEuroRoman
864    29: 0,  # langSami → smRoman
865    30: 0,  # langFaroese → smRoman (modified)
866    31: 4,  # langFarsi → smArabic (modified)
867    32: 7,  # langRussian → smCyrillic
868    33: 25,  # langSimpChinese → smSimpChinese
869    34: 0,  # langFlemish → smRoman
870    35: 0,  # langIrishGaelic → smRoman (modified)
871    36: 0,  # langAlbanian → smRoman
872    37: 0,  # langRomanian → smRoman (modified)
873    38: 29,  # langCzech → smCentralEuroRoman
874    39: 29,  # langSlovak → smCentralEuroRoman
875    40: 0,  # langSlovenian → smRoman (modified)
876    41: 5,  # langYiddish → smHebrew
877    42: 7,  # langSerbian → smCyrillic
878    43: 7,  # langMacedonian → smCyrillic
879    44: 7,  # langBulgarian → smCyrillic
880    45: 7,  # langUkrainian → smCyrillic (modified)
881    46: 7,  # langByelorussian → smCyrillic
882    47: 7,  # langUzbek → smCyrillic
883    48: 7,  # langKazakh → smCyrillic
884    49: 7,  # langAzerbaijani → smCyrillic
885    50: 4,  # langAzerbaijanAr → smArabic
886    51: 24,  # langArmenian → smArmenian
887    52: 23,  # langGeorgian → smGeorgian
888    53: 7,  # langMoldavian → smCyrillic
889    54: 7,  # langKirghiz → smCyrillic
890    55: 7,  # langTajiki → smCyrillic
891    56: 7,  # langTurkmen → smCyrillic
892    57: 27,  # langMongolian → smMongolian
893    58: 7,  # langMongolianCyr → smCyrillic
894    59: 4,  # langPashto → smArabic
895    60: 4,  # langKurdish → smArabic
896    61: 4,  # langKashmiri → smArabic
897    62: 4,  # langSindhi → smArabic
898    63: 26,  # langTibetan → smTibetan
899    64: 9,  # langNepali → smDevanagari
900    65: 9,  # langSanskrit → smDevanagari
901    66: 9,  # langMarathi → smDevanagari
902    67: 13,  # langBengali → smBengali
903    68: 13,  # langAssamese → smBengali
904    69: 11,  # langGujarati → smGujarati
905    70: 10,  # langPunjabi → smGurmukhi
906    71: 12,  # langOriya → smOriya
907    72: 17,  # langMalayalam → smMalayalam
908    73: 16,  # langKannada → smKannada
909    74: 14,  # langTamil → smTamil
910    75: 15,  # langTelugu → smTelugu
911    76: 18,  # langSinhalese → smSinhalese
912    77: 19,  # langBurmese → smBurmese
913    78: 20,  # langKhmer → smKhmer
914    79: 22,  # langLao → smLao
915    80: 30,  # langVietnamese → smVietnamese
916    81: 0,  # langIndonesian → smRoman
917    82: 0,  # langTagalog → smRoman
918    83: 0,  # langMalayRoman → smRoman
919    84: 4,  # langMalayArabic → smArabic
920    85: 28,  # langAmharic → smEthiopic
921    86: 28,  # langTigrinya → smEthiopic
922    87: 28,  # langOromo → smEthiopic
923    88: 0,  # langSomali → smRoman
924    89: 0,  # langSwahili → smRoman
925    90: 0,  # langKinyarwanda → smRoman
926    91: 0,  # langRundi → smRoman
927    92: 0,  # langNyanja → smRoman
928    93: 0,  # langMalagasy → smRoman
929    94: 0,  # langEsperanto → smRoman
930    128: 0,  # langWelsh → smRoman (modified)
931    129: 0,  # langBasque → smRoman
932    130: 0,  # langCatalan → smRoman
933    131: 0,  # langLatin → smRoman
934    132: 0,  # langQuechua → smRoman
935    133: 0,  # langGuarani → smRoman
936    134: 0,  # langAymara → smRoman
937    135: 7,  # langTatar → smCyrillic
938    136: 4,  # langUighur → smArabic
939    137: 26,  # langDzongkha → smTibetan
940    138: 0,  # langJavaneseRom → smRoman
941    139: 0,  # langSundaneseRom → smRoman
942    140: 0,  # langGalician → smRoman
943    141: 0,  # langAfrikaans → smRoman
944    142: 0,  # langBreton → smRoman (modified)
945    143: 28,  # langInuktitut → smEthiopic (modified)
946    144: 0,  # langScottishGaelic → smRoman (modified)
947    145: 0,  # langManxGaelic → smRoman (modified)
948    146: 0,  # langIrishGaelicScript → smRoman (modified)
949    147: 0,  # langTongan → smRoman
950    148: 6,  # langGreekAncient → smRoman
951    149: 0,  # langGreenlandic → smRoman
952    150: 0,  # langAzerbaijanRoman → smRoman
953    151: 0,   # langNynorsk → smRoman
954}
955