1# -*- coding: utf-8 -*- 2from __future__ import print_function, division, absolute_import 3from __future__ import unicode_literals 4from fontTools.misc.py23 import * 5from fontTools.misc import sstruct 6from fontTools.misc.textTools import safeEval 7from fontTools.misc.encodingTools import getEncoding 8from fontTools.ttLib import newTable 9from . import DefaultTable 10import struct 11import logging 12 13 14log = logging.getLogger(__name__) 15 16nameRecordFormat = """ 17 > # big endian 18 platformID: H 19 platEncID: H 20 langID: H 21 nameID: H 22 length: H 23 offset: H 24""" 25 26nameRecordSize = sstruct.calcsize(nameRecordFormat) 27 28 29class table__n_a_m_e(DefaultTable.DefaultTable): 30 dependencies = ["ltag"] 31 32 def decompile(self, data, ttFont): 33 format, n, stringOffset = struct.unpack(b">HHH", data[:6]) 34 expectedStringOffset = 6 + n * nameRecordSize 35 if stringOffset != expectedStringOffset: 36 log.error( 37 "'name' table stringOffset incorrect. Expected: %s; Actual: %s", 38 expectedStringOffset, stringOffset) 39 stringData = data[stringOffset:] 40 data = data[6:] 41 self.names = [] 42 for i in range(n): 43 if len(data) < 12: 44 log.error('skipping malformed name record #%d', i) 45 continue 46 name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord()) 47 name.string = stringData[name.offset:name.offset+name.length] 48 if name.offset + name.length > len(stringData): 49 log.error('skipping malformed name record #%d', i) 50 continue 51 assert len(name.string) == name.length 52 #if (name.platEncID, name.platformID) in ((0, 0), (1, 3)): 53 # if len(name.string) % 2: 54 # print "2-byte string doesn't have even length!" 55 # print name.__dict__ 56 del name.offset, name.length 57 self.names.append(name) 58 59 def compile(self, ttFont): 60 if not hasattr(self, "names"): 61 # only happens when there are NO name table entries read 62 # from the TTX file 63 self.names = [] 64 names = self.names 65 names.sort() # sort according to the spec; see NameRecord.__lt__() 66 stringData = b"" 67 format = 0 68 n = len(names) 69 stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat) 70 data = struct.pack(b">HHH", format, n, stringOffset) 71 lastoffset = 0 72 done = {} # remember the data so we can reuse the "pointers" 73 for name in names: 74 string = name.toBytes() 75 if string in done: 76 name.offset, name.length = done[string] 77 else: 78 name.offset, name.length = done[string] = len(stringData), len(string) 79 stringData = bytesjoin([stringData, string]) 80 data = data + sstruct.pack(nameRecordFormat, name) 81 return data + stringData 82 83 def toXML(self, writer, ttFont): 84 for name in self.names: 85 name.toXML(writer, ttFont) 86 87 def fromXML(self, name, attrs, content, ttFont): 88 if name != "namerecord": 89 return # ignore unknown tags 90 if not hasattr(self, "names"): 91 self.names = [] 92 name = NameRecord() 93 self.names.append(name) 94 name.fromXML(name, attrs, content, ttFont) 95 96 def getName(self, nameID, platformID, platEncID, langID=None): 97 for namerecord in self.names: 98 if ( namerecord.nameID == nameID and 99 namerecord.platformID == platformID and 100 namerecord.platEncID == platEncID): 101 if langID is None or namerecord.langID == langID: 102 return namerecord 103 return None # not found 104 105 def getDebugName(self, nameID): 106 englishName = someName = None 107 for name in self.names: 108 if name.nameID != nameID: 109 continue 110 try: 111 unistr = name.toUnicode() 112 except UnicodeDecodeError: 113 continue 114 115 someName = unistr 116 if (name.platformID, name.langID) in ((1, 0), (3, 0x409)): 117 englishName = unistr 118 break 119 if englishName: 120 return englishName 121 elif someName: 122 return someName 123 else: 124 return None 125 126 def setName(self, string, nameID, platformID, platEncID, langID): 127 """ Set the 'string' for the name record identified by 'nameID', 'platformID', 128 'platEncID' and 'langID'. If a record with that nameID doesn't exist, create it 129 and append to the name table. 130 131 'string' can be of type `str` (`unicode` in PY2) or `bytes`. In the latter case, 132 it is assumed to be already encoded with the correct plaform-specific encoding 133 identified by the (platformID, platEncID, langID) triplet. A warning is issued 134 to prevent unexpected results. 135 """ 136 if not hasattr(self, 'names'): 137 self.names = [] 138 if not isinstance(string, unicode): 139 if isinstance(string, bytes): 140 log.warning( 141 "name string is bytes, ensure it's correctly encoded: %r", string) 142 else: 143 raise TypeError( 144 "expected unicode or bytes, found %s: %r" % ( 145 type(string).__name__, string)) 146 namerecord = self.getName(nameID, platformID, platEncID, langID) 147 if namerecord: 148 namerecord.string = string 149 else: 150 self.names.append(makeName(string, nameID, platformID, platEncID, langID)) 151 152 def _findUnusedNameID(self, minNameID=256): 153 """Finds an unused name id. 154 155 The nameID is assigned in the range between 'minNameID' and 32767 (inclusive), 156 following the last nameID in the name table. 157 """ 158 names = getattr(self, 'names', []) 159 nameID = 1 + max([n.nameID for n in names] + [minNameID - 1]) 160 if nameID > 32767: 161 raise ValueError("nameID must be less than 32768") 162 return nameID 163 164 def addMultilingualName(self, names, ttFont=None, nameID=None, 165 windows=True, mac=True): 166 """Add a multilingual name, returning its name ID 167 168 'names' is a dictionary with the name in multiple languages, 169 such as {'en': 'Pale', 'de': 'Blaß', 'de-CH': 'Blass'}. 170 The keys can be arbitrary IETF BCP 47 language codes; 171 the values are Unicode strings. 172 173 'ttFont' is the TTFont to which the names are added, or None. 174 If present, the font's 'ltag' table can get populated 175 to store exotic language codes, which allows encoding 176 names that otherwise cannot get encoded at all. 177 178 'nameID' is the name ID to be used, or None to let the library 179 pick an unused name ID. 180 181 If 'windows' is True, a platformID=3 name record will be added. 182 If 'mac' is True, a platformID=1 name record will be added. 183 """ 184 if not hasattr(self, 'names'): 185 self.names = [] 186 if nameID is None: 187 nameID = self._findUnusedNameID() 188 # TODO: Should minimize BCP 47 language codes. 189 # https://github.com/fonttools/fonttools/issues/930 190 for lang, name in sorted(names.items()): 191 if windows: 192 windowsName = _makeWindowsName(name, nameID, lang) 193 if windowsName is not None: 194 self.names.append(windowsName) 195 else: 196 # We cannot not make a Windows name: make sure we add a 197 # Mac name as a fallback. This can happen for exotic 198 # BCP47 language tags that have no Windows language code. 199 mac = True 200 if mac: 201 macName = _makeMacName(name, nameID, lang, ttFont) 202 if macName is not None: 203 self.names.append(macName) 204 return nameID 205 206 def addName(self, string, platforms=((1, 0, 0), (3, 1, 0x409)), minNameID=255): 207 """ Add a new name record containing 'string' for each (platformID, platEncID, 208 langID) tuple specified in the 'platforms' list. 209 210 The nameID is assigned in the range between 'minNameID'+1 and 32767 (inclusive), 211 following the last nameID in the name table. 212 If no 'platforms' are specified, two English name records are added, one for the 213 Macintosh (platformID=0), and one for the Windows platform (3). 214 215 The 'string' must be a Unicode string, so it can be encoded with different, 216 platform-specific encodings. 217 218 Return the new nameID. 219 """ 220 assert len(platforms) > 0, \ 221 "'platforms' must contain at least one (platformID, platEncID, langID) tuple" 222 if not hasattr(self, 'names'): 223 self.names = [] 224 if not isinstance(string, unicode): 225 raise TypeError( 226 "expected %s, found %s: %r" % ( 227 unicode.__name__, type(string).__name__,string )) 228 nameID = self._findUnusedNameID(minNameID + 1) 229 for platformID, platEncID, langID in platforms: 230 self.names.append(makeName(string, nameID, platformID, platEncID, langID)) 231 return nameID 232 233 234def makeName(string, nameID, platformID, platEncID, langID): 235 name = NameRecord() 236 name.string, name.nameID, name.platformID, name.platEncID, name.langID = ( 237 string, nameID, platformID, platEncID, langID) 238 return name 239 240 241def _makeWindowsName(name, nameID, language): 242 """Create a NameRecord for the Microsoft Windows platform 243 244 'language' is an arbitrary IETF BCP 47 language identifier such 245 as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. If Microsoft Windows 246 does not support the desired language, the result will be None. 247 Future versions of fonttools might return a NameRecord for the 248 OpenType 'name' table format 1, but this is not implemented yet. 249 """ 250 langID = _WINDOWS_LANGUAGE_CODES.get(language.lower()) 251 if langID is not None: 252 return makeName(name, nameID, 3, 1, langID) 253 else: 254 log.warning("cannot add Windows name in language %s " 255 "because fonttools does not yet support " 256 "name table format 1" % language) 257 return None 258 259 260def _makeMacName(name, nameID, language, font=None): 261 """Create a NameRecord for Apple platforms 262 263 'language' is an arbitrary IETF BCP 47 language identifier such 264 as 'en', 'de-CH', 'de-AT-1901', or 'fa-Latn'. When possible, we 265 create a Macintosh NameRecord that is understood by old applications 266 (platform ID 1 and an old-style Macintosh language enum). If this 267 is not possible, we create a Unicode NameRecord (platform ID 0) 268 whose language points to the font’s 'ltag' table. The latter 269 can encode any string in any language, but legacy applications 270 might not recognize the format (in which case they will ignore 271 those names). 272 273 'font' should be the TTFont for which you want to create a name. 274 If 'font' is None, we only return NameRecords for legacy Macintosh; 275 in that case, the result will be None for names that need to 276 be encoded with an 'ltag' table. 277 278 See the section “The language identifier” in Apple’s specification: 279 https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html 280 """ 281 macLang = _MAC_LANGUAGE_CODES.get(language.lower()) 282 macScript = _MAC_LANGUAGE_TO_SCRIPT.get(macLang) 283 if macLang is not None and macScript is not None: 284 encoding = getEncoding(1, macScript, macLang, default="ascii") 285 # Check if we can actually encode this name. If we can't, 286 # for example because we have no support for the legacy 287 # encoding, or because the name string contains Unicode 288 # characters that the legacy encoding cannot represent, 289 # we fall back to encoding the name in Unicode and put 290 # the language tag into the ltag table. 291 try: 292 _ = tobytes(name, encoding, errors="strict") 293 return makeName(name, nameID, 1, macScript, macLang) 294 except UnicodeEncodeError: 295 pass 296 if font is not None: 297 ltag = font.tables.get("ltag") 298 if ltag is None: 299 ltag = font["ltag"] = newTable("ltag") 300 # 0 = Unicode; 4 = “Unicode 2.0 or later semantics (non-BMP characters allowed)” 301 # “The preferred platform-specific code for Unicode would be 3 or 4.” 302 # https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6name.html 303 return makeName(name, nameID, 0, 4, ltag.addTag(language)) 304 else: 305 log.warning("cannot store language %s into 'ltag' table " 306 "without having access to the TTFont object" % 307 language) 308 return None 309 310 311class NameRecord(object): 312 313 def getEncoding(self, default='ascii'): 314 """Returns the Python encoding name for this name entry based on its platformID, 315 platEncID, and langID. If encoding for these values is not known, by default 316 'ascii' is returned. That can be overriden by passing a value to the default 317 argument. 318 """ 319 return getEncoding(self.platformID, self.platEncID, self.langID, default) 320 321 def encodingIsUnicodeCompatible(self): 322 return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1'] 323 324 def __str__(self): 325 return self.toStr(errors='backslashreplace') 326 327 def isUnicode(self): 328 return (self.platformID == 0 or 329 (self.platformID == 3 and self.platEncID in [0, 1, 10])) 330 331 def toUnicode(self, errors='strict'): 332 """ 333 If self.string is a Unicode string, return it; otherwise try decoding the 334 bytes in self.string to a Unicode string using the encoding of this 335 entry as returned by self.getEncoding(); Note that self.getEncoding() 336 returns 'ascii' if the encoding is unknown to the library. 337 338 Certain heuristics are performed to recover data from bytes that are 339 ill-formed in the chosen encoding, or that otherwise look misencoded 340 (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE 341 but marked otherwise). If the bytes are ill-formed and the heuristics fail, 342 the error is handled according to the errors parameter to this function, which is 343 passed to the underlying decode() function; by default it throws a 344 UnicodeDecodeError exception. 345 346 Note: The mentioned heuristics mean that roundtripping a font to XML and back 347 to binary might recover some misencoded data whereas just loading the font 348 and saving it back will not change them. 349 """ 350 def isascii(b): 351 return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D] 352 encoding = self.getEncoding() 353 string = self.string 354 355 if encoding == 'utf_16_be' and len(string) % 2 == 1: 356 # Recover badly encoded UTF-16 strings that have an odd number of bytes: 357 # - If the last byte is zero, drop it. Otherwise, 358 # - If all the odd bytes are zero and all the even bytes are ASCII, 359 # prepend one zero byte. Otherwise, 360 # - If first byte is zero and all other bytes are ASCII, insert zero 361 # bytes between consecutive ASCII bytes. 362 # 363 # (Yes, I've seen all of these in the wild... sigh) 364 if byteord(string[-1]) == 0: 365 string = string[:-1] 366 elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)): 367 string = b'\0' + string 368 elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]): 369 string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:]) 370 371 string = tounicode(string, encoding=encoding, errors=errors) 372 373 # If decoded strings still looks like UTF-16BE, it suggests a double-encoding. 374 # Fix it up. 375 if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)): 376 # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text, 377 # narrow it down. 378 string = ''.join(c for c in string[1::2]) 379 380 return string 381 382 def toBytes(self, errors='strict'): 383 """ If self.string is a bytes object, return it; otherwise try encoding 384 the Unicode string in self.string to bytes using the encoding of this 385 entry as returned by self.getEncoding(); Note that self.getEncoding() 386 returns 'ascii' if the encoding is unknown to the library. 387 388 If the Unicode string cannot be encoded to bytes in the chosen encoding, 389 the error is handled according to the errors parameter to this function, 390 which is passed to the underlying encode() function; by default it throws a 391 UnicodeEncodeError exception. 392 """ 393 return tobytes(self.string, encoding=self.getEncoding(), errors=errors) 394 395 def toStr(self, errors='strict'): 396 if str == bytes: 397 # python 2 398 return self.toBytes(errors) 399 else: 400 # python 3 401 return self.toUnicode(errors) 402 403 def toXML(self, writer, ttFont): 404 try: 405 unistr = self.toUnicode() 406 except UnicodeDecodeError: 407 unistr = None 408 attrs = [ 409 ("nameID", self.nameID), 410 ("platformID", self.platformID), 411 ("platEncID", self.platEncID), 412 ("langID", hex(self.langID)), 413 ] 414 415 if unistr is None or not self.encodingIsUnicodeCompatible(): 416 attrs.append(("unicode", unistr is not None)) 417 418 writer.begintag("namerecord", attrs) 419 writer.newline() 420 if unistr is not None: 421 writer.write(unistr) 422 else: 423 writer.write8bit(self.string) 424 writer.newline() 425 writer.endtag("namerecord") 426 writer.newline() 427 428 def fromXML(self, name, attrs, content, ttFont): 429 self.nameID = safeEval(attrs["nameID"]) 430 self.platformID = safeEval(attrs["platformID"]) 431 self.platEncID = safeEval(attrs["platEncID"]) 432 self.langID = safeEval(attrs["langID"]) 433 s = strjoin(content).strip() 434 encoding = self.getEncoding() 435 if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")): 436 self.string = s.encode(encoding) 437 else: 438 # This is the inverse of write8bit... 439 self.string = s.encode("latin1") 440 441 def __lt__(self, other): 442 if type(self) != type(other): 443 return NotImplemented 444 445 # implemented so that list.sort() sorts according to the spec. 446 selfTuple = ( 447 getattr(self, "platformID", None), 448 getattr(self, "platEncID", None), 449 getattr(self, "langID", None), 450 getattr(self, "nameID", None), 451 getattr(self, "string", None), 452 ) 453 otherTuple = ( 454 getattr(other, "platformID", None), 455 getattr(other, "platEncID", None), 456 getattr(other, "langID", None), 457 getattr(other, "nameID", None), 458 getattr(other, "string", None), 459 ) 460 return selfTuple < otherTuple 461 462 def __repr__(self): 463 return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % ( 464 self.nameID, self.platformID, self.langID) 465 466 467# Windows language ID → IETF BCP-47 language tag 468# 469# While Microsoft indicates a region/country for all its language 470# IDs, we follow Unicode practice by omitting “most likely subtags” 471# as per Unicode CLDR. For example, English is simply “en” and not 472# “en-Latn” because according to Unicode, the default script 473# for English is Latin. 474# 475# http://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html 476# http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry 477_WINDOWS_LANGUAGES = { 478 0x0436: 'af', 479 0x041C: 'sq', 480 0x0484: 'gsw', 481 0x045E: 'am', 482 0x1401: 'ar-DZ', 483 0x3C01: 'ar-BH', 484 0x0C01: 'ar', 485 0x0801: 'ar-IQ', 486 0x2C01: 'ar-JO', 487 0x3401: 'ar-KW', 488 0x3001: 'ar-LB', 489 0x1001: 'ar-LY', 490 0x1801: 'ary', 491 0x2001: 'ar-OM', 492 0x4001: 'ar-QA', 493 0x0401: 'ar-SA', 494 0x2801: 'ar-SY', 495 0x1C01: 'aeb', 496 0x3801: 'ar-AE', 497 0x2401: 'ar-YE', 498 0x042B: 'hy', 499 0x044D: 'as', 500 0x082C: 'az-Cyrl', 501 0x042C: 'az', 502 0x046D: 'ba', 503 0x042D: 'eu', 504 0x0423: 'be', 505 0x0845: 'bn', 506 0x0445: 'bn-IN', 507 0x201A: 'bs-Cyrl', 508 0x141A: 'bs', 509 0x047E: 'br', 510 0x0402: 'bg', 511 0x0403: 'ca', 512 0x0C04: 'zh-HK', 513 0x1404: 'zh-MO', 514 0x0804: 'zh', 515 0x1004: 'zh-SG', 516 0x0404: 'zh-TW', 517 0x0483: 'co', 518 0x041A: 'hr', 519 0x101A: 'hr-BA', 520 0x0405: 'cs', 521 0x0406: 'da', 522 0x048C: 'prs', 523 0x0465: 'dv', 524 0x0813: 'nl-BE', 525 0x0413: 'nl', 526 0x0C09: 'en-AU', 527 0x2809: 'en-BZ', 528 0x1009: 'en-CA', 529 0x2409: 'en-029', 530 0x4009: 'en-IN', 531 0x1809: 'en-IE', 532 0x2009: 'en-JM', 533 0x4409: 'en-MY', 534 0x1409: 'en-NZ', 535 0x3409: 'en-PH', 536 0x4809: 'en-SG', 537 0x1C09: 'en-ZA', 538 0x2C09: 'en-TT', 539 0x0809: 'en-GB', 540 0x0409: 'en', 541 0x3009: 'en-ZW', 542 0x0425: 'et', 543 0x0438: 'fo', 544 0x0464: 'fil', 545 0x040B: 'fi', 546 0x080C: 'fr-BE', 547 0x0C0C: 'fr-CA', 548 0x040C: 'fr', 549 0x140C: 'fr-LU', 550 0x180C: 'fr-MC', 551 0x100C: 'fr-CH', 552 0x0462: 'fy', 553 0x0456: 'gl', 554 0x0437: 'ka', 555 0x0C07: 'de-AT', 556 0x0407: 'de', 557 0x1407: 'de-LI', 558 0x1007: 'de-LU', 559 0x0807: 'de-CH', 560 0x0408: 'el', 561 0x046F: 'kl', 562 0x0447: 'gu', 563 0x0468: 'ha', 564 0x040D: 'he', 565 0x0439: 'hi', 566 0x040E: 'hu', 567 0x040F: 'is', 568 0x0470: 'ig', 569 0x0421: 'id', 570 0x045D: 'iu', 571 0x085D: 'iu-Latn', 572 0x083C: 'ga', 573 0x0434: 'xh', 574 0x0435: 'zu', 575 0x0410: 'it', 576 0x0810: 'it-CH', 577 0x0411: 'ja', 578 0x044B: 'kn', 579 0x043F: 'kk', 580 0x0453: 'km', 581 0x0486: 'quc', 582 0x0487: 'rw', 583 0x0441: 'sw', 584 0x0457: 'kok', 585 0x0412: 'ko', 586 0x0440: 'ky', 587 0x0454: 'lo', 588 0x0426: 'lv', 589 0x0427: 'lt', 590 0x082E: 'dsb', 591 0x046E: 'lb', 592 0x042F: 'mk', 593 0x083E: 'ms-BN', 594 0x043E: 'ms', 595 0x044C: 'ml', 596 0x043A: 'mt', 597 0x0481: 'mi', 598 0x047A: 'arn', 599 0x044E: 'mr', 600 0x047C: 'moh', 601 0x0450: 'mn', 602 0x0850: 'mn-CN', 603 0x0461: 'ne', 604 0x0414: 'nb', 605 0x0814: 'nn', 606 0x0482: 'oc', 607 0x0448: 'or', 608 0x0463: 'ps', 609 0x0415: 'pl', 610 0x0416: 'pt', 611 0x0816: 'pt-PT', 612 0x0446: 'pa', 613 0x046B: 'qu-BO', 614 0x086B: 'qu-EC', 615 0x0C6B: 'qu', 616 0x0418: 'ro', 617 0x0417: 'rm', 618 0x0419: 'ru', 619 0x243B: 'smn', 620 0x103B: 'smj-NO', 621 0x143B: 'smj', 622 0x0C3B: 'se-FI', 623 0x043B: 'se', 624 0x083B: 'se-SE', 625 0x203B: 'sms', 626 0x183B: 'sma-NO', 627 0x1C3B: 'sms', 628 0x044F: 'sa', 629 0x1C1A: 'sr-Cyrl-BA', 630 0x0C1A: 'sr', 631 0x181A: 'sr-Latn-BA', 632 0x081A: 'sr-Latn', 633 0x046C: 'nso', 634 0x0432: 'tn', 635 0x045B: 'si', 636 0x041B: 'sk', 637 0x0424: 'sl', 638 0x2C0A: 'es-AR', 639 0x400A: 'es-BO', 640 0x340A: 'es-CL', 641 0x240A: 'es-CO', 642 0x140A: 'es-CR', 643 0x1C0A: 'es-DO', 644 0x300A: 'es-EC', 645 0x440A: 'es-SV', 646 0x100A: 'es-GT', 647 0x480A: 'es-HN', 648 0x080A: 'es-MX', 649 0x4C0A: 'es-NI', 650 0x180A: 'es-PA', 651 0x3C0A: 'es-PY', 652 0x280A: 'es-PE', 653 0x500A: 'es-PR', 654 655 # Microsoft has defined two different language codes for 656 # “Spanish with modern sorting” and “Spanish with traditional 657 # sorting”. This makes sense for collation APIs, and it would be 658 # possible to express this in BCP 47 language tags via Unicode 659 # extensions (eg., “es-u-co-trad” is “Spanish with traditional 660 # sorting”). However, for storing names in fonts, this distinction 661 # does not make sense, so we use “es” in both cases. 662 0x0C0A: 'es', 663 0x040A: 'es', 664 665 0x540A: 'es-US', 666 0x380A: 'es-UY', 667 0x200A: 'es-VE', 668 0x081D: 'sv-FI', 669 0x041D: 'sv', 670 0x045A: 'syr', 671 0x0428: 'tg', 672 0x085F: 'tzm', 673 0x0449: 'ta', 674 0x0444: 'tt', 675 0x044A: 'te', 676 0x041E: 'th', 677 0x0451: 'bo', 678 0x041F: 'tr', 679 0x0442: 'tk', 680 0x0480: 'ug', 681 0x0422: 'uk', 682 0x042E: 'hsb', 683 0x0420: 'ur', 684 0x0843: 'uz-Cyrl', 685 0x0443: 'uz', 686 0x042A: 'vi', 687 0x0452: 'cy', 688 0x0488: 'wo', 689 0x0485: 'sah', 690 0x0478: 'ii', 691 0x046A: 'yo', 692} 693 694 695_MAC_LANGUAGES = { 696 0: 'en', 697 1: 'fr', 698 2: 'de', 699 3: 'it', 700 4: 'nl', 701 5: 'sv', 702 6: 'es', 703 7: 'da', 704 8: 'pt', 705 9: 'no', 706 10: 'he', 707 11: 'ja', 708 12: 'ar', 709 13: 'fi', 710 14: 'el', 711 15: 'is', 712 16: 'mt', 713 17: 'tr', 714 18: 'hr', 715 19: 'zh-Hant', 716 20: 'ur', 717 21: 'hi', 718 22: 'th', 719 23: 'ko', 720 24: 'lt', 721 25: 'pl', 722 26: 'hu', 723 27: 'es', 724 28: 'lv', 725 29: 'se', 726 30: 'fo', 727 31: 'fa', 728 32: 'ru', 729 33: 'zh', 730 34: 'nl-BE', 731 35: 'ga', 732 36: 'sq', 733 37: 'ro', 734 38: 'cz', 735 39: 'sk', 736 40: 'sl', 737 41: 'yi', 738 42: 'sr', 739 43: 'mk', 740 44: 'bg', 741 45: 'uk', 742 46: 'be', 743 47: 'uz', 744 48: 'kk', 745 49: 'az-Cyrl', 746 50: 'az-Arab', 747 51: 'hy', 748 52: 'ka', 749 53: 'mo', 750 54: 'ky', 751 55: 'tg', 752 56: 'tk', 753 57: 'mn-CN', 754 58: 'mn', 755 59: 'ps', 756 60: 'ks', 757 61: 'ku', 758 62: 'sd', 759 63: 'bo', 760 64: 'ne', 761 65: 'sa', 762 66: 'mr', 763 67: 'bn', 764 68: 'as', 765 69: 'gu', 766 70: 'pa', 767 71: 'or', 768 72: 'ml', 769 73: 'kn', 770 74: 'ta', 771 75: 'te', 772 76: 'si', 773 77: 'my', 774 78: 'km', 775 79: 'lo', 776 80: 'vi', 777 81: 'id', 778 82: 'tl', 779 83: 'ms', 780 84: 'ms-Arab', 781 85: 'am', 782 86: 'ti', 783 87: 'om', 784 88: 'so', 785 89: 'sw', 786 90: 'rw', 787 91: 'rn', 788 92: 'ny', 789 93: 'mg', 790 94: 'eo', 791 128: 'cy', 792 129: 'eu', 793 130: 'ca', 794 131: 'la', 795 132: 'qu', 796 133: 'gn', 797 134: 'ay', 798 135: 'tt', 799 136: 'ug', 800 137: 'dz', 801 138: 'jv', 802 139: 'su', 803 140: 'gl', 804 141: 'af', 805 142: 'br', 806 143: 'iu', 807 144: 'gd', 808 145: 'gv', 809 146: 'ga', 810 147: 'to', 811 148: 'el-polyton', 812 149: 'kl', 813 150: 'az', 814 151: 'nn', 815} 816 817 818_WINDOWS_LANGUAGE_CODES = {lang.lower(): code for code, lang in _WINDOWS_LANGUAGES.items()} 819_MAC_LANGUAGE_CODES = {lang.lower(): code for code, lang in _MAC_LANGUAGES.items()} 820 821 822# MacOS language ID → MacOS script ID 823# 824# Note that the script ID is not sufficient to determine what encoding 825# to use in TrueType files. For some languages, MacOS used a modification 826# of a mainstream script. For example, an Icelandic name would be stored 827# with smRoman in the TrueType naming table, but the actual encoding 828# is a special Icelandic version of the normal Macintosh Roman encoding. 829# As another example, Inuktitut uses an 8-bit encoding for Canadian Aboriginal 830# Syllables but MacOS had run out of available script codes, so this was 831# done as a (pretty radical) “modification” of Ethiopic. 832# 833# http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/Readme.txt 834_MAC_LANGUAGE_TO_SCRIPT = { 835 0: 0, # langEnglish → smRoman 836 1: 0, # langFrench → smRoman 837 2: 0, # langGerman → smRoman 838 3: 0, # langItalian → smRoman 839 4: 0, # langDutch → smRoman 840 5: 0, # langSwedish → smRoman 841 6: 0, # langSpanish → smRoman 842 7: 0, # langDanish → smRoman 843 8: 0, # langPortuguese → smRoman 844 9: 0, # langNorwegian → smRoman 845 10: 5, # langHebrew → smHebrew 846 11: 1, # langJapanese → smJapanese 847 12: 4, # langArabic → smArabic 848 13: 0, # langFinnish → smRoman 849 14: 6, # langGreek → smGreek 850 15: 0, # langIcelandic → smRoman (modified) 851 16: 0, # langMaltese → smRoman 852 17: 0, # langTurkish → smRoman (modified) 853 18: 0, # langCroatian → smRoman (modified) 854 19: 2, # langTradChinese → smTradChinese 855 20: 4, # langUrdu → smArabic 856 21: 9, # langHindi → smDevanagari 857 22: 21, # langThai → smThai 858 23: 3, # langKorean → smKorean 859 24: 29, # langLithuanian → smCentralEuroRoman 860 25: 29, # langPolish → smCentralEuroRoman 861 26: 29, # langHungarian → smCentralEuroRoman 862 27: 29, # langEstonian → smCentralEuroRoman 863 28: 29, # langLatvian → smCentralEuroRoman 864 29: 0, # langSami → smRoman 865 30: 0, # langFaroese → smRoman (modified) 866 31: 4, # langFarsi → smArabic (modified) 867 32: 7, # langRussian → smCyrillic 868 33: 25, # langSimpChinese → smSimpChinese 869 34: 0, # langFlemish → smRoman 870 35: 0, # langIrishGaelic → smRoman (modified) 871 36: 0, # langAlbanian → smRoman 872 37: 0, # langRomanian → smRoman (modified) 873 38: 29, # langCzech → smCentralEuroRoman 874 39: 29, # langSlovak → smCentralEuroRoman 875 40: 0, # langSlovenian → smRoman (modified) 876 41: 5, # langYiddish → smHebrew 877 42: 7, # langSerbian → smCyrillic 878 43: 7, # langMacedonian → smCyrillic 879 44: 7, # langBulgarian → smCyrillic 880 45: 7, # langUkrainian → smCyrillic (modified) 881 46: 7, # langByelorussian → smCyrillic 882 47: 7, # langUzbek → smCyrillic 883 48: 7, # langKazakh → smCyrillic 884 49: 7, # langAzerbaijani → smCyrillic 885 50: 4, # langAzerbaijanAr → smArabic 886 51: 24, # langArmenian → smArmenian 887 52: 23, # langGeorgian → smGeorgian 888 53: 7, # langMoldavian → smCyrillic 889 54: 7, # langKirghiz → smCyrillic 890 55: 7, # langTajiki → smCyrillic 891 56: 7, # langTurkmen → smCyrillic 892 57: 27, # langMongolian → smMongolian 893 58: 7, # langMongolianCyr → smCyrillic 894 59: 4, # langPashto → smArabic 895 60: 4, # langKurdish → smArabic 896 61: 4, # langKashmiri → smArabic 897 62: 4, # langSindhi → smArabic 898 63: 26, # langTibetan → smTibetan 899 64: 9, # langNepali → smDevanagari 900 65: 9, # langSanskrit → smDevanagari 901 66: 9, # langMarathi → smDevanagari 902 67: 13, # langBengali → smBengali 903 68: 13, # langAssamese → smBengali 904 69: 11, # langGujarati → smGujarati 905 70: 10, # langPunjabi → smGurmukhi 906 71: 12, # langOriya → smOriya 907 72: 17, # langMalayalam → smMalayalam 908 73: 16, # langKannada → smKannada 909 74: 14, # langTamil → smTamil 910 75: 15, # langTelugu → smTelugu 911 76: 18, # langSinhalese → smSinhalese 912 77: 19, # langBurmese → smBurmese 913 78: 20, # langKhmer → smKhmer 914 79: 22, # langLao → smLao 915 80: 30, # langVietnamese → smVietnamese 916 81: 0, # langIndonesian → smRoman 917 82: 0, # langTagalog → smRoman 918 83: 0, # langMalayRoman → smRoman 919 84: 4, # langMalayArabic → smArabic 920 85: 28, # langAmharic → smEthiopic 921 86: 28, # langTigrinya → smEthiopic 922 87: 28, # langOromo → smEthiopic 923 88: 0, # langSomali → smRoman 924 89: 0, # langSwahili → smRoman 925 90: 0, # langKinyarwanda → smRoman 926 91: 0, # langRundi → smRoman 927 92: 0, # langNyanja → smRoman 928 93: 0, # langMalagasy → smRoman 929 94: 0, # langEsperanto → smRoman 930 128: 0, # langWelsh → smRoman (modified) 931 129: 0, # langBasque → smRoman 932 130: 0, # langCatalan → smRoman 933 131: 0, # langLatin → smRoman 934 132: 0, # langQuechua → smRoman 935 133: 0, # langGuarani → smRoman 936 134: 0, # langAymara → smRoman 937 135: 7, # langTatar → smCyrillic 938 136: 4, # langUighur → smArabic 939 137: 26, # langDzongkha → smTibetan 940 138: 0, # langJavaneseRom → smRoman 941 139: 0, # langSundaneseRom → smRoman 942 140: 0, # langGalician → smRoman 943 141: 0, # langAfrikaans → smRoman 944 142: 0, # langBreton → smRoman (modified) 945 143: 28, # langInuktitut → smEthiopic (modified) 946 144: 0, # langScottishGaelic → smRoman (modified) 947 145: 0, # langManxGaelic → smRoman (modified) 948 146: 0, # langIrishGaelicScript → smRoman (modified) 949 147: 0, # langTongan → smRoman 950 148: 6, # langGreekAncient → smRoman 951 149: 0, # langGreenlandic → smRoman 952 150: 0, # langAzerbaijanRoman → smRoman 953 151: 0, # langNynorsk → smRoman 954} 955