1from fontTools.misc.textTools import bytesjoin, safeEval, readHex 2from fontTools.misc.encodingTools import getEncoding 3from fontTools.ttLib import getSearchRange 4from fontTools.unicode import Unicode 5from . import DefaultTable 6import sys 7import struct 8import array 9import logging 10 11 12log = logging.getLogger(__name__) 13 14 15def _make_map(font, chars, gids): 16 assert len(chars) == len(gids) 17 glyphNames = font.getGlyphNameMany(gids) 18 cmap = {} 19 for char,gid,name in zip(chars,gids,glyphNames): 20 if gid == 0: 21 continue 22 cmap[char] = name 23 return cmap 24 25class table__c_m_a_p(DefaultTable.DefaultTable): 26 """Character to Glyph Index Mapping Table 27 28 This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_ 29 table, which maps between input characters (in Unicode or other system encodings) 30 and glyphs within the font. The ``cmap`` table contains one or more subtables 31 which determine the mapping of of characters to glyphs across different platforms 32 and encoding systems. 33 34 ``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access 35 to the subtables, although it is normally easier to retrieve individual subtables 36 through the utility methods described below. To add new subtables to a font, 37 first determine the subtable format (if in doubt use format 4 for glyphs within 38 the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation 39 Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``, 40 and append them to the ``.tables`` list. 41 42 Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap`` 43 attribute. 44 45 Example:: 46 47 cmap4_0_3 = CmapSubtable.newSubtable(4) 48 cmap4_0_3.platformID = 0 49 cmap4_0_3.platEncID = 3 50 cmap4_0_3.language = 0 51 cmap4_0_3.cmap = { 0xC1: "Aacute" } 52 53 cmap = newTable("cmap") 54 cmap.tableVersion = 0 55 cmap.tables = [cmap4_0_3] 56 """ 57 58 def getcmap(self, platformID, platEncID): 59 """Returns the first subtable which matches the given platform and encoding. 60 61 Args: 62 platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh 63 (deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows. 64 encodingID (int): Encoding ID. Interpretation depends on the platform ID. 65 See the OpenType specification for details. 66 67 Returns: 68 An object which is a subclass of :py:class:`CmapSubtable` if a matching 69 subtable is found within the font, or ``None`` otherwise. 70 """ 71 72 for subtable in self.tables: 73 if (subtable.platformID == platformID and 74 subtable.platEncID == platEncID): 75 return subtable 76 return None # not found 77 78 def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))): 79 """Returns the 'best' Unicode cmap dictionary available in the font 80 or ``None``, if no Unicode cmap subtable is available. 81 82 By default it will search for the following (platformID, platEncID) 83 pairs in order:: 84 85 (3, 10), # Windows Unicode full repertoire 86 (0, 6), # Unicode full repertoire (format 13 subtable) 87 (0, 4), # Unicode 2.0 full repertoire 88 (3, 1), # Windows Unicode BMP 89 (0, 3), # Unicode 2.0 BMP 90 (0, 2), # Unicode ISO/IEC 10646 91 (0, 1), # Unicode 1.1 92 (0, 0) # Unicode 1.0 93 94 This particular order matches what HarfBuzz uses to choose what 95 subtable to use by default. This order prefers the largest-repertoire 96 subtable, and among those, prefers the Windows-platform over the 97 Unicode-platform as the former has wider support. 98 99 This order can be customized via the ``cmapPreferences`` argument. 100 """ 101 for platformID, platEncID in cmapPreferences: 102 cmapSubtable = self.getcmap(platformID, platEncID) 103 if cmapSubtable is not None: 104 return cmapSubtable.cmap 105 return None # None of the requested cmap subtables were found 106 107 def buildReversed(self): 108 """Builds a reverse mapping dictionary 109 110 Iterates over all Unicode cmap tables and returns a dictionary mapping 111 glyphs to sets of codepoints, such as:: 112 113 { 114 'one': {0x31} 115 'A': {0x41,0x391} 116 } 117 118 The values are sets of Unicode codepoints because 119 some fonts map different codepoints to the same glyph. 120 For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391 121 GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph. 122 """ 123 result = {} 124 for subtable in self.tables: 125 if subtable.isUnicode(): 126 for codepoint, name in subtable.cmap.items(): 127 result.setdefault(name, set()).add(codepoint) 128 return result 129 130 def decompile(self, data, ttFont): 131 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 132 self.tableVersion = int(tableVersion) 133 self.tables = tables = [] 134 seenOffsets = {} 135 for i in range(numSubTables): 136 platformID, platEncID, offset = struct.unpack( 137 ">HHl", data[4+i*8:4+(i+1)*8]) 138 platformID, platEncID = int(platformID), int(platEncID) 139 format, length = struct.unpack(">HH", data[offset:offset+4]) 140 if format in [8,10,12,13]: 141 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 142 elif format in [14]: 143 format, length = struct.unpack(">HL", data[offset:offset+6]) 144 145 if not length: 146 log.error( 147 "cmap subtable is reported as having zero length: platformID %s, " 148 "platEncID %s, format %s offset %s. Skipping table.", 149 platformID, platEncID, format, offset) 150 continue 151 table = CmapSubtable.newSubtable(format) 152 table.platformID = platformID 153 table.platEncID = platEncID 154 # Note that by default we decompile only the subtable header info; 155 # any other data gets decompiled only when an attribute of the 156 # subtable is referenced. 157 table.decompileHeader(data[offset:offset+int(length)], ttFont) 158 if offset in seenOffsets: 159 table.data = None # Mark as decompiled 160 table.cmap = tables[seenOffsets[offset]].cmap 161 else: 162 seenOffsets[offset] = i 163 tables.append(table) 164 if ttFont.lazy is False: # Be lazy for None and True 165 self.ensureDecompiled() 166 167 def ensureDecompiled(self, recurse=False): 168 # The recurse argument is unused, but part of the signature of 169 # ensureDecompiled across the library. 170 for st in self.tables: 171 st.ensureDecompiled() 172 173 def compile(self, ttFont): 174 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 175 numSubTables = len(self.tables) 176 totalOffset = 4 + 8 * numSubTables 177 data = struct.pack(">HH", self.tableVersion, numSubTables) 178 tableData = b"" 179 seen = {} # Some tables are the same object reference. Don't compile them twice. 180 done = {} # Some tables are different objects, but compile to the same data chunk 181 for table in self.tables: 182 offset = seen.get(id(table.cmap)) 183 if offset is None: 184 chunk = table.compile(ttFont) 185 offset = done.get(chunk) 186 if offset is None: 187 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 188 tableData = tableData + chunk 189 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 190 return data + tableData 191 192 def toXML(self, writer, ttFont): 193 writer.simpletag("tableVersion", version=self.tableVersion) 194 writer.newline() 195 for table in self.tables: 196 table.toXML(writer, ttFont) 197 198 def fromXML(self, name, attrs, content, ttFont): 199 if name == "tableVersion": 200 self.tableVersion = safeEval(attrs["version"]) 201 return 202 if name[:12] != "cmap_format_": 203 return 204 if not hasattr(self, "tables"): 205 self.tables = [] 206 format = safeEval(name[12:]) 207 table = CmapSubtable.newSubtable(format) 208 table.platformID = safeEval(attrs["platformID"]) 209 table.platEncID = safeEval(attrs["platEncID"]) 210 table.fromXML(name, attrs, content, ttFont) 211 self.tables.append(table) 212 213 214class CmapSubtable(object): 215 """Base class for all cmap subtable formats. 216 217 Subclasses which handle the individual subtable formats are named 218 ``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass` 219 to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a 220 new subtable object for a given format. 221 222 The object exposes a ``.cmap`` attribute, which contains a dictionary mapping 223 character codepoints to glyph names. 224 """ 225 226 @staticmethod 227 def getSubtableClass(format): 228 """Return the subtable class for a format.""" 229 return cmap_classes.get(format, cmap_format_unknown) 230 231 @staticmethod 232 def newSubtable(format): 233 """Return a new instance of a subtable for the given format 234 .""" 235 subtableClass = CmapSubtable.getSubtableClass(format) 236 return subtableClass(format) 237 238 def __init__(self, format): 239 self.format = format 240 self.data = None 241 self.ttFont = None 242 self.platformID = None #: The platform ID of this subtable 243 self.platEncID = None #: The encoding ID of this subtable (interpretation depends on ``platformID``) 244 self.language = None #: The language ID of this subtable (Macintosh platform only) 245 246 def ensureDecompiled(self, recurse=False): 247 # The recurse argument is unused, but part of the signature of 248 # ensureDecompiled across the library. 249 if self.data is None: 250 return 251 self.decompile(None, None) # use saved data. 252 self.data = None # Once this table has been decompiled, make sure we don't 253 # just return the original data. Also avoids recursion when 254 # called with an attribute that the cmap subtable doesn't have. 255 256 def __getattr__(self, attr): 257 # allow lazy decompilation of subtables. 258 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 259 raise AttributeError(attr) 260 if self.data is None: 261 raise AttributeError(attr) 262 self.ensureDecompiled() 263 return getattr(self, attr) 264 265 def decompileHeader(self, data, ttFont): 266 format, length, language = struct.unpack(">HHH", data[:6]) 267 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 268 self.format = int(format) 269 self.length = int(length) 270 self.language = int(language) 271 self.data = data[6:] 272 self.ttFont = ttFont 273 274 def toXML(self, writer, ttFont): 275 writer.begintag(self.__class__.__name__, [ 276 ("platformID", self.platformID), 277 ("platEncID", self.platEncID), 278 ("language", self.language), 279 ]) 280 writer.newline() 281 codes = sorted(self.cmap.items()) 282 self._writeCodes(codes, writer) 283 writer.endtag(self.__class__.__name__) 284 writer.newline() 285 286 def getEncoding(self, default=None): 287 """Returns the Python encoding name for this cmap subtable based on its platformID, 288 platEncID, and language. If encoding for these values is not known, by default 289 ``None`` is returned. That can be overridden by passing a value to the ``default`` 290 argument. 291 292 Note that if you want to choose a "preferred" cmap subtable, most of the time 293 ``self.isUnicode()`` is what you want as that one only returns true for the modern, 294 commonly used, Unicode-compatible triplets, not the legacy ones. 295 """ 296 return getEncoding(self.platformID, self.platEncID, self.language, default) 297 298 def isUnicode(self): 299 """Returns true if the characters are interpreted as Unicode codepoints.""" 300 return (self.platformID == 0 or 301 (self.platformID == 3 and self.platEncID in [0, 1, 10])) 302 303 def isSymbol(self): 304 """Returns true if the subtable is for the Symbol encoding (3,0)""" 305 return self.platformID == 3 and self.platEncID == 0 306 307 def _writeCodes(self, codes, writer): 308 isUnicode = self.isUnicode() 309 for code, name in codes: 310 writer.simpletag("map", code=hex(code), name=name) 311 if isUnicode: 312 writer.comment(Unicode[code]) 313 writer.newline() 314 315 def __lt__(self, other): 316 if not isinstance(other, CmapSubtable): 317 return NotImplemented 318 319 # implemented so that list.sort() sorts according to the spec. 320 selfTuple = ( 321 getattr(self, "platformID", None), 322 getattr(self, "platEncID", None), 323 getattr(self, "language", None), 324 self.__dict__) 325 otherTuple = ( 326 getattr(other, "platformID", None), 327 getattr(other, "platEncID", None), 328 getattr(other, "language", None), 329 other.__dict__) 330 return selfTuple < otherTuple 331 332 333class cmap_format_0(CmapSubtable): 334 335 def decompile(self, data, ttFont): 336 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 337 # If not, someone is calling the subtable decompile() directly, and must provide both args. 338 if data is not None and ttFont is not None: 339 self.decompileHeader(data, ttFont) 340 else: 341 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 342 data = self.data # decompileHeader assigns the data after the header to self.data 343 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 344 gids = array.array("B") 345 gids.frombytes(self.data) 346 charCodes = list(range(len(gids))) 347 self.cmap = _make_map(self.ttFont, charCodes, gids) 348 349 def compile(self, ttFont): 350 if self.data: 351 return struct.pack(">HHH", 0, 262, self.language) + self.data 352 353 cmap = self.cmap 354 assert set(cmap.keys()).issubset(range(256)) 355 getGlyphID = ttFont.getGlyphID 356 valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)] 357 358 gids = array.array("B", valueList) 359 data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes() 360 assert len(data) == 262 361 return data 362 363 def fromXML(self, name, attrs, content, ttFont): 364 self.language = safeEval(attrs["language"]) 365 if not hasattr(self, "cmap"): 366 self.cmap = {} 367 cmap = self.cmap 368 for element in content: 369 if not isinstance(element, tuple): 370 continue 371 name, attrs, content = element 372 if name != "map": 373 continue 374 cmap[safeEval(attrs["code"])] = attrs["name"] 375 376 377subHeaderFormat = ">HHhH" 378class SubHeader(object): 379 def __init__(self): 380 self.firstCode = None 381 self.entryCount = None 382 self.idDelta = None 383 self.idRangeOffset = None 384 self.glyphIndexArray = [] 385 386class cmap_format_2(CmapSubtable): 387 388 def setIDDelta(self, subHeader): 389 subHeader.idDelta = 0 390 # find the minGI which is not zero. 391 minGI = subHeader.glyphIndexArray[0] 392 for gid in subHeader.glyphIndexArray: 393 if (gid != 0) and (gid < minGI): 394 minGI = gid 395 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 396 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 397 # We would like to pick an idDelta such that the first glyphArray GID is 1, 398 # so that we are more likely to be able to combine glypharray GID subranges. 399 # This means that we have a problem when minGI is > 32K 400 # Since the final gi is reconstructed from the glyphArray GID by: 401 # (short)finalGID = (gid + idDelta) % 0x10000), 402 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 403 # negative number to an unsigned short. 404 405 if (minGI > 1): 406 if minGI > 0x7FFF: 407 subHeader.idDelta = -(0x10000 - minGI) -1 408 else: 409 subHeader.idDelta = minGI -1 410 idDelta = subHeader.idDelta 411 for i in range(subHeader.entryCount): 412 gid = subHeader.glyphIndexArray[i] 413 if gid > 0: 414 subHeader.glyphIndexArray[i] = gid - idDelta 415 416 def decompile(self, data, ttFont): 417 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 418 # If not, someone is calling the subtable decompile() directly, and must provide both args. 419 if data is not None and ttFont is not None: 420 self.decompileHeader(data, ttFont) 421 else: 422 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 423 424 data = self.data # decompileHeader assigns the data after the header to self.data 425 subHeaderKeys = [] 426 maxSubHeaderindex = 0 427 # get the key array, and determine the number of subHeaders. 428 allKeys = array.array("H") 429 allKeys.frombytes(data[:512]) 430 data = data[512:] 431 if sys.byteorder != "big": allKeys.byteswap() 432 subHeaderKeys = [ key//8 for key in allKeys] 433 maxSubHeaderindex = max(subHeaderKeys) 434 435 #Load subHeaders 436 subHeaderList = [] 437 pos = 0 438 for i in range(maxSubHeaderindex + 1): 439 subHeader = SubHeader() 440 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 441 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 442 pos += 8 443 giDataPos = pos + subHeader.idRangeOffset-2 444 giList = array.array("H") 445 giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2]) 446 if sys.byteorder != "big": giList.byteswap() 447 subHeader.glyphIndexArray = giList 448 subHeaderList.append(subHeader) 449 # How this gets processed. 450 # Charcodes may be one or two bytes. 451 # The first byte of a charcode is mapped through the subHeaderKeys, to select 452 # a subHeader. For any subheader but 0, the next byte is then mapped through the 453 # selected subheader. If subheader Index 0 is selected, then the byte itself is 454 # mapped through the subheader, and there is no second byte. 455 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 456 # 457 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 458 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 459 # referenced by another subheader. 460 # The only subheader that will be referenced by more than one first-byte value is the subheader 461 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 462 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 463 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 464 # A subheader specifies a subrange within (0...256) by the 465 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 466 # (e.g. glyph not in font). 467 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 468 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 469 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 470 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 471 # Example for Logocut-Medium 472 # first byte of charcode = 129; selects subheader 1. 473 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 474 # second byte of charCode = 66 475 # the index offset = 66-64 = 2. 476 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 477 # [glyphIndexArray index], [subrange array index] = glyphIndex 478 # [256], [0]=1 from charcode [129, 64] 479 # [257], [1]=2 from charcode [129, 65] 480 # [258], [2]=3 from charcode [129, 66] 481 # [259], [3]=4 from charcode [129, 67] 482 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 483 # add it to the glyphID to get the final glyphIndex 484 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 485 486 self.data = b"" 487 cmap = {} 488 notdefGI = 0 489 for firstByte in range(256): 490 subHeadindex = subHeaderKeys[firstByte] 491 subHeader = subHeaderList[subHeadindex] 492 if subHeadindex == 0: 493 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 494 continue # gi is notdef. 495 else: 496 charCode = firstByte 497 offsetIndex = firstByte - subHeader.firstCode 498 gi = subHeader.glyphIndexArray[offsetIndex] 499 if gi != 0: 500 gi = (gi + subHeader.idDelta) % 0x10000 501 else: 502 continue # gi is notdef. 503 cmap[charCode] = gi 504 else: 505 if subHeader.entryCount: 506 charCodeOffset = firstByte * 256 + subHeader.firstCode 507 for offsetIndex in range(subHeader.entryCount): 508 charCode = charCodeOffset + offsetIndex 509 gi = subHeader.glyphIndexArray[offsetIndex] 510 if gi != 0: 511 gi = (gi + subHeader.idDelta) % 0x10000 512 else: 513 continue 514 cmap[charCode] = gi 515 # If not subHeader.entryCount, then all char codes with this first byte are 516 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 517 # same as mapping it to .notdef. 518 519 gids = list(cmap.values()) 520 charCodes = list(cmap.keys()) 521 self.cmap = _make_map(self.ttFont, charCodes, gids) 522 523 def compile(self, ttFont): 524 if self.data: 525 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 526 kEmptyTwoCharCodeRange = -1 527 notdefGI = 0 528 529 items = sorted(self.cmap.items()) 530 charCodes = [item[0] for item in items] 531 names = [item[1] for item in items] 532 nameMap = ttFont.getReverseGlyphMap() 533 try: 534 gids = [nameMap[name] for name in names] 535 except KeyError: 536 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 537 try: 538 gids = [nameMap[name] for name in names] 539 except KeyError: 540 # allow virtual GIDs in format 2 tables 541 gids = [] 542 for name in names: 543 try: 544 gid = nameMap[name] 545 except KeyError: 546 try: 547 if (name[:3] == 'gid'): 548 gid = int(name[3:]) 549 else: 550 gid = ttFont.getGlyphID(name) 551 except: 552 raise KeyError(name) 553 554 gids.append(gid) 555 556 # Process the (char code to gid) item list in char code order. 557 # By definition, all one byte char codes map to subheader 0. 558 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 559 # which defines all char codes in its range to map to notdef) unless proven otherwise. 560 # Note that since the char code items are processed in char code order, all the char codes with the 561 # same first byte are in sequential order. 562 563 subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 564 subHeaderList = [] 565 566 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 567 # with a cmap where all the one byte char codes map to notdef, 568 # with the result that the subhead 0 would not get created just by processing the item list. 569 charCode = charCodes[0] 570 if charCode > 255: 571 subHeader = SubHeader() 572 subHeader.firstCode = 0 573 subHeader.entryCount = 0 574 subHeader.idDelta = 0 575 subHeader.idRangeOffset = 0 576 subHeaderList.append(subHeader) 577 578 lastFirstByte = -1 579 items = zip(charCodes, gids) 580 for charCode, gid in items: 581 if gid == 0: 582 continue 583 firstbyte = charCode >> 8 584 secondByte = charCode & 0x00FF 585 586 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 587 if lastFirstByte > -1: 588 # fix GI's and iDelta of current subheader. 589 self.setIDDelta(subHeader) 590 591 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 592 # for the indices matching the char codes. 593 if lastFirstByte == 0: 594 for index in range(subHeader.entryCount): 595 charCode = subHeader.firstCode + index 596 subHeaderKeys[charCode] = 0 597 598 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 599 # init new subheader 600 subHeader = SubHeader() 601 subHeader.firstCode = secondByte 602 subHeader.entryCount = 1 603 subHeader.glyphIndexArray.append(gid) 604 subHeaderList.append(subHeader) 605 subHeaderKeys[firstbyte] = len(subHeaderList) -1 606 lastFirstByte = firstbyte 607 else: 608 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 609 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 610 for i in range(codeDiff): 611 subHeader.glyphIndexArray.append(notdefGI) 612 subHeader.glyphIndexArray.append(gid) 613 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 614 615 # fix GI's and iDelta of last subheader that we we added to the subheader array. 616 self.setIDDelta(subHeader) 617 618 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 619 subHeader = SubHeader() 620 subHeader.firstCode = 0 621 subHeader.entryCount = 0 622 subHeader.idDelta = 0 623 subHeader.idRangeOffset = 2 624 subHeaderList.append(subHeader) 625 emptySubheadIndex = len(subHeaderList) - 1 626 for index in range(256): 627 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 628 subHeaderKeys[index] = emptySubheadIndex 629 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 630 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 631 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 632 # charcode 0 and GID 0. 633 634 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 635 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 636 for index in range(subheadRangeLen): 637 subHeader = subHeaderList[index] 638 subHeader.idRangeOffset = 0 639 for j in range(index): 640 prevSubhead = subHeaderList[j] 641 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 642 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 643 subHeader.glyphIndexArray = [] 644 break 645 if subHeader.idRangeOffset == 0: # didn't find one. 646 subHeader.idRangeOffset = idRangeOffset 647 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 648 else: 649 idRangeOffset = idRangeOffset - 8 # one less subheader 650 651 # Now we can write out the data! 652 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 653 for subhead in subHeaderList[:-1]: 654 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 655 dataList = [struct.pack(">HHH", 2, length, self.language)] 656 for index in subHeaderKeys: 657 dataList.append(struct.pack(">H", index*8)) 658 for subhead in subHeaderList: 659 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 660 for subhead in subHeaderList[:-1]: 661 for gi in subhead.glyphIndexArray: 662 dataList.append(struct.pack(">H", gi)) 663 data = bytesjoin(dataList) 664 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 665 return data 666 667 def fromXML(self, name, attrs, content, ttFont): 668 self.language = safeEval(attrs["language"]) 669 if not hasattr(self, "cmap"): 670 self.cmap = {} 671 cmap = self.cmap 672 673 for element in content: 674 if not isinstance(element, tuple): 675 continue 676 name, attrs, content = element 677 if name != "map": 678 continue 679 cmap[safeEval(attrs["code"])] = attrs["name"] 680 681 682cmap_format_4_format = ">7H" 683 684#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 685#uint16 reservedPad # This value should be zero 686#uint16 startCode[segCount] # Starting character code for each segment 687#uint16 idDelta[segCount] # Delta for all character codes in segment 688#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 689#uint16 glyphIndexArray[variable] # Glyph index array 690 691def splitRange(startCode, endCode, cmap): 692 # Try to split a range of character codes into subranges with consecutive 693 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 694 # efficiently. I can't prove I've got the optimal solution, but it seems 695 # to do well with the fonts I tested: none became bigger, many became smaller. 696 if startCode == endCode: 697 return [], [endCode] 698 699 lastID = cmap[startCode] 700 lastCode = startCode 701 inOrder = None 702 orderedBegin = None 703 subRanges = [] 704 705 # Gather subranges in which the glyph IDs are consecutive. 706 for code in range(startCode + 1, endCode + 1): 707 glyphID = cmap[code] 708 709 if glyphID - 1 == lastID: 710 if inOrder is None or not inOrder: 711 inOrder = 1 712 orderedBegin = lastCode 713 else: 714 if inOrder: 715 inOrder = 0 716 subRanges.append((orderedBegin, lastCode)) 717 orderedBegin = None 718 719 lastID = glyphID 720 lastCode = code 721 722 if inOrder: 723 subRanges.append((orderedBegin, lastCode)) 724 assert lastCode == endCode 725 726 # Now filter out those new subranges that would only make the data bigger. 727 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 728 # character. 729 newRanges = [] 730 for b, e in subRanges: 731 if b == startCode and e == endCode: 732 break # the whole range, we're fine 733 if b == startCode or e == endCode: 734 threshold = 4 # split costs one more segment 735 else: 736 threshold = 8 # split costs two more segments 737 if (e - b + 1) > threshold: 738 newRanges.append((b, e)) 739 subRanges = newRanges 740 741 if not subRanges: 742 return [], [endCode] 743 744 if subRanges[0][0] != startCode: 745 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 746 if subRanges[-1][1] != endCode: 747 subRanges.append((subRanges[-1][1] + 1, endCode)) 748 749 # Fill the "holes" in the segments list -- those are the segments in which 750 # the glyph IDs are _not_ consecutive. 751 i = 1 752 while i < len(subRanges): 753 if subRanges[i-1][1] + 1 != subRanges[i][0]: 754 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 755 i = i + 1 756 i = i + 1 757 758 # Transform the ranges into startCode/endCode lists. 759 start = [] 760 end = [] 761 for b, e in subRanges: 762 start.append(b) 763 end.append(e) 764 start.pop(0) 765 766 assert len(start) + 1 == len(end) 767 return start, end 768 769 770class cmap_format_4(CmapSubtable): 771 772 def decompile(self, data, ttFont): 773 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 774 # If not, someone is calling the subtable decompile() directly, and must provide both args. 775 if data is not None and ttFont is not None: 776 self.decompileHeader(data, ttFont) 777 else: 778 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 779 780 data = self.data # decompileHeader assigns the data after the header to self.data 781 (segCountX2, searchRange, entrySelector, rangeShift) = \ 782 struct.unpack(">4H", data[:8]) 783 data = data[8:] 784 segCount = segCountX2 // 2 785 786 allCodes = array.array("H") 787 allCodes.frombytes(data) 788 self.data = data = None 789 790 if sys.byteorder != "big": allCodes.byteswap() 791 792 # divide the data 793 endCode = allCodes[:segCount] 794 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 795 startCode = allCodes[:segCount] 796 allCodes = allCodes[segCount:] 797 idDelta = allCodes[:segCount] 798 allCodes = allCodes[segCount:] 799 idRangeOffset = allCodes[:segCount] 800 glyphIndexArray = allCodes[segCount:] 801 lenGIArray = len(glyphIndexArray) 802 803 # build 2-byte character mapping 804 charCodes = [] 805 gids = [] 806 for i in range(len(startCode) - 1): # don't do 0xffff! 807 start = startCode[i] 808 delta = idDelta[i] 809 rangeOffset = idRangeOffset[i] 810 partial = rangeOffset // 2 - start + i - len(idRangeOffset) 811 812 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 813 charCodes.extend(rangeCharCodes) 814 if rangeOffset == 0: 815 gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes]) 816 else: 817 for charCode in rangeCharCodes: 818 index = charCode + partial 819 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 820 if glyphIndexArray[index] != 0: # if not missing glyph 821 glyphID = glyphIndexArray[index] + delta 822 else: 823 glyphID = 0 # missing glyph 824 gids.append(glyphID & 0xFFFF) 825 826 self.cmap = _make_map(self.ttFont, charCodes, gids) 827 828 def compile(self, ttFont): 829 if self.data: 830 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 831 832 charCodes = list(self.cmap.keys()) 833 if not charCodes: 834 startCode = [0xffff] 835 endCode = [0xffff] 836 else: 837 charCodes.sort() 838 names = [self.cmap[code] for code in charCodes] 839 nameMap = ttFont.getReverseGlyphMap() 840 try: 841 gids = [nameMap[name] for name in names] 842 except KeyError: 843 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 844 try: 845 gids = [nameMap[name] for name in names] 846 except KeyError: 847 # allow virtual GIDs in format 4 tables 848 gids = [] 849 for name in names: 850 try: 851 gid = nameMap[name] 852 except KeyError: 853 try: 854 if (name[:3] == 'gid'): 855 gid = int(name[3:]) 856 else: 857 gid = ttFont.getGlyphID(name) 858 except: 859 raise KeyError(name) 860 861 gids.append(gid) 862 cmap = {} # code:glyphID mapping 863 for code, gid in zip(charCodes, gids): 864 cmap[code] = gid 865 866 # Build startCode and endCode lists. 867 # Split the char codes in ranges of consecutive char codes, then split 868 # each range in more ranges of consecutive/not consecutive glyph IDs. 869 # See splitRange(). 870 lastCode = charCodes[0] 871 endCode = [] 872 startCode = [lastCode] 873 for charCode in charCodes[1:]: # skip the first code, it's the first start code 874 if charCode == lastCode + 1: 875 lastCode = charCode 876 continue 877 start, end = splitRange(startCode[-1], lastCode, cmap) 878 startCode.extend(start) 879 endCode.extend(end) 880 startCode.append(charCode) 881 lastCode = charCode 882 start, end = splitRange(startCode[-1], lastCode, cmap) 883 startCode.extend(start) 884 endCode.extend(end) 885 startCode.append(0xffff) 886 endCode.append(0xffff) 887 888 # build up rest of cruft 889 idDelta = [] 890 idRangeOffset = [] 891 glyphIndexArray = [] 892 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 893 indices = [] 894 for charCode in range(startCode[i], endCode[i] + 1): 895 indices.append(cmap[charCode]) 896 if (indices == list(range(indices[0], indices[0] + len(indices)))): 897 idDelta.append((indices[0] - startCode[i]) % 0x10000) 898 idRangeOffset.append(0) 899 else: 900 idDelta.append(0) 901 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 902 glyphIndexArray.extend(indices) 903 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 904 idRangeOffset.append(0) 905 906 # Insane. 907 segCount = len(endCode) 908 segCountX2 = segCount * 2 909 searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2) 910 911 charCodeArray = array.array("H", endCode + [0] + startCode) 912 idDeltaArray = array.array("H", idDelta) 913 restArray = array.array("H", idRangeOffset + glyphIndexArray) 914 if sys.byteorder != "big": charCodeArray.byteswap() 915 if sys.byteorder != "big": idDeltaArray.byteswap() 916 if sys.byteorder != "big": restArray.byteswap() 917 data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes() 918 919 length = struct.calcsize(cmap_format_4_format) + len(data) 920 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 921 segCountX2, searchRange, entrySelector, rangeShift) 922 return header + data 923 924 def fromXML(self, name, attrs, content, ttFont): 925 self.language = safeEval(attrs["language"]) 926 if not hasattr(self, "cmap"): 927 self.cmap = {} 928 cmap = self.cmap 929 930 for element in content: 931 if not isinstance(element, tuple): 932 continue 933 nameMap, attrsMap, dummyContent = element 934 if nameMap != "map": 935 assert 0, "Unrecognized keyword in cmap subtable" 936 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 937 938 939class cmap_format_6(CmapSubtable): 940 941 def decompile(self, data, ttFont): 942 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 943 # If not, someone is calling the subtable decompile() directly, and must provide both args. 944 if data is not None and ttFont is not None: 945 self.decompileHeader(data, ttFont) 946 else: 947 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 948 949 data = self.data # decompileHeader assigns the data after the header to self.data 950 firstCode, entryCount = struct.unpack(">HH", data[:4]) 951 firstCode = int(firstCode) 952 data = data[4:] 953 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 954 gids = array.array("H") 955 gids.frombytes(data[:2 * int(entryCount)]) 956 if sys.byteorder != "big": gids.byteswap() 957 self.data = data = None 958 959 charCodes = list(range(firstCode, firstCode + len(gids))) 960 self.cmap = _make_map(self.ttFont, charCodes, gids) 961 962 def compile(self, ttFont): 963 if self.data: 964 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 965 cmap = self.cmap 966 codes = sorted(cmap.keys()) 967 if codes: # yes, there are empty cmap tables. 968 codes = list(range(codes[0], codes[-1] + 1)) 969 firstCode = codes[0] 970 valueList = [ 971 ttFont.getGlyphID(cmap[code]) if code in cmap else 0 972 for code in codes 973 ] 974 gids = array.array("H", valueList) 975 if sys.byteorder != "big": gids.byteswap() 976 data = gids.tobytes() 977 else: 978 data = b"" 979 firstCode = 0 980 header = struct.pack(">HHHHH", 981 6, len(data) + 10, self.language, firstCode, len(codes)) 982 return header + data 983 984 def fromXML(self, name, attrs, content, ttFont): 985 self.language = safeEval(attrs["language"]) 986 if not hasattr(self, "cmap"): 987 self.cmap = {} 988 cmap = self.cmap 989 990 for element in content: 991 if not isinstance(element, tuple): 992 continue 993 name, attrs, content = element 994 if name != "map": 995 continue 996 cmap[safeEval(attrs["code"])] = attrs["name"] 997 998 999class cmap_format_12_or_13(CmapSubtable): 1000 1001 def __init__(self, format): 1002 self.format = format 1003 self.reserved = 0 1004 self.data = None 1005 self.ttFont = None 1006 1007 def decompileHeader(self, data, ttFont): 1008 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 1009 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length) 1010 self.format = format 1011 self.reserved = reserved 1012 self.length = length 1013 self.language = language 1014 self.nGroups = nGroups 1015 self.data = data[16:] 1016 self.ttFont = ttFont 1017 1018 def decompile(self, data, ttFont): 1019 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1020 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1021 if data is not None and ttFont is not None: 1022 self.decompileHeader(data, ttFont) 1023 else: 1024 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1025 1026 data = self.data # decompileHeader assigns the data after the header to self.data 1027 charCodes = [] 1028 gids = [] 1029 pos = 0 1030 for i in range(self.nGroups): 1031 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 1032 pos += 12 1033 lenGroup = 1 + endCharCode - startCharCode 1034 charCodes.extend(list(range(startCharCode, endCharCode +1))) 1035 gids.extend(self._computeGIDs(glyphID, lenGroup)) 1036 self.data = data = None 1037 self.cmap = _make_map(self.ttFont, charCodes, gids) 1038 1039 def compile(self, ttFont): 1040 if self.data: 1041 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 1042 charCodes = list(self.cmap.keys()) 1043 names = list(self.cmap.values()) 1044 nameMap = ttFont.getReverseGlyphMap() 1045 try: 1046 gids = [nameMap[name] for name in names] 1047 except KeyError: 1048 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 1049 try: 1050 gids = [nameMap[name] for name in names] 1051 except KeyError: 1052 # allow virtual GIDs in format 12 tables 1053 gids = [] 1054 for name in names: 1055 try: 1056 gid = nameMap[name] 1057 except KeyError: 1058 try: 1059 if (name[:3] == 'gid'): 1060 gid = int(name[3:]) 1061 else: 1062 gid = ttFont.getGlyphID(name) 1063 except: 1064 raise KeyError(name) 1065 1066 gids.append(gid) 1067 1068 cmap = {} # code:glyphID mapping 1069 for code, gid in zip(charCodes, gids): 1070 cmap[code] = gid 1071 1072 charCodes.sort() 1073 index = 0 1074 startCharCode = charCodes[0] 1075 startGlyphID = cmap[startCharCode] 1076 lastGlyphID = startGlyphID - self._format_step 1077 lastCharCode = startCharCode - 1 1078 nGroups = 0 1079 dataList = [] 1080 maxIndex = len(charCodes) 1081 for index in range(maxIndex): 1082 charCode = charCodes[index] 1083 glyphID = cmap[charCode] 1084 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 1085 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1086 startCharCode = charCode 1087 startGlyphID = glyphID 1088 nGroups = nGroups + 1 1089 lastGlyphID = glyphID 1090 lastCharCode = charCode 1091 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1092 nGroups = nGroups + 1 1093 data = bytesjoin(dataList) 1094 lengthSubtable = len(data) +16 1095 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1096 return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data 1097 1098 def toXML(self, writer, ttFont): 1099 writer.begintag(self.__class__.__name__, [ 1100 ("platformID", self.platformID), 1101 ("platEncID", self.platEncID), 1102 ("format", self.format), 1103 ("reserved", self.reserved), 1104 ("length", self.length), 1105 ("language", self.language), 1106 ("nGroups", self.nGroups), 1107 ]) 1108 writer.newline() 1109 codes = sorted(self.cmap.items()) 1110 self._writeCodes(codes, writer) 1111 writer.endtag(self.__class__.__name__) 1112 writer.newline() 1113 1114 def fromXML(self, name, attrs, content, ttFont): 1115 self.format = safeEval(attrs["format"]) 1116 self.reserved = safeEval(attrs["reserved"]) 1117 self.length = safeEval(attrs["length"]) 1118 self.language = safeEval(attrs["language"]) 1119 self.nGroups = safeEval(attrs["nGroups"]) 1120 if not hasattr(self, "cmap"): 1121 self.cmap = {} 1122 cmap = self.cmap 1123 1124 for element in content: 1125 if not isinstance(element, tuple): 1126 continue 1127 name, attrs, content = element 1128 if name != "map": 1129 continue 1130 cmap[safeEval(attrs["code"])] = attrs["name"] 1131 1132 1133class cmap_format_12(cmap_format_12_or_13): 1134 1135 _format_step = 1 1136 1137 def __init__(self, format=12): 1138 cmap_format_12_or_13.__init__(self, format) 1139 1140 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1141 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1142 1143 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1144 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1145 1146 1147class cmap_format_13(cmap_format_12_or_13): 1148 1149 _format_step = 0 1150 1151 def __init__(self, format=13): 1152 cmap_format_12_or_13.__init__(self, format) 1153 1154 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1155 return [startingGlyph] * numberOfGlyphs 1156 1157 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1158 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1159 1160 1161def cvtToUVS(threeByteString): 1162 data = b"\0" + threeByteString 1163 val, = struct.unpack(">L", data) 1164 return val 1165 1166def cvtFromUVS(val): 1167 assert 0 <= val < 0x1000000 1168 fourByteString = struct.pack(">L", val) 1169 return fourByteString[1:] 1170 1171 1172class cmap_format_14(CmapSubtable): 1173 1174 def decompileHeader(self, data, ttFont): 1175 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1176 self.data = data[10:] 1177 self.length = length 1178 self.numVarSelectorRecords = numVarSelectorRecords 1179 self.ttFont = ttFont 1180 self.language = 0xFF # has no language. 1181 1182 def decompile(self, data, ttFont): 1183 if data is not None and ttFont is not None: 1184 self.decompileHeader(data, ttFont) 1185 else: 1186 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1187 data = self.data 1188 1189 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1190 uvsDict = {} 1191 recOffset = 0 1192 for n in range(self.numVarSelectorRecords): 1193 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1194 recOffset += 11 1195 varUVS = cvtToUVS(uvs) 1196 if defOVSOffset: 1197 startOffset = defOVSOffset - 10 1198 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1199 startOffset +=4 1200 for r in range(numValues): 1201 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1202 startOffset += 4 1203 firstBaseUV = cvtToUVS(uv) 1204 cnt = addtlCnt+1 1205 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1206 glyphList = [None]*cnt 1207 localUVList = zip(baseUVList, glyphList) 1208 try: 1209 uvsDict[varUVS].extend(localUVList) 1210 except KeyError: 1211 uvsDict[varUVS] = list(localUVList) 1212 1213 if nonDefUVSOffset: 1214 startOffset = nonDefUVSOffset - 10 1215 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1216 startOffset +=4 1217 localUVList = [] 1218 for r in range(numRecs): 1219 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1220 startOffset += 5 1221 uv = cvtToUVS(uv) 1222 glyphName = self.ttFont.getGlyphName(gid) 1223 localUVList.append((uv, glyphName)) 1224 try: 1225 uvsDict[varUVS].extend(localUVList) 1226 except KeyError: 1227 uvsDict[varUVS] = localUVList 1228 1229 self.uvsDict = uvsDict 1230 1231 def toXML(self, writer, ttFont): 1232 writer.begintag(self.__class__.__name__, [ 1233 ("platformID", self.platformID), 1234 ("platEncID", self.platEncID), 1235 ]) 1236 writer.newline() 1237 uvsDict = self.uvsDict 1238 uvsList = sorted(uvsDict.keys()) 1239 for uvs in uvsList: 1240 uvList = uvsDict[uvs] 1241 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1242 for uv, gname in uvList: 1243 attrs = [("uv", hex(uv)), ("uvs", hex(uvs))] 1244 if gname is not None: 1245 attrs.append(("name", gname)) 1246 writer.simpletag("map", attrs) 1247 writer.newline() 1248 writer.endtag(self.__class__.__name__) 1249 writer.newline() 1250 1251 def fromXML(self, name, attrs, content, ttFont): 1252 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1253 if not hasattr(self, "cmap"): 1254 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1255 if not hasattr(self, "uvsDict"): 1256 self.uvsDict = {} 1257 uvsDict = self.uvsDict 1258 1259 # For backwards compatibility reasons we accept "None" as an indicator 1260 # for "default mapping", unless the font actually has a glyph named 1261 # "None". 1262 _hasGlyphNamedNone = None 1263 1264 for element in content: 1265 if not isinstance(element, tuple): 1266 continue 1267 name, attrs, content = element 1268 if name != "map": 1269 continue 1270 uvs = safeEval(attrs["uvs"]) 1271 uv = safeEval(attrs["uv"]) 1272 gname = attrs.get("name") 1273 if gname == "None": 1274 if _hasGlyphNamedNone is None: 1275 _hasGlyphNamedNone = "None" in ttFont.getGlyphOrder() 1276 if not _hasGlyphNamedNone: 1277 gname = None 1278 try: 1279 uvsDict[uvs].append((uv, gname)) 1280 except KeyError: 1281 uvsDict[uvs] = [(uv, gname)] 1282 1283 def compile(self, ttFont): 1284 if self.data: 1285 return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data 1286 1287 uvsDict = self.uvsDict 1288 uvsList = sorted(uvsDict.keys()) 1289 self.numVarSelectorRecords = len(uvsList) 1290 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1291 data = [] 1292 varSelectorRecords =[] 1293 for uvs in uvsList: 1294 entryList = uvsDict[uvs] 1295 1296 defList = [entry for entry in entryList if entry[1] is None] 1297 if defList: 1298 defList = [entry[0] for entry in defList] 1299 defOVSOffset = offset 1300 defList.sort() 1301 1302 lastUV = defList[0] 1303 cnt = -1 1304 defRecs = [] 1305 for defEntry in defList: 1306 cnt +=1 1307 if (lastUV+cnt) != defEntry: 1308 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1309 lastUV = defEntry 1310 defRecs.append(rec) 1311 cnt = 0 1312 1313 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1314 defRecs.append(rec) 1315 1316 numDefRecs = len(defRecs) 1317 data.append(struct.pack(">L", numDefRecs)) 1318 data.extend(defRecs) 1319 offset += 4 + numDefRecs*4 1320 else: 1321 defOVSOffset = 0 1322 1323 ndefList = [entry for entry in entryList if entry[1] is not None] 1324 if ndefList: 1325 nonDefUVSOffset = offset 1326 ndefList.sort() 1327 numNonDefRecs = len(ndefList) 1328 data.append(struct.pack(">L", numNonDefRecs)) 1329 offset += 4 + numNonDefRecs*5 1330 1331 for uv, gname in ndefList: 1332 gid = ttFont.getGlyphID(gname) 1333 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1334 data.append(ndrec) 1335 else: 1336 nonDefUVSOffset = 0 1337 1338 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1339 varSelectorRecords.append(vrec) 1340 1341 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1342 self.length = 10 + len(data) 1343 headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) 1344 1345 return headerdata + data 1346 1347 1348class cmap_format_unknown(CmapSubtable): 1349 1350 def toXML(self, writer, ttFont): 1351 cmapName = self.__class__.__name__[:12] + str(self.format) 1352 writer.begintag(cmapName, [ 1353 ("platformID", self.platformID), 1354 ("platEncID", self.platEncID), 1355 ]) 1356 writer.newline() 1357 writer.dumphex(self.data) 1358 writer.endtag(cmapName) 1359 writer.newline() 1360 1361 def fromXML(self, name, attrs, content, ttFont): 1362 self.data = readHex(content) 1363 self.cmap = {} 1364 1365 def decompileHeader(self, data, ttFont): 1366 self.language = 0 # dummy value 1367 self.data = data 1368 1369 def decompile(self, data, ttFont): 1370 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1371 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1372 if data is not None and ttFont is not None: 1373 self.decompileHeader(data, ttFont) 1374 else: 1375 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1376 1377 def compile(self, ttFont): 1378 if self.data: 1379 return self.data 1380 else: 1381 return None 1382 1383cmap_classes = { 1384 0: cmap_format_0, 1385 2: cmap_format_2, 1386 4: cmap_format_4, 1387 6: cmap_format_6, 1388 12: cmap_format_12, 1389 13: cmap_format_13, 1390 14: cmap_format_14, 1391} 1392