1from fontTools.misc.textTools import bytesjoin, safeEval, readHex 2from fontTools.misc.encodingTools import getEncoding 3from fontTools.ttLib import getSearchRange 4from fontTools.unicode import Unicode 5from . import DefaultTable 6import sys 7import struct 8import array 9import logging 10 11 12log = logging.getLogger(__name__) 13 14 15def _make_map(font, chars, gids): 16 assert len(chars) == len(gids) 17 glyphNames = font.getGlyphNameMany(gids) 18 cmap = {} 19 for char,gid,name in zip(chars,gids,glyphNames): 20 if gid == 0: 21 continue 22 cmap[char] = name 23 return cmap 24 25class table__c_m_a_p(DefaultTable.DefaultTable): 26 """Character to Glyph Index Mapping Table 27 28 This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_ 29 table, which maps between input characters (in Unicode or other system encodings) 30 and glyphs within the font. The ``cmap`` table contains one or more subtables 31 which determine the mapping of of characters to glyphs across different platforms 32 and encoding systems. 33 34 ``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access 35 to the subtables, although it is normally easier to retrieve individual subtables 36 through the utility methods described below. To add new subtables to a font, 37 first determine the subtable format (if in doubt use format 4 for glyphs within 38 the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation 39 Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``, 40 and append them to the ``.tables`` list. 41 42 Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap`` 43 attribute. 44 45 Example:: 46 47 cmap4_0_3 = CmapSubtable.newSubtable(4) 48 cmap4_0_3.platformID = 0 49 cmap4_0_3.platEncID = 3 50 cmap4_0_3.language = 0 51 cmap4_0_3.cmap = { 0xC1: "Aacute" } 52 53 cmap = newTable("cmap") 54 cmap.tableVersion = 0 55 cmap.tables = [cmap4_0_3] 56 """ 57 58 def getcmap(self, platformID, platEncID): 59 """Returns the first subtable which matches the given platform and encoding. 60 61 Args: 62 platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh 63 (deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows. 64 encodingID (int): Encoding ID. Interpretation depends on the platform ID. 65 See the OpenType specification for details. 66 67 Returns: 68 An object which is a subclass of :py:class:`CmapSubtable` if a matching 69 subtable is found within the font, or ``None`` otherwise. 70 """ 71 72 for subtable in self.tables: 73 if (subtable.platformID == platformID and 74 subtable.platEncID == platEncID): 75 return subtable 76 return None # not found 77 78 def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))): 79 """Returns the 'best' Unicode cmap dictionary available in the font 80 or ``None``, if no Unicode cmap subtable is available. 81 82 By default it will search for the following (platformID, platEncID) 83 pairs in order:: 84 85 (3, 10), # Windows Unicode full repertoire 86 (0, 6), # Unicode full repertoire (format 13 subtable) 87 (0, 4), # Unicode 2.0 full repertoire 88 (3, 1), # Windows Unicode BMP 89 (0, 3), # Unicode 2.0 BMP 90 (0, 2), # Unicode ISO/IEC 10646 91 (0, 1), # Unicode 1.1 92 (0, 0) # Unicode 1.0 93 94 This order can be customized via the ``cmapPreferences`` argument. 95 """ 96 for platformID, platEncID in cmapPreferences: 97 cmapSubtable = self.getcmap(platformID, platEncID) 98 if cmapSubtable is not None: 99 return cmapSubtable.cmap 100 return None # None of the requested cmap subtables were found 101 102 def buildReversed(self): 103 """Builds a reverse mapping dictionary 104 105 Iterates over all Unicode cmap tables and returns a dictionary mapping 106 glyphs to sets of codepoints, such as:: 107 108 { 109 'one': {0x31} 110 'A': {0x41,0x391} 111 } 112 113 The values are sets of Unicode codepoints because 114 some fonts map different codepoints to the same glyph. 115 For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391 116 GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph. 117 """ 118 result = {} 119 for subtable in self.tables: 120 if subtable.isUnicode(): 121 for codepoint, name in subtable.cmap.items(): 122 result.setdefault(name, set()).add(codepoint) 123 return result 124 125 def decompile(self, data, ttFont): 126 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 127 self.tableVersion = int(tableVersion) 128 self.tables = tables = [] 129 seenOffsets = {} 130 for i in range(numSubTables): 131 platformID, platEncID, offset = struct.unpack( 132 ">HHl", data[4+i*8:4+(i+1)*8]) 133 platformID, platEncID = int(platformID), int(platEncID) 134 format, length = struct.unpack(">HH", data[offset:offset+4]) 135 if format in [8,10,12,13]: 136 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 137 elif format in [14]: 138 format, length = struct.unpack(">HL", data[offset:offset+6]) 139 140 if not length: 141 log.error( 142 "cmap subtable is reported as having zero length: platformID %s, " 143 "platEncID %s, format %s offset %s. Skipping table.", 144 platformID, platEncID, format, offset) 145 continue 146 table = CmapSubtable.newSubtable(format) 147 table.platformID = platformID 148 table.platEncID = platEncID 149 # Note that by default we decompile only the subtable header info; 150 # any other data gets decompiled only when an attribute of the 151 # subtable is referenced. 152 table.decompileHeader(data[offset:offset+int(length)], ttFont) 153 if offset in seenOffsets: 154 table.data = None # Mark as decompiled 155 table.cmap = tables[seenOffsets[offset]].cmap 156 else: 157 seenOffsets[offset] = i 158 tables.append(table) 159 if ttFont.lazy is False: # Be lazy for None and True 160 self.ensureDecompiled() 161 162 def ensureDecompiled(self): 163 for st in self.tables: 164 st.ensureDecompiled() 165 166 def compile(self, ttFont): 167 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 168 numSubTables = len(self.tables) 169 totalOffset = 4 + 8 * numSubTables 170 data = struct.pack(">HH", self.tableVersion, numSubTables) 171 tableData = b"" 172 seen = {} # Some tables are the same object reference. Don't compile them twice. 173 done = {} # Some tables are different objects, but compile to the same data chunk 174 for table in self.tables: 175 try: 176 offset = seen[id(table.cmap)] 177 except KeyError: 178 chunk = table.compile(ttFont) 179 if chunk in done: 180 offset = done[chunk] 181 else: 182 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 183 tableData = tableData + chunk 184 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 185 return data + tableData 186 187 def toXML(self, writer, ttFont): 188 writer.simpletag("tableVersion", version=self.tableVersion) 189 writer.newline() 190 for table in self.tables: 191 table.toXML(writer, ttFont) 192 193 def fromXML(self, name, attrs, content, ttFont): 194 if name == "tableVersion": 195 self.tableVersion = safeEval(attrs["version"]) 196 return 197 if name[:12] != "cmap_format_": 198 return 199 if not hasattr(self, "tables"): 200 self.tables = [] 201 format = safeEval(name[12:]) 202 table = CmapSubtable.newSubtable(format) 203 table.platformID = safeEval(attrs["platformID"]) 204 table.platEncID = safeEval(attrs["platEncID"]) 205 table.fromXML(name, attrs, content, ttFont) 206 self.tables.append(table) 207 208 209class CmapSubtable(object): 210 """Base class for all cmap subtable formats. 211 212 Subclasses which handle the individual subtable formats are named 213 ``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass` 214 to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a 215 new subtable object for a given format. 216 217 The object exposes a ``.cmap`` attribute, which contains a dictionary mapping 218 character codepoints to glyph names. 219 """ 220 221 @staticmethod 222 def getSubtableClass(format): 223 """Return the subtable class for a format.""" 224 return cmap_classes.get(format, cmap_format_unknown) 225 226 @staticmethod 227 def newSubtable(format): 228 """Return a new instance of a subtable for the given format 229 .""" 230 subtableClass = CmapSubtable.getSubtableClass(format) 231 return subtableClass(format) 232 233 def __init__(self, format): 234 self.format = format 235 self.data = None 236 self.ttFont = None 237 self.platformID = None #: The platform ID of this subtable 238 self.platEncID = None #: The encoding ID of this subtable (interpretation depends on ``platformID``) 239 self.language = None #: The language ID of this subtable (Macintosh platform only) 240 241 def ensureDecompiled(self): 242 if self.data is None: 243 return 244 self.decompile(None, None) # use saved data. 245 self.data = None # Once this table has been decompiled, make sure we don't 246 # just return the original data. Also avoids recursion when 247 # called with an attribute that the cmap subtable doesn't have. 248 249 def __getattr__(self, attr): 250 # allow lazy decompilation of subtables. 251 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 252 raise AttributeError(attr) 253 if self.data is None: 254 raise AttributeError(attr) 255 self.ensureDecompiled() 256 return getattr(self, attr) 257 258 def decompileHeader(self, data, ttFont): 259 format, length, language = struct.unpack(">HHH", data[:6]) 260 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 261 self.format = int(format) 262 self.length = int(length) 263 self.language = int(language) 264 self.data = data[6:] 265 self.ttFont = ttFont 266 267 def toXML(self, writer, ttFont): 268 writer.begintag(self.__class__.__name__, [ 269 ("platformID", self.platformID), 270 ("platEncID", self.platEncID), 271 ("language", self.language), 272 ]) 273 writer.newline() 274 codes = sorted(self.cmap.items()) 275 self._writeCodes(codes, writer) 276 writer.endtag(self.__class__.__name__) 277 writer.newline() 278 279 def getEncoding(self, default=None): 280 """Returns the Python encoding name for this cmap subtable based on its platformID, 281 platEncID, and language. If encoding for these values is not known, by default 282 ``None`` is returned. That can be overridden by passing a value to the ``default`` 283 argument. 284 285 Note that if you want to choose a "preferred" cmap subtable, most of the time 286 ``self.isUnicode()`` is what you want as that one only returns true for the modern, 287 commonly used, Unicode-compatible triplets, not the legacy ones. 288 """ 289 return getEncoding(self.platformID, self.platEncID, self.language, default) 290 291 def isUnicode(self): 292 """Returns true if the characters are interpreted as Unicode codepoints.""" 293 return (self.platformID == 0 or 294 (self.platformID == 3 and self.platEncID in [0, 1, 10])) 295 296 def isSymbol(self): 297 """Returns true if the subtable is for the Symbol encoding (3,0)""" 298 return self.platformID == 3 and self.platEncID == 0 299 300 def _writeCodes(self, codes, writer): 301 isUnicode = self.isUnicode() 302 for code, name in codes: 303 writer.simpletag("map", code=hex(code), name=name) 304 if isUnicode: 305 writer.comment(Unicode[code]) 306 writer.newline() 307 308 def __lt__(self, other): 309 if not isinstance(other, CmapSubtable): 310 return NotImplemented 311 312 # implemented so that list.sort() sorts according to the spec. 313 selfTuple = ( 314 getattr(self, "platformID", None), 315 getattr(self, "platEncID", None), 316 getattr(self, "language", None), 317 self.__dict__) 318 otherTuple = ( 319 getattr(other, "platformID", None), 320 getattr(other, "platEncID", None), 321 getattr(other, "language", None), 322 other.__dict__) 323 return selfTuple < otherTuple 324 325 326class cmap_format_0(CmapSubtable): 327 328 def decompile(self, data, ttFont): 329 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 330 # If not, someone is calling the subtable decompile() directly, and must provide both args. 331 if data is not None and ttFont is not None: 332 self.decompileHeader(data, ttFont) 333 else: 334 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 335 data = self.data # decompileHeader assigns the data after the header to self.data 336 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 337 gids = array.array("B") 338 gids.frombytes(self.data) 339 charCodes = list(range(len(gids))) 340 self.cmap = _make_map(self.ttFont, charCodes, gids) 341 342 def compile(self, ttFont): 343 if self.data: 344 return struct.pack(">HHH", 0, 262, self.language) + self.data 345 346 cmap = self.cmap 347 assert set(cmap.keys()).issubset(range(256)) 348 getGlyphID = ttFont.getGlyphID 349 valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)] 350 351 gids = array.array("B", valueList) 352 data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes() 353 assert len(data) == 262 354 return data 355 356 def fromXML(self, name, attrs, content, ttFont): 357 self.language = safeEval(attrs["language"]) 358 if not hasattr(self, "cmap"): 359 self.cmap = {} 360 cmap = self.cmap 361 for element in content: 362 if not isinstance(element, tuple): 363 continue 364 name, attrs, content = element 365 if name != "map": 366 continue 367 cmap[safeEval(attrs["code"])] = attrs["name"] 368 369 370subHeaderFormat = ">HHhH" 371class SubHeader(object): 372 def __init__(self): 373 self.firstCode = None 374 self.entryCount = None 375 self.idDelta = None 376 self.idRangeOffset = None 377 self.glyphIndexArray = [] 378 379class cmap_format_2(CmapSubtable): 380 381 def setIDDelta(self, subHeader): 382 subHeader.idDelta = 0 383 # find the minGI which is not zero. 384 minGI = subHeader.glyphIndexArray[0] 385 for gid in subHeader.glyphIndexArray: 386 if (gid != 0) and (gid < minGI): 387 minGI = gid 388 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 389 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 390 # We would like to pick an idDelta such that the first glyphArray GID is 1, 391 # so that we are more likely to be able to combine glypharray GID subranges. 392 # This means that we have a problem when minGI is > 32K 393 # Since the final gi is reconstructed from the glyphArray GID by: 394 # (short)finalGID = (gid + idDelta) % 0x10000), 395 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 396 # negative number to an unsigned short. 397 398 if (minGI > 1): 399 if minGI > 0x7FFF: 400 subHeader.idDelta = -(0x10000 - minGI) -1 401 else: 402 subHeader.idDelta = minGI -1 403 idDelta = subHeader.idDelta 404 for i in range(subHeader.entryCount): 405 gid = subHeader.glyphIndexArray[i] 406 if gid > 0: 407 subHeader.glyphIndexArray[i] = gid - idDelta 408 409 def decompile(self, data, ttFont): 410 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 411 # If not, someone is calling the subtable decompile() directly, and must provide both args. 412 if data is not None and ttFont is not None: 413 self.decompileHeader(data, ttFont) 414 else: 415 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 416 417 data = self.data # decompileHeader assigns the data after the header to self.data 418 subHeaderKeys = [] 419 maxSubHeaderindex = 0 420 # get the key array, and determine the number of subHeaders. 421 allKeys = array.array("H") 422 allKeys.frombytes(data[:512]) 423 data = data[512:] 424 if sys.byteorder != "big": allKeys.byteswap() 425 subHeaderKeys = [ key//8 for key in allKeys] 426 maxSubHeaderindex = max(subHeaderKeys) 427 428 #Load subHeaders 429 subHeaderList = [] 430 pos = 0 431 for i in range(maxSubHeaderindex + 1): 432 subHeader = SubHeader() 433 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 434 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 435 pos += 8 436 giDataPos = pos + subHeader.idRangeOffset-2 437 giList = array.array("H") 438 giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2]) 439 if sys.byteorder != "big": giList.byteswap() 440 subHeader.glyphIndexArray = giList 441 subHeaderList.append(subHeader) 442 # How this gets processed. 443 # Charcodes may be one or two bytes. 444 # The first byte of a charcode is mapped through the subHeaderKeys, to select 445 # a subHeader. For any subheader but 0, the next byte is then mapped through the 446 # selected subheader. If subheader Index 0 is selected, then the byte itself is 447 # mapped through the subheader, and there is no second byte. 448 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 449 # 450 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 451 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 452 # referenced by another subheader. 453 # The only subheader that will be referenced by more than one first-byte value is the subheader 454 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 455 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 456 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 457 # A subheader specifies a subrange within (0...256) by the 458 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 459 # (e.g. glyph not in font). 460 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 461 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 462 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 463 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 464 # Example for Logocut-Medium 465 # first byte of charcode = 129; selects subheader 1. 466 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 467 # second byte of charCode = 66 468 # the index offset = 66-64 = 2. 469 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 470 # [glyphIndexArray index], [subrange array index] = glyphIndex 471 # [256], [0]=1 from charcode [129, 64] 472 # [257], [1]=2 from charcode [129, 65] 473 # [258], [2]=3 from charcode [129, 66] 474 # [259], [3]=4 from charcode [129, 67] 475 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 476 # add it to the glyphID to get the final glyphIndex 477 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 478 479 self.data = b"" 480 cmap = {} 481 notdefGI = 0 482 for firstByte in range(256): 483 subHeadindex = subHeaderKeys[firstByte] 484 subHeader = subHeaderList[subHeadindex] 485 if subHeadindex == 0: 486 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 487 continue # gi is notdef. 488 else: 489 charCode = firstByte 490 offsetIndex = firstByte - subHeader.firstCode 491 gi = subHeader.glyphIndexArray[offsetIndex] 492 if gi != 0: 493 gi = (gi + subHeader.idDelta) % 0x10000 494 else: 495 continue # gi is notdef. 496 cmap[charCode] = gi 497 else: 498 if subHeader.entryCount: 499 charCodeOffset = firstByte * 256 + subHeader.firstCode 500 for offsetIndex in range(subHeader.entryCount): 501 charCode = charCodeOffset + offsetIndex 502 gi = subHeader.glyphIndexArray[offsetIndex] 503 if gi != 0: 504 gi = (gi + subHeader.idDelta) % 0x10000 505 else: 506 continue 507 cmap[charCode] = gi 508 # If not subHeader.entryCount, then all char codes with this first byte are 509 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 510 # same as mapping it to .notdef. 511 512 gids = list(cmap.values()) 513 charCodes = list(cmap.keys()) 514 self.cmap = _make_map(self.ttFont, charCodes, gids) 515 516 def compile(self, ttFont): 517 if self.data: 518 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 519 kEmptyTwoCharCodeRange = -1 520 notdefGI = 0 521 522 items = sorted(self.cmap.items()) 523 charCodes = [item[0] for item in items] 524 names = [item[1] for item in items] 525 nameMap = ttFont.getReverseGlyphMap() 526 try: 527 gids = [nameMap[name] for name in names] 528 except KeyError: 529 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 530 try: 531 gids = [nameMap[name] for name in names] 532 except KeyError: 533 # allow virtual GIDs in format 2 tables 534 gids = [] 535 for name in names: 536 try: 537 gid = nameMap[name] 538 except KeyError: 539 try: 540 if (name[:3] == 'gid'): 541 gid = int(name[3:]) 542 else: 543 gid = ttFont.getGlyphID(name) 544 except: 545 raise KeyError(name) 546 547 gids.append(gid) 548 549 # Process the (char code to gid) item list in char code order. 550 # By definition, all one byte char codes map to subheader 0. 551 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 552 # which defines all char codes in its range to map to notdef) unless proven otherwise. 553 # Note that since the char code items are processed in char code order, all the char codes with the 554 # same first byte are in sequential order. 555 556 subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 557 subHeaderList = [] 558 559 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 560 # with a cmap where all the one byte char codes map to notdef, 561 # with the result that the subhead 0 would not get created just by processing the item list. 562 charCode = charCodes[0] 563 if charCode > 255: 564 subHeader = SubHeader() 565 subHeader.firstCode = 0 566 subHeader.entryCount = 0 567 subHeader.idDelta = 0 568 subHeader.idRangeOffset = 0 569 subHeaderList.append(subHeader) 570 571 lastFirstByte = -1 572 items = zip(charCodes, gids) 573 for charCode, gid in items: 574 if gid == 0: 575 continue 576 firstbyte = charCode >> 8 577 secondByte = charCode & 0x00FF 578 579 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 580 if lastFirstByte > -1: 581 # fix GI's and iDelta of current subheader. 582 self.setIDDelta(subHeader) 583 584 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 585 # for the indices matching the char codes. 586 if lastFirstByte == 0: 587 for index in range(subHeader.entryCount): 588 charCode = subHeader.firstCode + index 589 subHeaderKeys[charCode] = 0 590 591 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 592 # init new subheader 593 subHeader = SubHeader() 594 subHeader.firstCode = secondByte 595 subHeader.entryCount = 1 596 subHeader.glyphIndexArray.append(gid) 597 subHeaderList.append(subHeader) 598 subHeaderKeys[firstbyte] = len(subHeaderList) -1 599 lastFirstByte = firstbyte 600 else: 601 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 602 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 603 for i in range(codeDiff): 604 subHeader.glyphIndexArray.append(notdefGI) 605 subHeader.glyphIndexArray.append(gid) 606 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 607 608 # fix GI's and iDelta of last subheader that we we added to the subheader array. 609 self.setIDDelta(subHeader) 610 611 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 612 subHeader = SubHeader() 613 subHeader.firstCode = 0 614 subHeader.entryCount = 0 615 subHeader.idDelta = 0 616 subHeader.idRangeOffset = 2 617 subHeaderList.append(subHeader) 618 emptySubheadIndex = len(subHeaderList) - 1 619 for index in range(256): 620 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 621 subHeaderKeys[index] = emptySubheadIndex 622 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 623 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 624 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 625 # charcode 0 and GID 0. 626 627 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 628 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 629 for index in range(subheadRangeLen): 630 subHeader = subHeaderList[index] 631 subHeader.idRangeOffset = 0 632 for j in range(index): 633 prevSubhead = subHeaderList[j] 634 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 635 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 636 subHeader.glyphIndexArray = [] 637 break 638 if subHeader.idRangeOffset == 0: # didn't find one. 639 subHeader.idRangeOffset = idRangeOffset 640 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 641 else: 642 idRangeOffset = idRangeOffset - 8 # one less subheader 643 644 # Now we can write out the data! 645 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 646 for subhead in subHeaderList[:-1]: 647 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 648 dataList = [struct.pack(">HHH", 2, length, self.language)] 649 for index in subHeaderKeys: 650 dataList.append(struct.pack(">H", index*8)) 651 for subhead in subHeaderList: 652 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 653 for subhead in subHeaderList[:-1]: 654 for gi in subhead.glyphIndexArray: 655 dataList.append(struct.pack(">H", gi)) 656 data = bytesjoin(dataList) 657 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 658 return data 659 660 def fromXML(self, name, attrs, content, ttFont): 661 self.language = safeEval(attrs["language"]) 662 if not hasattr(self, "cmap"): 663 self.cmap = {} 664 cmap = self.cmap 665 666 for element in content: 667 if not isinstance(element, tuple): 668 continue 669 name, attrs, content = element 670 if name != "map": 671 continue 672 cmap[safeEval(attrs["code"])] = attrs["name"] 673 674 675cmap_format_4_format = ">7H" 676 677#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 678#uint16 reservedPad # This value should be zero 679#uint16 startCode[segCount] # Starting character code for each segment 680#uint16 idDelta[segCount] # Delta for all character codes in segment 681#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 682#uint16 glyphIndexArray[variable] # Glyph index array 683 684def splitRange(startCode, endCode, cmap): 685 # Try to split a range of character codes into subranges with consecutive 686 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 687 # efficiently. I can't prove I've got the optimal solution, but it seems 688 # to do well with the fonts I tested: none became bigger, many became smaller. 689 if startCode == endCode: 690 return [], [endCode] 691 692 lastID = cmap[startCode] 693 lastCode = startCode 694 inOrder = None 695 orderedBegin = None 696 subRanges = [] 697 698 # Gather subranges in which the glyph IDs are consecutive. 699 for code in range(startCode + 1, endCode + 1): 700 glyphID = cmap[code] 701 702 if glyphID - 1 == lastID: 703 if inOrder is None or not inOrder: 704 inOrder = 1 705 orderedBegin = lastCode 706 else: 707 if inOrder: 708 inOrder = 0 709 subRanges.append((orderedBegin, lastCode)) 710 orderedBegin = None 711 712 lastID = glyphID 713 lastCode = code 714 715 if inOrder: 716 subRanges.append((orderedBegin, lastCode)) 717 assert lastCode == endCode 718 719 # Now filter out those new subranges that would only make the data bigger. 720 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 721 # character. 722 newRanges = [] 723 for b, e in subRanges: 724 if b == startCode and e == endCode: 725 break # the whole range, we're fine 726 if b == startCode or e == endCode: 727 threshold = 4 # split costs one more segment 728 else: 729 threshold = 8 # split costs two more segments 730 if (e - b + 1) > threshold: 731 newRanges.append((b, e)) 732 subRanges = newRanges 733 734 if not subRanges: 735 return [], [endCode] 736 737 if subRanges[0][0] != startCode: 738 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 739 if subRanges[-1][1] != endCode: 740 subRanges.append((subRanges[-1][1] + 1, endCode)) 741 742 # Fill the "holes" in the segments list -- those are the segments in which 743 # the glyph IDs are _not_ consecutive. 744 i = 1 745 while i < len(subRanges): 746 if subRanges[i-1][1] + 1 != subRanges[i][0]: 747 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 748 i = i + 1 749 i = i + 1 750 751 # Transform the ranges into startCode/endCode lists. 752 start = [] 753 end = [] 754 for b, e in subRanges: 755 start.append(b) 756 end.append(e) 757 start.pop(0) 758 759 assert len(start) + 1 == len(end) 760 return start, end 761 762 763class cmap_format_4(CmapSubtable): 764 765 def decompile(self, data, ttFont): 766 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 767 # If not, someone is calling the subtable decompile() directly, and must provide both args. 768 if data is not None and ttFont is not None: 769 self.decompileHeader(data, ttFont) 770 else: 771 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 772 773 data = self.data # decompileHeader assigns the data after the header to self.data 774 (segCountX2, searchRange, entrySelector, rangeShift) = \ 775 struct.unpack(">4H", data[:8]) 776 data = data[8:] 777 segCount = segCountX2 // 2 778 779 allCodes = array.array("H") 780 allCodes.frombytes(data) 781 self.data = data = None 782 783 if sys.byteorder != "big": allCodes.byteswap() 784 785 # divide the data 786 endCode = allCodes[:segCount] 787 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 788 startCode = allCodes[:segCount] 789 allCodes = allCodes[segCount:] 790 idDelta = allCodes[:segCount] 791 allCodes = allCodes[segCount:] 792 idRangeOffset = allCodes[:segCount] 793 glyphIndexArray = allCodes[segCount:] 794 lenGIArray = len(glyphIndexArray) 795 796 # build 2-byte character mapping 797 charCodes = [] 798 gids = [] 799 for i in range(len(startCode) - 1): # don't do 0xffff! 800 start = startCode[i] 801 delta = idDelta[i] 802 rangeOffset = idRangeOffset[i] 803 # *someone* needs to get killed. 804 partial = rangeOffset // 2 - start + i - len(idRangeOffset) 805 806 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 807 charCodes.extend(rangeCharCodes) 808 if rangeOffset == 0: 809 gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes]) 810 else: 811 for charCode in rangeCharCodes: 812 index = charCode + partial 813 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 814 if glyphIndexArray[index] != 0: # if not missing glyph 815 glyphID = glyphIndexArray[index] + delta 816 else: 817 glyphID = 0 # missing glyph 818 gids.append(glyphID & 0xFFFF) 819 820 self.cmap = _make_map(self.ttFont, charCodes, gids) 821 822 def compile(self, ttFont): 823 if self.data: 824 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 825 826 charCodes = list(self.cmap.keys()) 827 if not charCodes: 828 startCode = [0xffff] 829 endCode = [0xffff] 830 else: 831 charCodes.sort() 832 names = [self.cmap[code] for code in charCodes] 833 nameMap = ttFont.getReverseGlyphMap() 834 try: 835 gids = [nameMap[name] for name in names] 836 except KeyError: 837 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 838 try: 839 gids = [nameMap[name] for name in names] 840 except KeyError: 841 # allow virtual GIDs in format 4 tables 842 gids = [] 843 for name in names: 844 try: 845 gid = nameMap[name] 846 except KeyError: 847 try: 848 if (name[:3] == 'gid'): 849 gid = int(name[3:]) 850 else: 851 gid = ttFont.getGlyphID(name) 852 except: 853 raise KeyError(name) 854 855 gids.append(gid) 856 cmap = {} # code:glyphID mapping 857 for code, gid in zip(charCodes, gids): 858 cmap[code] = gid 859 860 # Build startCode and endCode lists. 861 # Split the char codes in ranges of consecutive char codes, then split 862 # each range in more ranges of consecutive/not consecutive glyph IDs. 863 # See splitRange(). 864 lastCode = charCodes[0] 865 endCode = [] 866 startCode = [lastCode] 867 for charCode in charCodes[1:]: # skip the first code, it's the first start code 868 if charCode == lastCode + 1: 869 lastCode = charCode 870 continue 871 start, end = splitRange(startCode[-1], lastCode, cmap) 872 startCode.extend(start) 873 endCode.extend(end) 874 startCode.append(charCode) 875 lastCode = charCode 876 start, end = splitRange(startCode[-1], lastCode, cmap) 877 startCode.extend(start) 878 endCode.extend(end) 879 startCode.append(0xffff) 880 endCode.append(0xffff) 881 882 # build up rest of cruft 883 idDelta = [] 884 idRangeOffset = [] 885 glyphIndexArray = [] 886 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 887 indices = [] 888 for charCode in range(startCode[i], endCode[i] + 1): 889 indices.append(cmap[charCode]) 890 if (indices == list(range(indices[0], indices[0] + len(indices)))): 891 idDelta.append((indices[0] - startCode[i]) % 0x10000) 892 idRangeOffset.append(0) 893 else: 894 # someone *definitely* needs to get killed. 895 idDelta.append(0) 896 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 897 glyphIndexArray.extend(indices) 898 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 899 idRangeOffset.append(0) 900 901 # Insane. 902 segCount = len(endCode) 903 segCountX2 = segCount * 2 904 searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2) 905 906 charCodeArray = array.array("H", endCode + [0] + startCode) 907 idDeltaArray = array.array("H", idDelta) 908 restArray = array.array("H", idRangeOffset + glyphIndexArray) 909 if sys.byteorder != "big": charCodeArray.byteswap() 910 if sys.byteorder != "big": idDeltaArray.byteswap() 911 if sys.byteorder != "big": restArray.byteswap() 912 data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes() 913 914 length = struct.calcsize(cmap_format_4_format) + len(data) 915 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 916 segCountX2, searchRange, entrySelector, rangeShift) 917 return header + data 918 919 def fromXML(self, name, attrs, content, ttFont): 920 self.language = safeEval(attrs["language"]) 921 if not hasattr(self, "cmap"): 922 self.cmap = {} 923 cmap = self.cmap 924 925 for element in content: 926 if not isinstance(element, tuple): 927 continue 928 nameMap, attrsMap, dummyContent = element 929 if nameMap != "map": 930 assert 0, "Unrecognized keyword in cmap subtable" 931 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 932 933 934class cmap_format_6(CmapSubtable): 935 936 def decompile(self, data, ttFont): 937 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 938 # If not, someone is calling the subtable decompile() directly, and must provide both args. 939 if data is not None and ttFont is not None: 940 self.decompileHeader(data, ttFont) 941 else: 942 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 943 944 data = self.data # decompileHeader assigns the data after the header to self.data 945 firstCode, entryCount = struct.unpack(">HH", data[:4]) 946 firstCode = int(firstCode) 947 data = data[4:] 948 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 949 gids = array.array("H") 950 gids.frombytes(data[:2 * int(entryCount)]) 951 if sys.byteorder != "big": gids.byteswap() 952 self.data = data = None 953 954 charCodes = list(range(firstCode, firstCode + len(gids))) 955 self.cmap = _make_map(self.ttFont, charCodes, gids) 956 957 def compile(self, ttFont): 958 if self.data: 959 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 960 cmap = self.cmap 961 codes = sorted(cmap.keys()) 962 if codes: # yes, there are empty cmap tables. 963 codes = list(range(codes[0], codes[-1] + 1)) 964 firstCode = codes[0] 965 valueList = [ 966 ttFont.getGlyphID(cmap[code]) if code in cmap else 0 967 for code in codes 968 ] 969 gids = array.array("H", valueList) 970 if sys.byteorder != "big": gids.byteswap() 971 data = gids.tobytes() 972 else: 973 data = b"" 974 firstCode = 0 975 header = struct.pack(">HHHHH", 976 6, len(data) + 10, self.language, firstCode, len(codes)) 977 return header + data 978 979 def fromXML(self, name, attrs, content, ttFont): 980 self.language = safeEval(attrs["language"]) 981 if not hasattr(self, "cmap"): 982 self.cmap = {} 983 cmap = self.cmap 984 985 for element in content: 986 if not isinstance(element, tuple): 987 continue 988 name, attrs, content = element 989 if name != "map": 990 continue 991 cmap[safeEval(attrs["code"])] = attrs["name"] 992 993 994class cmap_format_12_or_13(CmapSubtable): 995 996 def __init__(self, format): 997 self.format = format 998 self.reserved = 0 999 self.data = None 1000 self.ttFont = None 1001 1002 def decompileHeader(self, data, ttFont): 1003 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 1004 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length) 1005 self.format = format 1006 self.reserved = reserved 1007 self.length = length 1008 self.language = language 1009 self.nGroups = nGroups 1010 self.data = data[16:] 1011 self.ttFont = ttFont 1012 1013 def decompile(self, data, ttFont): 1014 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1015 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1016 if data is not None and ttFont is not None: 1017 self.decompileHeader(data, ttFont) 1018 else: 1019 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1020 1021 data = self.data # decompileHeader assigns the data after the header to self.data 1022 charCodes = [] 1023 gids = [] 1024 pos = 0 1025 for i in range(self.nGroups): 1026 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 1027 pos += 12 1028 lenGroup = 1 + endCharCode - startCharCode 1029 charCodes.extend(list(range(startCharCode, endCharCode +1))) 1030 gids.extend(self._computeGIDs(glyphID, lenGroup)) 1031 self.data = data = None 1032 self.cmap = _make_map(self.ttFont, charCodes, gids) 1033 1034 def compile(self, ttFont): 1035 if self.data: 1036 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 1037 charCodes = list(self.cmap.keys()) 1038 names = list(self.cmap.values()) 1039 nameMap = ttFont.getReverseGlyphMap() 1040 try: 1041 gids = [nameMap[name] for name in names] 1042 except KeyError: 1043 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 1044 try: 1045 gids = [nameMap[name] for name in names] 1046 except KeyError: 1047 # allow virtual GIDs in format 12 tables 1048 gids = [] 1049 for name in names: 1050 try: 1051 gid = nameMap[name] 1052 except KeyError: 1053 try: 1054 if (name[:3] == 'gid'): 1055 gid = int(name[3:]) 1056 else: 1057 gid = ttFont.getGlyphID(name) 1058 except: 1059 raise KeyError(name) 1060 1061 gids.append(gid) 1062 1063 cmap = {} # code:glyphID mapping 1064 for code, gid in zip(charCodes, gids): 1065 cmap[code] = gid 1066 1067 charCodes.sort() 1068 index = 0 1069 startCharCode = charCodes[0] 1070 startGlyphID = cmap[startCharCode] 1071 lastGlyphID = startGlyphID - self._format_step 1072 lastCharCode = startCharCode - 1 1073 nGroups = 0 1074 dataList = [] 1075 maxIndex = len(charCodes) 1076 for index in range(maxIndex): 1077 charCode = charCodes[index] 1078 glyphID = cmap[charCode] 1079 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 1080 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1081 startCharCode = charCode 1082 startGlyphID = glyphID 1083 nGroups = nGroups + 1 1084 lastGlyphID = glyphID 1085 lastCharCode = charCode 1086 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1087 nGroups = nGroups + 1 1088 data = bytesjoin(dataList) 1089 lengthSubtable = len(data) +16 1090 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1091 return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data 1092 1093 def toXML(self, writer, ttFont): 1094 writer.begintag(self.__class__.__name__, [ 1095 ("platformID", self.platformID), 1096 ("platEncID", self.platEncID), 1097 ("format", self.format), 1098 ("reserved", self.reserved), 1099 ("length", self.length), 1100 ("language", self.language), 1101 ("nGroups", self.nGroups), 1102 ]) 1103 writer.newline() 1104 codes = sorted(self.cmap.items()) 1105 self._writeCodes(codes, writer) 1106 writer.endtag(self.__class__.__name__) 1107 writer.newline() 1108 1109 def fromXML(self, name, attrs, content, ttFont): 1110 self.format = safeEval(attrs["format"]) 1111 self.reserved = safeEval(attrs["reserved"]) 1112 self.length = safeEval(attrs["length"]) 1113 self.language = safeEval(attrs["language"]) 1114 self.nGroups = safeEval(attrs["nGroups"]) 1115 if not hasattr(self, "cmap"): 1116 self.cmap = {} 1117 cmap = self.cmap 1118 1119 for element in content: 1120 if not isinstance(element, tuple): 1121 continue 1122 name, attrs, content = element 1123 if name != "map": 1124 continue 1125 cmap[safeEval(attrs["code"])] = attrs["name"] 1126 1127 1128class cmap_format_12(cmap_format_12_or_13): 1129 1130 _format_step = 1 1131 1132 def __init__(self, format=12): 1133 cmap_format_12_or_13.__init__(self, format) 1134 1135 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1136 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1137 1138 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1139 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1140 1141 1142class cmap_format_13(cmap_format_12_or_13): 1143 1144 _format_step = 0 1145 1146 def __init__(self, format=13): 1147 cmap_format_12_or_13.__init__(self, format) 1148 1149 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1150 return [startingGlyph] * numberOfGlyphs 1151 1152 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1153 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1154 1155 1156def cvtToUVS(threeByteString): 1157 data = b"\0" + threeByteString 1158 val, = struct.unpack(">L", data) 1159 return val 1160 1161def cvtFromUVS(val): 1162 assert 0 <= val < 0x1000000 1163 fourByteString = struct.pack(">L", val) 1164 return fourByteString[1:] 1165 1166 1167class cmap_format_14(CmapSubtable): 1168 1169 def decompileHeader(self, data, ttFont): 1170 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1171 self.data = data[10:] 1172 self.length = length 1173 self.numVarSelectorRecords = numVarSelectorRecords 1174 self.ttFont = ttFont 1175 self.language = 0xFF # has no language. 1176 1177 def decompile(self, data, ttFont): 1178 if data is not None and ttFont is not None: 1179 self.decompileHeader(data, ttFont) 1180 else: 1181 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1182 data = self.data 1183 1184 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1185 uvsDict = {} 1186 recOffset = 0 1187 for n in range(self.numVarSelectorRecords): 1188 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1189 recOffset += 11 1190 varUVS = cvtToUVS(uvs) 1191 if defOVSOffset: 1192 startOffset = defOVSOffset - 10 1193 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1194 startOffset +=4 1195 for r in range(numValues): 1196 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1197 startOffset += 4 1198 firstBaseUV = cvtToUVS(uv) 1199 cnt = addtlCnt+1 1200 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1201 glyphList = [None]*cnt 1202 localUVList = zip(baseUVList, glyphList) 1203 try: 1204 uvsDict[varUVS].extend(localUVList) 1205 except KeyError: 1206 uvsDict[varUVS] = list(localUVList) 1207 1208 if nonDefUVSOffset: 1209 startOffset = nonDefUVSOffset - 10 1210 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1211 startOffset +=4 1212 localUVList = [] 1213 for r in range(numRecs): 1214 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1215 startOffset += 5 1216 uv = cvtToUVS(uv) 1217 glyphName = self.ttFont.getGlyphName(gid) 1218 localUVList.append((uv, glyphName)) 1219 try: 1220 uvsDict[varUVS].extend(localUVList) 1221 except KeyError: 1222 uvsDict[varUVS] = localUVList 1223 1224 self.uvsDict = uvsDict 1225 1226 def toXML(self, writer, ttFont): 1227 writer.begintag(self.__class__.__name__, [ 1228 ("platformID", self.platformID), 1229 ("platEncID", self.platEncID), 1230 ]) 1231 writer.newline() 1232 uvsDict = self.uvsDict 1233 uvsList = sorted(uvsDict.keys()) 1234 for uvs in uvsList: 1235 uvList = uvsDict[uvs] 1236 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1237 for uv, gname in uvList: 1238 attrs = [("uv", hex(uv)), ("uvs", hex(uvs))] 1239 if gname is not None: 1240 attrs.append(("name", gname)) 1241 writer.simpletag("map", attrs) 1242 writer.newline() 1243 writer.endtag(self.__class__.__name__) 1244 writer.newline() 1245 1246 def fromXML(self, name, attrs, content, ttFont): 1247 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1248 if not hasattr(self, "cmap"): 1249 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1250 if not hasattr(self, "uvsDict"): 1251 self.uvsDict = {} 1252 uvsDict = self.uvsDict 1253 1254 # For backwards compatibility reasons we accept "None" as an indicator 1255 # for "default mapping", unless the font actually has a glyph named 1256 # "None". 1257 _hasGlyphNamedNone = None 1258 1259 for element in content: 1260 if not isinstance(element, tuple): 1261 continue 1262 name, attrs, content = element 1263 if name != "map": 1264 continue 1265 uvs = safeEval(attrs["uvs"]) 1266 uv = safeEval(attrs["uv"]) 1267 gname = attrs.get("name") 1268 if gname == "None": 1269 if _hasGlyphNamedNone is None: 1270 _hasGlyphNamedNone = "None" in ttFont.getGlyphOrder() 1271 if not _hasGlyphNamedNone: 1272 gname = None 1273 try: 1274 uvsDict[uvs].append((uv, gname)) 1275 except KeyError: 1276 uvsDict[uvs] = [(uv, gname)] 1277 1278 def compile(self, ttFont): 1279 if self.data: 1280 return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data 1281 1282 uvsDict = self.uvsDict 1283 uvsList = sorted(uvsDict.keys()) 1284 self.numVarSelectorRecords = len(uvsList) 1285 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1286 data = [] 1287 varSelectorRecords =[] 1288 for uvs in uvsList: 1289 entryList = uvsDict[uvs] 1290 1291 defList = [entry for entry in entryList if entry[1] is None] 1292 if defList: 1293 defList = [entry[0] for entry in defList] 1294 defOVSOffset = offset 1295 defList.sort() 1296 1297 lastUV = defList[0] 1298 cnt = -1 1299 defRecs = [] 1300 for defEntry in defList: 1301 cnt +=1 1302 if (lastUV+cnt) != defEntry: 1303 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1304 lastUV = defEntry 1305 defRecs.append(rec) 1306 cnt = 0 1307 1308 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1309 defRecs.append(rec) 1310 1311 numDefRecs = len(defRecs) 1312 data.append(struct.pack(">L", numDefRecs)) 1313 data.extend(defRecs) 1314 offset += 4 + numDefRecs*4 1315 else: 1316 defOVSOffset = 0 1317 1318 ndefList = [entry for entry in entryList if entry[1] is not None] 1319 if ndefList: 1320 nonDefUVSOffset = offset 1321 ndefList.sort() 1322 numNonDefRecs = len(ndefList) 1323 data.append(struct.pack(">L", numNonDefRecs)) 1324 offset += 4 + numNonDefRecs*5 1325 1326 for uv, gname in ndefList: 1327 gid = ttFont.getGlyphID(gname) 1328 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1329 data.append(ndrec) 1330 else: 1331 nonDefUVSOffset = 0 1332 1333 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1334 varSelectorRecords.append(vrec) 1335 1336 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1337 self.length = 10 + len(data) 1338 headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) 1339 1340 return headerdata + data 1341 1342 1343class cmap_format_unknown(CmapSubtable): 1344 1345 def toXML(self, writer, ttFont): 1346 cmapName = self.__class__.__name__[:12] + str(self.format) 1347 writer.begintag(cmapName, [ 1348 ("platformID", self.platformID), 1349 ("platEncID", self.platEncID), 1350 ]) 1351 writer.newline() 1352 writer.dumphex(self.data) 1353 writer.endtag(cmapName) 1354 writer.newline() 1355 1356 def fromXML(self, name, attrs, content, ttFont): 1357 self.data = readHex(content) 1358 self.cmap = {} 1359 1360 def decompileHeader(self, data, ttFont): 1361 self.language = 0 # dummy value 1362 self.data = data 1363 1364 def decompile(self, data, ttFont): 1365 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1366 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1367 if data is not None and ttFont is not None: 1368 self.decompileHeader(data, ttFont) 1369 else: 1370 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1371 1372 def compile(self, ttFont): 1373 if self.data: 1374 return self.data 1375 else: 1376 return None 1377 1378cmap_classes = { 1379 0: cmap_format_0, 1380 2: cmap_format_2, 1381 4: cmap_format_4, 1382 6: cmap_format_6, 1383 12: cmap_format_12, 1384 13: cmap_format_13, 1385 14: cmap_format_14, 1386} 1387