1from fontTools.misc.py23 import bytesjoin 2from fontTools.misc.textTools import safeEval, readHex 3from fontTools.misc.encodingTools import getEncoding 4from fontTools.ttLib import getSearchRange 5from fontTools.unicode import Unicode 6from . import DefaultTable 7import sys 8import struct 9import array 10import logging 11 12 13log = logging.getLogger(__name__) 14 15 16def _make_map(font, chars, gids): 17 assert len(chars) == len(gids) 18 cmap = {} 19 glyphOrder = font.getGlyphOrder() 20 for char,gid in zip(chars,gids): 21 if gid == 0: 22 continue 23 try: 24 name = glyphOrder[gid] 25 except IndexError: 26 name = font.getGlyphName(gid) 27 cmap[char] = name 28 return cmap 29 30class table__c_m_a_p(DefaultTable.DefaultTable): 31 32 def getcmap(self, platformID, platEncID): 33 for subtable in self.tables: 34 if (subtable.platformID == platformID and 35 subtable.platEncID == platEncID): 36 return subtable 37 return None # not found 38 39 def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))): 40 """Return the 'best' unicode cmap dictionary available in the font, 41 or None, if no unicode cmap subtable is available. 42 43 By default it will search for the following (platformID, platEncID) 44 pairs: 45 (3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0) 46 This can be customized via the cmapPreferences argument. 47 """ 48 for platformID, platEncID in cmapPreferences: 49 cmapSubtable = self.getcmap(platformID, platEncID) 50 if cmapSubtable is not None: 51 return cmapSubtable.cmap 52 return None # None of the requested cmap subtables were found 53 54 def buildReversed(self): 55 """Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}. 56 57 The values are sets of Unicode codepoints because 58 some fonts map different codepoints to the same glyph. 59 For example, U+0041 LATIN CAPITAL LETTER A and U+0391 60 GREEK CAPITAL LETTER ALPHA are sometimes the same glyph. 61 """ 62 result = {} 63 for subtable in self.tables: 64 if subtable.isUnicode(): 65 for codepoint, name in subtable.cmap.items(): 66 result.setdefault(name, set()).add(codepoint) 67 return result 68 69 def decompile(self, data, ttFont): 70 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 71 self.tableVersion = int(tableVersion) 72 self.tables = tables = [] 73 seenOffsets = {} 74 for i in range(numSubTables): 75 platformID, platEncID, offset = struct.unpack( 76 ">HHl", data[4+i*8:4+(i+1)*8]) 77 platformID, platEncID = int(platformID), int(platEncID) 78 format, length = struct.unpack(">HH", data[offset:offset+4]) 79 if format in [8,10,12,13]: 80 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 81 elif format in [14]: 82 format, length = struct.unpack(">HL", data[offset:offset+6]) 83 84 if not length: 85 log.error( 86 "cmap subtable is reported as having zero length: platformID %s, " 87 "platEncID %s, format %s offset %s. Skipping table.", 88 platformID, platEncID, format, offset) 89 continue 90 table = CmapSubtable.newSubtable(format) 91 table.platformID = platformID 92 table.platEncID = platEncID 93 # Note that by default we decompile only the subtable header info; 94 # any other data gets decompiled only when an attribute of the 95 # subtable is referenced. 96 table.decompileHeader(data[offset:offset+int(length)], ttFont) 97 if offset in seenOffsets: 98 table.data = None # Mark as decompiled 99 table.cmap = tables[seenOffsets[offset]].cmap 100 else: 101 seenOffsets[offset] = i 102 tables.append(table) 103 104 def compile(self, ttFont): 105 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 106 numSubTables = len(self.tables) 107 totalOffset = 4 + 8 * numSubTables 108 data = struct.pack(">HH", self.tableVersion, numSubTables) 109 tableData = b"" 110 seen = {} # Some tables are the same object reference. Don't compile them twice. 111 done = {} # Some tables are different objects, but compile to the same data chunk 112 for table in self.tables: 113 try: 114 offset = seen[id(table.cmap)] 115 except KeyError: 116 chunk = table.compile(ttFont) 117 if chunk in done: 118 offset = done[chunk] 119 else: 120 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 121 tableData = tableData + chunk 122 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 123 return data + tableData 124 125 def toXML(self, writer, ttFont): 126 writer.simpletag("tableVersion", version=self.tableVersion) 127 writer.newline() 128 for table in self.tables: 129 table.toXML(writer, ttFont) 130 131 def fromXML(self, name, attrs, content, ttFont): 132 if name == "tableVersion": 133 self.tableVersion = safeEval(attrs["version"]) 134 return 135 if name[:12] != "cmap_format_": 136 return 137 if not hasattr(self, "tables"): 138 self.tables = [] 139 format = safeEval(name[12:]) 140 table = CmapSubtable.newSubtable(format) 141 table.platformID = safeEval(attrs["platformID"]) 142 table.platEncID = safeEval(attrs["platEncID"]) 143 table.fromXML(name, attrs, content, ttFont) 144 self.tables.append(table) 145 146 147class CmapSubtable(object): 148 149 @staticmethod 150 def getSubtableClass(format): 151 """Return the subtable class for a format.""" 152 return cmap_classes.get(format, cmap_format_unknown) 153 154 @staticmethod 155 def newSubtable(format): 156 """Return a new instance of a subtable for format.""" 157 subtableClass = CmapSubtable.getSubtableClass(format) 158 return subtableClass(format) 159 160 def __init__(self, format): 161 self.format = format 162 self.data = None 163 self.ttFont = None 164 165 def __getattr__(self, attr): 166 # allow lazy decompilation of subtables. 167 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 168 raise AttributeError(attr) 169 if self.data is None: 170 raise AttributeError(attr) 171 self.decompile(None, None) # use saved data. 172 self.data = None # Once this table has been decompiled, make sure we don't 173 # just return the original data. Also avoids recursion when 174 # called with an attribute that the cmap subtable doesn't have. 175 return getattr(self, attr) 176 177 def decompileHeader(self, data, ttFont): 178 format, length, language = struct.unpack(">HHH", data[:6]) 179 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 180 self.format = int(format) 181 self.length = int(length) 182 self.language = int(language) 183 self.data = data[6:] 184 self.ttFont = ttFont 185 186 def toXML(self, writer, ttFont): 187 writer.begintag(self.__class__.__name__, [ 188 ("platformID", self.platformID), 189 ("platEncID", self.platEncID), 190 ("language", self.language), 191 ]) 192 writer.newline() 193 codes = sorted(self.cmap.items()) 194 self._writeCodes(codes, writer) 195 writer.endtag(self.__class__.__name__) 196 writer.newline() 197 198 def getEncoding(self, default=None): 199 """Returns the Python encoding name for this cmap subtable based on its platformID, 200 platEncID, and language. If encoding for these values is not known, by default 201 None is returned. That can be overriden by passing a value to the default 202 argument. 203 204 Note that if you want to choose a "preferred" cmap subtable, most of the time 205 self.isUnicode() is what you want as that one only returns true for the modern, 206 commonly used, Unicode-compatible triplets, not the legacy ones. 207 """ 208 return getEncoding(self.platformID, self.platEncID, self.language, default) 209 210 def isUnicode(self): 211 return (self.platformID == 0 or 212 (self.platformID == 3 and self.platEncID in [0, 1, 10])) 213 214 def isSymbol(self): 215 return self.platformID == 3 and self.platEncID == 0 216 217 def _writeCodes(self, codes, writer): 218 isUnicode = self.isUnicode() 219 for code, name in codes: 220 writer.simpletag("map", code=hex(code), name=name) 221 if isUnicode: 222 writer.comment(Unicode[code]) 223 writer.newline() 224 225 def __lt__(self, other): 226 if not isinstance(other, CmapSubtable): 227 return NotImplemented 228 229 # implemented so that list.sort() sorts according to the spec. 230 selfTuple = ( 231 getattr(self, "platformID", None), 232 getattr(self, "platEncID", None), 233 getattr(self, "language", None), 234 self.__dict__) 235 otherTuple = ( 236 getattr(other, "platformID", None), 237 getattr(other, "platEncID", None), 238 getattr(other, "language", None), 239 other.__dict__) 240 return selfTuple < otherTuple 241 242 243class cmap_format_0(CmapSubtable): 244 245 def decompile(self, data, ttFont): 246 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 247 # If not, someone is calling the subtable decompile() directly, and must provide both args. 248 if data is not None and ttFont is not None: 249 self.decompileHeader(data, ttFont) 250 else: 251 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 252 data = self.data # decompileHeader assigns the data after the header to self.data 253 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 254 gids = array.array("B") 255 gids.frombytes(self.data) 256 charCodes = list(range(len(gids))) 257 self.cmap = _make_map(self.ttFont, charCodes, gids) 258 259 def compile(self, ttFont): 260 if self.data: 261 return struct.pack(">HHH", 0, 262, self.language) + self.data 262 263 cmap = self.cmap 264 assert set(cmap.keys()).issubset(range(256)) 265 getGlyphID = ttFont.getGlyphID 266 valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)] 267 268 gids = array.array("B", valueList) 269 data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes() 270 assert len(data) == 262 271 return data 272 273 def fromXML(self, name, attrs, content, ttFont): 274 self.language = safeEval(attrs["language"]) 275 if not hasattr(self, "cmap"): 276 self.cmap = {} 277 cmap = self.cmap 278 for element in content: 279 if not isinstance(element, tuple): 280 continue 281 name, attrs, content = element 282 if name != "map": 283 continue 284 cmap[safeEval(attrs["code"])] = attrs["name"] 285 286 287subHeaderFormat = ">HHhH" 288class SubHeader(object): 289 def __init__(self): 290 self.firstCode = None 291 self.entryCount = None 292 self.idDelta = None 293 self.idRangeOffset = None 294 self.glyphIndexArray = [] 295 296class cmap_format_2(CmapSubtable): 297 298 def setIDDelta(self, subHeader): 299 subHeader.idDelta = 0 300 # find the minGI which is not zero. 301 minGI = subHeader.glyphIndexArray[0] 302 for gid in subHeader.glyphIndexArray: 303 if (gid != 0) and (gid < minGI): 304 minGI = gid 305 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 306 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 307 # We would like to pick an idDelta such that the first glyphArray GID is 1, 308 # so that we are more likely to be able to combine glypharray GID subranges. 309 # This means that we have a problem when minGI is > 32K 310 # Since the final gi is reconstructed from the glyphArray GID by: 311 # (short)finalGID = (gid + idDelta) % 0x10000), 312 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 313 # negative number to an unsigned short. 314 315 if (minGI > 1): 316 if minGI > 0x7FFF: 317 subHeader.idDelta = -(0x10000 - minGI) -1 318 else: 319 subHeader.idDelta = minGI -1 320 idDelta = subHeader.idDelta 321 for i in range(subHeader.entryCount): 322 gid = subHeader.glyphIndexArray[i] 323 if gid > 0: 324 subHeader.glyphIndexArray[i] = gid - idDelta 325 326 def decompile(self, data, ttFont): 327 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 328 # If not, someone is calling the subtable decompile() directly, and must provide both args. 329 if data is not None and ttFont is not None: 330 self.decompileHeader(data, ttFont) 331 else: 332 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 333 334 data = self.data # decompileHeader assigns the data after the header to self.data 335 subHeaderKeys = [] 336 maxSubHeaderindex = 0 337 # get the key array, and determine the number of subHeaders. 338 allKeys = array.array("H") 339 allKeys.frombytes(data[:512]) 340 data = data[512:] 341 if sys.byteorder != "big": allKeys.byteswap() 342 subHeaderKeys = [ key//8 for key in allKeys] 343 maxSubHeaderindex = max(subHeaderKeys) 344 345 #Load subHeaders 346 subHeaderList = [] 347 pos = 0 348 for i in range(maxSubHeaderindex + 1): 349 subHeader = SubHeader() 350 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 351 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 352 pos += 8 353 giDataPos = pos + subHeader.idRangeOffset-2 354 giList = array.array("H") 355 giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2]) 356 if sys.byteorder != "big": giList.byteswap() 357 subHeader.glyphIndexArray = giList 358 subHeaderList.append(subHeader) 359 # How this gets processed. 360 # Charcodes may be one or two bytes. 361 # The first byte of a charcode is mapped through the subHeaderKeys, to select 362 # a subHeader. For any subheader but 0, the next byte is then mapped through the 363 # selected subheader. If subheader Index 0 is selected, then the byte itself is 364 # mapped through the subheader, and there is no second byte. 365 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 366 # 367 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 368 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 369 # referenced by another subheader. 370 # The only subheader that will be referenced by more than one first-byte value is the subheader 371 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 372 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 373 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 374 # A subheader specifies a subrange within (0...256) by the 375 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 376 # (e.g. glyph not in font). 377 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 378 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 379 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 380 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 381 # Example for Logocut-Medium 382 # first byte of charcode = 129; selects subheader 1. 383 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 384 # second byte of charCode = 66 385 # the index offset = 66-64 = 2. 386 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 387 # [glyphIndexArray index], [subrange array index] = glyphIndex 388 # [256], [0]=1 from charcode [129, 64] 389 # [257], [1]=2 from charcode [129, 65] 390 # [258], [2]=3 from charcode [129, 66] 391 # [259], [3]=4 from charcode [129, 67] 392 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 393 # add it to the glyphID to get the final glyphIndex 394 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 395 396 self.data = b"" 397 cmap = {} 398 notdefGI = 0 399 for firstByte in range(256): 400 subHeadindex = subHeaderKeys[firstByte] 401 subHeader = subHeaderList[subHeadindex] 402 if subHeadindex == 0: 403 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 404 continue # gi is notdef. 405 else: 406 charCode = firstByte 407 offsetIndex = firstByte - subHeader.firstCode 408 gi = subHeader.glyphIndexArray[offsetIndex] 409 if gi != 0: 410 gi = (gi + subHeader.idDelta) % 0x10000 411 else: 412 continue # gi is notdef. 413 cmap[charCode] = gi 414 else: 415 if subHeader.entryCount: 416 charCodeOffset = firstByte * 256 + subHeader.firstCode 417 for offsetIndex in range(subHeader.entryCount): 418 charCode = charCodeOffset + offsetIndex 419 gi = subHeader.glyphIndexArray[offsetIndex] 420 if gi != 0: 421 gi = (gi + subHeader.idDelta) % 0x10000 422 else: 423 continue 424 cmap[charCode] = gi 425 # If not subHeader.entryCount, then all char codes with this first byte are 426 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 427 # same as mapping it to .notdef. 428 429 gids = list(cmap.values()) 430 charCodes = list(cmap.keys()) 431 self.cmap = _make_map(self.ttFont, charCodes, gids) 432 433 def compile(self, ttFont): 434 if self.data: 435 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 436 kEmptyTwoCharCodeRange = -1 437 notdefGI = 0 438 439 items = sorted(self.cmap.items()) 440 charCodes = [item[0] for item in items] 441 names = [item[1] for item in items] 442 nameMap = ttFont.getReverseGlyphMap() 443 try: 444 gids = [nameMap[name] for name in names] 445 except KeyError: 446 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 447 try: 448 gids = [nameMap[name] for name in names] 449 except KeyError: 450 # allow virtual GIDs in format 2 tables 451 gids = [] 452 for name in names: 453 try: 454 gid = nameMap[name] 455 except KeyError: 456 try: 457 if (name[:3] == 'gid'): 458 gid = int(name[3:]) 459 else: 460 gid = ttFont.getGlyphID(name) 461 except: 462 raise KeyError(name) 463 464 gids.append(gid) 465 466 # Process the (char code to gid) item list in char code order. 467 # By definition, all one byte char codes map to subheader 0. 468 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 469 # which defines all char codes in its range to map to notdef) unless proven otherwise. 470 # Note that since the char code items are processed in char code order, all the char codes with the 471 # same first byte are in sequential order. 472 473 subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 474 subHeaderList = [] 475 476 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 477 # with a cmap where all the one byte char codes map to notdef, 478 # with the result that the subhead 0 would not get created just by processing the item list. 479 charCode = charCodes[0] 480 if charCode > 255: 481 subHeader = SubHeader() 482 subHeader.firstCode = 0 483 subHeader.entryCount = 0 484 subHeader.idDelta = 0 485 subHeader.idRangeOffset = 0 486 subHeaderList.append(subHeader) 487 488 lastFirstByte = -1 489 items = zip(charCodes, gids) 490 for charCode, gid in items: 491 if gid == 0: 492 continue 493 firstbyte = charCode >> 8 494 secondByte = charCode & 0x00FF 495 496 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 497 if lastFirstByte > -1: 498 # fix GI's and iDelta of current subheader. 499 self.setIDDelta(subHeader) 500 501 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 502 # for the indices matching the char codes. 503 if lastFirstByte == 0: 504 for index in range(subHeader.entryCount): 505 charCode = subHeader.firstCode + index 506 subHeaderKeys[charCode] = 0 507 508 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 509 # init new subheader 510 subHeader = SubHeader() 511 subHeader.firstCode = secondByte 512 subHeader.entryCount = 1 513 subHeader.glyphIndexArray.append(gid) 514 subHeaderList.append(subHeader) 515 subHeaderKeys[firstbyte] = len(subHeaderList) -1 516 lastFirstByte = firstbyte 517 else: 518 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 519 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 520 for i in range(codeDiff): 521 subHeader.glyphIndexArray.append(notdefGI) 522 subHeader.glyphIndexArray.append(gid) 523 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 524 525 # fix GI's and iDelta of last subheader that we we added to the subheader array. 526 self.setIDDelta(subHeader) 527 528 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 529 subHeader = SubHeader() 530 subHeader.firstCode = 0 531 subHeader.entryCount = 0 532 subHeader.idDelta = 0 533 subHeader.idRangeOffset = 2 534 subHeaderList.append(subHeader) 535 emptySubheadIndex = len(subHeaderList) - 1 536 for index in range(256): 537 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 538 subHeaderKeys[index] = emptySubheadIndex 539 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 540 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 541 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 542 # charcode 0 and GID 0. 543 544 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 545 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 546 for index in range(subheadRangeLen): 547 subHeader = subHeaderList[index] 548 subHeader.idRangeOffset = 0 549 for j in range(index): 550 prevSubhead = subHeaderList[j] 551 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 552 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 553 subHeader.glyphIndexArray = [] 554 break 555 if subHeader.idRangeOffset == 0: # didn't find one. 556 subHeader.idRangeOffset = idRangeOffset 557 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 558 else: 559 idRangeOffset = idRangeOffset - 8 # one less subheader 560 561 # Now we can write out the data! 562 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 563 for subhead in subHeaderList[:-1]: 564 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 565 dataList = [struct.pack(">HHH", 2, length, self.language)] 566 for index in subHeaderKeys: 567 dataList.append(struct.pack(">H", index*8)) 568 for subhead in subHeaderList: 569 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 570 for subhead in subHeaderList[:-1]: 571 for gi in subhead.glyphIndexArray: 572 dataList.append(struct.pack(">H", gi)) 573 data = bytesjoin(dataList) 574 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 575 return data 576 577 def fromXML(self, name, attrs, content, ttFont): 578 self.language = safeEval(attrs["language"]) 579 if not hasattr(self, "cmap"): 580 self.cmap = {} 581 cmap = self.cmap 582 583 for element in content: 584 if not isinstance(element, tuple): 585 continue 586 name, attrs, content = element 587 if name != "map": 588 continue 589 cmap[safeEval(attrs["code"])] = attrs["name"] 590 591 592cmap_format_4_format = ">7H" 593 594#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 595#uint16 reservedPad # This value should be zero 596#uint16 startCode[segCount] # Starting character code for each segment 597#uint16 idDelta[segCount] # Delta for all character codes in segment 598#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 599#uint16 glyphIndexArray[variable] # Glyph index array 600 601def splitRange(startCode, endCode, cmap): 602 # Try to split a range of character codes into subranges with consecutive 603 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 604 # efficiently. I can't prove I've got the optimal solution, but it seems 605 # to do well with the fonts I tested: none became bigger, many became smaller. 606 if startCode == endCode: 607 return [], [endCode] 608 609 lastID = cmap[startCode] 610 lastCode = startCode 611 inOrder = None 612 orderedBegin = None 613 subRanges = [] 614 615 # Gather subranges in which the glyph IDs are consecutive. 616 for code in range(startCode + 1, endCode + 1): 617 glyphID = cmap[code] 618 619 if glyphID - 1 == lastID: 620 if inOrder is None or not inOrder: 621 inOrder = 1 622 orderedBegin = lastCode 623 else: 624 if inOrder: 625 inOrder = 0 626 subRanges.append((orderedBegin, lastCode)) 627 orderedBegin = None 628 629 lastID = glyphID 630 lastCode = code 631 632 if inOrder: 633 subRanges.append((orderedBegin, lastCode)) 634 assert lastCode == endCode 635 636 # Now filter out those new subranges that would only make the data bigger. 637 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 638 # character. 639 newRanges = [] 640 for b, e in subRanges: 641 if b == startCode and e == endCode: 642 break # the whole range, we're fine 643 if b == startCode or e == endCode: 644 threshold = 4 # split costs one more segment 645 else: 646 threshold = 8 # split costs two more segments 647 if (e - b + 1) > threshold: 648 newRanges.append((b, e)) 649 subRanges = newRanges 650 651 if not subRanges: 652 return [], [endCode] 653 654 if subRanges[0][0] != startCode: 655 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 656 if subRanges[-1][1] != endCode: 657 subRanges.append((subRanges[-1][1] + 1, endCode)) 658 659 # Fill the "holes" in the segments list -- those are the segments in which 660 # the glyph IDs are _not_ consecutive. 661 i = 1 662 while i < len(subRanges): 663 if subRanges[i-1][1] + 1 != subRanges[i][0]: 664 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 665 i = i + 1 666 i = i + 1 667 668 # Transform the ranges into startCode/endCode lists. 669 start = [] 670 end = [] 671 for b, e in subRanges: 672 start.append(b) 673 end.append(e) 674 start.pop(0) 675 676 assert len(start) + 1 == len(end) 677 return start, end 678 679 680class cmap_format_4(CmapSubtable): 681 682 def decompile(self, data, ttFont): 683 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 684 # If not, someone is calling the subtable decompile() directly, and must provide both args. 685 if data is not None and ttFont is not None: 686 self.decompileHeader(data, ttFont) 687 else: 688 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 689 690 data = self.data # decompileHeader assigns the data after the header to self.data 691 (segCountX2, searchRange, entrySelector, rangeShift) = \ 692 struct.unpack(">4H", data[:8]) 693 data = data[8:] 694 segCount = segCountX2 // 2 695 696 allCodes = array.array("H") 697 allCodes.frombytes(data) 698 self.data = data = None 699 700 if sys.byteorder != "big": allCodes.byteswap() 701 702 # divide the data 703 endCode = allCodes[:segCount] 704 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 705 startCode = allCodes[:segCount] 706 allCodes = allCodes[segCount:] 707 idDelta = allCodes[:segCount] 708 allCodes = allCodes[segCount:] 709 idRangeOffset = allCodes[:segCount] 710 glyphIndexArray = allCodes[segCount:] 711 lenGIArray = len(glyphIndexArray) 712 713 # build 2-byte character mapping 714 charCodes = [] 715 gids = [] 716 for i in range(len(startCode) - 1): # don't do 0xffff! 717 start = startCode[i] 718 delta = idDelta[i] 719 rangeOffset = idRangeOffset[i] 720 # *someone* needs to get killed. 721 partial = rangeOffset // 2 - start + i - len(idRangeOffset) 722 723 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 724 charCodes.extend(rangeCharCodes) 725 if rangeOffset == 0: 726 gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes]) 727 else: 728 for charCode in rangeCharCodes: 729 index = charCode + partial 730 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 731 if glyphIndexArray[index] != 0: # if not missing glyph 732 glyphID = glyphIndexArray[index] + delta 733 else: 734 glyphID = 0 # missing glyph 735 gids.append(glyphID & 0xFFFF) 736 737 self.cmap = _make_map(self.ttFont, charCodes, gids) 738 739 def compile(self, ttFont): 740 if self.data: 741 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 742 743 charCodes = list(self.cmap.keys()) 744 if not charCodes: 745 startCode = [0xffff] 746 endCode = [0xffff] 747 else: 748 charCodes.sort() 749 names = [self.cmap[code] for code in charCodes] 750 nameMap = ttFont.getReverseGlyphMap() 751 try: 752 gids = [nameMap[name] for name in names] 753 except KeyError: 754 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 755 try: 756 gids = [nameMap[name] for name in names] 757 except KeyError: 758 # allow virtual GIDs in format 4 tables 759 gids = [] 760 for name in names: 761 try: 762 gid = nameMap[name] 763 except KeyError: 764 try: 765 if (name[:3] == 'gid'): 766 gid = int(name[3:]) 767 else: 768 gid = ttFont.getGlyphID(name) 769 except: 770 raise KeyError(name) 771 772 gids.append(gid) 773 cmap = {} # code:glyphID mapping 774 for code, gid in zip(charCodes, gids): 775 cmap[code] = gid 776 777 # Build startCode and endCode lists. 778 # Split the char codes in ranges of consecutive char codes, then split 779 # each range in more ranges of consecutive/not consecutive glyph IDs. 780 # See splitRange(). 781 lastCode = charCodes[0] 782 endCode = [] 783 startCode = [lastCode] 784 for charCode in charCodes[1:]: # skip the first code, it's the first start code 785 if charCode == lastCode + 1: 786 lastCode = charCode 787 continue 788 start, end = splitRange(startCode[-1], lastCode, cmap) 789 startCode.extend(start) 790 endCode.extend(end) 791 startCode.append(charCode) 792 lastCode = charCode 793 start, end = splitRange(startCode[-1], lastCode, cmap) 794 startCode.extend(start) 795 endCode.extend(end) 796 startCode.append(0xffff) 797 endCode.append(0xffff) 798 799 # build up rest of cruft 800 idDelta = [] 801 idRangeOffset = [] 802 glyphIndexArray = [] 803 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 804 indices = [] 805 for charCode in range(startCode[i], endCode[i] + 1): 806 indices.append(cmap[charCode]) 807 if (indices == list(range(indices[0], indices[0] + len(indices)))): 808 idDelta.append((indices[0] - startCode[i]) % 0x10000) 809 idRangeOffset.append(0) 810 else: 811 # someone *definitely* needs to get killed. 812 idDelta.append(0) 813 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 814 glyphIndexArray.extend(indices) 815 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 816 idRangeOffset.append(0) 817 818 # Insane. 819 segCount = len(endCode) 820 segCountX2 = segCount * 2 821 searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2) 822 823 charCodeArray = array.array("H", endCode + [0] + startCode) 824 idDeltaArray = array.array("H", idDelta) 825 restArray = array.array("H", idRangeOffset + glyphIndexArray) 826 if sys.byteorder != "big": charCodeArray.byteswap() 827 if sys.byteorder != "big": idDeltaArray.byteswap() 828 if sys.byteorder != "big": restArray.byteswap() 829 data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes() 830 831 length = struct.calcsize(cmap_format_4_format) + len(data) 832 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 833 segCountX2, searchRange, entrySelector, rangeShift) 834 return header + data 835 836 def fromXML(self, name, attrs, content, ttFont): 837 self.language = safeEval(attrs["language"]) 838 if not hasattr(self, "cmap"): 839 self.cmap = {} 840 cmap = self.cmap 841 842 for element in content: 843 if not isinstance(element, tuple): 844 continue 845 nameMap, attrsMap, dummyContent = element 846 if nameMap != "map": 847 assert 0, "Unrecognized keyword in cmap subtable" 848 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 849 850 851class cmap_format_6(CmapSubtable): 852 853 def decompile(self, data, ttFont): 854 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 855 # If not, someone is calling the subtable decompile() directly, and must provide both args. 856 if data is not None and ttFont is not None: 857 self.decompileHeader(data, ttFont) 858 else: 859 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 860 861 data = self.data # decompileHeader assigns the data after the header to self.data 862 firstCode, entryCount = struct.unpack(">HH", data[:4]) 863 firstCode = int(firstCode) 864 data = data[4:] 865 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 866 gids = array.array("H") 867 gids.frombytes(data[:2 * int(entryCount)]) 868 if sys.byteorder != "big": gids.byteswap() 869 self.data = data = None 870 871 charCodes = list(range(firstCode, firstCode + len(gids))) 872 self.cmap = _make_map(self.ttFont, charCodes, gids) 873 874 def compile(self, ttFont): 875 if self.data: 876 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 877 cmap = self.cmap 878 codes = sorted(cmap.keys()) 879 if codes: # yes, there are empty cmap tables. 880 codes = list(range(codes[0], codes[-1] + 1)) 881 firstCode = codes[0] 882 valueList = [ 883 ttFont.getGlyphID(cmap[code]) if code in cmap else 0 884 for code in codes 885 ] 886 gids = array.array("H", valueList) 887 if sys.byteorder != "big": gids.byteswap() 888 data = gids.tobytes() 889 else: 890 data = b"" 891 firstCode = 0 892 header = struct.pack(">HHHHH", 893 6, len(data) + 10, self.language, firstCode, len(codes)) 894 return header + data 895 896 def fromXML(self, name, attrs, content, ttFont): 897 self.language = safeEval(attrs["language"]) 898 if not hasattr(self, "cmap"): 899 self.cmap = {} 900 cmap = self.cmap 901 902 for element in content: 903 if not isinstance(element, tuple): 904 continue 905 name, attrs, content = element 906 if name != "map": 907 continue 908 cmap[safeEval(attrs["code"])] = attrs["name"] 909 910 911class cmap_format_12_or_13(CmapSubtable): 912 913 def __init__(self, format): 914 self.format = format 915 self.reserved = 0 916 self.data = None 917 self.ttFont = None 918 919 def decompileHeader(self, data, ttFont): 920 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 921 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length) 922 self.format = format 923 self.reserved = reserved 924 self.length = length 925 self.language = language 926 self.nGroups = nGroups 927 self.data = data[16:] 928 self.ttFont = ttFont 929 930 def decompile(self, data, ttFont): 931 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 932 # If not, someone is calling the subtable decompile() directly, and must provide both args. 933 if data is not None and ttFont is not None: 934 self.decompileHeader(data, ttFont) 935 else: 936 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 937 938 data = self.data # decompileHeader assigns the data after the header to self.data 939 charCodes = [] 940 gids = [] 941 pos = 0 942 for i in range(self.nGroups): 943 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 944 pos += 12 945 lenGroup = 1 + endCharCode - startCharCode 946 charCodes.extend(list(range(startCharCode, endCharCode +1))) 947 gids.extend(self._computeGIDs(glyphID, lenGroup)) 948 self.data = data = None 949 self.cmap = _make_map(self.ttFont, charCodes, gids) 950 951 def compile(self, ttFont): 952 if self.data: 953 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 954 charCodes = list(self.cmap.keys()) 955 names = list(self.cmap.values()) 956 nameMap = ttFont.getReverseGlyphMap() 957 try: 958 gids = [nameMap[name] for name in names] 959 except KeyError: 960 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 961 try: 962 gids = [nameMap[name] for name in names] 963 except KeyError: 964 # allow virtual GIDs in format 12 tables 965 gids = [] 966 for name in names: 967 try: 968 gid = nameMap[name] 969 except KeyError: 970 try: 971 if (name[:3] == 'gid'): 972 gid = int(name[3:]) 973 else: 974 gid = ttFont.getGlyphID(name) 975 except: 976 raise KeyError(name) 977 978 gids.append(gid) 979 980 cmap = {} # code:glyphID mapping 981 for code, gid in zip(charCodes, gids): 982 cmap[code] = gid 983 984 charCodes.sort() 985 index = 0 986 startCharCode = charCodes[0] 987 startGlyphID = cmap[startCharCode] 988 lastGlyphID = startGlyphID - self._format_step 989 lastCharCode = startCharCode - 1 990 nGroups = 0 991 dataList = [] 992 maxIndex = len(charCodes) 993 for index in range(maxIndex): 994 charCode = charCodes[index] 995 glyphID = cmap[charCode] 996 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 997 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 998 startCharCode = charCode 999 startGlyphID = glyphID 1000 nGroups = nGroups + 1 1001 lastGlyphID = glyphID 1002 lastCharCode = charCode 1003 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1004 nGroups = nGroups + 1 1005 data = bytesjoin(dataList) 1006 lengthSubtable = len(data) +16 1007 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1008 return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data 1009 1010 def toXML(self, writer, ttFont): 1011 writer.begintag(self.__class__.__name__, [ 1012 ("platformID", self.platformID), 1013 ("platEncID", self.platEncID), 1014 ("format", self.format), 1015 ("reserved", self.reserved), 1016 ("length", self.length), 1017 ("language", self.language), 1018 ("nGroups", self.nGroups), 1019 ]) 1020 writer.newline() 1021 codes = sorted(self.cmap.items()) 1022 self._writeCodes(codes, writer) 1023 writer.endtag(self.__class__.__name__) 1024 writer.newline() 1025 1026 def fromXML(self, name, attrs, content, ttFont): 1027 self.format = safeEval(attrs["format"]) 1028 self.reserved = safeEval(attrs["reserved"]) 1029 self.length = safeEval(attrs["length"]) 1030 self.language = safeEval(attrs["language"]) 1031 self.nGroups = safeEval(attrs["nGroups"]) 1032 if not hasattr(self, "cmap"): 1033 self.cmap = {} 1034 cmap = self.cmap 1035 1036 for element in content: 1037 if not isinstance(element, tuple): 1038 continue 1039 name, attrs, content = element 1040 if name != "map": 1041 continue 1042 cmap[safeEval(attrs["code"])] = attrs["name"] 1043 1044 1045class cmap_format_12(cmap_format_12_or_13): 1046 1047 _format_step = 1 1048 1049 def __init__(self, format=12): 1050 cmap_format_12_or_13.__init__(self, format) 1051 1052 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1053 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1054 1055 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1056 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1057 1058 1059class cmap_format_13(cmap_format_12_or_13): 1060 1061 _format_step = 0 1062 1063 def __init__(self, format=13): 1064 cmap_format_12_or_13.__init__(self, format) 1065 1066 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1067 return [startingGlyph] * numberOfGlyphs 1068 1069 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1070 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1071 1072 1073def cvtToUVS(threeByteString): 1074 data = b"\0" + threeByteString 1075 val, = struct.unpack(">L", data) 1076 return val 1077 1078def cvtFromUVS(val): 1079 assert 0 <= val < 0x1000000 1080 fourByteString = struct.pack(">L", val) 1081 return fourByteString[1:] 1082 1083 1084class cmap_format_14(CmapSubtable): 1085 1086 def decompileHeader(self, data, ttFont): 1087 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1088 self.data = data[10:] 1089 self.length = length 1090 self.numVarSelectorRecords = numVarSelectorRecords 1091 self.ttFont = ttFont 1092 self.language = 0xFF # has no language. 1093 1094 def decompile(self, data, ttFont): 1095 if data is not None and ttFont is not None: 1096 self.decompileHeader(data, ttFont) 1097 else: 1098 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1099 data = self.data 1100 1101 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1102 uvsDict = {} 1103 recOffset = 0 1104 for n in range(self.numVarSelectorRecords): 1105 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1106 recOffset += 11 1107 varUVS = cvtToUVS(uvs) 1108 if defOVSOffset: 1109 startOffset = defOVSOffset - 10 1110 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1111 startOffset +=4 1112 for r in range(numValues): 1113 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1114 startOffset += 4 1115 firstBaseUV = cvtToUVS(uv) 1116 cnt = addtlCnt+1 1117 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1118 glyphList = [None]*cnt 1119 localUVList = zip(baseUVList, glyphList) 1120 try: 1121 uvsDict[varUVS].extend(localUVList) 1122 except KeyError: 1123 uvsDict[varUVS] = list(localUVList) 1124 1125 if nonDefUVSOffset: 1126 startOffset = nonDefUVSOffset - 10 1127 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1128 startOffset +=4 1129 localUVList = [] 1130 for r in range(numRecs): 1131 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1132 startOffset += 5 1133 uv = cvtToUVS(uv) 1134 glyphName = self.ttFont.getGlyphName(gid) 1135 localUVList.append((uv, glyphName)) 1136 try: 1137 uvsDict[varUVS].extend(localUVList) 1138 except KeyError: 1139 uvsDict[varUVS] = localUVList 1140 1141 self.uvsDict = uvsDict 1142 1143 def toXML(self, writer, ttFont): 1144 writer.begintag(self.__class__.__name__, [ 1145 ("platformID", self.platformID), 1146 ("platEncID", self.platEncID), 1147 ]) 1148 writer.newline() 1149 uvsDict = self.uvsDict 1150 uvsList = sorted(uvsDict.keys()) 1151 for uvs in uvsList: 1152 uvList = uvsDict[uvs] 1153 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1154 for uv, gname in uvList: 1155 attrs = [("uv", hex(uv)), ("uvs", hex(uvs))] 1156 if gname is not None: 1157 attrs.append(("name", gname)) 1158 writer.simpletag("map", attrs) 1159 writer.newline() 1160 writer.endtag(self.__class__.__name__) 1161 writer.newline() 1162 1163 def fromXML(self, name, attrs, content, ttFont): 1164 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1165 if not hasattr(self, "cmap"): 1166 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1167 if not hasattr(self, "uvsDict"): 1168 self.uvsDict = {} 1169 uvsDict = self.uvsDict 1170 1171 # For backwards compatibility reasons we accept "None" as an indicator 1172 # for "default mapping", unless the font actually has a glyph named 1173 # "None". 1174 _hasGlyphNamedNone = None 1175 1176 for element in content: 1177 if not isinstance(element, tuple): 1178 continue 1179 name, attrs, content = element 1180 if name != "map": 1181 continue 1182 uvs = safeEval(attrs["uvs"]) 1183 uv = safeEval(attrs["uv"]) 1184 gname = attrs.get("name") 1185 if gname == "None": 1186 if _hasGlyphNamedNone is None: 1187 _hasGlyphNamedNone = "None" in ttFont.getGlyphOrder() 1188 if not _hasGlyphNamedNone: 1189 gname = None 1190 try: 1191 uvsDict[uvs].append((uv, gname)) 1192 except KeyError: 1193 uvsDict[uvs] = [(uv, gname)] 1194 1195 def compile(self, ttFont): 1196 if self.data: 1197 return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data 1198 1199 uvsDict = self.uvsDict 1200 uvsList = sorted(uvsDict.keys()) 1201 self.numVarSelectorRecords = len(uvsList) 1202 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1203 data = [] 1204 varSelectorRecords =[] 1205 for uvs in uvsList: 1206 entryList = uvsDict[uvs] 1207 1208 defList = [entry for entry in entryList if entry[1] is None] 1209 if defList: 1210 defList = [entry[0] for entry in defList] 1211 defOVSOffset = offset 1212 defList.sort() 1213 1214 lastUV = defList[0] 1215 cnt = -1 1216 defRecs = [] 1217 for defEntry in defList: 1218 cnt +=1 1219 if (lastUV+cnt) != defEntry: 1220 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1221 lastUV = defEntry 1222 defRecs.append(rec) 1223 cnt = 0 1224 1225 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1226 defRecs.append(rec) 1227 1228 numDefRecs = len(defRecs) 1229 data.append(struct.pack(">L", numDefRecs)) 1230 data.extend(defRecs) 1231 offset += 4 + numDefRecs*4 1232 else: 1233 defOVSOffset = 0 1234 1235 ndefList = [entry for entry in entryList if entry[1] is not None] 1236 if ndefList: 1237 nonDefUVSOffset = offset 1238 ndefList.sort() 1239 numNonDefRecs = len(ndefList) 1240 data.append(struct.pack(">L", numNonDefRecs)) 1241 offset += 4 + numNonDefRecs*5 1242 1243 for uv, gname in ndefList: 1244 gid = ttFont.getGlyphID(gname) 1245 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1246 data.append(ndrec) 1247 else: 1248 nonDefUVSOffset = 0 1249 1250 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1251 varSelectorRecords.append(vrec) 1252 1253 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1254 self.length = 10 + len(data) 1255 headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) 1256 1257 return headerdata + data 1258 1259 1260class cmap_format_unknown(CmapSubtable): 1261 1262 def toXML(self, writer, ttFont): 1263 cmapName = self.__class__.__name__[:12] + str(self.format) 1264 writer.begintag(cmapName, [ 1265 ("platformID", self.platformID), 1266 ("platEncID", self.platEncID), 1267 ]) 1268 writer.newline() 1269 writer.dumphex(self.data) 1270 writer.endtag(cmapName) 1271 writer.newline() 1272 1273 def fromXML(self, name, attrs, content, ttFont): 1274 self.data = readHex(content) 1275 self.cmap = {} 1276 1277 def decompileHeader(self, data, ttFont): 1278 self.language = 0 # dummy value 1279 self.data = data 1280 1281 def decompile(self, data, ttFont): 1282 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1283 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1284 if data is not None and ttFont is not None: 1285 self.decompileHeader(data, ttFont) 1286 else: 1287 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1288 1289 def compile(self, ttFont): 1290 if self.data: 1291 return self.data 1292 else: 1293 return None 1294 1295cmap_classes = { 1296 0: cmap_format_0, 1297 2: cmap_format_2, 1298 4: cmap_format_4, 1299 6: cmap_format_6, 1300 12: cmap_format_12, 1301 13: cmap_format_13, 1302 14: cmap_format_14, 1303} 1304