1from __future__ import print_function, division, absolute_import 2from fontTools.misc.py23 import * 3from fontTools.misc.textTools import safeEval, readHex 4from fontTools.misc.encodingTools import getEncoding 5from fontTools.ttLib import getSearchRange 6from fontTools.unicode import Unicode 7from . import DefaultTable 8import sys 9import struct 10import array 11import logging 12 13 14log = logging.getLogger(__name__) 15 16 17def _make_map(font, chars, gids): 18 assert len(chars) == len(gids) 19 cmap = {} 20 glyphOrder = font.getGlyphOrder() 21 for char,gid in zip(chars,gids): 22 if gid is 0: 23 continue 24 try: 25 name = glyphOrder[gid] 26 except IndexError: 27 name = font.getGlyphName(gid) 28 cmap[char] = name 29 return cmap 30 31class table__c_m_a_p(DefaultTable.DefaultTable): 32 33 def getcmap(self, platformID, platEncID): 34 for subtable in self.tables: 35 if (subtable.platformID == platformID and 36 subtable.platEncID == platEncID): 37 return subtable 38 return None # not found 39 40 def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))): 41 """Return the 'best' unicode cmap dictionary available in the font, 42 or None, if no unicode cmap subtable is available. 43 44 By default it will search for the following (platformID, platEncID) 45 pairs: 46 (3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0) 47 This can be customized via the cmapPreferences argument. 48 """ 49 for platformID, platEncID in cmapPreferences: 50 cmapSubtable = self.getcmap(platformID, platEncID) 51 if cmapSubtable is not None: 52 return cmapSubtable.cmap 53 return None # None of the requested cmap subtables were found 54 55 def buildReversed(self): 56 """Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}. 57 58 The values are sets of Unicode codepoints because 59 some fonts map different codepoints to the same glyph. 60 For example, U+0041 LATIN CAPITAL LETTER A and U+0391 61 GREEK CAPITAL LETTER ALPHA are sometimes the same glyph. 62 """ 63 result = {} 64 for subtable in self.tables: 65 if subtable.isUnicode(): 66 for codepoint, name in subtable.cmap.items(): 67 result.setdefault(name, set()).add(codepoint) 68 return result 69 70 def decompile(self, data, ttFont): 71 tableVersion, numSubTables = struct.unpack(">HH", data[:4]) 72 self.tableVersion = int(tableVersion) 73 self.tables = tables = [] 74 seenOffsets = {} 75 for i in range(numSubTables): 76 platformID, platEncID, offset = struct.unpack( 77 ">HHl", data[4+i*8:4+(i+1)*8]) 78 platformID, platEncID = int(platformID), int(platEncID) 79 format, length = struct.unpack(">HH", data[offset:offset+4]) 80 if format in [8,10,12,13]: 81 format, reserved, length = struct.unpack(">HHL", data[offset:offset+8]) 82 elif format in [14]: 83 format, length = struct.unpack(">HL", data[offset:offset+6]) 84 85 if not length: 86 log.error( 87 "cmap subtable is reported as having zero length: platformID %s, " 88 "platEncID %s, format %s offset %s. Skipping table.", 89 platformID, platEncID, format, offset) 90 continue 91 table = CmapSubtable.newSubtable(format) 92 table.platformID = platformID 93 table.platEncID = platEncID 94 # Note that by default we decompile only the subtable header info; 95 # any other data gets decompiled only when an attribute of the 96 # subtable is referenced. 97 table.decompileHeader(data[offset:offset+int(length)], ttFont) 98 if offset in seenOffsets: 99 table.data = None # Mark as decompiled 100 table.cmap = tables[seenOffsets[offset]].cmap 101 else: 102 seenOffsets[offset] = i 103 tables.append(table) 104 105 def compile(self, ttFont): 106 self.tables.sort() # sort according to the spec; see CmapSubtable.__lt__() 107 numSubTables = len(self.tables) 108 totalOffset = 4 + 8 * numSubTables 109 data = struct.pack(">HH", self.tableVersion, numSubTables) 110 tableData = b"" 111 seen = {} # Some tables are the same object reference. Don't compile them twice. 112 done = {} # Some tables are different objects, but compile to the same data chunk 113 for table in self.tables: 114 try: 115 offset = seen[id(table.cmap)] 116 except KeyError: 117 chunk = table.compile(ttFont) 118 if chunk in done: 119 offset = done[chunk] 120 else: 121 offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData) 122 tableData = tableData + chunk 123 data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset) 124 return data + tableData 125 126 def toXML(self, writer, ttFont): 127 writer.simpletag("tableVersion", version=self.tableVersion) 128 writer.newline() 129 for table in self.tables: 130 table.toXML(writer, ttFont) 131 132 def fromXML(self, name, attrs, content, ttFont): 133 if name == "tableVersion": 134 self.tableVersion = safeEval(attrs["version"]) 135 return 136 if name[:12] != "cmap_format_": 137 return 138 if not hasattr(self, "tables"): 139 self.tables = [] 140 format = safeEval(name[12:]) 141 table = CmapSubtable.newSubtable(format) 142 table.platformID = safeEval(attrs["platformID"]) 143 table.platEncID = safeEval(attrs["platEncID"]) 144 table.fromXML(name, attrs, content, ttFont) 145 self.tables.append(table) 146 147 148class CmapSubtable(object): 149 150 @staticmethod 151 def getSubtableClass(format): 152 """Return the subtable class for a format.""" 153 return cmap_classes.get(format, cmap_format_unknown) 154 155 @staticmethod 156 def newSubtable(format): 157 """Return a new instance of a subtable for format.""" 158 subtableClass = CmapSubtable.getSubtableClass(format) 159 return subtableClass(format) 160 161 def __init__(self, format): 162 self.format = format 163 self.data = None 164 self.ttFont = None 165 166 def __getattr__(self, attr): 167 # allow lazy decompilation of subtables. 168 if attr[:2] == '__': # don't handle requests for member functions like '__lt__' 169 raise AttributeError(attr) 170 if self.data is None: 171 raise AttributeError(attr) 172 self.decompile(None, None) # use saved data. 173 self.data = None # Once this table has been decompiled, make sure we don't 174 # just return the original data. Also avoids recursion when 175 # called with an attribute that the cmap subtable doesn't have. 176 return getattr(self, attr) 177 178 def decompileHeader(self, data, ttFont): 179 format, length, language = struct.unpack(">HHH", data[:6]) 180 assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length) 181 self.format = int(format) 182 self.length = int(length) 183 self.language = int(language) 184 self.data = data[6:] 185 self.ttFont = ttFont 186 187 def toXML(self, writer, ttFont): 188 writer.begintag(self.__class__.__name__, [ 189 ("platformID", self.platformID), 190 ("platEncID", self.platEncID), 191 ("language", self.language), 192 ]) 193 writer.newline() 194 codes = sorted(self.cmap.items()) 195 self._writeCodes(codes, writer) 196 writer.endtag(self.__class__.__name__) 197 writer.newline() 198 199 def getEncoding(self, default=None): 200 """Returns the Python encoding name for this cmap subtable based on its platformID, 201 platEncID, and language. If encoding for these values is not known, by default 202 None is returned. That can be overriden by passing a value to the default 203 argument. 204 205 Note that if you want to choose a "preferred" cmap subtable, most of the time 206 self.isUnicode() is what you want as that one only returns true for the modern, 207 commonly used, Unicode-compatible triplets, not the legacy ones. 208 """ 209 return getEncoding(self.platformID, self.platEncID, self.language, default) 210 211 def isUnicode(self): 212 return (self.platformID == 0 or 213 (self.platformID == 3 and self.platEncID in [0, 1, 10])) 214 215 def isSymbol(self): 216 return self.platformID == 3 and self.platEncID == 0 217 218 def _writeCodes(self, codes, writer): 219 isUnicode = self.isUnicode() 220 for code, name in codes: 221 writer.simpletag("map", code=hex(code), name=name) 222 if isUnicode: 223 writer.comment(Unicode[code]) 224 writer.newline() 225 226 def __lt__(self, other): 227 if not isinstance(other, CmapSubtable): 228 return NotImplemented 229 230 # implemented so that list.sort() sorts according to the spec. 231 selfTuple = ( 232 getattr(self, "platformID", None), 233 getattr(self, "platEncID", None), 234 getattr(self, "language", None), 235 self.__dict__) 236 otherTuple = ( 237 getattr(other, "platformID", None), 238 getattr(other, "platEncID", None), 239 getattr(other, "language", None), 240 other.__dict__) 241 return selfTuple < otherTuple 242 243 244class cmap_format_0(CmapSubtable): 245 246 def decompile(self, data, ttFont): 247 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 248 # If not, someone is calling the subtable decompile() directly, and must provide both args. 249 if data is not None and ttFont is not None: 250 self.decompileHeader(data, ttFont) 251 else: 252 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 253 data = self.data # decompileHeader assigns the data after the header to self.data 254 assert 262 == self.length, "Format 0 cmap subtable not 262 bytes" 255 gids = array.array("B") 256 gids.fromstring(self.data) 257 charCodes = list(range(len(gids))) 258 self.cmap = _make_map(self.ttFont, charCodes, gids) 259 260 def compile(self, ttFont): 261 if self.data: 262 return struct.pack(">HHH", 0, 262, self.language) + self.data 263 264 cmap = self.cmap 265 assert set(cmap.keys()).issubset(range(256)) 266 getGlyphID = ttFont.getGlyphID 267 valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)] 268 269 gids = array.array("B", valueList) 270 data = struct.pack(">HHH", 0, 262, self.language) + gids.tostring() 271 assert len(data) == 262 272 return data 273 274 def fromXML(self, name, attrs, content, ttFont): 275 self.language = safeEval(attrs["language"]) 276 if not hasattr(self, "cmap"): 277 self.cmap = {} 278 cmap = self.cmap 279 for element in content: 280 if not isinstance(element, tuple): 281 continue 282 name, attrs, content = element 283 if name != "map": 284 continue 285 cmap[safeEval(attrs["code"])] = attrs["name"] 286 287 288subHeaderFormat = ">HHhH" 289class SubHeader(object): 290 def __init__(self): 291 self.firstCode = None 292 self.entryCount = None 293 self.idDelta = None 294 self.idRangeOffset = None 295 self.glyphIndexArray = [] 296 297class cmap_format_2(CmapSubtable): 298 299 def setIDDelta(self, subHeader): 300 subHeader.idDelta = 0 301 # find the minGI which is not zero. 302 minGI = subHeader.glyphIndexArray[0] 303 for gid in subHeader.glyphIndexArray: 304 if (gid != 0) and (gid < minGI): 305 minGI = gid 306 # The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1. 307 # idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K. 308 # We would like to pick an idDelta such that the first glyphArray GID is 1, 309 # so that we are more likely to be able to combine glypharray GID subranges. 310 # This means that we have a problem when minGI is > 32K 311 # Since the final gi is reconstructed from the glyphArray GID by: 312 # (short)finalGID = (gid + idDelta) % 0x10000), 313 # we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the 314 # negative number to an unsigned short. 315 316 if (minGI > 1): 317 if minGI > 0x7FFF: 318 subHeader.idDelta = -(0x10000 - minGI) -1 319 else: 320 subHeader.idDelta = minGI -1 321 idDelta = subHeader.idDelta 322 for i in range(subHeader.entryCount): 323 gid = subHeader.glyphIndexArray[i] 324 if gid > 0: 325 subHeader.glyphIndexArray[i] = gid - idDelta 326 327 def decompile(self, data, ttFont): 328 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 329 # If not, someone is calling the subtable decompile() directly, and must provide both args. 330 if data is not None and ttFont is not None: 331 self.decompileHeader(data, ttFont) 332 else: 333 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 334 335 data = self.data # decompileHeader assigns the data after the header to self.data 336 subHeaderKeys = [] 337 maxSubHeaderindex = 0 338 # get the key array, and determine the number of subHeaders. 339 allKeys = array.array("H") 340 allKeys.fromstring(data[:512]) 341 data = data[512:] 342 if sys.byteorder != "big": allKeys.byteswap() 343 subHeaderKeys = [ key//8 for key in allKeys] 344 maxSubHeaderindex = max(subHeaderKeys) 345 346 #Load subHeaders 347 subHeaderList = [] 348 pos = 0 349 for i in range(maxSubHeaderindex + 1): 350 subHeader = SubHeader() 351 (subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \ 352 subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8]) 353 pos += 8 354 giDataPos = pos + subHeader.idRangeOffset-2 355 giList = array.array("H") 356 giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2]) 357 if sys.byteorder != "big": giList.byteswap() 358 subHeader.glyphIndexArray = giList 359 subHeaderList.append(subHeader) 360 # How this gets processed. 361 # Charcodes may be one or two bytes. 362 # The first byte of a charcode is mapped through the subHeaderKeys, to select 363 # a subHeader. For any subheader but 0, the next byte is then mapped through the 364 # selected subheader. If subheader Index 0 is selected, then the byte itself is 365 # mapped through the subheader, and there is no second byte. 366 # Then assume that the subsequent byte is the first byte of the next charcode,and repeat. 367 # 368 # Each subheader references a range in the glyphIndexArray whose length is entryCount. 369 # The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray 370 # referenced by another subheader. 371 # The only subheader that will be referenced by more than one first-byte value is the subheader 372 # that maps the entire range of glyphID values to glyphIndex 0, e.g notdef: 373 # {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx} 374 # A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex. 375 # A subheader specifies a subrange within (0...256) by the 376 # firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero 377 # (e.g. glyph not in font). 378 # If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar). 379 # The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 380 # counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the 381 # glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex. 382 # Example for Logocut-Medium 383 # first byte of charcode = 129; selects subheader 1. 384 # subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252} 385 # second byte of charCode = 66 386 # the index offset = 66-64 = 2. 387 # The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is: 388 # [glyphIndexArray index], [subrange array index] = glyphIndex 389 # [256], [0]=1 from charcode [129, 64] 390 # [257], [1]=2 from charcode [129, 65] 391 # [258], [2]=3 from charcode [129, 66] 392 # [259], [3]=4 from charcode [129, 67] 393 # So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 394 # add it to the glyphID to get the final glyphIndex 395 # value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew! 396 397 self.data = b"" 398 cmap = {} 399 notdefGI = 0 400 for firstByte in range(256): 401 subHeadindex = subHeaderKeys[firstByte] 402 subHeader = subHeaderList[subHeadindex] 403 if subHeadindex == 0: 404 if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount): 405 continue # gi is notdef. 406 else: 407 charCode = firstByte 408 offsetIndex = firstByte - subHeader.firstCode 409 gi = subHeader.glyphIndexArray[offsetIndex] 410 if gi != 0: 411 gi = (gi + subHeader.idDelta) % 0x10000 412 else: 413 continue # gi is notdef. 414 cmap[charCode] = gi 415 else: 416 if subHeader.entryCount: 417 charCodeOffset = firstByte * 256 + subHeader.firstCode 418 for offsetIndex in range(subHeader.entryCount): 419 charCode = charCodeOffset + offsetIndex 420 gi = subHeader.glyphIndexArray[offsetIndex] 421 if gi != 0: 422 gi = (gi + subHeader.idDelta) % 0x10000 423 else: 424 continue 425 cmap[charCode] = gi 426 # If not subHeader.entryCount, then all char codes with this first byte are 427 # mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 428 # same as mapping it to .notdef. 429 430 gids = list(cmap.values()) 431 charCodes = list(cmap.keys()) 432 self.cmap = _make_map(self.ttFont, charCodes, gids) 433 434 def compile(self, ttFont): 435 if self.data: 436 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 437 kEmptyTwoCharCodeRange = -1 438 notdefGI = 0 439 440 items = sorted(self.cmap.items()) 441 charCodes = [item[0] for item in items] 442 names = [item[1] for item in items] 443 nameMap = ttFont.getReverseGlyphMap() 444 try: 445 gids = [nameMap[name] for name in names] 446 except KeyError: 447 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 448 try: 449 gids = [nameMap[name] for name in names] 450 except KeyError: 451 # allow virtual GIDs in format 2 tables 452 gids = [] 453 for name in names: 454 try: 455 gid = nameMap[name] 456 except KeyError: 457 try: 458 if (name[:3] == 'gid'): 459 gid = int(name[3:]) 460 else: 461 gid = ttFont.getGlyphID(name) 462 except: 463 raise KeyError(name) 464 465 gids.append(gid) 466 467 # Process the (char code to gid) item list in char code order. 468 # By definition, all one byte char codes map to subheader 0. 469 # For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 470 # which defines all char codes in its range to map to notdef) unless proven otherwise. 471 # Note that since the char code items are processed in char code order, all the char codes with the 472 # same first byte are in sequential order. 473 474 subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList. 475 subHeaderList = [] 476 477 # We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up 478 # with a cmap where all the one byte char codes map to notdef, 479 # with the result that the subhead 0 would not get created just by processing the item list. 480 charCode = charCodes[0] 481 if charCode > 255: 482 subHeader = SubHeader() 483 subHeader.firstCode = 0 484 subHeader.entryCount = 0 485 subHeader.idDelta = 0 486 subHeader.idRangeOffset = 0 487 subHeaderList.append(subHeader) 488 489 lastFirstByte = -1 490 items = zip(charCodes, gids) 491 for charCode, gid in items: 492 if gid == 0: 493 continue 494 firstbyte = charCode >> 8 495 secondByte = charCode & 0x00FF 496 497 if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one. 498 if lastFirstByte > -1: 499 # fix GI's and iDelta of current subheader. 500 self.setIDDelta(subHeader) 501 502 # If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero 503 # for the indices matching the char codes. 504 if lastFirstByte == 0: 505 for index in range(subHeader.entryCount): 506 charCode = subHeader.firstCode + index 507 subHeaderKeys[charCode] = 0 508 509 assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange." 510 # init new subheader 511 subHeader = SubHeader() 512 subHeader.firstCode = secondByte 513 subHeader.entryCount = 1 514 subHeader.glyphIndexArray.append(gid) 515 subHeaderList.append(subHeader) 516 subHeaderKeys[firstbyte] = len(subHeaderList) -1 517 lastFirstByte = firstbyte 518 else: 519 # need to fill in with notdefs all the code points between the last charCode and the current charCode. 520 codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount) 521 for i in range(codeDiff): 522 subHeader.glyphIndexArray.append(notdefGI) 523 subHeader.glyphIndexArray.append(gid) 524 subHeader.entryCount = subHeader.entryCount + codeDiff + 1 525 526 # fix GI's and iDelta of last subheader that we we added to the subheader array. 527 self.setIDDelta(subHeader) 528 529 # Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges. 530 subHeader = SubHeader() 531 subHeader.firstCode = 0 532 subHeader.entryCount = 0 533 subHeader.idDelta = 0 534 subHeader.idRangeOffset = 2 535 subHeaderList.append(subHeader) 536 emptySubheadIndex = len(subHeaderList) - 1 537 for index in range(256): 538 if subHeaderKeys[index] == kEmptyTwoCharCodeRange: 539 subHeaderKeys[index] = emptySubheadIndex 540 # Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the 541 # idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray, 542 # since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 543 # charcode 0 and GID 0. 544 545 idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset. 546 subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2. 547 for index in range(subheadRangeLen): 548 subHeader = subHeaderList[index] 549 subHeader.idRangeOffset = 0 550 for j in range(index): 551 prevSubhead = subHeaderList[j] 552 if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray 553 subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8 554 subHeader.glyphIndexArray = [] 555 break 556 if subHeader.idRangeOffset == 0: # didn't find one. 557 subHeader.idRangeOffset = idRangeOffset 558 idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray. 559 else: 560 idRangeOffset = idRangeOffset - 8 # one less subheader 561 562 # Now we can write out the data! 563 length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array. 564 for subhead in subHeaderList[:-1]: 565 length = length + len(subhead.glyphIndexArray)*2 # We can't use subhead.entryCount, as some of the subhead may share subArrays. 566 dataList = [struct.pack(">HHH", 2, length, self.language)] 567 for index in subHeaderKeys: 568 dataList.append(struct.pack(">H", index*8)) 569 for subhead in subHeaderList: 570 dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)) 571 for subhead in subHeaderList[:-1]: 572 for gi in subhead.glyphIndexArray: 573 dataList.append(struct.pack(">H", gi)) 574 data = bytesjoin(dataList) 575 assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length) 576 return data 577 578 def fromXML(self, name, attrs, content, ttFont): 579 self.language = safeEval(attrs["language"]) 580 if not hasattr(self, "cmap"): 581 self.cmap = {} 582 cmap = self.cmap 583 584 for element in content: 585 if not isinstance(element, tuple): 586 continue 587 name, attrs, content = element 588 if name != "map": 589 continue 590 cmap[safeEval(attrs["code"])] = attrs["name"] 591 592 593cmap_format_4_format = ">7H" 594 595#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF. 596#uint16 reservedPad # This value should be zero 597#uint16 startCode[segCount] # Starting character code for each segment 598#uint16 idDelta[segCount] # Delta for all character codes in segment 599#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0 600#uint16 glyphIndexArray[variable] # Glyph index array 601 602def splitRange(startCode, endCode, cmap): 603 # Try to split a range of character codes into subranges with consecutive 604 # glyph IDs in such a way that the cmap4 subtable can be stored "most" 605 # efficiently. I can't prove I've got the optimal solution, but it seems 606 # to do well with the fonts I tested: none became bigger, many became smaller. 607 if startCode == endCode: 608 return [], [endCode] 609 610 lastID = cmap[startCode] 611 lastCode = startCode 612 inOrder = None 613 orderedBegin = None 614 subRanges = [] 615 616 # Gather subranges in which the glyph IDs are consecutive. 617 for code in range(startCode + 1, endCode + 1): 618 glyphID = cmap[code] 619 620 if glyphID - 1 == lastID: 621 if inOrder is None or not inOrder: 622 inOrder = 1 623 orderedBegin = lastCode 624 else: 625 if inOrder: 626 inOrder = 0 627 subRanges.append((orderedBegin, lastCode)) 628 orderedBegin = None 629 630 lastID = glyphID 631 lastCode = code 632 633 if inOrder: 634 subRanges.append((orderedBegin, lastCode)) 635 assert lastCode == endCode 636 637 # Now filter out those new subranges that would only make the data bigger. 638 # A new segment cost 8 bytes, not using a new segment costs 2 bytes per 639 # character. 640 newRanges = [] 641 for b, e in subRanges: 642 if b == startCode and e == endCode: 643 break # the whole range, we're fine 644 if b == startCode or e == endCode: 645 threshold = 4 # split costs one more segment 646 else: 647 threshold = 8 # split costs two more segments 648 if (e - b + 1) > threshold: 649 newRanges.append((b, e)) 650 subRanges = newRanges 651 652 if not subRanges: 653 return [], [endCode] 654 655 if subRanges[0][0] != startCode: 656 subRanges.insert(0, (startCode, subRanges[0][0] - 1)) 657 if subRanges[-1][1] != endCode: 658 subRanges.append((subRanges[-1][1] + 1, endCode)) 659 660 # Fill the "holes" in the segments list -- those are the segments in which 661 # the glyph IDs are _not_ consecutive. 662 i = 1 663 while i < len(subRanges): 664 if subRanges[i-1][1] + 1 != subRanges[i][0]: 665 subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1)) 666 i = i + 1 667 i = i + 1 668 669 # Transform the ranges into startCode/endCode lists. 670 start = [] 671 end = [] 672 for b, e in subRanges: 673 start.append(b) 674 end.append(e) 675 start.pop(0) 676 677 assert len(start) + 1 == len(end) 678 return start, end 679 680 681class cmap_format_4(CmapSubtable): 682 683 def decompile(self, data, ttFont): 684 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 685 # If not, someone is calling the subtable decompile() directly, and must provide both args. 686 if data is not None and ttFont is not None: 687 self.decompileHeader(data, ttFont) 688 else: 689 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 690 691 data = self.data # decompileHeader assigns the data after the header to self.data 692 (segCountX2, searchRange, entrySelector, rangeShift) = \ 693 struct.unpack(">4H", data[:8]) 694 data = data[8:] 695 segCount = segCountX2 // 2 696 697 allCodes = array.array("H") 698 allCodes.fromstring(data) 699 self.data = data = None 700 701 if sys.byteorder != "big": allCodes.byteswap() 702 703 # divide the data 704 endCode = allCodes[:segCount] 705 allCodes = allCodes[segCount+1:] # the +1 is skipping the reservedPad field 706 startCode = allCodes[:segCount] 707 allCodes = allCodes[segCount:] 708 idDelta = allCodes[:segCount] 709 allCodes = allCodes[segCount:] 710 idRangeOffset = allCodes[:segCount] 711 glyphIndexArray = allCodes[segCount:] 712 lenGIArray = len(glyphIndexArray) 713 714 # build 2-byte character mapping 715 charCodes = [] 716 gids = [] 717 for i in range(len(startCode) - 1): # don't do 0xffff! 718 start = startCode[i] 719 delta = idDelta[i] 720 rangeOffset = idRangeOffset[i] 721 # *someone* needs to get killed. 722 partial = rangeOffset // 2 - start + i - len(idRangeOffset) 723 724 rangeCharCodes = list(range(startCode[i], endCode[i] + 1)) 725 charCodes.extend(rangeCharCodes) 726 if rangeOffset == 0: 727 gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes]) 728 else: 729 for charCode in rangeCharCodes: 730 index = charCode + partial 731 assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray) 732 if glyphIndexArray[index] != 0: # if not missing glyph 733 glyphID = glyphIndexArray[index] + delta 734 else: 735 glyphID = 0 # missing glyph 736 gids.append(glyphID & 0xFFFF) 737 738 self.cmap = _make_map(self.ttFont, charCodes, gids) 739 740 def compile(self, ttFont): 741 if self.data: 742 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 743 744 charCodes = list(self.cmap.keys()) 745 if not charCodes: 746 startCode = [0xffff] 747 endCode = [0xffff] 748 else: 749 charCodes.sort() 750 names = [self.cmap[code] for code in charCodes] 751 nameMap = ttFont.getReverseGlyphMap() 752 try: 753 gids = [nameMap[name] for name in names] 754 except KeyError: 755 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 756 try: 757 gids = [nameMap[name] for name in names] 758 except KeyError: 759 # allow virtual GIDs in format 4 tables 760 gids = [] 761 for name in names: 762 try: 763 gid = nameMap[name] 764 except KeyError: 765 try: 766 if (name[:3] == 'gid'): 767 gid = int(name[3:]) 768 else: 769 gid = ttFont.getGlyphID(name) 770 except: 771 raise KeyError(name) 772 773 gids.append(gid) 774 cmap = {} # code:glyphID mapping 775 for code, gid in zip(charCodes, gids): 776 cmap[code] = gid 777 778 # Build startCode and endCode lists. 779 # Split the char codes in ranges of consecutive char codes, then split 780 # each range in more ranges of consecutive/not consecutive glyph IDs. 781 # See splitRange(). 782 lastCode = charCodes[0] 783 endCode = [] 784 startCode = [lastCode] 785 for charCode in charCodes[1:]: # skip the first code, it's the first start code 786 if charCode == lastCode + 1: 787 lastCode = charCode 788 continue 789 start, end = splitRange(startCode[-1], lastCode, cmap) 790 startCode.extend(start) 791 endCode.extend(end) 792 startCode.append(charCode) 793 lastCode = charCode 794 start, end = splitRange(startCode[-1], lastCode, cmap) 795 startCode.extend(start) 796 endCode.extend(end) 797 startCode.append(0xffff) 798 endCode.append(0xffff) 799 800 # build up rest of cruft 801 idDelta = [] 802 idRangeOffset = [] 803 glyphIndexArray = [] 804 for i in range(len(endCode)-1): # skip the closing codes (0xffff) 805 indices = [] 806 for charCode in range(startCode[i], endCode[i] + 1): 807 indices.append(cmap[charCode]) 808 if (indices == list(range(indices[0], indices[0] + len(indices)))): 809 idDelta.append((indices[0] - startCode[i]) % 0x10000) 810 idRangeOffset.append(0) 811 else: 812 # someone *definitely* needs to get killed. 813 idDelta.append(0) 814 idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i)) 815 glyphIndexArray.extend(indices) 816 idDelta.append(1) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef 817 idRangeOffset.append(0) 818 819 # Insane. 820 segCount = len(endCode) 821 segCountX2 = segCount * 2 822 searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2) 823 824 charCodeArray = array.array("H", endCode + [0] + startCode) 825 idDeltaArray = array.array("H", idDelta) 826 restArray = array.array("H", idRangeOffset + glyphIndexArray) 827 if sys.byteorder != "big": charCodeArray.byteswap() 828 if sys.byteorder != "big": idDeltaArray.byteswap() 829 if sys.byteorder != "big": restArray.byteswap() 830 data = charCodeArray.tostring() + idDeltaArray.tostring() + restArray.tostring() 831 832 length = struct.calcsize(cmap_format_4_format) + len(data) 833 header = struct.pack(cmap_format_4_format, self.format, length, self.language, 834 segCountX2, searchRange, entrySelector, rangeShift) 835 return header + data 836 837 def fromXML(self, name, attrs, content, ttFont): 838 self.language = safeEval(attrs["language"]) 839 if not hasattr(self, "cmap"): 840 self.cmap = {} 841 cmap = self.cmap 842 843 for element in content: 844 if not isinstance(element, tuple): 845 continue 846 nameMap, attrsMap, dummyContent = element 847 if nameMap != "map": 848 assert 0, "Unrecognized keyword in cmap subtable" 849 cmap[safeEval(attrsMap["code"])] = attrsMap["name"] 850 851 852class cmap_format_6(CmapSubtable): 853 854 def decompile(self, data, ttFont): 855 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 856 # If not, someone is calling the subtable decompile() directly, and must provide both args. 857 if data is not None and ttFont is not None: 858 self.decompileHeader(data, ttFont) 859 else: 860 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 861 862 data = self.data # decompileHeader assigns the data after the header to self.data 863 firstCode, entryCount = struct.unpack(">HH", data[:4]) 864 firstCode = int(firstCode) 865 data = data[4:] 866 #assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!! 867 gids = array.array("H") 868 gids.fromstring(data[:2 * int(entryCount)]) 869 if sys.byteorder != "big": gids.byteswap() 870 self.data = data = None 871 872 charCodes = list(range(firstCode, firstCode + len(gids))) 873 self.cmap = _make_map(self.ttFont, charCodes, gids) 874 875 def compile(self, ttFont): 876 if self.data: 877 return struct.pack(">HHH", self.format, self.length, self.language) + self.data 878 cmap = self.cmap 879 codes = sorted(cmap.keys()) 880 if codes: # yes, there are empty cmap tables. 881 codes = list(range(codes[0], codes[-1] + 1)) 882 firstCode = codes[0] 883 valueList = [ 884 ttFont.getGlyphID(cmap[code]) if code in cmap else 0 885 for code in codes 886 ] 887 gids = array.array("H", valueList) 888 if sys.byteorder != "big": gids.byteswap() 889 data = gids.tostring() 890 else: 891 data = b"" 892 firstCode = 0 893 header = struct.pack(">HHHHH", 894 6, len(data) + 10, self.language, firstCode, len(codes)) 895 return header + data 896 897 def fromXML(self, name, attrs, content, ttFont): 898 self.language = safeEval(attrs["language"]) 899 if not hasattr(self, "cmap"): 900 self.cmap = {} 901 cmap = self.cmap 902 903 for element in content: 904 if not isinstance(element, tuple): 905 continue 906 name, attrs, content = element 907 if name != "map": 908 continue 909 cmap[safeEval(attrs["code"])] = attrs["name"] 910 911 912class cmap_format_12_or_13(CmapSubtable): 913 914 def __init__(self, format): 915 self.format = format 916 self.reserved = 0 917 self.data = None 918 self.ttFont = None 919 920 def decompileHeader(self, data, ttFont): 921 format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16]) 922 assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length) 923 self.format = format 924 self.reserved = reserved 925 self.length = length 926 self.language = language 927 self.nGroups = nGroups 928 self.data = data[16:] 929 self.ttFont = ttFont 930 931 def decompile(self, data, ttFont): 932 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 933 # If not, someone is calling the subtable decompile() directly, and must provide both args. 934 if data is not None and ttFont is not None: 935 self.decompileHeader(data, ttFont) 936 else: 937 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 938 939 data = self.data # decompileHeader assigns the data after the header to self.data 940 charCodes = [] 941 gids = [] 942 pos = 0 943 for i in range(self.nGroups): 944 startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] ) 945 pos += 12 946 lenGroup = 1 + endCharCode - startCharCode 947 charCodes.extend(list(range(startCharCode, endCharCode +1))) 948 gids.extend(self._computeGIDs(glyphID, lenGroup)) 949 self.data = data = None 950 self.cmap = _make_map(self.ttFont, charCodes, gids) 951 952 def compile(self, ttFont): 953 if self.data: 954 return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data 955 charCodes = list(self.cmap.keys()) 956 names = list(self.cmap.values()) 957 nameMap = ttFont.getReverseGlyphMap() 958 try: 959 gids = [nameMap[name] for name in names] 960 except KeyError: 961 nameMap = ttFont.getReverseGlyphMap(rebuild=True) 962 try: 963 gids = [nameMap[name] for name in names] 964 except KeyError: 965 # allow virtual GIDs in format 12 tables 966 gids = [] 967 for name in names: 968 try: 969 gid = nameMap[name] 970 except KeyError: 971 try: 972 if (name[:3] == 'gid'): 973 gid = int(name[3:]) 974 else: 975 gid = ttFont.getGlyphID(name) 976 except: 977 raise KeyError(name) 978 979 gids.append(gid) 980 981 cmap = {} # code:glyphID mapping 982 for code, gid in zip(charCodes, gids): 983 cmap[code] = gid 984 985 charCodes.sort() 986 index = 0 987 startCharCode = charCodes[0] 988 startGlyphID = cmap[startCharCode] 989 lastGlyphID = startGlyphID - self._format_step 990 lastCharCode = startCharCode - 1 991 nGroups = 0 992 dataList = [] 993 maxIndex = len(charCodes) 994 for index in range(maxIndex): 995 charCode = charCodes[index] 996 glyphID = cmap[charCode] 997 if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode): 998 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 999 startCharCode = charCode 1000 startGlyphID = glyphID 1001 nGroups = nGroups + 1 1002 lastGlyphID = glyphID 1003 lastCharCode = charCode 1004 dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID)) 1005 nGroups = nGroups + 1 1006 data = bytesjoin(dataList) 1007 lengthSubtable = len(data) +16 1008 assert len(data) == (nGroups*12) == (lengthSubtable-16) 1009 return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data 1010 1011 def toXML(self, writer, ttFont): 1012 writer.begintag(self.__class__.__name__, [ 1013 ("platformID", self.platformID), 1014 ("platEncID", self.platEncID), 1015 ("format", self.format), 1016 ("reserved", self.reserved), 1017 ("length", self.length), 1018 ("language", self.language), 1019 ("nGroups", self.nGroups), 1020 ]) 1021 writer.newline() 1022 codes = sorted(self.cmap.items()) 1023 self._writeCodes(codes, writer) 1024 writer.endtag(self.__class__.__name__) 1025 writer.newline() 1026 1027 def fromXML(self, name, attrs, content, ttFont): 1028 self.format = safeEval(attrs["format"]) 1029 self.reserved = safeEval(attrs["reserved"]) 1030 self.length = safeEval(attrs["length"]) 1031 self.language = safeEval(attrs["language"]) 1032 self.nGroups = safeEval(attrs["nGroups"]) 1033 if not hasattr(self, "cmap"): 1034 self.cmap = {} 1035 cmap = self.cmap 1036 1037 for element in content: 1038 if not isinstance(element, tuple): 1039 continue 1040 name, attrs, content = element 1041 if name != "map": 1042 continue 1043 cmap[safeEval(attrs["code"])] = attrs["name"] 1044 1045 1046class cmap_format_12(cmap_format_12_or_13): 1047 1048 _format_step = 1 1049 1050 def __init__(self, format=12): 1051 cmap_format_12_or_13.__init__(self, format) 1052 1053 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1054 return list(range(startingGlyph, startingGlyph + numberOfGlyphs)) 1055 1056 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1057 return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode) 1058 1059 1060class cmap_format_13(cmap_format_12_or_13): 1061 1062 _format_step = 0 1063 1064 def __init__(self, format=13): 1065 cmap_format_12_or_13.__init__(self, format) 1066 1067 def _computeGIDs(self, startingGlyph, numberOfGlyphs): 1068 return [startingGlyph] * numberOfGlyphs 1069 1070 def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode): 1071 return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode) 1072 1073 1074def cvtToUVS(threeByteString): 1075 data = b"\0" + threeByteString 1076 val, = struct.unpack(">L", data) 1077 return val 1078 1079def cvtFromUVS(val): 1080 assert 0 <= val < 0x1000000 1081 fourByteString = struct.pack(">L", val) 1082 return fourByteString[1:] 1083 1084 1085class cmap_format_14(CmapSubtable): 1086 1087 def decompileHeader(self, data, ttFont): 1088 format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10]) 1089 self.data = data[10:] 1090 self.length = length 1091 self.numVarSelectorRecords = numVarSelectorRecords 1092 self.ttFont = ttFont 1093 self.language = 0xFF # has no language. 1094 1095 def decompile(self, data, ttFont): 1096 if data is not None and ttFont is not None: 1097 self.decompileHeader(data, ttFont) 1098 else: 1099 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1100 data = self.data 1101 1102 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1103 uvsDict = {} 1104 recOffset = 0 1105 for n in range(self.numVarSelectorRecords): 1106 uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11]) 1107 recOffset += 11 1108 varUVS = cvtToUVS(uvs) 1109 if defOVSOffset: 1110 startOffset = defOVSOffset - 10 1111 numValues, = struct.unpack(">L", data[startOffset:startOffset+4]) 1112 startOffset +=4 1113 for r in range(numValues): 1114 uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4]) 1115 startOffset += 4 1116 firstBaseUV = cvtToUVS(uv) 1117 cnt = addtlCnt+1 1118 baseUVList = list(range(firstBaseUV, firstBaseUV+cnt)) 1119 glyphList = [None]*cnt 1120 localUVList = zip(baseUVList, glyphList) 1121 try: 1122 uvsDict[varUVS].extend(localUVList) 1123 except KeyError: 1124 uvsDict[varUVS] = list(localUVList) 1125 1126 if nonDefUVSOffset: 1127 startOffset = nonDefUVSOffset - 10 1128 numRecs, = struct.unpack(">L", data[startOffset:startOffset+4]) 1129 startOffset +=4 1130 localUVList = [] 1131 for r in range(numRecs): 1132 uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5]) 1133 startOffset += 5 1134 uv = cvtToUVS(uv) 1135 glyphName = self.ttFont.getGlyphName(gid) 1136 localUVList.append((uv, glyphName)) 1137 try: 1138 uvsDict[varUVS].extend(localUVList) 1139 except KeyError: 1140 uvsDict[varUVS] = localUVList 1141 1142 self.uvsDict = uvsDict 1143 1144 def toXML(self, writer, ttFont): 1145 writer.begintag(self.__class__.__name__, [ 1146 ("platformID", self.platformID), 1147 ("platEncID", self.platEncID), 1148 ]) 1149 writer.newline() 1150 uvsDict = self.uvsDict 1151 uvsList = sorted(uvsDict.keys()) 1152 for uvs in uvsList: 1153 uvList = uvsDict[uvs] 1154 uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1])) 1155 for uv, gname in uvList: 1156 attrs = [("uv", hex(uv)), ("uvs", hex(uvs))] 1157 if gname is not None: 1158 attrs.append(("name", gname)) 1159 writer.simpletag("map", attrs) 1160 writer.newline() 1161 writer.endtag(self.__class__.__name__) 1162 writer.newline() 1163 1164 def fromXML(self, name, attrs, content, ttFont): 1165 self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail 1166 if not hasattr(self, "cmap"): 1167 self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail. 1168 if not hasattr(self, "uvsDict"): 1169 self.uvsDict = {} 1170 uvsDict = self.uvsDict 1171 1172 # For backwards compatibility reasons we accept "None" as an indicator 1173 # for "default mapping", unless the font actually has a glyph named 1174 # "None". 1175 _hasGlyphNamedNone = None 1176 1177 for element in content: 1178 if not isinstance(element, tuple): 1179 continue 1180 name, attrs, content = element 1181 if name != "map": 1182 continue 1183 uvs = safeEval(attrs["uvs"]) 1184 uv = safeEval(attrs["uv"]) 1185 gname = attrs.get("name") 1186 if gname == "None": 1187 if _hasGlyphNamedNone is None: 1188 _hasGlyphNamedNone = "None" in ttFont.getGlyphOrder() 1189 if not _hasGlyphNamedNone: 1190 gname = None 1191 try: 1192 uvsDict[uvs].append((uv, gname)) 1193 except KeyError: 1194 uvsDict[uvs] = [(uv, gname)] 1195 1196 def compile(self, ttFont): 1197 if self.data: 1198 return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data 1199 1200 uvsDict = self.uvsDict 1201 uvsList = sorted(uvsDict.keys()) 1202 self.numVarSelectorRecords = len(uvsList) 1203 offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block. 1204 data = [] 1205 varSelectorRecords =[] 1206 for uvs in uvsList: 1207 entryList = uvsDict[uvs] 1208 1209 defList = [entry for entry in entryList if entry[1] is None] 1210 if defList: 1211 defList = [entry[0] for entry in defList] 1212 defOVSOffset = offset 1213 defList.sort() 1214 1215 lastUV = defList[0] 1216 cnt = -1 1217 defRecs = [] 1218 for defEntry in defList: 1219 cnt +=1 1220 if (lastUV+cnt) != defEntry: 1221 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1) 1222 lastUV = defEntry 1223 defRecs.append(rec) 1224 cnt = 0 1225 1226 rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt) 1227 defRecs.append(rec) 1228 1229 numDefRecs = len(defRecs) 1230 data.append(struct.pack(">L", numDefRecs)) 1231 data.extend(defRecs) 1232 offset += 4 + numDefRecs*4 1233 else: 1234 defOVSOffset = 0 1235 1236 ndefList = [entry for entry in entryList if entry[1] is not None] 1237 if ndefList: 1238 nonDefUVSOffset = offset 1239 ndefList.sort() 1240 numNonDefRecs = len(ndefList) 1241 data.append(struct.pack(">L", numNonDefRecs)) 1242 offset += 4 + numNonDefRecs*5 1243 1244 for uv, gname in ndefList: 1245 gid = ttFont.getGlyphID(gname) 1246 ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid) 1247 data.append(ndrec) 1248 else: 1249 nonDefUVSOffset = 0 1250 1251 vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset) 1252 varSelectorRecords.append(vrec) 1253 1254 data = bytesjoin(varSelectorRecords) + bytesjoin(data) 1255 self.length = 10 + len(data) 1256 headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) 1257 1258 return headerdata + data 1259 1260 1261class cmap_format_unknown(CmapSubtable): 1262 1263 def toXML(self, writer, ttFont): 1264 cmapName = self.__class__.__name__[:12] + str(self.format) 1265 writer.begintag(cmapName, [ 1266 ("platformID", self.platformID), 1267 ("platEncID", self.platEncID), 1268 ]) 1269 writer.newline() 1270 writer.dumphex(self.data) 1271 writer.endtag(cmapName) 1272 writer.newline() 1273 1274 def fromXML(self, name, attrs, content, ttFont): 1275 self.data = readHex(content) 1276 self.cmap = {} 1277 1278 def decompileHeader(self, data, ttFont): 1279 self.language = 0 # dummy value 1280 self.data = data 1281 1282 def decompile(self, data, ttFont): 1283 # we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None. 1284 # If not, someone is calling the subtable decompile() directly, and must provide both args. 1285 if data is not None and ttFont is not None: 1286 self.decompileHeader(data, ttFont) 1287 else: 1288 assert (data is None and ttFont is None), "Need both data and ttFont arguments" 1289 1290 def compile(self, ttFont): 1291 if self.data: 1292 return self.data 1293 else: 1294 return None 1295 1296cmap_classes = { 1297 0: cmap_format_0, 1298 2: cmap_format_2, 1299 4: cmap_format_4, 1300 6: cmap_format_6, 1301 12: cmap_format_12, 1302 13: cmap_format_13, 1303 14: cmap_format_14, 1304} 1305