• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from fontTools.misc.py23 import bytesjoin
2from fontTools.misc.textTools import safeEval, readHex
3from fontTools.misc.encodingTools import getEncoding
4from fontTools.ttLib import getSearchRange
5from fontTools.unicode import Unicode
6from . import DefaultTable
7import sys
8import struct
9import array
10import logging
11
12
13log = logging.getLogger(__name__)
14
15
16def _make_map(font, chars, gids):
17	assert len(chars) == len(gids)
18	cmap = {}
19	glyphOrder = font.getGlyphOrder()
20	for char,gid in zip(chars,gids):
21		if gid == 0:
22			continue
23		try:
24			name = glyphOrder[gid]
25		except IndexError:
26			name = font.getGlyphName(gid)
27		cmap[char] = name
28	return cmap
29
30class table__c_m_a_p(DefaultTable.DefaultTable):
31
32	def getcmap(self, platformID, platEncID):
33		for subtable in self.tables:
34			if (subtable.platformID == platformID and
35					subtable.platEncID == platEncID):
36				return subtable
37		return None # not found
38
39	def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))):
40		"""Return the 'best' unicode cmap dictionary available in the font,
41		or None, if no unicode cmap subtable is available.
42
43		By default it will search for the following (platformID, platEncID)
44		pairs:
45			(3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0)
46		This can be customized via the cmapPreferences argument.
47		"""
48		for platformID, platEncID in cmapPreferences:
49			cmapSubtable = self.getcmap(platformID, platEncID)
50			if cmapSubtable is not None:
51				return cmapSubtable.cmap
52		return None  # None of the requested cmap subtables were found
53
54	def buildReversed(self):
55		"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
56
57		The values are sets of Unicode codepoints because
58		some fonts map different codepoints to the same glyph.
59		For example, U+0041 LATIN CAPITAL LETTER A and U+0391
60		GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
61		"""
62		result = {}
63		for subtable in self.tables:
64			if subtable.isUnicode():
65				for codepoint, name in subtable.cmap.items():
66					result.setdefault(name, set()).add(codepoint)
67		return result
68
69	def decompile(self, data, ttFont):
70		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
71		self.tableVersion = int(tableVersion)
72		self.tables = tables = []
73		seenOffsets = {}
74		for i in range(numSubTables):
75			platformID, platEncID, offset = struct.unpack(
76					">HHl", data[4+i*8:4+(i+1)*8])
77			platformID, platEncID = int(platformID), int(platEncID)
78			format, length = struct.unpack(">HH", data[offset:offset+4])
79			if format in [8,10,12,13]:
80				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
81			elif format in [14]:
82				format, length = struct.unpack(">HL", data[offset:offset+6])
83
84			if not length:
85				log.error(
86					"cmap subtable is reported as having zero length: platformID %s, "
87					"platEncID %s, format %s offset %s. Skipping table.",
88					platformID, platEncID, format, offset)
89				continue
90			table = CmapSubtable.newSubtable(format)
91			table.platformID = platformID
92			table.platEncID = platEncID
93			# Note that by default we decompile only the subtable header info;
94			# any other data gets decompiled only when an attribute of the
95			# subtable is referenced.
96			table.decompileHeader(data[offset:offset+int(length)], ttFont)
97			if offset in seenOffsets:
98				table.data = None # Mark as decompiled
99				table.cmap = tables[seenOffsets[offset]].cmap
100			else:
101				seenOffsets[offset] = i
102			tables.append(table)
103
104	def compile(self, ttFont):
105		self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
106		numSubTables = len(self.tables)
107		totalOffset = 4 + 8 * numSubTables
108		data = struct.pack(">HH", self.tableVersion, numSubTables)
109		tableData = b""
110		seen = {}  # Some tables are the same object reference. Don't compile them twice.
111		done = {}  # Some tables are different objects, but compile to the same data chunk
112		for table in self.tables:
113			try:
114				offset = seen[id(table.cmap)]
115			except KeyError:
116				chunk = table.compile(ttFont)
117				if chunk in done:
118					offset = done[chunk]
119				else:
120					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
121					tableData = tableData + chunk
122			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
123		return data + tableData
124
125	def toXML(self, writer, ttFont):
126		writer.simpletag("tableVersion", version=self.tableVersion)
127		writer.newline()
128		for table in self.tables:
129			table.toXML(writer, ttFont)
130
131	def fromXML(self, name, attrs, content, ttFont):
132		if name == "tableVersion":
133			self.tableVersion = safeEval(attrs["version"])
134			return
135		if name[:12] != "cmap_format_":
136			return
137		if not hasattr(self, "tables"):
138			self.tables = []
139		format = safeEval(name[12:])
140		table = CmapSubtable.newSubtable(format)
141		table.platformID = safeEval(attrs["platformID"])
142		table.platEncID = safeEval(attrs["platEncID"])
143		table.fromXML(name, attrs, content, ttFont)
144		self.tables.append(table)
145
146
147class CmapSubtable(object):
148
149	@staticmethod
150	def getSubtableClass(format):
151		"""Return the subtable class for a format."""
152		return cmap_classes.get(format, cmap_format_unknown)
153
154	@staticmethod
155	def newSubtable(format):
156		"""Return a new instance of a subtable for format."""
157		subtableClass = CmapSubtable.getSubtableClass(format)
158		return subtableClass(format)
159
160	def __init__(self, format):
161		self.format = format
162		self.data = None
163		self.ttFont = None
164
165	def __getattr__(self, attr):
166		# allow lazy decompilation of subtables.
167		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
168			raise AttributeError(attr)
169		if self.data is None:
170			raise AttributeError(attr)
171		self.decompile(None, None) # use saved data.
172		self.data = None	# Once this table has been decompiled, make sure we don't
173							# just return the original data. Also avoids recursion when
174							# called with an attribute that the cmap subtable doesn't have.
175		return getattr(self, attr)
176
177	def decompileHeader(self, data, ttFont):
178		format, length, language = struct.unpack(">HHH", data[:6])
179		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
180		self.format = int(format)
181		self.length = int(length)
182		self.language = int(language)
183		self.data = data[6:]
184		self.ttFont = ttFont
185
186	def toXML(self, writer, ttFont):
187		writer.begintag(self.__class__.__name__, [
188				("platformID", self.platformID),
189				("platEncID", self.platEncID),
190				("language", self.language),
191				])
192		writer.newline()
193		codes = sorted(self.cmap.items())
194		self._writeCodes(codes, writer)
195		writer.endtag(self.__class__.__name__)
196		writer.newline()
197
198	def getEncoding(self, default=None):
199		"""Returns the Python encoding name for this cmap subtable based on its platformID,
200		platEncID, and language.  If encoding for these values is not known, by default
201		None is returned.  That can be overriden by passing a value to the default
202		argument.
203
204		Note that if you want to choose a "preferred" cmap subtable, most of the time
205		self.isUnicode() is what you want as that one only returns true for the modern,
206		commonly used, Unicode-compatible triplets, not the legacy ones.
207		"""
208		return getEncoding(self.platformID, self.platEncID, self.language, default)
209
210	def isUnicode(self):
211		return (self.platformID == 0 or
212			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
213
214	def isSymbol(self):
215		return self.platformID == 3 and self.platEncID == 0
216
217	def _writeCodes(self, codes, writer):
218		isUnicode = self.isUnicode()
219		for code, name in codes:
220			writer.simpletag("map", code=hex(code), name=name)
221			if isUnicode:
222				writer.comment(Unicode[code])
223			writer.newline()
224
225	def __lt__(self, other):
226		if not isinstance(other, CmapSubtable):
227			return NotImplemented
228
229		# implemented so that list.sort() sorts according to the spec.
230		selfTuple = (
231			getattr(self, "platformID", None),
232			getattr(self, "platEncID", None),
233			getattr(self, "language", None),
234			self.__dict__)
235		otherTuple = (
236			getattr(other, "platformID", None),
237			getattr(other, "platEncID", None),
238			getattr(other, "language", None),
239			other.__dict__)
240		return selfTuple < otherTuple
241
242
243class cmap_format_0(CmapSubtable):
244
245	def decompile(self, data, ttFont):
246		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
247		# If not, someone is calling the subtable decompile() directly, and must provide both args.
248		if data is not None and ttFont is not None:
249			self.decompileHeader(data, ttFont)
250		else:
251			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
252		data = self.data # decompileHeader assigns the data after the header to self.data
253		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
254		gids = array.array("B")
255		gids.frombytes(self.data)
256		charCodes = list(range(len(gids)))
257		self.cmap = _make_map(self.ttFont, charCodes, gids)
258
259	def compile(self, ttFont):
260		if self.data:
261			return struct.pack(">HHH", 0, 262, self.language) + self.data
262
263		cmap = self.cmap
264		assert set(cmap.keys()).issubset(range(256))
265		getGlyphID = ttFont.getGlyphID
266		valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)]
267
268		gids = array.array("B", valueList)
269		data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes()
270		assert len(data) == 262
271		return data
272
273	def fromXML(self, name, attrs, content, ttFont):
274		self.language = safeEval(attrs["language"])
275		if not hasattr(self, "cmap"):
276			self.cmap = {}
277		cmap = self.cmap
278		for element in content:
279			if not isinstance(element, tuple):
280				continue
281			name, attrs, content = element
282			if name != "map":
283				continue
284			cmap[safeEval(attrs["code"])] = attrs["name"]
285
286
287subHeaderFormat = ">HHhH"
288class SubHeader(object):
289	def __init__(self):
290		self.firstCode = None
291		self.entryCount = None
292		self.idDelta = None
293		self.idRangeOffset = None
294		self.glyphIndexArray = []
295
296class cmap_format_2(CmapSubtable):
297
298	def setIDDelta(self, subHeader):
299		subHeader.idDelta = 0
300		# find the minGI which is not zero.
301		minGI = subHeader.glyphIndexArray[0]
302		for gid in subHeader.glyphIndexArray:
303			if (gid != 0) and (gid < minGI):
304				minGI = gid
305		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
306		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
307		# We would like to pick an idDelta such that the first glyphArray GID is 1,
308		# so that we are more likely to be able to combine glypharray GID subranges.
309		# This means that we have a problem when minGI is > 32K
310		# Since the final gi is reconstructed from the glyphArray GID by:
311		#    (short)finalGID = (gid + idDelta) % 0x10000),
312		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
313		# negative number to an unsigned short.
314
315		if (minGI > 1):
316			if minGI > 0x7FFF:
317				subHeader.idDelta = -(0x10000 - minGI) -1
318			else:
319				subHeader.idDelta = minGI -1
320			idDelta = subHeader.idDelta
321			for i in range(subHeader.entryCount):
322				gid = subHeader.glyphIndexArray[i]
323				if gid > 0:
324					subHeader.glyphIndexArray[i] = gid - idDelta
325
326	def decompile(self, data, ttFont):
327		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
328		# If not, someone is calling the subtable decompile() directly, and must provide both args.
329		if data is not None and ttFont is not None:
330			self.decompileHeader(data, ttFont)
331		else:
332			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
333
334		data = self.data # decompileHeader assigns the data after the header to self.data
335		subHeaderKeys = []
336		maxSubHeaderindex = 0
337		# get the key array, and determine the number of subHeaders.
338		allKeys = array.array("H")
339		allKeys.frombytes(data[:512])
340		data = data[512:]
341		if sys.byteorder != "big": allKeys.byteswap()
342		subHeaderKeys = [ key//8 for key in allKeys]
343		maxSubHeaderindex = max(subHeaderKeys)
344
345		#Load subHeaders
346		subHeaderList = []
347		pos = 0
348		for i in range(maxSubHeaderindex + 1):
349			subHeader = SubHeader()
350			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
351				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
352			pos += 8
353			giDataPos = pos + subHeader.idRangeOffset-2
354			giList = array.array("H")
355			giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2])
356			if sys.byteorder != "big": giList.byteswap()
357			subHeader.glyphIndexArray = giList
358			subHeaderList.append(subHeader)
359		# How this gets processed.
360		# Charcodes may be one or two bytes.
361		# The first byte of a charcode is mapped through the subHeaderKeys, to select
362		# a subHeader. For any subheader but 0, the next byte is then mapped through the
363		# selected subheader. If subheader Index 0 is selected, then the byte itself is
364		# mapped through the subheader, and there is no second byte.
365		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
366		#
367		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
368		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
369		# referenced by another subheader.
370		# The only subheader that will be referenced by more than one first-byte value is the subheader
371		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
372		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
373		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
374		# A subheader specifies a subrange within (0...256) by the
375		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
376		# (e.g. glyph not in font).
377		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
378		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
379		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
380		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
381		# Example for Logocut-Medium
382		# first byte of charcode = 129; selects subheader 1.
383		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
384		# second byte of charCode = 66
385		# the index offset = 66-64 = 2.
386		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
387		# [glyphIndexArray index], [subrange array index] = glyphIndex
388		# [256], [0]=1 	from charcode [129, 64]
389		# [257], [1]=2  	from charcode [129, 65]
390		# [258], [2]=3  	from charcode [129, 66]
391		# [259], [3]=4  	from charcode [129, 67]
392		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
393		# add it to the glyphID to get the final glyphIndex
394		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
395
396		self.data = b""
397		cmap = {}
398		notdefGI = 0
399		for firstByte in range(256):
400			subHeadindex = subHeaderKeys[firstByte]
401			subHeader = subHeaderList[subHeadindex]
402			if subHeadindex == 0:
403				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
404					continue # gi is notdef.
405				else:
406					charCode = firstByte
407					offsetIndex = firstByte - subHeader.firstCode
408					gi = subHeader.glyphIndexArray[offsetIndex]
409					if gi != 0:
410						gi = (gi + subHeader.idDelta) % 0x10000
411					else:
412						continue # gi is notdef.
413				cmap[charCode] = gi
414			else:
415				if subHeader.entryCount:
416					charCodeOffset = firstByte * 256 + subHeader.firstCode
417					for offsetIndex in range(subHeader.entryCount):
418						charCode = charCodeOffset + offsetIndex
419						gi = subHeader.glyphIndexArray[offsetIndex]
420						if gi != 0:
421							gi = (gi + subHeader.idDelta) % 0x10000
422						else:
423							continue
424						cmap[charCode] = gi
425				# If not subHeader.entryCount, then all char codes with this first byte are
426				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
427				# same as mapping it to .notdef.
428
429		gids = list(cmap.values())
430		charCodes = list(cmap.keys())
431		self.cmap = _make_map(self.ttFont, charCodes, gids)
432
433	def compile(self, ttFont):
434		if self.data:
435			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
436		kEmptyTwoCharCodeRange = -1
437		notdefGI = 0
438
439		items = sorted(self.cmap.items())
440		charCodes = [item[0] for item in items]
441		names = [item[1] for item in items]
442		nameMap = ttFont.getReverseGlyphMap()
443		try:
444			gids = [nameMap[name] for name in names]
445		except KeyError:
446			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
447			try:
448				gids = [nameMap[name] for name in names]
449			except KeyError:
450				# allow virtual GIDs in format 2 tables
451				gids = []
452				for name in names:
453					try:
454						gid = nameMap[name]
455					except KeyError:
456						try:
457							if (name[:3] == 'gid'):
458								gid = int(name[3:])
459							else:
460								gid = ttFont.getGlyphID(name)
461						except:
462							raise KeyError(name)
463
464					gids.append(gid)
465
466		# Process the (char code to gid) item list in char code order.
467		# By definition, all one byte char codes map to subheader 0.
468		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
469		# which defines all char codes in its range to map to notdef) unless proven otherwise.
470		# Note that since the char code items are processed in char code order, all the char codes with the
471		# same first byte are in sequential order.
472
473		subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
474		subHeaderList = []
475
476		# We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
477		# with a cmap where all the one byte char codes map to notdef,
478		# with the result that the subhead 0 would not get created just by processing the item list.
479		charCode = charCodes[0]
480		if charCode > 255:
481			subHeader = SubHeader()
482			subHeader.firstCode = 0
483			subHeader.entryCount = 0
484			subHeader.idDelta = 0
485			subHeader.idRangeOffset = 0
486			subHeaderList.append(subHeader)
487
488		lastFirstByte = -1
489		items = zip(charCodes, gids)
490		for charCode, gid in items:
491			if gid == 0:
492				continue
493			firstbyte = charCode >> 8
494			secondByte = charCode & 0x00FF
495
496			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
497				if lastFirstByte > -1:
498					# fix GI's and iDelta of current subheader.
499					self.setIDDelta(subHeader)
500
501					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
502					# for the indices matching the char codes.
503					if lastFirstByte == 0:
504						for index in range(subHeader.entryCount):
505							charCode = subHeader.firstCode + index
506							subHeaderKeys[charCode] = 0
507
508					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
509				# init new subheader
510				subHeader = SubHeader()
511				subHeader.firstCode = secondByte
512				subHeader.entryCount = 1
513				subHeader.glyphIndexArray.append(gid)
514				subHeaderList.append(subHeader)
515				subHeaderKeys[firstbyte] = len(subHeaderList) -1
516				lastFirstByte = firstbyte
517			else:
518				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
519				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
520				for i in range(codeDiff):
521					subHeader.glyphIndexArray.append(notdefGI)
522				subHeader.glyphIndexArray.append(gid)
523				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
524
525		# fix GI's and iDelta of last subheader that we we added to the subheader array.
526		self.setIDDelta(subHeader)
527
528		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
529		subHeader = SubHeader()
530		subHeader.firstCode = 0
531		subHeader.entryCount = 0
532		subHeader.idDelta = 0
533		subHeader.idRangeOffset = 2
534		subHeaderList.append(subHeader)
535		emptySubheadIndex = len(subHeaderList) - 1
536		for index in range(256):
537			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
538				subHeaderKeys[index] = emptySubheadIndex
539		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
540		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
541		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
542		# charcode 0 and GID 0.
543
544		idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
545		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
546		for index in range(subheadRangeLen):
547			subHeader = subHeaderList[index]
548			subHeader.idRangeOffset = 0
549			for j in range(index):
550				prevSubhead = subHeaderList[j]
551				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
552					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
553					subHeader.glyphIndexArray = []
554					break
555			if subHeader.idRangeOffset == 0: # didn't find one.
556				subHeader.idRangeOffset = idRangeOffset
557				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
558			else:
559				idRangeOffset = idRangeOffset - 8  # one less subheader
560
561		# Now we can write out the data!
562		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
563		for subhead in 	subHeaderList[:-1]:
564			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
565		dataList = [struct.pack(">HHH", 2, length, self.language)]
566		for index in subHeaderKeys:
567			dataList.append(struct.pack(">H", index*8))
568		for subhead in 	subHeaderList:
569			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
570		for subhead in 	subHeaderList[:-1]:
571			for gi in subhead.glyphIndexArray:
572				dataList.append(struct.pack(">H", gi))
573		data = bytesjoin(dataList)
574		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
575		return data
576
577	def fromXML(self, name, attrs, content, ttFont):
578		self.language = safeEval(attrs["language"])
579		if not hasattr(self, "cmap"):
580			self.cmap = {}
581		cmap = self.cmap
582
583		for element in content:
584			if not isinstance(element, tuple):
585				continue
586			name, attrs, content = element
587			if name != "map":
588				continue
589			cmap[safeEval(attrs["code"])] = attrs["name"]
590
591
592cmap_format_4_format = ">7H"
593
594#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
595#uint16  reservedPad                # This value should be zero
596#uint16  startCode[segCount]        # Starting character code for each segment
597#uint16  idDelta[segCount]          # Delta for all character codes in segment
598#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
599#uint16  glyphIndexArray[variable]  # Glyph index array
600
601def splitRange(startCode, endCode, cmap):
602	# Try to split a range of character codes into subranges with consecutive
603	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
604	# efficiently. I can't prove I've got the optimal solution, but it seems
605	# to do well with the fonts I tested: none became bigger, many became smaller.
606	if startCode == endCode:
607		return [], [endCode]
608
609	lastID = cmap[startCode]
610	lastCode = startCode
611	inOrder = None
612	orderedBegin = None
613	subRanges = []
614
615	# Gather subranges in which the glyph IDs are consecutive.
616	for code in range(startCode + 1, endCode + 1):
617		glyphID = cmap[code]
618
619		if glyphID - 1 == lastID:
620			if inOrder is None or not inOrder:
621				inOrder = 1
622				orderedBegin = lastCode
623		else:
624			if inOrder:
625				inOrder = 0
626				subRanges.append((orderedBegin, lastCode))
627				orderedBegin = None
628
629		lastID = glyphID
630		lastCode = code
631
632	if inOrder:
633		subRanges.append((orderedBegin, lastCode))
634	assert lastCode == endCode
635
636	# Now filter out those new subranges that would only make the data bigger.
637	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
638	# character.
639	newRanges = []
640	for b, e in subRanges:
641		if b == startCode and e == endCode:
642			break  # the whole range, we're fine
643		if b == startCode or e == endCode:
644			threshold = 4  # split costs one more segment
645		else:
646			threshold = 8  # split costs two more segments
647		if (e - b + 1) > threshold:
648			newRanges.append((b, e))
649	subRanges = newRanges
650
651	if not subRanges:
652		return [], [endCode]
653
654	if subRanges[0][0] != startCode:
655		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
656	if subRanges[-1][1] != endCode:
657		subRanges.append((subRanges[-1][1] + 1, endCode))
658
659	# Fill the "holes" in the segments list -- those are the segments in which
660	# the glyph IDs are _not_ consecutive.
661	i = 1
662	while i < len(subRanges):
663		if subRanges[i-1][1] + 1 != subRanges[i][0]:
664			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
665			i = i + 1
666		i = i + 1
667
668	# Transform the ranges into startCode/endCode lists.
669	start = []
670	end = []
671	for b, e in subRanges:
672		start.append(b)
673		end.append(e)
674	start.pop(0)
675
676	assert len(start) + 1 == len(end)
677	return start, end
678
679
680class cmap_format_4(CmapSubtable):
681
682	def decompile(self, data, ttFont):
683		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
684		# If not, someone is calling the subtable decompile() directly, and must provide both args.
685		if data is not None and ttFont is not None:
686			self.decompileHeader(data, ttFont)
687		else:
688			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
689
690		data = self.data # decompileHeader assigns the data after the header to self.data
691		(segCountX2, searchRange, entrySelector, rangeShift) = \
692					struct.unpack(">4H", data[:8])
693		data = data[8:]
694		segCount = segCountX2 // 2
695
696		allCodes = array.array("H")
697		allCodes.frombytes(data)
698		self.data = data = None
699
700		if sys.byteorder != "big": allCodes.byteswap()
701
702		# divide the data
703		endCode = allCodes[:segCount]
704		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
705		startCode = allCodes[:segCount]
706		allCodes = allCodes[segCount:]
707		idDelta = allCodes[:segCount]
708		allCodes = allCodes[segCount:]
709		idRangeOffset = allCodes[:segCount]
710		glyphIndexArray = allCodes[segCount:]
711		lenGIArray = len(glyphIndexArray)
712
713		# build 2-byte character mapping
714		charCodes = []
715		gids = []
716		for i in range(len(startCode) - 1):	# don't do 0xffff!
717			start = startCode[i]
718			delta = idDelta[i]
719			rangeOffset = idRangeOffset[i]
720			# *someone* needs to get killed.
721			partial = rangeOffset // 2 - start + i - len(idRangeOffset)
722
723			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
724			charCodes.extend(rangeCharCodes)
725			if rangeOffset == 0:
726				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
727			else:
728				for charCode in rangeCharCodes:
729					index = charCode + partial
730					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
731					if glyphIndexArray[index] != 0:  # if not missing glyph
732						glyphID = glyphIndexArray[index] + delta
733					else:
734						glyphID = 0  # missing glyph
735					gids.append(glyphID & 0xFFFF)
736
737		self.cmap = _make_map(self.ttFont, charCodes, gids)
738
739	def compile(self, ttFont):
740		if self.data:
741			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
742
743		charCodes = list(self.cmap.keys())
744		if not charCodes:
745			startCode = [0xffff]
746			endCode = [0xffff]
747		else:
748			charCodes.sort()
749			names = [self.cmap[code] for code in charCodes]
750			nameMap = ttFont.getReverseGlyphMap()
751			try:
752				gids = [nameMap[name] for name in names]
753			except KeyError:
754				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
755				try:
756					gids = [nameMap[name] for name in names]
757				except KeyError:
758					# allow virtual GIDs in format 4 tables
759					gids = []
760					for name in names:
761						try:
762							gid = nameMap[name]
763						except KeyError:
764							try:
765								if (name[:3] == 'gid'):
766									gid = int(name[3:])
767								else:
768									gid = ttFont.getGlyphID(name)
769							except:
770								raise KeyError(name)
771
772						gids.append(gid)
773			cmap = {}  # code:glyphID mapping
774			for code, gid in zip(charCodes, gids):
775				cmap[code] = gid
776
777			# Build startCode and endCode lists.
778			# Split the char codes in ranges of consecutive char codes, then split
779			# each range in more ranges of consecutive/not consecutive glyph IDs.
780			# See splitRange().
781			lastCode = charCodes[0]
782			endCode = []
783			startCode = [lastCode]
784			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
785				if charCode == lastCode + 1:
786					lastCode = charCode
787					continue
788				start, end = splitRange(startCode[-1], lastCode, cmap)
789				startCode.extend(start)
790				endCode.extend(end)
791				startCode.append(charCode)
792				lastCode = charCode
793			start, end = splitRange(startCode[-1], lastCode, cmap)
794			startCode.extend(start)
795			endCode.extend(end)
796			startCode.append(0xffff)
797			endCode.append(0xffff)
798
799		# build up rest of cruft
800		idDelta = []
801		idRangeOffset = []
802		glyphIndexArray = []
803		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
804			indices = []
805			for charCode in range(startCode[i], endCode[i] + 1):
806				indices.append(cmap[charCode])
807			if (indices == list(range(indices[0], indices[0] + len(indices)))):
808				idDelta.append((indices[0] - startCode[i]) % 0x10000)
809				idRangeOffset.append(0)
810			else:
811				# someone *definitely* needs to get killed.
812				idDelta.append(0)
813				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
814				glyphIndexArray.extend(indices)
815		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
816		idRangeOffset.append(0)
817
818		# Insane.
819		segCount = len(endCode)
820		segCountX2 = segCount * 2
821		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)
822
823		charCodeArray = array.array("H", endCode + [0] + startCode)
824		idDeltaArray = array.array("H", idDelta)
825		restArray = array.array("H", idRangeOffset + glyphIndexArray)
826		if sys.byteorder != "big": charCodeArray.byteswap()
827		if sys.byteorder != "big": idDeltaArray.byteswap()
828		if sys.byteorder != "big": restArray.byteswap()
829		data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes()
830
831		length = struct.calcsize(cmap_format_4_format) + len(data)
832		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
833				segCountX2, searchRange, entrySelector, rangeShift)
834		return header + data
835
836	def fromXML(self, name, attrs, content, ttFont):
837		self.language = safeEval(attrs["language"])
838		if not hasattr(self, "cmap"):
839			self.cmap = {}
840		cmap = self.cmap
841
842		for element in content:
843			if not isinstance(element, tuple):
844				continue
845			nameMap, attrsMap, dummyContent = element
846			if nameMap != "map":
847				assert 0, "Unrecognized keyword in cmap subtable"
848			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
849
850
851class cmap_format_6(CmapSubtable):
852
853	def decompile(self, data, ttFont):
854		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
855		# If not, someone is calling the subtable decompile() directly, and must provide both args.
856		if data is not None and ttFont is not None:
857			self.decompileHeader(data, ttFont)
858		else:
859			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
860
861		data = self.data # decompileHeader assigns the data after the header to self.data
862		firstCode, entryCount = struct.unpack(">HH", data[:4])
863		firstCode = int(firstCode)
864		data = data[4:]
865		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
866		gids = array.array("H")
867		gids.frombytes(data[:2 * int(entryCount)])
868		if sys.byteorder != "big": gids.byteswap()
869		self.data = data = None
870
871		charCodes = list(range(firstCode, firstCode + len(gids)))
872		self.cmap = _make_map(self.ttFont, charCodes, gids)
873
874	def compile(self, ttFont):
875		if self.data:
876			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
877		cmap = self.cmap
878		codes = sorted(cmap.keys())
879		if codes: # yes, there are empty cmap tables.
880			codes = list(range(codes[0], codes[-1] + 1))
881			firstCode = codes[0]
882			valueList = [
883				ttFont.getGlyphID(cmap[code]) if code in cmap else 0
884				for code in codes
885			]
886			gids = array.array("H", valueList)
887			if sys.byteorder != "big": gids.byteswap()
888			data = gids.tobytes()
889		else:
890			data = b""
891			firstCode = 0
892		header = struct.pack(">HHHHH",
893				6, len(data) + 10, self.language, firstCode, len(codes))
894		return header + data
895
896	def fromXML(self, name, attrs, content, ttFont):
897		self.language = safeEval(attrs["language"])
898		if not hasattr(self, "cmap"):
899			self.cmap = {}
900		cmap = self.cmap
901
902		for element in content:
903			if not isinstance(element, tuple):
904				continue
905			name, attrs, content = element
906			if name != "map":
907				continue
908			cmap[safeEval(attrs["code"])] = attrs["name"]
909
910
911class cmap_format_12_or_13(CmapSubtable):
912
913	def __init__(self, format):
914		self.format = format
915		self.reserved = 0
916		self.data = None
917		self.ttFont = None
918
919	def decompileHeader(self, data, ttFont):
920		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
921		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length)
922		self.format = format
923		self.reserved = reserved
924		self.length = length
925		self.language = language
926		self.nGroups = nGroups
927		self.data = data[16:]
928		self.ttFont = ttFont
929
930	def decompile(self, data, ttFont):
931		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
932		# If not, someone is calling the subtable decompile() directly, and must provide both args.
933		if data is not None and ttFont is not None:
934			self.decompileHeader(data, ttFont)
935		else:
936			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
937
938		data = self.data # decompileHeader assigns the data after the header to self.data
939		charCodes = []
940		gids = []
941		pos = 0
942		for i in range(self.nGroups):
943			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
944			pos += 12
945			lenGroup = 1 + endCharCode - startCharCode
946			charCodes.extend(list(range(startCharCode, endCharCode +1)))
947			gids.extend(self._computeGIDs(glyphID, lenGroup))
948		self.data = data = None
949		self.cmap = _make_map(self.ttFont, charCodes, gids)
950
951	def compile(self, ttFont):
952		if self.data:
953			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
954		charCodes = list(self.cmap.keys())
955		names = list(self.cmap.values())
956		nameMap = ttFont.getReverseGlyphMap()
957		try:
958			gids = [nameMap[name] for name in names]
959		except KeyError:
960			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
961			try:
962				gids = [nameMap[name] for name in names]
963			except KeyError:
964				# allow virtual GIDs in format 12 tables
965				gids = []
966				for name in names:
967					try:
968						gid = nameMap[name]
969					except KeyError:
970						try:
971							if (name[:3] == 'gid'):
972								gid = int(name[3:])
973							else:
974								gid = ttFont.getGlyphID(name)
975						except:
976							raise KeyError(name)
977
978					gids.append(gid)
979
980		cmap = {}  # code:glyphID mapping
981		for code, gid in zip(charCodes, gids):
982			cmap[code] = gid
983
984		charCodes.sort()
985		index = 0
986		startCharCode = charCodes[0]
987		startGlyphID = cmap[startCharCode]
988		lastGlyphID = startGlyphID - self._format_step
989		lastCharCode = startCharCode - 1
990		nGroups = 0
991		dataList = []
992		maxIndex = len(charCodes)
993		for index in range(maxIndex):
994			charCode = charCodes[index]
995			glyphID = cmap[charCode]
996			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
997				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
998				startCharCode = charCode
999				startGlyphID = glyphID
1000				nGroups = nGroups + 1
1001			lastGlyphID = glyphID
1002			lastCharCode = charCode
1003		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1004		nGroups = nGroups + 1
1005		data = bytesjoin(dataList)
1006		lengthSubtable = len(data) +16
1007		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1008		return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data
1009
1010	def toXML(self, writer, ttFont):
1011		writer.begintag(self.__class__.__name__, [
1012				("platformID", self.platformID),
1013				("platEncID", self.platEncID),
1014				("format", self.format),
1015				("reserved", self.reserved),
1016				("length", self.length),
1017				("language", self.language),
1018				("nGroups", self.nGroups),
1019				])
1020		writer.newline()
1021		codes = sorted(self.cmap.items())
1022		self._writeCodes(codes, writer)
1023		writer.endtag(self.__class__.__name__)
1024		writer.newline()
1025
1026	def fromXML(self, name, attrs, content, ttFont):
1027		self.format = safeEval(attrs["format"])
1028		self.reserved = safeEval(attrs["reserved"])
1029		self.length = safeEval(attrs["length"])
1030		self.language = safeEval(attrs["language"])
1031		self.nGroups = safeEval(attrs["nGroups"])
1032		if not hasattr(self, "cmap"):
1033			self.cmap = {}
1034		cmap = self.cmap
1035
1036		for element in content:
1037			if not isinstance(element, tuple):
1038				continue
1039			name, attrs, content = element
1040			if name != "map":
1041				continue
1042			cmap[safeEval(attrs["code"])] = attrs["name"]
1043
1044
1045class cmap_format_12(cmap_format_12_or_13):
1046
1047	_format_step = 1
1048
1049	def __init__(self, format=12):
1050		cmap_format_12_or_13.__init__(self, format)
1051
1052	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1053		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1054
1055	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1056		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1057
1058
1059class cmap_format_13(cmap_format_12_or_13):
1060
1061	_format_step = 0
1062
1063	def __init__(self, format=13):
1064		cmap_format_12_or_13.__init__(self, format)
1065
1066	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1067		return [startingGlyph] * numberOfGlyphs
1068
1069	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1070		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1071
1072
1073def cvtToUVS(threeByteString):
1074	data = b"\0" + threeByteString
1075	val, = struct.unpack(">L", data)
1076	return val
1077
1078def cvtFromUVS(val):
1079	assert 0 <= val < 0x1000000
1080	fourByteString = struct.pack(">L", val)
1081	return fourByteString[1:]
1082
1083
1084class cmap_format_14(CmapSubtable):
1085
1086	def decompileHeader(self, data, ttFont):
1087		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1088		self.data = data[10:]
1089		self.length = length
1090		self.numVarSelectorRecords = numVarSelectorRecords
1091		self.ttFont = ttFont
1092		self.language = 0xFF # has no language.
1093
1094	def decompile(self, data, ttFont):
1095		if data is not None and ttFont is not None:
1096			self.decompileHeader(data, ttFont)
1097		else:
1098			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1099		data = self.data
1100
1101		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1102		uvsDict = {}
1103		recOffset = 0
1104		for n in range(self.numVarSelectorRecords):
1105			uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
1106			recOffset += 11
1107			varUVS = cvtToUVS(uvs)
1108			if defOVSOffset:
1109				startOffset = defOVSOffset - 10
1110				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1111				startOffset +=4
1112				for r in range(numValues):
1113					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1114					startOffset += 4
1115					firstBaseUV = cvtToUVS(uv)
1116					cnt = addtlCnt+1
1117					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1118					glyphList = [None]*cnt
1119					localUVList = zip(baseUVList, glyphList)
1120					try:
1121						uvsDict[varUVS].extend(localUVList)
1122					except KeyError:
1123						uvsDict[varUVS] = list(localUVList)
1124
1125			if nonDefUVSOffset:
1126				startOffset = nonDefUVSOffset - 10
1127				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1128				startOffset +=4
1129				localUVList = []
1130				for r in range(numRecs):
1131					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1132					startOffset += 5
1133					uv = cvtToUVS(uv)
1134					glyphName = self.ttFont.getGlyphName(gid)
1135					localUVList.append((uv, glyphName))
1136				try:
1137					uvsDict[varUVS].extend(localUVList)
1138				except KeyError:
1139					uvsDict[varUVS] = localUVList
1140
1141		self.uvsDict = uvsDict
1142
1143	def toXML(self, writer, ttFont):
1144		writer.begintag(self.__class__.__name__, [
1145				("platformID", self.platformID),
1146				("platEncID", self.platEncID),
1147				])
1148		writer.newline()
1149		uvsDict = self.uvsDict
1150		uvsList = sorted(uvsDict.keys())
1151		for uvs in uvsList:
1152			uvList = uvsDict[uvs]
1153			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1154			for uv, gname in uvList:
1155				attrs = [("uv", hex(uv)), ("uvs", hex(uvs))]
1156				if gname is not None:
1157					attrs.append(("name", gname))
1158				writer.simpletag("map", attrs)
1159				writer.newline()
1160		writer.endtag(self.__class__.__name__)
1161		writer.newline()
1162
1163	def fromXML(self, name, attrs, content, ttFont):
1164		self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail
1165		if not hasattr(self, "cmap"):
1166			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1167		if not hasattr(self, "uvsDict"):
1168			self.uvsDict = {}
1169			uvsDict = self.uvsDict
1170
1171		# For backwards compatibility reasons we accept "None" as an indicator
1172		# for "default mapping", unless the font actually has a glyph named
1173		# "None".
1174		_hasGlyphNamedNone = None
1175
1176		for element in content:
1177			if not isinstance(element, tuple):
1178				continue
1179			name, attrs, content = element
1180			if name != "map":
1181				continue
1182			uvs = safeEval(attrs["uvs"])
1183			uv = safeEval(attrs["uv"])
1184			gname = attrs.get("name")
1185			if gname == "None":
1186				if _hasGlyphNamedNone is None:
1187					_hasGlyphNamedNone = "None" in ttFont.getGlyphOrder()
1188				if not _hasGlyphNamedNone:
1189					gname = None
1190			try:
1191				uvsDict[uvs].append((uv, gname))
1192			except KeyError:
1193				uvsDict[uvs] = [(uv, gname)]
1194
1195	def compile(self, ttFont):
1196		if self.data:
1197			return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data
1198
1199		uvsDict = self.uvsDict
1200		uvsList = sorted(uvsDict.keys())
1201		self.numVarSelectorRecords = len(uvsList)
1202		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1203		data = []
1204		varSelectorRecords =[]
1205		for uvs in uvsList:
1206			entryList = uvsDict[uvs]
1207
1208			defList = [entry for entry in entryList if entry[1] is None]
1209			if defList:
1210				defList = [entry[0] for entry in defList]
1211				defOVSOffset = offset
1212				defList.sort()
1213
1214				lastUV = defList[0]
1215				cnt = -1
1216				defRecs = []
1217				for defEntry in defList:
1218					cnt +=1
1219					if (lastUV+cnt) != defEntry:
1220						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1221						lastUV = defEntry
1222						defRecs.append(rec)
1223						cnt = 0
1224
1225				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1226				defRecs.append(rec)
1227
1228				numDefRecs = len(defRecs)
1229				data.append(struct.pack(">L", numDefRecs))
1230				data.extend(defRecs)
1231				offset += 4 + numDefRecs*4
1232			else:
1233				defOVSOffset = 0
1234
1235			ndefList = [entry for entry in entryList if entry[1] is not None]
1236			if ndefList:
1237				nonDefUVSOffset = offset
1238				ndefList.sort()
1239				numNonDefRecs = len(ndefList)
1240				data.append(struct.pack(">L", numNonDefRecs))
1241				offset += 4 + numNonDefRecs*5
1242
1243				for uv, gname in ndefList:
1244					gid = ttFont.getGlyphID(gname)
1245					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1246					data.append(ndrec)
1247			else:
1248				nonDefUVSOffset = 0
1249
1250			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1251			varSelectorRecords.append(vrec)
1252
1253		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1254		self.length = 10 + len(data)
1255		headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords)
1256
1257		return headerdata + data
1258
1259
1260class cmap_format_unknown(CmapSubtable):
1261
1262	def toXML(self, writer, ttFont):
1263		cmapName = self.__class__.__name__[:12] + str(self.format)
1264		writer.begintag(cmapName, [
1265				("platformID", self.platformID),
1266				("platEncID", self.platEncID),
1267				])
1268		writer.newline()
1269		writer.dumphex(self.data)
1270		writer.endtag(cmapName)
1271		writer.newline()
1272
1273	def fromXML(self, name, attrs, content, ttFont):
1274		self.data = readHex(content)
1275		self.cmap = {}
1276
1277	def decompileHeader(self, data, ttFont):
1278		self.language = 0  # dummy value
1279		self.data = data
1280
1281	def decompile(self, data, ttFont):
1282		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1283		# If not, someone is calling the subtable decompile() directly, and must provide both args.
1284		if data is not None and ttFont is not None:
1285			self.decompileHeader(data, ttFont)
1286		else:
1287			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1288
1289	def compile(self, ttFont):
1290		if self.data:
1291			return self.data
1292		else:
1293			return None
1294
1295cmap_classes = {
1296		0: cmap_format_0,
1297		2: cmap_format_2,
1298		4: cmap_format_4,
1299		6: cmap_format_6,
1300		12: cmap_format_12,
1301		13: cmap_format_13,
1302		14: cmap_format_14,
1303}
1304