• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import print_function, division, absolute_import
2from fontTools.misc.py23 import *
3from fontTools.misc.textTools import safeEval, readHex
4from fontTools.misc.encodingTools import getEncoding
5from fontTools.ttLib import getSearchRange
6from fontTools.unicode import Unicode
7from . import DefaultTable
8import sys
9import struct
10import array
11import logging
12
13
14log = logging.getLogger(__name__)
15
16
17def _make_map(font, chars, gids):
18	assert len(chars) == len(gids)
19	cmap = {}
20	glyphOrder = font.getGlyphOrder()
21	for char,gid in zip(chars,gids):
22		if gid is 0:
23			continue
24		try:
25			name = glyphOrder[gid]
26		except IndexError:
27			name = font.getGlyphName(gid)
28		cmap[char] = name
29	return cmap
30
31class table__c_m_a_p(DefaultTable.DefaultTable):
32
33	def getcmap(self, platformID, platEncID):
34		for subtable in self.tables:
35			if (subtable.platformID == platformID and
36					subtable.platEncID == platEncID):
37				return subtable
38		return None # not found
39
40	def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))):
41		"""Return the 'best' unicode cmap dictionary available in the font,
42		or None, if no unicode cmap subtable is available.
43
44		By default it will search for the following (platformID, platEncID)
45		pairs:
46			(3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0)
47		This can be customized via the cmapPreferences argument.
48		"""
49		for platformID, platEncID in cmapPreferences:
50			cmapSubtable = self.getcmap(platformID, platEncID)
51			if cmapSubtable is not None:
52				return cmapSubtable.cmap
53		return None  # None of the requested cmap subtables were found
54
55	def buildReversed(self):
56		"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
57
58		The values are sets of Unicode codepoints because
59		some fonts map different codepoints to the same glyph.
60		For example, U+0041 LATIN CAPITAL LETTER A and U+0391
61		GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
62		"""
63		result = {}
64		for subtable in self.tables:
65			if subtable.isUnicode():
66				for codepoint, name in subtable.cmap.items():
67					result.setdefault(name, set()).add(codepoint)
68		return result
69
70	def decompile(self, data, ttFont):
71		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
72		self.tableVersion = int(tableVersion)
73		self.tables = tables = []
74		seenOffsets = {}
75		for i in range(numSubTables):
76			platformID, platEncID, offset = struct.unpack(
77					">HHl", data[4+i*8:4+(i+1)*8])
78			platformID, platEncID = int(platformID), int(platEncID)
79			format, length = struct.unpack(">HH", data[offset:offset+4])
80			if format in [8,10,12,13]:
81				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
82			elif format in [14]:
83				format, length = struct.unpack(">HL", data[offset:offset+6])
84
85			if not length:
86				log.error(
87					"cmap subtable is reported as having zero length: platformID %s, "
88					"platEncID %s, format %s offset %s. Skipping table.",
89					platformID, platEncID, format, offset)
90				continue
91			table = CmapSubtable.newSubtable(format)
92			table.platformID = platformID
93			table.platEncID = platEncID
94			# Note that by default we decompile only the subtable header info;
95			# any other data gets decompiled only when an attribute of the
96			# subtable is referenced.
97			table.decompileHeader(data[offset:offset+int(length)], ttFont)
98			if offset in seenOffsets:
99				table.data = None # Mark as decompiled
100				table.cmap = tables[seenOffsets[offset]].cmap
101			else:
102				seenOffsets[offset] = i
103			tables.append(table)
104
105	def compile(self, ttFont):
106		self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
107		numSubTables = len(self.tables)
108		totalOffset = 4 + 8 * numSubTables
109		data = struct.pack(">HH", self.tableVersion, numSubTables)
110		tableData = b""
111		seen = {}  # Some tables are the same object reference. Don't compile them twice.
112		done = {}  # Some tables are different objects, but compile to the same data chunk
113		for table in self.tables:
114			try:
115				offset = seen[id(table.cmap)]
116			except KeyError:
117				chunk = table.compile(ttFont)
118				if chunk in done:
119					offset = done[chunk]
120				else:
121					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
122					tableData = tableData + chunk
123			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
124		return data + tableData
125
126	def toXML(self, writer, ttFont):
127		writer.simpletag("tableVersion", version=self.tableVersion)
128		writer.newline()
129		for table in self.tables:
130			table.toXML(writer, ttFont)
131
132	def fromXML(self, name, attrs, content, ttFont):
133		if name == "tableVersion":
134			self.tableVersion = safeEval(attrs["version"])
135			return
136		if name[:12] != "cmap_format_":
137			return
138		if not hasattr(self, "tables"):
139			self.tables = []
140		format = safeEval(name[12:])
141		table = CmapSubtable.newSubtable(format)
142		table.platformID = safeEval(attrs["platformID"])
143		table.platEncID = safeEval(attrs["platEncID"])
144		table.fromXML(name, attrs, content, ttFont)
145		self.tables.append(table)
146
147
148class CmapSubtable(object):
149
150	@staticmethod
151	def getSubtableClass(format):
152		"""Return the subtable class for a format."""
153		return cmap_classes.get(format, cmap_format_unknown)
154
155	@staticmethod
156	def newSubtable(format):
157		"""Return a new instance of a subtable for format."""
158		subtableClass = CmapSubtable.getSubtableClass(format)
159		return subtableClass(format)
160
161	def __init__(self, format):
162		self.format = format
163		self.data = None
164		self.ttFont = None
165
166	def __getattr__(self, attr):
167		# allow lazy decompilation of subtables.
168		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
169			raise AttributeError(attr)
170		if self.data is None:
171			raise AttributeError(attr)
172		self.decompile(None, None) # use saved data.
173		self.data = None	# Once this table has been decompiled, make sure we don't
174							# just return the original data. Also avoids recursion when
175							# called with an attribute that the cmap subtable doesn't have.
176		return getattr(self, attr)
177
178	def decompileHeader(self, data, ttFont):
179		format, length, language = struct.unpack(">HHH", data[:6])
180		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
181		self.format = int(format)
182		self.length = int(length)
183		self.language = int(language)
184		self.data = data[6:]
185		self.ttFont = ttFont
186
187	def toXML(self, writer, ttFont):
188		writer.begintag(self.__class__.__name__, [
189				("platformID", self.platformID),
190				("platEncID", self.platEncID),
191				("language", self.language),
192				])
193		writer.newline()
194		codes = sorted(self.cmap.items())
195		self._writeCodes(codes, writer)
196		writer.endtag(self.__class__.__name__)
197		writer.newline()
198
199	def getEncoding(self, default=None):
200		"""Returns the Python encoding name for this cmap subtable based on its platformID,
201		platEncID, and language.  If encoding for these values is not known, by default
202		None is returned.  That can be overriden by passing a value to the default
203		argument.
204
205		Note that if you want to choose a "preferred" cmap subtable, most of the time
206		self.isUnicode() is what you want as that one only returns true for the modern,
207		commonly used, Unicode-compatible triplets, not the legacy ones.
208		"""
209		return getEncoding(self.platformID, self.platEncID, self.language, default)
210
211	def isUnicode(self):
212		return (self.platformID == 0 or
213			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
214
215	def isSymbol(self):
216		return self.platformID == 3 and self.platEncID == 0
217
218	def _writeCodes(self, codes, writer):
219		isUnicode = self.isUnicode()
220		for code, name in codes:
221			writer.simpletag("map", code=hex(code), name=name)
222			if isUnicode:
223				writer.comment(Unicode[code])
224			writer.newline()
225
226	def __lt__(self, other):
227		if not isinstance(other, CmapSubtable):
228			return NotImplemented
229
230		# implemented so that list.sort() sorts according to the spec.
231		selfTuple = (
232			getattr(self, "platformID", None),
233			getattr(self, "platEncID", None),
234			getattr(self, "language", None),
235			self.__dict__)
236		otherTuple = (
237			getattr(other, "platformID", None),
238			getattr(other, "platEncID", None),
239			getattr(other, "language", None),
240			other.__dict__)
241		return selfTuple < otherTuple
242
243
244class cmap_format_0(CmapSubtable):
245
246	def decompile(self, data, ttFont):
247		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
248		# If not, someone is calling the subtable decompile() directly, and must provide both args.
249		if data is not None and ttFont is not None:
250			self.decompileHeader(data, ttFont)
251		else:
252			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
253		data = self.data # decompileHeader assigns the data after the header to self.data
254		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
255		gids = array.array("B")
256		gids.fromstring(self.data)
257		charCodes = list(range(len(gids)))
258		self.cmap = _make_map(self.ttFont, charCodes, gids)
259
260	def compile(self, ttFont):
261		if self.data:
262			return struct.pack(">HHH", 0, 262, self.language) + self.data
263
264		cmap = self.cmap
265		assert set(cmap.keys()).issubset(range(256))
266		getGlyphID = ttFont.getGlyphID
267		valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)]
268
269		gids = array.array("B", valueList)
270		data = struct.pack(">HHH", 0, 262, self.language) + gids.tostring()
271		assert len(data) == 262
272		return data
273
274	def fromXML(self, name, attrs, content, ttFont):
275		self.language = safeEval(attrs["language"])
276		if not hasattr(self, "cmap"):
277			self.cmap = {}
278		cmap = self.cmap
279		for element in content:
280			if not isinstance(element, tuple):
281				continue
282			name, attrs, content = element
283			if name != "map":
284				continue
285			cmap[safeEval(attrs["code"])] = attrs["name"]
286
287
288subHeaderFormat = ">HHhH"
289class SubHeader(object):
290	def __init__(self):
291		self.firstCode = None
292		self.entryCount = None
293		self.idDelta = None
294		self.idRangeOffset = None
295		self.glyphIndexArray = []
296
297class cmap_format_2(CmapSubtable):
298
299	def setIDDelta(self, subHeader):
300		subHeader.idDelta = 0
301		# find the minGI which is not zero.
302		minGI = subHeader.glyphIndexArray[0]
303		for gid in subHeader.glyphIndexArray:
304			if (gid != 0) and (gid < minGI):
305				minGI = gid
306		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
307		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
308		# We would like to pick an idDelta such that the first glyphArray GID is 1,
309		# so that we are more likely to be able to combine glypharray GID subranges.
310		# This means that we have a problem when minGI is > 32K
311		# Since the final gi is reconstructed from the glyphArray GID by:
312		#    (short)finalGID = (gid + idDelta) % 0x10000),
313		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
314		# negative number to an unsigned short.
315
316		if (minGI > 1):
317			if minGI > 0x7FFF:
318				subHeader.idDelta = -(0x10000 - minGI) -1
319			else:
320				subHeader.idDelta = minGI -1
321			idDelta = subHeader.idDelta
322			for i in range(subHeader.entryCount):
323				gid = subHeader.glyphIndexArray[i]
324				if gid > 0:
325					subHeader.glyphIndexArray[i] = gid - idDelta
326
327	def decompile(self, data, ttFont):
328		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
329		# If not, someone is calling the subtable decompile() directly, and must provide both args.
330		if data is not None and ttFont is not None:
331			self.decompileHeader(data, ttFont)
332		else:
333			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
334
335		data = self.data # decompileHeader assigns the data after the header to self.data
336		subHeaderKeys = []
337		maxSubHeaderindex = 0
338		# get the key array, and determine the number of subHeaders.
339		allKeys = array.array("H")
340		allKeys.fromstring(data[:512])
341		data = data[512:]
342		if sys.byteorder != "big": allKeys.byteswap()
343		subHeaderKeys = [ key//8 for key in allKeys]
344		maxSubHeaderindex = max(subHeaderKeys)
345
346		#Load subHeaders
347		subHeaderList = []
348		pos = 0
349		for i in range(maxSubHeaderindex + 1):
350			subHeader = SubHeader()
351			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
352				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
353			pos += 8
354			giDataPos = pos + subHeader.idRangeOffset-2
355			giList = array.array("H")
356			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
357			if sys.byteorder != "big": giList.byteswap()
358			subHeader.glyphIndexArray = giList
359			subHeaderList.append(subHeader)
360		# How this gets processed.
361		# Charcodes may be one or two bytes.
362		# The first byte of a charcode is mapped through the subHeaderKeys, to select
363		# a subHeader. For any subheader but 0, the next byte is then mapped through the
364		# selected subheader. If subheader Index 0 is selected, then the byte itself is
365		# mapped through the subheader, and there is no second byte.
366		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
367		#
368		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
369		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
370		# referenced by another subheader.
371		# The only subheader that will be referenced by more than one first-byte value is the subheader
372		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
373		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
374		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
375		# A subheader specifies a subrange within (0...256) by the
376		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
377		# (e.g. glyph not in font).
378		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
379		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
380		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
381		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
382		# Example for Logocut-Medium
383		# first byte of charcode = 129; selects subheader 1.
384		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
385		# second byte of charCode = 66
386		# the index offset = 66-64 = 2.
387		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
388		# [glyphIndexArray index], [subrange array index] = glyphIndex
389		# [256], [0]=1 	from charcode [129, 64]
390		# [257], [1]=2  	from charcode [129, 65]
391		# [258], [2]=3  	from charcode [129, 66]
392		# [259], [3]=4  	from charcode [129, 67]
393		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
394		# add it to the glyphID to get the final glyphIndex
395		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
396
397		self.data = b""
398		cmap = {}
399		notdefGI = 0
400		for firstByte in range(256):
401			subHeadindex = subHeaderKeys[firstByte]
402			subHeader = subHeaderList[subHeadindex]
403			if subHeadindex == 0:
404				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
405					continue # gi is notdef.
406				else:
407					charCode = firstByte
408					offsetIndex = firstByte - subHeader.firstCode
409					gi = subHeader.glyphIndexArray[offsetIndex]
410					if gi != 0:
411						gi = (gi + subHeader.idDelta) % 0x10000
412					else:
413						continue # gi is notdef.
414				cmap[charCode] = gi
415			else:
416				if subHeader.entryCount:
417					charCodeOffset = firstByte * 256 + subHeader.firstCode
418					for offsetIndex in range(subHeader.entryCount):
419						charCode = charCodeOffset + offsetIndex
420						gi = subHeader.glyphIndexArray[offsetIndex]
421						if gi != 0:
422							gi = (gi + subHeader.idDelta) % 0x10000
423						else:
424							continue
425						cmap[charCode] = gi
426				# If not subHeader.entryCount, then all char codes with this first byte are
427				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
428				# same as mapping it to .notdef.
429
430		gids = list(cmap.values())
431		charCodes = list(cmap.keys())
432		self.cmap = _make_map(self.ttFont, charCodes, gids)
433
434	def compile(self, ttFont):
435		if self.data:
436			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
437		kEmptyTwoCharCodeRange = -1
438		notdefGI = 0
439
440		items = sorted(self.cmap.items())
441		charCodes = [item[0] for item in items]
442		names = [item[1] for item in items]
443		nameMap = ttFont.getReverseGlyphMap()
444		try:
445			gids = [nameMap[name] for name in names]
446		except KeyError:
447			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
448			try:
449				gids = [nameMap[name] for name in names]
450			except KeyError:
451				# allow virtual GIDs in format 2 tables
452				gids = []
453				for name in names:
454					try:
455						gid = nameMap[name]
456					except KeyError:
457						try:
458							if (name[:3] == 'gid'):
459								gid = int(name[3:])
460							else:
461								gid = ttFont.getGlyphID(name)
462						except:
463							raise KeyError(name)
464
465					gids.append(gid)
466
467		# Process the (char code to gid) item list in char code order.
468		# By definition, all one byte char codes map to subheader 0.
469		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
470		# which defines all char codes in its range to map to notdef) unless proven otherwise.
471		# Note that since the char code items are processed in char code order, all the char codes with the
472		# same first byte are in sequential order.
473
474		subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
475		subHeaderList = []
476
477		# We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
478		# with a cmap where all the one byte char codes map to notdef,
479		# with the result that the subhead 0 would not get created just by processing the item list.
480		charCode = charCodes[0]
481		if charCode > 255:
482			subHeader = SubHeader()
483			subHeader.firstCode = 0
484			subHeader.entryCount = 0
485			subHeader.idDelta = 0
486			subHeader.idRangeOffset = 0
487			subHeaderList.append(subHeader)
488
489		lastFirstByte = -1
490		items = zip(charCodes, gids)
491		for charCode, gid in items:
492			if gid == 0:
493				continue
494			firstbyte = charCode >> 8
495			secondByte = charCode & 0x00FF
496
497			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
498				if lastFirstByte > -1:
499					# fix GI's and iDelta of current subheader.
500					self.setIDDelta(subHeader)
501
502					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
503					# for the indices matching the char codes.
504					if lastFirstByte == 0:
505						for index in range(subHeader.entryCount):
506							charCode = subHeader.firstCode + index
507							subHeaderKeys[charCode] = 0
508
509					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
510				# init new subheader
511				subHeader = SubHeader()
512				subHeader.firstCode = secondByte
513				subHeader.entryCount = 1
514				subHeader.glyphIndexArray.append(gid)
515				subHeaderList.append(subHeader)
516				subHeaderKeys[firstbyte] = len(subHeaderList) -1
517				lastFirstByte = firstbyte
518			else:
519				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
520				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
521				for i in range(codeDiff):
522					subHeader.glyphIndexArray.append(notdefGI)
523				subHeader.glyphIndexArray.append(gid)
524				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
525
526		# fix GI's and iDelta of last subheader that we we added to the subheader array.
527		self.setIDDelta(subHeader)
528
529		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
530		subHeader = SubHeader()
531		subHeader.firstCode = 0
532		subHeader.entryCount = 0
533		subHeader.idDelta = 0
534		subHeader.idRangeOffset = 2
535		subHeaderList.append(subHeader)
536		emptySubheadIndex = len(subHeaderList) - 1
537		for index in range(256):
538			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
539				subHeaderKeys[index] = emptySubheadIndex
540		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
541		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
542		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
543		# charcode 0 and GID 0.
544
545		idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
546		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
547		for index in range(subheadRangeLen):
548			subHeader = subHeaderList[index]
549			subHeader.idRangeOffset = 0
550			for j in range(index):
551				prevSubhead = subHeaderList[j]
552				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
553					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
554					subHeader.glyphIndexArray = []
555					break
556			if subHeader.idRangeOffset == 0: # didn't find one.
557				subHeader.idRangeOffset = idRangeOffset
558				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
559			else:
560				idRangeOffset = idRangeOffset - 8  # one less subheader
561
562		# Now we can write out the data!
563		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
564		for subhead in 	subHeaderList[:-1]:
565			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
566		dataList = [struct.pack(">HHH", 2, length, self.language)]
567		for index in subHeaderKeys:
568			dataList.append(struct.pack(">H", index*8))
569		for subhead in 	subHeaderList:
570			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
571		for subhead in 	subHeaderList[:-1]:
572			for gi in subhead.glyphIndexArray:
573				dataList.append(struct.pack(">H", gi))
574		data = bytesjoin(dataList)
575		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
576		return data
577
578	def fromXML(self, name, attrs, content, ttFont):
579		self.language = safeEval(attrs["language"])
580		if not hasattr(self, "cmap"):
581			self.cmap = {}
582		cmap = self.cmap
583
584		for element in content:
585			if not isinstance(element, tuple):
586				continue
587			name, attrs, content = element
588			if name != "map":
589				continue
590			cmap[safeEval(attrs["code"])] = attrs["name"]
591
592
593cmap_format_4_format = ">7H"
594
595#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
596#uint16  reservedPad                # This value should be zero
597#uint16  startCode[segCount]        # Starting character code for each segment
598#uint16  idDelta[segCount]          # Delta for all character codes in segment
599#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
600#uint16  glyphIndexArray[variable]  # Glyph index array
601
602def splitRange(startCode, endCode, cmap):
603	# Try to split a range of character codes into subranges with consecutive
604	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
605	# efficiently. I can't prove I've got the optimal solution, but it seems
606	# to do well with the fonts I tested: none became bigger, many became smaller.
607	if startCode == endCode:
608		return [], [endCode]
609
610	lastID = cmap[startCode]
611	lastCode = startCode
612	inOrder = None
613	orderedBegin = None
614	subRanges = []
615
616	# Gather subranges in which the glyph IDs are consecutive.
617	for code in range(startCode + 1, endCode + 1):
618		glyphID = cmap[code]
619
620		if glyphID - 1 == lastID:
621			if inOrder is None or not inOrder:
622				inOrder = 1
623				orderedBegin = lastCode
624		else:
625			if inOrder:
626				inOrder = 0
627				subRanges.append((orderedBegin, lastCode))
628				orderedBegin = None
629
630		lastID = glyphID
631		lastCode = code
632
633	if inOrder:
634		subRanges.append((orderedBegin, lastCode))
635	assert lastCode == endCode
636
637	# Now filter out those new subranges that would only make the data bigger.
638	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
639	# character.
640	newRanges = []
641	for b, e in subRanges:
642		if b == startCode and e == endCode:
643			break  # the whole range, we're fine
644		if b == startCode or e == endCode:
645			threshold = 4  # split costs one more segment
646		else:
647			threshold = 8  # split costs two more segments
648		if (e - b + 1) > threshold:
649			newRanges.append((b, e))
650	subRanges = newRanges
651
652	if not subRanges:
653		return [], [endCode]
654
655	if subRanges[0][0] != startCode:
656		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
657	if subRanges[-1][1] != endCode:
658		subRanges.append((subRanges[-1][1] + 1, endCode))
659
660	# Fill the "holes" in the segments list -- those are the segments in which
661	# the glyph IDs are _not_ consecutive.
662	i = 1
663	while i < len(subRanges):
664		if subRanges[i-1][1] + 1 != subRanges[i][0]:
665			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
666			i = i + 1
667		i = i + 1
668
669	# Transform the ranges into startCode/endCode lists.
670	start = []
671	end = []
672	for b, e in subRanges:
673		start.append(b)
674		end.append(e)
675	start.pop(0)
676
677	assert len(start) + 1 == len(end)
678	return start, end
679
680
681class cmap_format_4(CmapSubtable):
682
683	def decompile(self, data, ttFont):
684		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
685		# If not, someone is calling the subtable decompile() directly, and must provide both args.
686		if data is not None and ttFont is not None:
687			self.decompileHeader(data, ttFont)
688		else:
689			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
690
691		data = self.data # decompileHeader assigns the data after the header to self.data
692		(segCountX2, searchRange, entrySelector, rangeShift) = \
693					struct.unpack(">4H", data[:8])
694		data = data[8:]
695		segCount = segCountX2 // 2
696
697		allCodes = array.array("H")
698		allCodes.fromstring(data)
699		self.data = data = None
700
701		if sys.byteorder != "big": allCodes.byteswap()
702
703		# divide the data
704		endCode = allCodes[:segCount]
705		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
706		startCode = allCodes[:segCount]
707		allCodes = allCodes[segCount:]
708		idDelta = allCodes[:segCount]
709		allCodes = allCodes[segCount:]
710		idRangeOffset = allCodes[:segCount]
711		glyphIndexArray = allCodes[segCount:]
712		lenGIArray = len(glyphIndexArray)
713
714		# build 2-byte character mapping
715		charCodes = []
716		gids = []
717		for i in range(len(startCode) - 1):	# don't do 0xffff!
718			start = startCode[i]
719			delta = idDelta[i]
720			rangeOffset = idRangeOffset[i]
721			# *someone* needs to get killed.
722			partial = rangeOffset // 2 - start + i - len(idRangeOffset)
723
724			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
725			charCodes.extend(rangeCharCodes)
726			if rangeOffset == 0:
727				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
728			else:
729				for charCode in rangeCharCodes:
730					index = charCode + partial
731					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
732					if glyphIndexArray[index] != 0:  # if not missing glyph
733						glyphID = glyphIndexArray[index] + delta
734					else:
735						glyphID = 0  # missing glyph
736					gids.append(glyphID & 0xFFFF)
737
738		self.cmap = _make_map(self.ttFont, charCodes, gids)
739
740	def compile(self, ttFont):
741		if self.data:
742			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
743
744		charCodes = list(self.cmap.keys())
745		if not charCodes:
746			startCode = [0xffff]
747			endCode = [0xffff]
748		else:
749			charCodes.sort()
750			names = [self.cmap[code] for code in charCodes]
751			nameMap = ttFont.getReverseGlyphMap()
752			try:
753				gids = [nameMap[name] for name in names]
754			except KeyError:
755				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
756				try:
757					gids = [nameMap[name] for name in names]
758				except KeyError:
759					# allow virtual GIDs in format 4 tables
760					gids = []
761					for name in names:
762						try:
763							gid = nameMap[name]
764						except KeyError:
765							try:
766								if (name[:3] == 'gid'):
767									gid = int(name[3:])
768								else:
769									gid = ttFont.getGlyphID(name)
770							except:
771								raise KeyError(name)
772
773						gids.append(gid)
774			cmap = {}  # code:glyphID mapping
775			for code, gid in zip(charCodes, gids):
776				cmap[code] = gid
777
778			# Build startCode and endCode lists.
779			# Split the char codes in ranges of consecutive char codes, then split
780			# each range in more ranges of consecutive/not consecutive glyph IDs.
781			# See splitRange().
782			lastCode = charCodes[0]
783			endCode = []
784			startCode = [lastCode]
785			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
786				if charCode == lastCode + 1:
787					lastCode = charCode
788					continue
789				start, end = splitRange(startCode[-1], lastCode, cmap)
790				startCode.extend(start)
791				endCode.extend(end)
792				startCode.append(charCode)
793				lastCode = charCode
794			start, end = splitRange(startCode[-1], lastCode, cmap)
795			startCode.extend(start)
796			endCode.extend(end)
797			startCode.append(0xffff)
798			endCode.append(0xffff)
799
800		# build up rest of cruft
801		idDelta = []
802		idRangeOffset = []
803		glyphIndexArray = []
804		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
805			indices = []
806			for charCode in range(startCode[i], endCode[i] + 1):
807				indices.append(cmap[charCode])
808			if (indices == list(range(indices[0], indices[0] + len(indices)))):
809				idDelta.append((indices[0] - startCode[i]) % 0x10000)
810				idRangeOffset.append(0)
811			else:
812				# someone *definitely* needs to get killed.
813				idDelta.append(0)
814				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
815				glyphIndexArray.extend(indices)
816		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
817		idRangeOffset.append(0)
818
819		# Insane.
820		segCount = len(endCode)
821		segCountX2 = segCount * 2
822		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)
823
824		charCodeArray = array.array("H", endCode + [0] + startCode)
825		idDeltaArray = array.array("H", idDelta)
826		restArray = array.array("H", idRangeOffset + glyphIndexArray)
827		if sys.byteorder != "big": charCodeArray.byteswap()
828		if sys.byteorder != "big": idDeltaArray.byteswap()
829		if sys.byteorder != "big": restArray.byteswap()
830		data = charCodeArray.tostring() + idDeltaArray.tostring() + restArray.tostring()
831
832		length = struct.calcsize(cmap_format_4_format) + len(data)
833		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
834				segCountX2, searchRange, entrySelector, rangeShift)
835		return header + data
836
837	def fromXML(self, name, attrs, content, ttFont):
838		self.language = safeEval(attrs["language"])
839		if not hasattr(self, "cmap"):
840			self.cmap = {}
841		cmap = self.cmap
842
843		for element in content:
844			if not isinstance(element, tuple):
845				continue
846			nameMap, attrsMap, dummyContent = element
847			if nameMap != "map":
848				assert 0, "Unrecognized keyword in cmap subtable"
849			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
850
851
852class cmap_format_6(CmapSubtable):
853
854	def decompile(self, data, ttFont):
855		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
856		# If not, someone is calling the subtable decompile() directly, and must provide both args.
857		if data is not None and ttFont is not None:
858			self.decompileHeader(data, ttFont)
859		else:
860			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
861
862		data = self.data # decompileHeader assigns the data after the header to self.data
863		firstCode, entryCount = struct.unpack(">HH", data[:4])
864		firstCode = int(firstCode)
865		data = data[4:]
866		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
867		gids = array.array("H")
868		gids.fromstring(data[:2 * int(entryCount)])
869		if sys.byteorder != "big": gids.byteswap()
870		self.data = data = None
871
872		charCodes = list(range(firstCode, firstCode + len(gids)))
873		self.cmap = _make_map(self.ttFont, charCodes, gids)
874
875	def compile(self, ttFont):
876		if self.data:
877			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
878		cmap = self.cmap
879		codes = sorted(cmap.keys())
880		if codes: # yes, there are empty cmap tables.
881			codes = list(range(codes[0], codes[-1] + 1))
882			firstCode = codes[0]
883			valueList = [
884				ttFont.getGlyphID(cmap[code]) if code in cmap else 0
885				for code in codes
886			]
887			gids = array.array("H", valueList)
888			if sys.byteorder != "big": gids.byteswap()
889			data = gids.tostring()
890		else:
891			data = b""
892			firstCode = 0
893		header = struct.pack(">HHHHH",
894				6, len(data) + 10, self.language, firstCode, len(codes))
895		return header + data
896
897	def fromXML(self, name, attrs, content, ttFont):
898		self.language = safeEval(attrs["language"])
899		if not hasattr(self, "cmap"):
900			self.cmap = {}
901		cmap = self.cmap
902
903		for element in content:
904			if not isinstance(element, tuple):
905				continue
906			name, attrs, content = element
907			if name != "map":
908				continue
909			cmap[safeEval(attrs["code"])] = attrs["name"]
910
911
912class cmap_format_12_or_13(CmapSubtable):
913
914	def __init__(self, format):
915		self.format = format
916		self.reserved = 0
917		self.data = None
918		self.ttFont = None
919
920	def decompileHeader(self, data, ttFont):
921		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
922		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length)
923		self.format = format
924		self.reserved = reserved
925		self.length = length
926		self.language = language
927		self.nGroups = nGroups
928		self.data = data[16:]
929		self.ttFont = ttFont
930
931	def decompile(self, data, ttFont):
932		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
933		# If not, someone is calling the subtable decompile() directly, and must provide both args.
934		if data is not None and ttFont is not None:
935			self.decompileHeader(data, ttFont)
936		else:
937			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
938
939		data = self.data # decompileHeader assigns the data after the header to self.data
940		charCodes = []
941		gids = []
942		pos = 0
943		for i in range(self.nGroups):
944			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
945			pos += 12
946			lenGroup = 1 + endCharCode - startCharCode
947			charCodes.extend(list(range(startCharCode, endCharCode +1)))
948			gids.extend(self._computeGIDs(glyphID, lenGroup))
949		self.data = data = None
950		self.cmap = _make_map(self.ttFont, charCodes, gids)
951
952	def compile(self, ttFont):
953		if self.data:
954			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
955		charCodes = list(self.cmap.keys())
956		names = list(self.cmap.values())
957		nameMap = ttFont.getReverseGlyphMap()
958		try:
959			gids = [nameMap[name] for name in names]
960		except KeyError:
961			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
962			try:
963				gids = [nameMap[name] for name in names]
964			except KeyError:
965				# allow virtual GIDs in format 12 tables
966				gids = []
967				for name in names:
968					try:
969						gid = nameMap[name]
970					except KeyError:
971						try:
972							if (name[:3] == 'gid'):
973								gid = int(name[3:])
974							else:
975								gid = ttFont.getGlyphID(name)
976						except:
977							raise KeyError(name)
978
979					gids.append(gid)
980
981		cmap = {}  # code:glyphID mapping
982		for code, gid in zip(charCodes, gids):
983			cmap[code] = gid
984
985		charCodes.sort()
986		index = 0
987		startCharCode = charCodes[0]
988		startGlyphID = cmap[startCharCode]
989		lastGlyphID = startGlyphID - self._format_step
990		lastCharCode = startCharCode - 1
991		nGroups = 0
992		dataList = []
993		maxIndex = len(charCodes)
994		for index in range(maxIndex):
995			charCode = charCodes[index]
996			glyphID = cmap[charCode]
997			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
998				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
999				startCharCode = charCode
1000				startGlyphID = glyphID
1001				nGroups = nGroups + 1
1002			lastGlyphID = glyphID
1003			lastCharCode = charCode
1004		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1005		nGroups = nGroups + 1
1006		data = bytesjoin(dataList)
1007		lengthSubtable = len(data) +16
1008		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1009		return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data
1010
1011	def toXML(self, writer, ttFont):
1012		writer.begintag(self.__class__.__name__, [
1013				("platformID", self.platformID),
1014				("platEncID", self.platEncID),
1015				("format", self.format),
1016				("reserved", self.reserved),
1017				("length", self.length),
1018				("language", self.language),
1019				("nGroups", self.nGroups),
1020				])
1021		writer.newline()
1022		codes = sorted(self.cmap.items())
1023		self._writeCodes(codes, writer)
1024		writer.endtag(self.__class__.__name__)
1025		writer.newline()
1026
1027	def fromXML(self, name, attrs, content, ttFont):
1028		self.format = safeEval(attrs["format"])
1029		self.reserved = safeEval(attrs["reserved"])
1030		self.length = safeEval(attrs["length"])
1031		self.language = safeEval(attrs["language"])
1032		self.nGroups = safeEval(attrs["nGroups"])
1033		if not hasattr(self, "cmap"):
1034			self.cmap = {}
1035		cmap = self.cmap
1036
1037		for element in content:
1038			if not isinstance(element, tuple):
1039				continue
1040			name, attrs, content = element
1041			if name != "map":
1042				continue
1043			cmap[safeEval(attrs["code"])] = attrs["name"]
1044
1045
1046class cmap_format_12(cmap_format_12_or_13):
1047
1048	_format_step = 1
1049
1050	def __init__(self, format=12):
1051		cmap_format_12_or_13.__init__(self, format)
1052
1053	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1054		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1055
1056	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1057		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1058
1059
1060class cmap_format_13(cmap_format_12_or_13):
1061
1062	_format_step = 0
1063
1064	def __init__(self, format=13):
1065		cmap_format_12_or_13.__init__(self, format)
1066
1067	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1068		return [startingGlyph] * numberOfGlyphs
1069
1070	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1071		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1072
1073
1074def cvtToUVS(threeByteString):
1075	data = b"\0" + threeByteString
1076	val, = struct.unpack(">L", data)
1077	return val
1078
1079def cvtFromUVS(val):
1080	assert 0 <= val < 0x1000000
1081	fourByteString = struct.pack(">L", val)
1082	return fourByteString[1:]
1083
1084
1085class cmap_format_14(CmapSubtable):
1086
1087	def decompileHeader(self, data, ttFont):
1088		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1089		self.data = data[10:]
1090		self.length = length
1091		self.numVarSelectorRecords = numVarSelectorRecords
1092		self.ttFont = ttFont
1093		self.language = 0xFF # has no language.
1094
1095	def decompile(self, data, ttFont):
1096		if data is not None and ttFont is not None:
1097			self.decompileHeader(data, ttFont)
1098		else:
1099			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1100		data = self.data
1101
1102		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1103		uvsDict = {}
1104		recOffset = 0
1105		for n in range(self.numVarSelectorRecords):
1106			uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
1107			recOffset += 11
1108			varUVS = cvtToUVS(uvs)
1109			if defOVSOffset:
1110				startOffset = defOVSOffset - 10
1111				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1112				startOffset +=4
1113				for r in range(numValues):
1114					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1115					startOffset += 4
1116					firstBaseUV = cvtToUVS(uv)
1117					cnt = addtlCnt+1
1118					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1119					glyphList = [None]*cnt
1120					localUVList = zip(baseUVList, glyphList)
1121					try:
1122						uvsDict[varUVS].extend(localUVList)
1123					except KeyError:
1124						uvsDict[varUVS] = list(localUVList)
1125
1126			if nonDefUVSOffset:
1127				startOffset = nonDefUVSOffset - 10
1128				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1129				startOffset +=4
1130				localUVList = []
1131				for r in range(numRecs):
1132					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1133					startOffset += 5
1134					uv = cvtToUVS(uv)
1135					glyphName = self.ttFont.getGlyphName(gid)
1136					localUVList.append((uv, glyphName))
1137				try:
1138					uvsDict[varUVS].extend(localUVList)
1139				except KeyError:
1140					uvsDict[varUVS] = localUVList
1141
1142		self.uvsDict = uvsDict
1143
1144	def toXML(self, writer, ttFont):
1145		writer.begintag(self.__class__.__name__, [
1146				("platformID", self.platformID),
1147				("platEncID", self.platEncID),
1148				])
1149		writer.newline()
1150		uvsDict = self.uvsDict
1151		uvsList = sorted(uvsDict.keys())
1152		for uvs in uvsList:
1153			uvList = uvsDict[uvs]
1154			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1155			for uv, gname in uvList:
1156				attrs = [("uv", hex(uv)), ("uvs", hex(uvs))]
1157				if gname is not None:
1158					attrs.append(("name", gname))
1159				writer.simpletag("map", attrs)
1160				writer.newline()
1161		writer.endtag(self.__class__.__name__)
1162		writer.newline()
1163
1164	def fromXML(self, name, attrs, content, ttFont):
1165		self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail
1166		if not hasattr(self, "cmap"):
1167			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1168		if not hasattr(self, "uvsDict"):
1169			self.uvsDict = {}
1170			uvsDict = self.uvsDict
1171
1172		# For backwards compatibility reasons we accept "None" as an indicator
1173		# for "default mapping", unless the font actually has a glyph named
1174		# "None".
1175		_hasGlyphNamedNone = None
1176
1177		for element in content:
1178			if not isinstance(element, tuple):
1179				continue
1180			name, attrs, content = element
1181			if name != "map":
1182				continue
1183			uvs = safeEval(attrs["uvs"])
1184			uv = safeEval(attrs["uv"])
1185			gname = attrs.get("name")
1186			if gname == "None":
1187				if _hasGlyphNamedNone is None:
1188					_hasGlyphNamedNone = "None" in ttFont.getGlyphOrder()
1189				if not _hasGlyphNamedNone:
1190					gname = None
1191			try:
1192				uvsDict[uvs].append((uv, gname))
1193			except KeyError:
1194				uvsDict[uvs] = [(uv, gname)]
1195
1196	def compile(self, ttFont):
1197		if self.data:
1198			return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data
1199
1200		uvsDict = self.uvsDict
1201		uvsList = sorted(uvsDict.keys())
1202		self.numVarSelectorRecords = len(uvsList)
1203		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1204		data = []
1205		varSelectorRecords =[]
1206		for uvs in uvsList:
1207			entryList = uvsDict[uvs]
1208
1209			defList = [entry for entry in entryList if entry[1] is None]
1210			if defList:
1211				defList = [entry[0] for entry in defList]
1212				defOVSOffset = offset
1213				defList.sort()
1214
1215				lastUV = defList[0]
1216				cnt = -1
1217				defRecs = []
1218				for defEntry in defList:
1219					cnt +=1
1220					if (lastUV+cnt) != defEntry:
1221						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1222						lastUV = defEntry
1223						defRecs.append(rec)
1224						cnt = 0
1225
1226				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1227				defRecs.append(rec)
1228
1229				numDefRecs = len(defRecs)
1230				data.append(struct.pack(">L", numDefRecs))
1231				data.extend(defRecs)
1232				offset += 4 + numDefRecs*4
1233			else:
1234				defOVSOffset = 0
1235
1236			ndefList = [entry for entry in entryList if entry[1] is not None]
1237			if ndefList:
1238				nonDefUVSOffset = offset
1239				ndefList.sort()
1240				numNonDefRecs = len(ndefList)
1241				data.append(struct.pack(">L", numNonDefRecs))
1242				offset += 4 + numNonDefRecs*5
1243
1244				for uv, gname in ndefList:
1245					gid = ttFont.getGlyphID(gname)
1246					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1247					data.append(ndrec)
1248			else:
1249				nonDefUVSOffset = 0
1250
1251			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1252			varSelectorRecords.append(vrec)
1253
1254		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1255		self.length = 10 + len(data)
1256		headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords)
1257
1258		return headerdata + data
1259
1260
1261class cmap_format_unknown(CmapSubtable):
1262
1263	def toXML(self, writer, ttFont):
1264		cmapName = self.__class__.__name__[:12] + str(self.format)
1265		writer.begintag(cmapName, [
1266				("platformID", self.platformID),
1267				("platEncID", self.platEncID),
1268				])
1269		writer.newline()
1270		writer.dumphex(self.data)
1271		writer.endtag(cmapName)
1272		writer.newline()
1273
1274	def fromXML(self, name, attrs, content, ttFont):
1275		self.data = readHex(content)
1276		self.cmap = {}
1277
1278	def decompileHeader(self, data, ttFont):
1279		self.language = 0  # dummy value
1280		self.data = data
1281
1282	def decompile(self, data, ttFont):
1283		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1284		# If not, someone is calling the subtable decompile() directly, and must provide both args.
1285		if data is not None and ttFont is not None:
1286			self.decompileHeader(data, ttFont)
1287		else:
1288			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1289
1290	def compile(self, ttFont):
1291		if self.data:
1292			return self.data
1293		else:
1294			return None
1295
1296cmap_classes = {
1297		0: cmap_format_0,
1298		2: cmap_format_2,
1299		4: cmap_format_4,
1300		6: cmap_format_6,
1301		12: cmap_format_12,
1302		13: cmap_format_13,
1303		14: cmap_format_14,
1304}
1305