• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from fontTools.misc.textTools import bytesjoin, safeEval, readHex
2from fontTools.misc.encodingTools import getEncoding
3from fontTools.ttLib import getSearchRange
4from fontTools.unicode import Unicode
5from . import DefaultTable
6import sys
7import struct
8import array
9import logging
10
11
12log = logging.getLogger(__name__)
13
14
15def _make_map(font, chars, gids):
16	assert len(chars) == len(gids)
17	glyphNames = font.getGlyphNameMany(gids)
18	cmap = {}
19	for char,gid,name in zip(chars,gids,glyphNames):
20		if gid == 0:
21			continue
22		cmap[char] = name
23	return cmap
24
25class table__c_m_a_p(DefaultTable.DefaultTable):
26	"""Character to Glyph Index Mapping Table
27
28	This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_
29	table, which maps between input characters (in Unicode or other system encodings)
30	and glyphs within the font. The ``cmap`` table contains one or more subtables
31	which determine the mapping of of characters to glyphs across different platforms
32	and encoding systems.
33
34	``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access
35	to the subtables, although it is normally easier to retrieve individual subtables
36	through the utility methods described below. To add new subtables to a font,
37	first determine the subtable format (if in doubt use format 4 for glyphs within
38	the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation
39	Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``,
40	and append them to the ``.tables`` list.
41
42	Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap``
43	attribute.
44
45	Example::
46
47		cmap4_0_3 = CmapSubtable.newSubtable(4)
48		cmap4_0_3.platformID = 0
49		cmap4_0_3.platEncID = 3
50		cmap4_0_3.language = 0
51		cmap4_0_3.cmap = { 0xC1: "Aacute" }
52
53		cmap = newTable("cmap")
54		cmap.tableVersion = 0
55		cmap.tables = [cmap4_0_3]
56	"""
57
58	def getcmap(self, platformID, platEncID):
59		"""Returns the first subtable which matches the given platform and encoding.
60
61		Args:
62			platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh
63				(deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows.
64			encodingID (int): Encoding ID. Interpretation depends on the platform ID.
65				See the OpenType specification for details.
66
67		Returns:
68			An object which is a subclass of :py:class:`CmapSubtable` if a matching
69			subtable is found within the font, or ``None`` otherwise.
70		"""
71
72		for subtable in self.tables:
73			if (subtable.platformID == platformID and
74					subtable.platEncID == platEncID):
75				return subtable
76		return None # not found
77
78	def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))):
79		"""Returns the 'best' Unicode cmap dictionary available in the font
80		or ``None``, if no Unicode cmap subtable is available.
81
82		By default it will search for the following (platformID, platEncID)
83		pairs in order::
84
85				(3, 10), # Windows Unicode full repertoire
86				(0, 6),  # Unicode full repertoire (format 13 subtable)
87				(0, 4),  # Unicode 2.0 full repertoire
88				(3, 1),  # Windows Unicode BMP
89				(0, 3),  # Unicode 2.0 BMP
90				(0, 2),  # Unicode ISO/IEC 10646
91				(0, 1),  # Unicode 1.1
92				(0, 0)   # Unicode 1.0
93
94		This order can be customized via the ``cmapPreferences`` argument.
95		"""
96		for platformID, platEncID in cmapPreferences:
97			cmapSubtable = self.getcmap(platformID, platEncID)
98			if cmapSubtable is not None:
99				return cmapSubtable.cmap
100		return None  # None of the requested cmap subtables were found
101
102	def buildReversed(self):
103		"""Builds a reverse mapping dictionary
104
105		Iterates over all Unicode cmap tables and returns a dictionary mapping
106		glyphs to sets of codepoints, such as::
107
108			{
109				'one': {0x31}
110				'A': {0x41,0x391}
111			}
112
113		The values are sets of Unicode codepoints because
114		some fonts map different codepoints to the same glyph.
115		For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391
116		GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph.
117		"""
118		result = {}
119		for subtable in self.tables:
120			if subtable.isUnicode():
121				for codepoint, name in subtable.cmap.items():
122					result.setdefault(name, set()).add(codepoint)
123		return result
124
125	def decompile(self, data, ttFont):
126		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
127		self.tableVersion = int(tableVersion)
128		self.tables = tables = []
129		seenOffsets = {}
130		for i in range(numSubTables):
131			platformID, platEncID, offset = struct.unpack(
132					">HHl", data[4+i*8:4+(i+1)*8])
133			platformID, platEncID = int(platformID), int(platEncID)
134			format, length = struct.unpack(">HH", data[offset:offset+4])
135			if format in [8,10,12,13]:
136				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
137			elif format in [14]:
138				format, length = struct.unpack(">HL", data[offset:offset+6])
139
140			if not length:
141				log.error(
142					"cmap subtable is reported as having zero length: platformID %s, "
143					"platEncID %s, format %s offset %s. Skipping table.",
144					platformID, platEncID, format, offset)
145				continue
146			table = CmapSubtable.newSubtable(format)
147			table.platformID = platformID
148			table.platEncID = platEncID
149			# Note that by default we decompile only the subtable header info;
150			# any other data gets decompiled only when an attribute of the
151			# subtable is referenced.
152			table.decompileHeader(data[offset:offset+int(length)], ttFont)
153			if offset in seenOffsets:
154				table.data = None # Mark as decompiled
155				table.cmap = tables[seenOffsets[offset]].cmap
156			else:
157				seenOffsets[offset] = i
158			tables.append(table)
159		if ttFont.lazy is False:  # Be lazy for None and True
160			self.ensureDecompiled()
161
162	def ensureDecompiled(self):
163		for st in self.tables:
164			st.ensureDecompiled()
165
166	def compile(self, ttFont):
167		self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
168		numSubTables = len(self.tables)
169		totalOffset = 4 + 8 * numSubTables
170		data = struct.pack(">HH", self.tableVersion, numSubTables)
171		tableData = b""
172		seen = {}  # Some tables are the same object reference. Don't compile them twice.
173		done = {}  # Some tables are different objects, but compile to the same data chunk
174		for table in self.tables:
175			try:
176				offset = seen[id(table.cmap)]
177			except KeyError:
178				chunk = table.compile(ttFont)
179				if chunk in done:
180					offset = done[chunk]
181				else:
182					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
183					tableData = tableData + chunk
184			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
185		return data + tableData
186
187	def toXML(self, writer, ttFont):
188		writer.simpletag("tableVersion", version=self.tableVersion)
189		writer.newline()
190		for table in self.tables:
191			table.toXML(writer, ttFont)
192
193	def fromXML(self, name, attrs, content, ttFont):
194		if name == "tableVersion":
195			self.tableVersion = safeEval(attrs["version"])
196			return
197		if name[:12] != "cmap_format_":
198			return
199		if not hasattr(self, "tables"):
200			self.tables = []
201		format = safeEval(name[12:])
202		table = CmapSubtable.newSubtable(format)
203		table.platformID = safeEval(attrs["platformID"])
204		table.platEncID = safeEval(attrs["platEncID"])
205		table.fromXML(name, attrs, content, ttFont)
206		self.tables.append(table)
207
208
209class CmapSubtable(object):
210	"""Base class for all cmap subtable formats.
211
212	Subclasses which handle the individual subtable formats are named
213	``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass`
214	to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a
215	new subtable object for a given format.
216
217	The object exposes a ``.cmap`` attribute, which contains a dictionary mapping
218	character codepoints to glyph names.
219	"""
220
221	@staticmethod
222	def getSubtableClass(format):
223		"""Return the subtable class for a format."""
224		return cmap_classes.get(format, cmap_format_unknown)
225
226	@staticmethod
227	def newSubtable(format):
228		"""Return a new instance of a subtable for the given format
229		."""
230		subtableClass = CmapSubtable.getSubtableClass(format)
231		return subtableClass(format)
232
233	def __init__(self, format):
234		self.format = format
235		self.data = None
236		self.ttFont = None
237		self.platformID = None  #: The platform ID of this subtable
238		self.platEncID = None   #: The encoding ID of this subtable (interpretation depends on ``platformID``)
239		self.language = None    #: The language ID of this subtable (Macintosh platform only)
240
241	def ensureDecompiled(self):
242		if self.data is None:
243			return
244		self.decompile(None, None) # use saved data.
245		self.data = None	# Once this table has been decompiled, make sure we don't
246							# just return the original data. Also avoids recursion when
247							# called with an attribute that the cmap subtable doesn't have.
248
249	def __getattr__(self, attr):
250		# allow lazy decompilation of subtables.
251		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
252			raise AttributeError(attr)
253		if self.data is None:
254			raise AttributeError(attr)
255		self.ensureDecompiled()
256		return getattr(self, attr)
257
258	def decompileHeader(self, data, ttFont):
259		format, length, language = struct.unpack(">HHH", data[:6])
260		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
261		self.format = int(format)
262		self.length = int(length)
263		self.language = int(language)
264		self.data = data[6:]
265		self.ttFont = ttFont
266
267	def toXML(self, writer, ttFont):
268		writer.begintag(self.__class__.__name__, [
269				("platformID", self.platformID),
270				("platEncID", self.platEncID),
271				("language", self.language),
272				])
273		writer.newline()
274		codes = sorted(self.cmap.items())
275		self._writeCodes(codes, writer)
276		writer.endtag(self.__class__.__name__)
277		writer.newline()
278
279	def getEncoding(self, default=None):
280		"""Returns the Python encoding name for this cmap subtable based on its platformID,
281		platEncID, and language.  If encoding for these values is not known, by default
282		``None`` is returned.  That can be overridden by passing a value to the ``default``
283		argument.
284
285		Note that if you want to choose a "preferred" cmap subtable, most of the time
286		``self.isUnicode()`` is what you want as that one only returns true for the modern,
287		commonly used, Unicode-compatible triplets, not the legacy ones.
288		"""
289		return getEncoding(self.platformID, self.platEncID, self.language, default)
290
291	def isUnicode(self):
292		"""Returns true if the characters are interpreted as Unicode codepoints."""
293		return (self.platformID == 0 or
294			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
295
296	def isSymbol(self):
297		"""Returns true if the subtable is for the Symbol encoding (3,0)"""
298		return self.platformID == 3 and self.platEncID == 0
299
300	def _writeCodes(self, codes, writer):
301		isUnicode = self.isUnicode()
302		for code, name in codes:
303			writer.simpletag("map", code=hex(code), name=name)
304			if isUnicode:
305				writer.comment(Unicode[code])
306			writer.newline()
307
308	def __lt__(self, other):
309		if not isinstance(other, CmapSubtable):
310			return NotImplemented
311
312		# implemented so that list.sort() sorts according to the spec.
313		selfTuple = (
314			getattr(self, "platformID", None),
315			getattr(self, "platEncID", None),
316			getattr(self, "language", None),
317			self.__dict__)
318		otherTuple = (
319			getattr(other, "platformID", None),
320			getattr(other, "platEncID", None),
321			getattr(other, "language", None),
322			other.__dict__)
323		return selfTuple < otherTuple
324
325
326class cmap_format_0(CmapSubtable):
327
328	def decompile(self, data, ttFont):
329		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
330		# If not, someone is calling the subtable decompile() directly, and must provide both args.
331		if data is not None and ttFont is not None:
332			self.decompileHeader(data, ttFont)
333		else:
334			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
335		data = self.data # decompileHeader assigns the data after the header to self.data
336		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
337		gids = array.array("B")
338		gids.frombytes(self.data)
339		charCodes = list(range(len(gids)))
340		self.cmap = _make_map(self.ttFont, charCodes, gids)
341
342	def compile(self, ttFont):
343		if self.data:
344			return struct.pack(">HHH", 0, 262, self.language) + self.data
345
346		cmap = self.cmap
347		assert set(cmap.keys()).issubset(range(256))
348		getGlyphID = ttFont.getGlyphID
349		valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)]
350
351		gids = array.array("B", valueList)
352		data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes()
353		assert len(data) == 262
354		return data
355
356	def fromXML(self, name, attrs, content, ttFont):
357		self.language = safeEval(attrs["language"])
358		if not hasattr(self, "cmap"):
359			self.cmap = {}
360		cmap = self.cmap
361		for element in content:
362			if not isinstance(element, tuple):
363				continue
364			name, attrs, content = element
365			if name != "map":
366				continue
367			cmap[safeEval(attrs["code"])] = attrs["name"]
368
369
370subHeaderFormat = ">HHhH"
371class SubHeader(object):
372	def __init__(self):
373		self.firstCode = None
374		self.entryCount = None
375		self.idDelta = None
376		self.idRangeOffset = None
377		self.glyphIndexArray = []
378
379class cmap_format_2(CmapSubtable):
380
381	def setIDDelta(self, subHeader):
382		subHeader.idDelta = 0
383		# find the minGI which is not zero.
384		minGI = subHeader.glyphIndexArray[0]
385		for gid in subHeader.glyphIndexArray:
386			if (gid != 0) and (gid < minGI):
387				minGI = gid
388		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
389		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
390		# We would like to pick an idDelta such that the first glyphArray GID is 1,
391		# so that we are more likely to be able to combine glypharray GID subranges.
392		# This means that we have a problem when minGI is > 32K
393		# Since the final gi is reconstructed from the glyphArray GID by:
394		#    (short)finalGID = (gid + idDelta) % 0x10000),
395		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
396		# negative number to an unsigned short.
397
398		if (minGI > 1):
399			if minGI > 0x7FFF:
400				subHeader.idDelta = -(0x10000 - minGI) -1
401			else:
402				subHeader.idDelta = minGI -1
403			idDelta = subHeader.idDelta
404			for i in range(subHeader.entryCount):
405				gid = subHeader.glyphIndexArray[i]
406				if gid > 0:
407					subHeader.glyphIndexArray[i] = gid - idDelta
408
409	def decompile(self, data, ttFont):
410		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
411		# If not, someone is calling the subtable decompile() directly, and must provide both args.
412		if data is not None and ttFont is not None:
413			self.decompileHeader(data, ttFont)
414		else:
415			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
416
417		data = self.data # decompileHeader assigns the data after the header to self.data
418		subHeaderKeys = []
419		maxSubHeaderindex = 0
420		# get the key array, and determine the number of subHeaders.
421		allKeys = array.array("H")
422		allKeys.frombytes(data[:512])
423		data = data[512:]
424		if sys.byteorder != "big": allKeys.byteswap()
425		subHeaderKeys = [ key//8 for key in allKeys]
426		maxSubHeaderindex = max(subHeaderKeys)
427
428		#Load subHeaders
429		subHeaderList = []
430		pos = 0
431		for i in range(maxSubHeaderindex + 1):
432			subHeader = SubHeader()
433			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
434				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
435			pos += 8
436			giDataPos = pos + subHeader.idRangeOffset-2
437			giList = array.array("H")
438			giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2])
439			if sys.byteorder != "big": giList.byteswap()
440			subHeader.glyphIndexArray = giList
441			subHeaderList.append(subHeader)
442		# How this gets processed.
443		# Charcodes may be one or two bytes.
444		# The first byte of a charcode is mapped through the subHeaderKeys, to select
445		# a subHeader. For any subheader but 0, the next byte is then mapped through the
446		# selected subheader. If subheader Index 0 is selected, then the byte itself is
447		# mapped through the subheader, and there is no second byte.
448		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
449		#
450		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
451		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
452		# referenced by another subheader.
453		# The only subheader that will be referenced by more than one first-byte value is the subheader
454		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
455		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
456		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
457		# A subheader specifies a subrange within (0...256) by the
458		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
459		# (e.g. glyph not in font).
460		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
461		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
462		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
463		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
464		# Example for Logocut-Medium
465		# first byte of charcode = 129; selects subheader 1.
466		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
467		# second byte of charCode = 66
468		# the index offset = 66-64 = 2.
469		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
470		# [glyphIndexArray index], [subrange array index] = glyphIndex
471		# [256], [0]=1 	from charcode [129, 64]
472		# [257], [1]=2  	from charcode [129, 65]
473		# [258], [2]=3  	from charcode [129, 66]
474		# [259], [3]=4  	from charcode [129, 67]
475		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
476		# add it to the glyphID to get the final glyphIndex
477		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
478
479		self.data = b""
480		cmap = {}
481		notdefGI = 0
482		for firstByte in range(256):
483			subHeadindex = subHeaderKeys[firstByte]
484			subHeader = subHeaderList[subHeadindex]
485			if subHeadindex == 0:
486				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
487					continue # gi is notdef.
488				else:
489					charCode = firstByte
490					offsetIndex = firstByte - subHeader.firstCode
491					gi = subHeader.glyphIndexArray[offsetIndex]
492					if gi != 0:
493						gi = (gi + subHeader.idDelta) % 0x10000
494					else:
495						continue # gi is notdef.
496				cmap[charCode] = gi
497			else:
498				if subHeader.entryCount:
499					charCodeOffset = firstByte * 256 + subHeader.firstCode
500					for offsetIndex in range(subHeader.entryCount):
501						charCode = charCodeOffset + offsetIndex
502						gi = subHeader.glyphIndexArray[offsetIndex]
503						if gi != 0:
504							gi = (gi + subHeader.idDelta) % 0x10000
505						else:
506							continue
507						cmap[charCode] = gi
508				# If not subHeader.entryCount, then all char codes with this first byte are
509				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
510				# same as mapping it to .notdef.
511
512		gids = list(cmap.values())
513		charCodes = list(cmap.keys())
514		self.cmap = _make_map(self.ttFont, charCodes, gids)
515
516	def compile(self, ttFont):
517		if self.data:
518			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
519		kEmptyTwoCharCodeRange = -1
520		notdefGI = 0
521
522		items = sorted(self.cmap.items())
523		charCodes = [item[0] for item in items]
524		names = [item[1] for item in items]
525		nameMap = ttFont.getReverseGlyphMap()
526		try:
527			gids = [nameMap[name] for name in names]
528		except KeyError:
529			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
530			try:
531				gids = [nameMap[name] for name in names]
532			except KeyError:
533				# allow virtual GIDs in format 2 tables
534				gids = []
535				for name in names:
536					try:
537						gid = nameMap[name]
538					except KeyError:
539						try:
540							if (name[:3] == 'gid'):
541								gid = int(name[3:])
542							else:
543								gid = ttFont.getGlyphID(name)
544						except:
545							raise KeyError(name)
546
547					gids.append(gid)
548
549		# Process the (char code to gid) item list in char code order.
550		# By definition, all one byte char codes map to subheader 0.
551		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
552		# which defines all char codes in its range to map to notdef) unless proven otherwise.
553		# Note that since the char code items are processed in char code order, all the char codes with the
554		# same first byte are in sequential order.
555
556		subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
557		subHeaderList = []
558
559		# We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
560		# with a cmap where all the one byte char codes map to notdef,
561		# with the result that the subhead 0 would not get created just by processing the item list.
562		charCode = charCodes[0]
563		if charCode > 255:
564			subHeader = SubHeader()
565			subHeader.firstCode = 0
566			subHeader.entryCount = 0
567			subHeader.idDelta = 0
568			subHeader.idRangeOffset = 0
569			subHeaderList.append(subHeader)
570
571		lastFirstByte = -1
572		items = zip(charCodes, gids)
573		for charCode, gid in items:
574			if gid == 0:
575				continue
576			firstbyte = charCode >> 8
577			secondByte = charCode & 0x00FF
578
579			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
580				if lastFirstByte > -1:
581					# fix GI's and iDelta of current subheader.
582					self.setIDDelta(subHeader)
583
584					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
585					# for the indices matching the char codes.
586					if lastFirstByte == 0:
587						for index in range(subHeader.entryCount):
588							charCode = subHeader.firstCode + index
589							subHeaderKeys[charCode] = 0
590
591					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
592				# init new subheader
593				subHeader = SubHeader()
594				subHeader.firstCode = secondByte
595				subHeader.entryCount = 1
596				subHeader.glyphIndexArray.append(gid)
597				subHeaderList.append(subHeader)
598				subHeaderKeys[firstbyte] = len(subHeaderList) -1
599				lastFirstByte = firstbyte
600			else:
601				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
602				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
603				for i in range(codeDiff):
604					subHeader.glyphIndexArray.append(notdefGI)
605				subHeader.glyphIndexArray.append(gid)
606				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
607
608		# fix GI's and iDelta of last subheader that we we added to the subheader array.
609		self.setIDDelta(subHeader)
610
611		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
612		subHeader = SubHeader()
613		subHeader.firstCode = 0
614		subHeader.entryCount = 0
615		subHeader.idDelta = 0
616		subHeader.idRangeOffset = 2
617		subHeaderList.append(subHeader)
618		emptySubheadIndex = len(subHeaderList) - 1
619		for index in range(256):
620			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
621				subHeaderKeys[index] = emptySubheadIndex
622		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
623		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
624		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
625		# charcode 0 and GID 0.
626
627		idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
628		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
629		for index in range(subheadRangeLen):
630			subHeader = subHeaderList[index]
631			subHeader.idRangeOffset = 0
632			for j in range(index):
633				prevSubhead = subHeaderList[j]
634				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
635					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
636					subHeader.glyphIndexArray = []
637					break
638			if subHeader.idRangeOffset == 0: # didn't find one.
639				subHeader.idRangeOffset = idRangeOffset
640				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
641			else:
642				idRangeOffset = idRangeOffset - 8  # one less subheader
643
644		# Now we can write out the data!
645		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
646		for subhead in 	subHeaderList[:-1]:
647			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
648		dataList = [struct.pack(">HHH", 2, length, self.language)]
649		for index in subHeaderKeys:
650			dataList.append(struct.pack(">H", index*8))
651		for subhead in 	subHeaderList:
652			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
653		for subhead in 	subHeaderList[:-1]:
654			for gi in subhead.glyphIndexArray:
655				dataList.append(struct.pack(">H", gi))
656		data = bytesjoin(dataList)
657		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
658		return data
659
660	def fromXML(self, name, attrs, content, ttFont):
661		self.language = safeEval(attrs["language"])
662		if not hasattr(self, "cmap"):
663			self.cmap = {}
664		cmap = self.cmap
665
666		for element in content:
667			if not isinstance(element, tuple):
668				continue
669			name, attrs, content = element
670			if name != "map":
671				continue
672			cmap[safeEval(attrs["code"])] = attrs["name"]
673
674
675cmap_format_4_format = ">7H"
676
677#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
678#uint16  reservedPad                # This value should be zero
679#uint16  startCode[segCount]        # Starting character code for each segment
680#uint16  idDelta[segCount]          # Delta for all character codes in segment
681#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
682#uint16  glyphIndexArray[variable]  # Glyph index array
683
684def splitRange(startCode, endCode, cmap):
685	# Try to split a range of character codes into subranges with consecutive
686	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
687	# efficiently. I can't prove I've got the optimal solution, but it seems
688	# to do well with the fonts I tested: none became bigger, many became smaller.
689	if startCode == endCode:
690		return [], [endCode]
691
692	lastID = cmap[startCode]
693	lastCode = startCode
694	inOrder = None
695	orderedBegin = None
696	subRanges = []
697
698	# Gather subranges in which the glyph IDs are consecutive.
699	for code in range(startCode + 1, endCode + 1):
700		glyphID = cmap[code]
701
702		if glyphID - 1 == lastID:
703			if inOrder is None or not inOrder:
704				inOrder = 1
705				orderedBegin = lastCode
706		else:
707			if inOrder:
708				inOrder = 0
709				subRanges.append((orderedBegin, lastCode))
710				orderedBegin = None
711
712		lastID = glyphID
713		lastCode = code
714
715	if inOrder:
716		subRanges.append((orderedBegin, lastCode))
717	assert lastCode == endCode
718
719	# Now filter out those new subranges that would only make the data bigger.
720	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
721	# character.
722	newRanges = []
723	for b, e in subRanges:
724		if b == startCode and e == endCode:
725			break  # the whole range, we're fine
726		if b == startCode or e == endCode:
727			threshold = 4  # split costs one more segment
728		else:
729			threshold = 8  # split costs two more segments
730		if (e - b + 1) > threshold:
731			newRanges.append((b, e))
732	subRanges = newRanges
733
734	if not subRanges:
735		return [], [endCode]
736
737	if subRanges[0][0] != startCode:
738		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
739	if subRanges[-1][1] != endCode:
740		subRanges.append((subRanges[-1][1] + 1, endCode))
741
742	# Fill the "holes" in the segments list -- those are the segments in which
743	# the glyph IDs are _not_ consecutive.
744	i = 1
745	while i < len(subRanges):
746		if subRanges[i-1][1] + 1 != subRanges[i][0]:
747			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
748			i = i + 1
749		i = i + 1
750
751	# Transform the ranges into startCode/endCode lists.
752	start = []
753	end = []
754	for b, e in subRanges:
755		start.append(b)
756		end.append(e)
757	start.pop(0)
758
759	assert len(start) + 1 == len(end)
760	return start, end
761
762
763class cmap_format_4(CmapSubtable):
764
765	def decompile(self, data, ttFont):
766		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
767		# If not, someone is calling the subtable decompile() directly, and must provide both args.
768		if data is not None and ttFont is not None:
769			self.decompileHeader(data, ttFont)
770		else:
771			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
772
773		data = self.data # decompileHeader assigns the data after the header to self.data
774		(segCountX2, searchRange, entrySelector, rangeShift) = \
775					struct.unpack(">4H", data[:8])
776		data = data[8:]
777		segCount = segCountX2 // 2
778
779		allCodes = array.array("H")
780		allCodes.frombytes(data)
781		self.data = data = None
782
783		if sys.byteorder != "big": allCodes.byteswap()
784
785		# divide the data
786		endCode = allCodes[:segCount]
787		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
788		startCode = allCodes[:segCount]
789		allCodes = allCodes[segCount:]
790		idDelta = allCodes[:segCount]
791		allCodes = allCodes[segCount:]
792		idRangeOffset = allCodes[:segCount]
793		glyphIndexArray = allCodes[segCount:]
794		lenGIArray = len(glyphIndexArray)
795
796		# build 2-byte character mapping
797		charCodes = []
798		gids = []
799		for i in range(len(startCode) - 1):	# don't do 0xffff!
800			start = startCode[i]
801			delta = idDelta[i]
802			rangeOffset = idRangeOffset[i]
803			# *someone* needs to get killed.
804			partial = rangeOffset // 2 - start + i - len(idRangeOffset)
805
806			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
807			charCodes.extend(rangeCharCodes)
808			if rangeOffset == 0:
809				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
810			else:
811				for charCode in rangeCharCodes:
812					index = charCode + partial
813					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
814					if glyphIndexArray[index] != 0:  # if not missing glyph
815						glyphID = glyphIndexArray[index] + delta
816					else:
817						glyphID = 0  # missing glyph
818					gids.append(glyphID & 0xFFFF)
819
820		self.cmap = _make_map(self.ttFont, charCodes, gids)
821
822	def compile(self, ttFont):
823		if self.data:
824			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
825
826		charCodes = list(self.cmap.keys())
827		if not charCodes:
828			startCode = [0xffff]
829			endCode = [0xffff]
830		else:
831			charCodes.sort()
832			names = [self.cmap[code] for code in charCodes]
833			nameMap = ttFont.getReverseGlyphMap()
834			try:
835				gids = [nameMap[name] for name in names]
836			except KeyError:
837				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
838				try:
839					gids = [nameMap[name] for name in names]
840				except KeyError:
841					# allow virtual GIDs in format 4 tables
842					gids = []
843					for name in names:
844						try:
845							gid = nameMap[name]
846						except KeyError:
847							try:
848								if (name[:3] == 'gid'):
849									gid = int(name[3:])
850								else:
851									gid = ttFont.getGlyphID(name)
852							except:
853								raise KeyError(name)
854
855						gids.append(gid)
856			cmap = {}  # code:glyphID mapping
857			for code, gid in zip(charCodes, gids):
858				cmap[code] = gid
859
860			# Build startCode and endCode lists.
861			# Split the char codes in ranges of consecutive char codes, then split
862			# each range in more ranges of consecutive/not consecutive glyph IDs.
863			# See splitRange().
864			lastCode = charCodes[0]
865			endCode = []
866			startCode = [lastCode]
867			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
868				if charCode == lastCode + 1:
869					lastCode = charCode
870					continue
871				start, end = splitRange(startCode[-1], lastCode, cmap)
872				startCode.extend(start)
873				endCode.extend(end)
874				startCode.append(charCode)
875				lastCode = charCode
876			start, end = splitRange(startCode[-1], lastCode, cmap)
877			startCode.extend(start)
878			endCode.extend(end)
879			startCode.append(0xffff)
880			endCode.append(0xffff)
881
882		# build up rest of cruft
883		idDelta = []
884		idRangeOffset = []
885		glyphIndexArray = []
886		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
887			indices = []
888			for charCode in range(startCode[i], endCode[i] + 1):
889				indices.append(cmap[charCode])
890			if (indices == list(range(indices[0], indices[0] + len(indices)))):
891				idDelta.append((indices[0] - startCode[i]) % 0x10000)
892				idRangeOffset.append(0)
893			else:
894				# someone *definitely* needs to get killed.
895				idDelta.append(0)
896				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
897				glyphIndexArray.extend(indices)
898		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
899		idRangeOffset.append(0)
900
901		# Insane.
902		segCount = len(endCode)
903		segCountX2 = segCount * 2
904		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)
905
906		charCodeArray = array.array("H", endCode + [0] + startCode)
907		idDeltaArray = array.array("H", idDelta)
908		restArray = array.array("H", idRangeOffset + glyphIndexArray)
909		if sys.byteorder != "big": charCodeArray.byteswap()
910		if sys.byteorder != "big": idDeltaArray.byteswap()
911		if sys.byteorder != "big": restArray.byteswap()
912		data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes()
913
914		length = struct.calcsize(cmap_format_4_format) + len(data)
915		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
916				segCountX2, searchRange, entrySelector, rangeShift)
917		return header + data
918
919	def fromXML(self, name, attrs, content, ttFont):
920		self.language = safeEval(attrs["language"])
921		if not hasattr(self, "cmap"):
922			self.cmap = {}
923		cmap = self.cmap
924
925		for element in content:
926			if not isinstance(element, tuple):
927				continue
928			nameMap, attrsMap, dummyContent = element
929			if nameMap != "map":
930				assert 0, "Unrecognized keyword in cmap subtable"
931			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
932
933
934class cmap_format_6(CmapSubtable):
935
936	def decompile(self, data, ttFont):
937		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
938		# If not, someone is calling the subtable decompile() directly, and must provide both args.
939		if data is not None and ttFont is not None:
940			self.decompileHeader(data, ttFont)
941		else:
942			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
943
944		data = self.data # decompileHeader assigns the data after the header to self.data
945		firstCode, entryCount = struct.unpack(">HH", data[:4])
946		firstCode = int(firstCode)
947		data = data[4:]
948		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
949		gids = array.array("H")
950		gids.frombytes(data[:2 * int(entryCount)])
951		if sys.byteorder != "big": gids.byteswap()
952		self.data = data = None
953
954		charCodes = list(range(firstCode, firstCode + len(gids)))
955		self.cmap = _make_map(self.ttFont, charCodes, gids)
956
957	def compile(self, ttFont):
958		if self.data:
959			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
960		cmap = self.cmap
961		codes = sorted(cmap.keys())
962		if codes: # yes, there are empty cmap tables.
963			codes = list(range(codes[0], codes[-1] + 1))
964			firstCode = codes[0]
965			valueList = [
966				ttFont.getGlyphID(cmap[code]) if code in cmap else 0
967				for code in codes
968			]
969			gids = array.array("H", valueList)
970			if sys.byteorder != "big": gids.byteswap()
971			data = gids.tobytes()
972		else:
973			data = b""
974			firstCode = 0
975		header = struct.pack(">HHHHH",
976				6, len(data) + 10, self.language, firstCode, len(codes))
977		return header + data
978
979	def fromXML(self, name, attrs, content, ttFont):
980		self.language = safeEval(attrs["language"])
981		if not hasattr(self, "cmap"):
982			self.cmap = {}
983		cmap = self.cmap
984
985		for element in content:
986			if not isinstance(element, tuple):
987				continue
988			name, attrs, content = element
989			if name != "map":
990				continue
991			cmap[safeEval(attrs["code"])] = attrs["name"]
992
993
994class cmap_format_12_or_13(CmapSubtable):
995
996	def __init__(self, format):
997		self.format = format
998		self.reserved = 0
999		self.data = None
1000		self.ttFont = None
1001
1002	def decompileHeader(self, data, ttFont):
1003		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
1004		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length)
1005		self.format = format
1006		self.reserved = reserved
1007		self.length = length
1008		self.language = language
1009		self.nGroups = nGroups
1010		self.data = data[16:]
1011		self.ttFont = ttFont
1012
1013	def decompile(self, data, ttFont):
1014		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1015		# If not, someone is calling the subtable decompile() directly, and must provide both args.
1016		if data is not None and ttFont is not None:
1017			self.decompileHeader(data, ttFont)
1018		else:
1019			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1020
1021		data = self.data # decompileHeader assigns the data after the header to self.data
1022		charCodes = []
1023		gids = []
1024		pos = 0
1025		for i in range(self.nGroups):
1026			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
1027			pos += 12
1028			lenGroup = 1 + endCharCode - startCharCode
1029			charCodes.extend(list(range(startCharCode, endCharCode +1)))
1030			gids.extend(self._computeGIDs(glyphID, lenGroup))
1031		self.data = data = None
1032		self.cmap = _make_map(self.ttFont, charCodes, gids)
1033
1034	def compile(self, ttFont):
1035		if self.data:
1036			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
1037		charCodes = list(self.cmap.keys())
1038		names = list(self.cmap.values())
1039		nameMap = ttFont.getReverseGlyphMap()
1040		try:
1041			gids = [nameMap[name] for name in names]
1042		except KeyError:
1043			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
1044			try:
1045				gids = [nameMap[name] for name in names]
1046			except KeyError:
1047				# allow virtual GIDs in format 12 tables
1048				gids = []
1049				for name in names:
1050					try:
1051						gid = nameMap[name]
1052					except KeyError:
1053						try:
1054							if (name[:3] == 'gid'):
1055								gid = int(name[3:])
1056							else:
1057								gid = ttFont.getGlyphID(name)
1058						except:
1059							raise KeyError(name)
1060
1061					gids.append(gid)
1062
1063		cmap = {}  # code:glyphID mapping
1064		for code, gid in zip(charCodes, gids):
1065			cmap[code] = gid
1066
1067		charCodes.sort()
1068		index = 0
1069		startCharCode = charCodes[0]
1070		startGlyphID = cmap[startCharCode]
1071		lastGlyphID = startGlyphID - self._format_step
1072		lastCharCode = startCharCode - 1
1073		nGroups = 0
1074		dataList = []
1075		maxIndex = len(charCodes)
1076		for index in range(maxIndex):
1077			charCode = charCodes[index]
1078			glyphID = cmap[charCode]
1079			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
1080				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1081				startCharCode = charCode
1082				startGlyphID = glyphID
1083				nGroups = nGroups + 1
1084			lastGlyphID = glyphID
1085			lastCharCode = charCode
1086		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1087		nGroups = nGroups + 1
1088		data = bytesjoin(dataList)
1089		lengthSubtable = len(data) +16
1090		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1091		return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data
1092
1093	def toXML(self, writer, ttFont):
1094		writer.begintag(self.__class__.__name__, [
1095				("platformID", self.platformID),
1096				("platEncID", self.platEncID),
1097				("format", self.format),
1098				("reserved", self.reserved),
1099				("length", self.length),
1100				("language", self.language),
1101				("nGroups", self.nGroups),
1102				])
1103		writer.newline()
1104		codes = sorted(self.cmap.items())
1105		self._writeCodes(codes, writer)
1106		writer.endtag(self.__class__.__name__)
1107		writer.newline()
1108
1109	def fromXML(self, name, attrs, content, ttFont):
1110		self.format = safeEval(attrs["format"])
1111		self.reserved = safeEval(attrs["reserved"])
1112		self.length = safeEval(attrs["length"])
1113		self.language = safeEval(attrs["language"])
1114		self.nGroups = safeEval(attrs["nGroups"])
1115		if not hasattr(self, "cmap"):
1116			self.cmap = {}
1117		cmap = self.cmap
1118
1119		for element in content:
1120			if not isinstance(element, tuple):
1121				continue
1122			name, attrs, content = element
1123			if name != "map":
1124				continue
1125			cmap[safeEval(attrs["code"])] = attrs["name"]
1126
1127
1128class cmap_format_12(cmap_format_12_or_13):
1129
1130	_format_step = 1
1131
1132	def __init__(self, format=12):
1133		cmap_format_12_or_13.__init__(self, format)
1134
1135	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1136		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1137
1138	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1139		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1140
1141
1142class cmap_format_13(cmap_format_12_or_13):
1143
1144	_format_step = 0
1145
1146	def __init__(self, format=13):
1147		cmap_format_12_or_13.__init__(self, format)
1148
1149	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1150		return [startingGlyph] * numberOfGlyphs
1151
1152	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1153		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1154
1155
1156def cvtToUVS(threeByteString):
1157	data = b"\0" + threeByteString
1158	val, = struct.unpack(">L", data)
1159	return val
1160
1161def cvtFromUVS(val):
1162	assert 0 <= val < 0x1000000
1163	fourByteString = struct.pack(">L", val)
1164	return fourByteString[1:]
1165
1166
1167class cmap_format_14(CmapSubtable):
1168
1169	def decompileHeader(self, data, ttFont):
1170		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1171		self.data = data[10:]
1172		self.length = length
1173		self.numVarSelectorRecords = numVarSelectorRecords
1174		self.ttFont = ttFont
1175		self.language = 0xFF # has no language.
1176
1177	def decompile(self, data, ttFont):
1178		if data is not None and ttFont is not None:
1179			self.decompileHeader(data, ttFont)
1180		else:
1181			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1182		data = self.data
1183
1184		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1185		uvsDict = {}
1186		recOffset = 0
1187		for n in range(self.numVarSelectorRecords):
1188			uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
1189			recOffset += 11
1190			varUVS = cvtToUVS(uvs)
1191			if defOVSOffset:
1192				startOffset = defOVSOffset - 10
1193				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1194				startOffset +=4
1195				for r in range(numValues):
1196					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1197					startOffset += 4
1198					firstBaseUV = cvtToUVS(uv)
1199					cnt = addtlCnt+1
1200					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1201					glyphList = [None]*cnt
1202					localUVList = zip(baseUVList, glyphList)
1203					try:
1204						uvsDict[varUVS].extend(localUVList)
1205					except KeyError:
1206						uvsDict[varUVS] = list(localUVList)
1207
1208			if nonDefUVSOffset:
1209				startOffset = nonDefUVSOffset - 10
1210				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1211				startOffset +=4
1212				localUVList = []
1213				for r in range(numRecs):
1214					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1215					startOffset += 5
1216					uv = cvtToUVS(uv)
1217					glyphName = self.ttFont.getGlyphName(gid)
1218					localUVList.append((uv, glyphName))
1219				try:
1220					uvsDict[varUVS].extend(localUVList)
1221				except KeyError:
1222					uvsDict[varUVS] = localUVList
1223
1224		self.uvsDict = uvsDict
1225
1226	def toXML(self, writer, ttFont):
1227		writer.begintag(self.__class__.__name__, [
1228				("platformID", self.platformID),
1229				("platEncID", self.platEncID),
1230				])
1231		writer.newline()
1232		uvsDict = self.uvsDict
1233		uvsList = sorted(uvsDict.keys())
1234		for uvs in uvsList:
1235			uvList = uvsDict[uvs]
1236			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1237			for uv, gname in uvList:
1238				attrs = [("uv", hex(uv)), ("uvs", hex(uvs))]
1239				if gname is not None:
1240					attrs.append(("name", gname))
1241				writer.simpletag("map", attrs)
1242				writer.newline()
1243		writer.endtag(self.__class__.__name__)
1244		writer.newline()
1245
1246	def fromXML(self, name, attrs, content, ttFont):
1247		self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail
1248		if not hasattr(self, "cmap"):
1249			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1250		if not hasattr(self, "uvsDict"):
1251			self.uvsDict = {}
1252			uvsDict = self.uvsDict
1253
1254		# For backwards compatibility reasons we accept "None" as an indicator
1255		# for "default mapping", unless the font actually has a glyph named
1256		# "None".
1257		_hasGlyphNamedNone = None
1258
1259		for element in content:
1260			if not isinstance(element, tuple):
1261				continue
1262			name, attrs, content = element
1263			if name != "map":
1264				continue
1265			uvs = safeEval(attrs["uvs"])
1266			uv = safeEval(attrs["uv"])
1267			gname = attrs.get("name")
1268			if gname == "None":
1269				if _hasGlyphNamedNone is None:
1270					_hasGlyphNamedNone = "None" in ttFont.getGlyphOrder()
1271				if not _hasGlyphNamedNone:
1272					gname = None
1273			try:
1274				uvsDict[uvs].append((uv, gname))
1275			except KeyError:
1276				uvsDict[uvs] = [(uv, gname)]
1277
1278	def compile(self, ttFont):
1279		if self.data:
1280			return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data
1281
1282		uvsDict = self.uvsDict
1283		uvsList = sorted(uvsDict.keys())
1284		self.numVarSelectorRecords = len(uvsList)
1285		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1286		data = []
1287		varSelectorRecords =[]
1288		for uvs in uvsList:
1289			entryList = uvsDict[uvs]
1290
1291			defList = [entry for entry in entryList if entry[1] is None]
1292			if defList:
1293				defList = [entry[0] for entry in defList]
1294				defOVSOffset = offset
1295				defList.sort()
1296
1297				lastUV = defList[0]
1298				cnt = -1
1299				defRecs = []
1300				for defEntry in defList:
1301					cnt +=1
1302					if (lastUV+cnt) != defEntry:
1303						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1304						lastUV = defEntry
1305						defRecs.append(rec)
1306						cnt = 0
1307
1308				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1309				defRecs.append(rec)
1310
1311				numDefRecs = len(defRecs)
1312				data.append(struct.pack(">L", numDefRecs))
1313				data.extend(defRecs)
1314				offset += 4 + numDefRecs*4
1315			else:
1316				defOVSOffset = 0
1317
1318			ndefList = [entry for entry in entryList if entry[1] is not None]
1319			if ndefList:
1320				nonDefUVSOffset = offset
1321				ndefList.sort()
1322				numNonDefRecs = len(ndefList)
1323				data.append(struct.pack(">L", numNonDefRecs))
1324				offset += 4 + numNonDefRecs*5
1325
1326				for uv, gname in ndefList:
1327					gid = ttFont.getGlyphID(gname)
1328					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1329					data.append(ndrec)
1330			else:
1331				nonDefUVSOffset = 0
1332
1333			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1334			varSelectorRecords.append(vrec)
1335
1336		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1337		self.length = 10 + len(data)
1338		headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords)
1339
1340		return headerdata + data
1341
1342
1343class cmap_format_unknown(CmapSubtable):
1344
1345	def toXML(self, writer, ttFont):
1346		cmapName = self.__class__.__name__[:12] + str(self.format)
1347		writer.begintag(cmapName, [
1348				("platformID", self.platformID),
1349				("platEncID", self.platEncID),
1350				])
1351		writer.newline()
1352		writer.dumphex(self.data)
1353		writer.endtag(cmapName)
1354		writer.newline()
1355
1356	def fromXML(self, name, attrs, content, ttFont):
1357		self.data = readHex(content)
1358		self.cmap = {}
1359
1360	def decompileHeader(self, data, ttFont):
1361		self.language = 0  # dummy value
1362		self.data = data
1363
1364	def decompile(self, data, ttFont):
1365		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1366		# If not, someone is calling the subtable decompile() directly, and must provide both args.
1367		if data is not None and ttFont is not None:
1368			self.decompileHeader(data, ttFont)
1369		else:
1370			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1371
1372	def compile(self, ttFont):
1373		if self.data:
1374			return self.data
1375		else:
1376			return None
1377
1378cmap_classes = {
1379		0: cmap_format_0,
1380		2: cmap_format_2,
1381		4: cmap_format_4,
1382		6: cmap_format_6,
1383		12: cmap_format_12,
1384		13: cmap_format_13,
1385		14: cmap_format_14,
1386}
1387