• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from fontTools.misc.textTools import bytesjoin, safeEval, readHex
2from fontTools.misc.encodingTools import getEncoding
3from fontTools.ttLib import getSearchRange
4from fontTools.unicode import Unicode
5from . import DefaultTable
6import sys
7import struct
8import array
9import logging
10
11
12log = logging.getLogger(__name__)
13
14
15def _make_map(font, chars, gids):
16	assert len(chars) == len(gids)
17	glyphNames = font.getGlyphNameMany(gids)
18	cmap = {}
19	for char,gid,name in zip(chars,gids,glyphNames):
20		if gid == 0:
21			continue
22		cmap[char] = name
23	return cmap
24
25class table__c_m_a_p(DefaultTable.DefaultTable):
26	"""Character to Glyph Index Mapping Table
27
28	This class represents the `cmap <https://docs.microsoft.com/en-us/typography/opentype/spec/cmap>`_
29	table, which maps between input characters (in Unicode or other system encodings)
30	and glyphs within the font. The ``cmap`` table contains one or more subtables
31	which determine the mapping of of characters to glyphs across different platforms
32	and encoding systems.
33
34	``table__c_m_a_p`` objects expose an accessor ``.tables`` which provides access
35	to the subtables, although it is normally easier to retrieve individual subtables
36	through the utility methods described below. To add new subtables to a font,
37	first determine the subtable format (if in doubt use format 4 for glyphs within
38	the BMP, format 12 for glyphs outside the BMP, and format 14 for Unicode Variation
39	Sequences) construct subtable objects with ``CmapSubtable.newSubtable(format)``,
40	and append them to the ``.tables`` list.
41
42	Within a subtable, the mapping of characters to glyphs is provided by the ``.cmap``
43	attribute.
44
45	Example::
46
47		cmap4_0_3 = CmapSubtable.newSubtable(4)
48		cmap4_0_3.platformID = 0
49		cmap4_0_3.platEncID = 3
50		cmap4_0_3.language = 0
51		cmap4_0_3.cmap = { 0xC1: "Aacute" }
52
53		cmap = newTable("cmap")
54		cmap.tableVersion = 0
55		cmap.tables = [cmap4_0_3]
56	"""
57
58	def getcmap(self, platformID, platEncID):
59		"""Returns the first subtable which matches the given platform and encoding.
60
61		Args:
62			platformID (int): The platform ID. Use 0 for Unicode, 1 for Macintosh
63				(deprecated for new fonts), 2 for ISO (deprecated) and 3 for Windows.
64			encodingID (int): Encoding ID. Interpretation depends on the platform ID.
65				See the OpenType specification for details.
66
67		Returns:
68			An object which is a subclass of :py:class:`CmapSubtable` if a matching
69			subtable is found within the font, or ``None`` otherwise.
70		"""
71
72		for subtable in self.tables:
73			if (subtable.platformID == platformID and
74					subtable.platEncID == platEncID):
75				return subtable
76		return None # not found
77
78	def getBestCmap(self, cmapPreferences=((3, 10), (0, 6), (0, 4), (3, 1), (0, 3), (0, 2), (0, 1), (0, 0))):
79		"""Returns the 'best' Unicode cmap dictionary available in the font
80		or ``None``, if no Unicode cmap subtable is available.
81
82		By default it will search for the following (platformID, platEncID)
83		pairs in order::
84
85				(3, 10), # Windows Unicode full repertoire
86				(0, 6),  # Unicode full repertoire (format 13 subtable)
87				(0, 4),  # Unicode 2.0 full repertoire
88				(3, 1),  # Windows Unicode BMP
89				(0, 3),  # Unicode 2.0 BMP
90				(0, 2),  # Unicode ISO/IEC 10646
91				(0, 1),  # Unicode 1.1
92				(0, 0)   # Unicode 1.0
93
94		This particular order matches what HarfBuzz uses to choose what
95		subtable to use by default. This order prefers the largest-repertoire
96		subtable, and among those, prefers the Windows-platform over the
97		Unicode-platform as the former has wider support.
98
99		This order can be customized via the ``cmapPreferences`` argument.
100		"""
101		for platformID, platEncID in cmapPreferences:
102			cmapSubtable = self.getcmap(platformID, platEncID)
103			if cmapSubtable is not None:
104				return cmapSubtable.cmap
105		return None  # None of the requested cmap subtables were found
106
107	def buildReversed(self):
108		"""Builds a reverse mapping dictionary
109
110		Iterates over all Unicode cmap tables and returns a dictionary mapping
111		glyphs to sets of codepoints, such as::
112
113			{
114				'one': {0x31}
115				'A': {0x41,0x391}
116			}
117
118		The values are sets of Unicode codepoints because
119		some fonts map different codepoints to the same glyph.
120		For example, ``U+0041 LATIN CAPITAL LETTER A`` and ``U+0391
121		GREEK CAPITAL LETTER ALPHA`` are sometimes the same glyph.
122		"""
123		result = {}
124		for subtable in self.tables:
125			if subtable.isUnicode():
126				for codepoint, name in subtable.cmap.items():
127					result.setdefault(name, set()).add(codepoint)
128		return result
129
130	def decompile(self, data, ttFont):
131		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
132		self.tableVersion = int(tableVersion)
133		self.tables = tables = []
134		seenOffsets = {}
135		for i in range(numSubTables):
136			platformID, platEncID, offset = struct.unpack(
137					">HHl", data[4+i*8:4+(i+1)*8])
138			platformID, platEncID = int(platformID), int(platEncID)
139			format, length = struct.unpack(">HH", data[offset:offset+4])
140			if format in [8,10,12,13]:
141				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
142			elif format in [14]:
143				format, length = struct.unpack(">HL", data[offset:offset+6])
144
145			if not length:
146				log.error(
147					"cmap subtable is reported as having zero length: platformID %s, "
148					"platEncID %s, format %s offset %s. Skipping table.",
149					platformID, platEncID, format, offset)
150				continue
151			table = CmapSubtable.newSubtable(format)
152			table.platformID = platformID
153			table.platEncID = platEncID
154			# Note that by default we decompile only the subtable header info;
155			# any other data gets decompiled only when an attribute of the
156			# subtable is referenced.
157			table.decompileHeader(data[offset:offset+int(length)], ttFont)
158			if offset in seenOffsets:
159				table.data = None # Mark as decompiled
160				table.cmap = tables[seenOffsets[offset]].cmap
161			else:
162				seenOffsets[offset] = i
163			tables.append(table)
164		if ttFont.lazy is False:  # Be lazy for None and True
165			self.ensureDecompiled()
166
167	def ensureDecompiled(self, recurse=False):
168		# The recurse argument is unused, but part of the signature of
169		# ensureDecompiled across the library.
170		for st in self.tables:
171			st.ensureDecompiled()
172
173	def compile(self, ttFont):
174		self.tables.sort()  # sort according to the spec; see CmapSubtable.__lt__()
175		numSubTables = len(self.tables)
176		totalOffset = 4 + 8 * numSubTables
177		data = struct.pack(">HH", self.tableVersion, numSubTables)
178		tableData = b""
179		seen = {}  # Some tables are the same object reference. Don't compile them twice.
180		done = {}  # Some tables are different objects, but compile to the same data chunk
181		for table in self.tables:
182			offset = seen.get(id(table.cmap))
183			if offset is None:
184				chunk = table.compile(ttFont)
185				offset = done.get(chunk)
186				if offset is None:
187					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
188					tableData = tableData + chunk
189			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
190		return data + tableData
191
192	def toXML(self, writer, ttFont):
193		writer.simpletag("tableVersion", version=self.tableVersion)
194		writer.newline()
195		for table in self.tables:
196			table.toXML(writer, ttFont)
197
198	def fromXML(self, name, attrs, content, ttFont):
199		if name == "tableVersion":
200			self.tableVersion = safeEval(attrs["version"])
201			return
202		if name[:12] != "cmap_format_":
203			return
204		if not hasattr(self, "tables"):
205			self.tables = []
206		format = safeEval(name[12:])
207		table = CmapSubtable.newSubtable(format)
208		table.platformID = safeEval(attrs["platformID"])
209		table.platEncID = safeEval(attrs["platEncID"])
210		table.fromXML(name, attrs, content, ttFont)
211		self.tables.append(table)
212
213
214class CmapSubtable(object):
215	"""Base class for all cmap subtable formats.
216
217	Subclasses which handle the individual subtable formats are named
218	``cmap_format_0``, ``cmap_format_2`` etc. Use :py:meth:`getSubtableClass`
219	to retrieve the concrete subclass, or :py:meth:`newSubtable` to get a
220	new subtable object for a given format.
221
222	The object exposes a ``.cmap`` attribute, which contains a dictionary mapping
223	character codepoints to glyph names.
224	"""
225
226	@staticmethod
227	def getSubtableClass(format):
228		"""Return the subtable class for a format."""
229		return cmap_classes.get(format, cmap_format_unknown)
230
231	@staticmethod
232	def newSubtable(format):
233		"""Return a new instance of a subtable for the given format
234		."""
235		subtableClass = CmapSubtable.getSubtableClass(format)
236		return subtableClass(format)
237
238	def __init__(self, format):
239		self.format = format
240		self.data = None
241		self.ttFont = None
242		self.platformID = None  #: The platform ID of this subtable
243		self.platEncID = None   #: The encoding ID of this subtable (interpretation depends on ``platformID``)
244		self.language = None    #: The language ID of this subtable (Macintosh platform only)
245
246	def ensureDecompiled(self, recurse=False):
247		# The recurse argument is unused, but part of the signature of
248		# ensureDecompiled across the library.
249		if self.data is None:
250			return
251		self.decompile(None, None) # use saved data.
252		self.data = None	# Once this table has been decompiled, make sure we don't
253							# just return the original data. Also avoids recursion when
254							# called with an attribute that the cmap subtable doesn't have.
255
256	def __getattr__(self, attr):
257		# allow lazy decompilation of subtables.
258		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
259			raise AttributeError(attr)
260		if self.data is None:
261			raise AttributeError(attr)
262		self.ensureDecompiled()
263		return getattr(self, attr)
264
265	def decompileHeader(self, data, ttFont):
266		format, length, language = struct.unpack(">HHH", data[:6])
267		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
268		self.format = int(format)
269		self.length = int(length)
270		self.language = int(language)
271		self.data = data[6:]
272		self.ttFont = ttFont
273
274	def toXML(self, writer, ttFont):
275		writer.begintag(self.__class__.__name__, [
276				("platformID", self.platformID),
277				("platEncID", self.platEncID),
278				("language", self.language),
279				])
280		writer.newline()
281		codes = sorted(self.cmap.items())
282		self._writeCodes(codes, writer)
283		writer.endtag(self.__class__.__name__)
284		writer.newline()
285
286	def getEncoding(self, default=None):
287		"""Returns the Python encoding name for this cmap subtable based on its platformID,
288		platEncID, and language.  If encoding for these values is not known, by default
289		``None`` is returned.  That can be overridden by passing a value to the ``default``
290		argument.
291
292		Note that if you want to choose a "preferred" cmap subtable, most of the time
293		``self.isUnicode()`` is what you want as that one only returns true for the modern,
294		commonly used, Unicode-compatible triplets, not the legacy ones.
295		"""
296		return getEncoding(self.platformID, self.platEncID, self.language, default)
297
298	def isUnicode(self):
299		"""Returns true if the characters are interpreted as Unicode codepoints."""
300		return (self.platformID == 0 or
301			(self.platformID == 3 and self.platEncID in [0, 1, 10]))
302
303	def isSymbol(self):
304		"""Returns true if the subtable is for the Symbol encoding (3,0)"""
305		return self.platformID == 3 and self.platEncID == 0
306
307	def _writeCodes(self, codes, writer):
308		isUnicode = self.isUnicode()
309		for code, name in codes:
310			writer.simpletag("map", code=hex(code), name=name)
311			if isUnicode:
312				writer.comment(Unicode[code])
313			writer.newline()
314
315	def __lt__(self, other):
316		if not isinstance(other, CmapSubtable):
317			return NotImplemented
318
319		# implemented so that list.sort() sorts according to the spec.
320		selfTuple = (
321			getattr(self, "platformID", None),
322			getattr(self, "platEncID", None),
323			getattr(self, "language", None),
324			self.__dict__)
325		otherTuple = (
326			getattr(other, "platformID", None),
327			getattr(other, "platEncID", None),
328			getattr(other, "language", None),
329			other.__dict__)
330		return selfTuple < otherTuple
331
332
333class cmap_format_0(CmapSubtable):
334
335	def decompile(self, data, ttFont):
336		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
337		# If not, someone is calling the subtable decompile() directly, and must provide both args.
338		if data is not None and ttFont is not None:
339			self.decompileHeader(data, ttFont)
340		else:
341			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
342		data = self.data # decompileHeader assigns the data after the header to self.data
343		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
344		gids = array.array("B")
345		gids.frombytes(self.data)
346		charCodes = list(range(len(gids)))
347		self.cmap = _make_map(self.ttFont, charCodes, gids)
348
349	def compile(self, ttFont):
350		if self.data:
351			return struct.pack(">HHH", 0, 262, self.language) + self.data
352
353		cmap = self.cmap
354		assert set(cmap.keys()).issubset(range(256))
355		getGlyphID = ttFont.getGlyphID
356		valueList = [getGlyphID(cmap[i]) if i in cmap else 0 for i in range(256)]
357
358		gids = array.array("B", valueList)
359		data = struct.pack(">HHH", 0, 262, self.language) + gids.tobytes()
360		assert len(data) == 262
361		return data
362
363	def fromXML(self, name, attrs, content, ttFont):
364		self.language = safeEval(attrs["language"])
365		if not hasattr(self, "cmap"):
366			self.cmap = {}
367		cmap = self.cmap
368		for element in content:
369			if not isinstance(element, tuple):
370				continue
371			name, attrs, content = element
372			if name != "map":
373				continue
374			cmap[safeEval(attrs["code"])] = attrs["name"]
375
376
377subHeaderFormat = ">HHhH"
378class SubHeader(object):
379	def __init__(self):
380		self.firstCode = None
381		self.entryCount = None
382		self.idDelta = None
383		self.idRangeOffset = None
384		self.glyphIndexArray = []
385
386class cmap_format_2(CmapSubtable):
387
388	def setIDDelta(self, subHeader):
389		subHeader.idDelta = 0
390		# find the minGI which is not zero.
391		minGI = subHeader.glyphIndexArray[0]
392		for gid in subHeader.glyphIndexArray:
393			if (gid != 0) and (gid < minGI):
394				minGI = gid
395		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
396		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
397		# We would like to pick an idDelta such that the first glyphArray GID is 1,
398		# so that we are more likely to be able to combine glypharray GID subranges.
399		# This means that we have a problem when minGI is > 32K
400		# Since the final gi is reconstructed from the glyphArray GID by:
401		#    (short)finalGID = (gid + idDelta) % 0x10000),
402		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
403		# negative number to an unsigned short.
404
405		if (minGI > 1):
406			if minGI > 0x7FFF:
407				subHeader.idDelta = -(0x10000 - minGI) -1
408			else:
409				subHeader.idDelta = minGI -1
410			idDelta = subHeader.idDelta
411			for i in range(subHeader.entryCount):
412				gid = subHeader.glyphIndexArray[i]
413				if gid > 0:
414					subHeader.glyphIndexArray[i] = gid - idDelta
415
416	def decompile(self, data, ttFont):
417		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
418		# If not, someone is calling the subtable decompile() directly, and must provide both args.
419		if data is not None and ttFont is not None:
420			self.decompileHeader(data, ttFont)
421		else:
422			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
423
424		data = self.data # decompileHeader assigns the data after the header to self.data
425		subHeaderKeys = []
426		maxSubHeaderindex = 0
427		# get the key array, and determine the number of subHeaders.
428		allKeys = array.array("H")
429		allKeys.frombytes(data[:512])
430		data = data[512:]
431		if sys.byteorder != "big": allKeys.byteswap()
432		subHeaderKeys = [ key//8 for key in allKeys]
433		maxSubHeaderindex = max(subHeaderKeys)
434
435		#Load subHeaders
436		subHeaderList = []
437		pos = 0
438		for i in range(maxSubHeaderindex + 1):
439			subHeader = SubHeader()
440			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
441				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
442			pos += 8
443			giDataPos = pos + subHeader.idRangeOffset-2
444			giList = array.array("H")
445			giList.frombytes(data[giDataPos:giDataPos + subHeader.entryCount*2])
446			if sys.byteorder != "big": giList.byteswap()
447			subHeader.glyphIndexArray = giList
448			subHeaderList.append(subHeader)
449		# How this gets processed.
450		# Charcodes may be one or two bytes.
451		# The first byte of a charcode is mapped through the subHeaderKeys, to select
452		# a subHeader. For any subheader but 0, the next byte is then mapped through the
453		# selected subheader. If subheader Index 0 is selected, then the byte itself is
454		# mapped through the subheader, and there is no second byte.
455		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
456		#
457		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
458		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
459		# referenced by another subheader.
460		# The only subheader that will be referenced by more than one first-byte value is the subheader
461		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
462		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
463		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
464		# A subheader specifies a subrange within (0...256) by the
465		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
466		# (e.g. glyph not in font).
467		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
468		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
469		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
470		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
471		# Example for Logocut-Medium
472		# first byte of charcode = 129; selects subheader 1.
473		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
474		# second byte of charCode = 66
475		# the index offset = 66-64 = 2.
476		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
477		# [glyphIndexArray index], [subrange array index] = glyphIndex
478		# [256], [0]=1 	from charcode [129, 64]
479		# [257], [1]=2  	from charcode [129, 65]
480		# [258], [2]=3  	from charcode [129, 66]
481		# [259], [3]=4  	from charcode [129, 67]
482		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
483		# add it to the glyphID to get the final glyphIndex
484		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
485
486		self.data = b""
487		cmap = {}
488		notdefGI = 0
489		for firstByte in range(256):
490			subHeadindex = subHeaderKeys[firstByte]
491			subHeader = subHeaderList[subHeadindex]
492			if subHeadindex == 0:
493				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
494					continue # gi is notdef.
495				else:
496					charCode = firstByte
497					offsetIndex = firstByte - subHeader.firstCode
498					gi = subHeader.glyphIndexArray[offsetIndex]
499					if gi != 0:
500						gi = (gi + subHeader.idDelta) % 0x10000
501					else:
502						continue # gi is notdef.
503				cmap[charCode] = gi
504			else:
505				if subHeader.entryCount:
506					charCodeOffset = firstByte * 256 + subHeader.firstCode
507					for offsetIndex in range(subHeader.entryCount):
508						charCode = charCodeOffset + offsetIndex
509						gi = subHeader.glyphIndexArray[offsetIndex]
510						if gi != 0:
511							gi = (gi + subHeader.idDelta) % 0x10000
512						else:
513							continue
514						cmap[charCode] = gi
515				# If not subHeader.entryCount, then all char codes with this first byte are
516				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
517				# same as mapping it to .notdef.
518
519		gids = list(cmap.values())
520		charCodes = list(cmap.keys())
521		self.cmap = _make_map(self.ttFont, charCodes, gids)
522
523	def compile(self, ttFont):
524		if self.data:
525			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
526		kEmptyTwoCharCodeRange = -1
527		notdefGI = 0
528
529		items = sorted(self.cmap.items())
530		charCodes = [item[0] for item in items]
531		names = [item[1] for item in items]
532		nameMap = ttFont.getReverseGlyphMap()
533		try:
534			gids = [nameMap[name] for name in names]
535		except KeyError:
536			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
537			try:
538				gids = [nameMap[name] for name in names]
539			except KeyError:
540				# allow virtual GIDs in format 2 tables
541				gids = []
542				for name in names:
543					try:
544						gid = nameMap[name]
545					except KeyError:
546						try:
547							if (name[:3] == 'gid'):
548								gid = int(name[3:])
549							else:
550								gid = ttFont.getGlyphID(name)
551						except:
552							raise KeyError(name)
553
554					gids.append(gid)
555
556		# Process the (char code to gid) item list in char code order.
557		# By definition, all one byte char codes map to subheader 0.
558		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
559		# which defines all char codes in its range to map to notdef) unless proven otherwise.
560		# Note that since the char code items are processed in char code order, all the char codes with the
561		# same first byte are in sequential order.
562
563		subHeaderKeys = [kEmptyTwoCharCodeRange for x in range(256)] # list of indices into subHeaderList.
564		subHeaderList = []
565
566		# We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
567		# with a cmap where all the one byte char codes map to notdef,
568		# with the result that the subhead 0 would not get created just by processing the item list.
569		charCode = charCodes[0]
570		if charCode > 255:
571			subHeader = SubHeader()
572			subHeader.firstCode = 0
573			subHeader.entryCount = 0
574			subHeader.idDelta = 0
575			subHeader.idRangeOffset = 0
576			subHeaderList.append(subHeader)
577
578		lastFirstByte = -1
579		items = zip(charCodes, gids)
580		for charCode, gid in items:
581			if gid == 0:
582				continue
583			firstbyte = charCode >> 8
584			secondByte = charCode & 0x00FF
585
586			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
587				if lastFirstByte > -1:
588					# fix GI's and iDelta of current subheader.
589					self.setIDDelta(subHeader)
590
591					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
592					# for the indices matching the char codes.
593					if lastFirstByte == 0:
594						for index in range(subHeader.entryCount):
595							charCode = subHeader.firstCode + index
596							subHeaderKeys[charCode] = 0
597
598					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
599				# init new subheader
600				subHeader = SubHeader()
601				subHeader.firstCode = secondByte
602				subHeader.entryCount = 1
603				subHeader.glyphIndexArray.append(gid)
604				subHeaderList.append(subHeader)
605				subHeaderKeys[firstbyte] = len(subHeaderList) -1
606				lastFirstByte = firstbyte
607			else:
608				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
609				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
610				for i in range(codeDiff):
611					subHeader.glyphIndexArray.append(notdefGI)
612				subHeader.glyphIndexArray.append(gid)
613				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
614
615		# fix GI's and iDelta of last subheader that we we added to the subheader array.
616		self.setIDDelta(subHeader)
617
618		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
619		subHeader = SubHeader()
620		subHeader.firstCode = 0
621		subHeader.entryCount = 0
622		subHeader.idDelta = 0
623		subHeader.idRangeOffset = 2
624		subHeaderList.append(subHeader)
625		emptySubheadIndex = len(subHeaderList) - 1
626		for index in range(256):
627			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
628				subHeaderKeys[index] = emptySubheadIndex
629		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
630		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
631		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
632		# charcode 0 and GID 0.
633
634		idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
635		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
636		for index in range(subheadRangeLen):
637			subHeader = subHeaderList[index]
638			subHeader.idRangeOffset = 0
639			for j in range(index):
640				prevSubhead = subHeaderList[j]
641				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
642					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
643					subHeader.glyphIndexArray = []
644					break
645			if subHeader.idRangeOffset == 0: # didn't find one.
646				subHeader.idRangeOffset = idRangeOffset
647				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
648			else:
649				idRangeOffset = idRangeOffset - 8  # one less subheader
650
651		# Now we can write out the data!
652		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
653		for subhead in 	subHeaderList[:-1]:
654			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
655		dataList = [struct.pack(">HHH", 2, length, self.language)]
656		for index in subHeaderKeys:
657			dataList.append(struct.pack(">H", index*8))
658		for subhead in 	subHeaderList:
659			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
660		for subhead in 	subHeaderList[:-1]:
661			for gi in subhead.glyphIndexArray:
662				dataList.append(struct.pack(">H", gi))
663		data = bytesjoin(dataList)
664		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
665		return data
666
667	def fromXML(self, name, attrs, content, ttFont):
668		self.language = safeEval(attrs["language"])
669		if not hasattr(self, "cmap"):
670			self.cmap = {}
671		cmap = self.cmap
672
673		for element in content:
674			if not isinstance(element, tuple):
675				continue
676			name, attrs, content = element
677			if name != "map":
678				continue
679			cmap[safeEval(attrs["code"])] = attrs["name"]
680
681
682cmap_format_4_format = ">7H"
683
684#uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
685#uint16  reservedPad                # This value should be zero
686#uint16  startCode[segCount]        # Starting character code for each segment
687#uint16  idDelta[segCount]          # Delta for all character codes in segment
688#uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
689#uint16  glyphIndexArray[variable]  # Glyph index array
690
691def splitRange(startCode, endCode, cmap):
692	# Try to split a range of character codes into subranges with consecutive
693	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
694	# efficiently. I can't prove I've got the optimal solution, but it seems
695	# to do well with the fonts I tested: none became bigger, many became smaller.
696	if startCode == endCode:
697		return [], [endCode]
698
699	lastID = cmap[startCode]
700	lastCode = startCode
701	inOrder = None
702	orderedBegin = None
703	subRanges = []
704
705	# Gather subranges in which the glyph IDs are consecutive.
706	for code in range(startCode + 1, endCode + 1):
707		glyphID = cmap[code]
708
709		if glyphID - 1 == lastID:
710			if inOrder is None or not inOrder:
711				inOrder = 1
712				orderedBegin = lastCode
713		else:
714			if inOrder:
715				inOrder = 0
716				subRanges.append((orderedBegin, lastCode))
717				orderedBegin = None
718
719		lastID = glyphID
720		lastCode = code
721
722	if inOrder:
723		subRanges.append((orderedBegin, lastCode))
724	assert lastCode == endCode
725
726	# Now filter out those new subranges that would only make the data bigger.
727	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
728	# character.
729	newRanges = []
730	for b, e in subRanges:
731		if b == startCode and e == endCode:
732			break  # the whole range, we're fine
733		if b == startCode or e == endCode:
734			threshold = 4  # split costs one more segment
735		else:
736			threshold = 8  # split costs two more segments
737		if (e - b + 1) > threshold:
738			newRanges.append((b, e))
739	subRanges = newRanges
740
741	if not subRanges:
742		return [], [endCode]
743
744	if subRanges[0][0] != startCode:
745		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
746	if subRanges[-1][1] != endCode:
747		subRanges.append((subRanges[-1][1] + 1, endCode))
748
749	# Fill the "holes" in the segments list -- those are the segments in which
750	# the glyph IDs are _not_ consecutive.
751	i = 1
752	while i < len(subRanges):
753		if subRanges[i-1][1] + 1 != subRanges[i][0]:
754			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
755			i = i + 1
756		i = i + 1
757
758	# Transform the ranges into startCode/endCode lists.
759	start = []
760	end = []
761	for b, e in subRanges:
762		start.append(b)
763		end.append(e)
764	start.pop(0)
765
766	assert len(start) + 1 == len(end)
767	return start, end
768
769
770class cmap_format_4(CmapSubtable):
771
772	def decompile(self, data, ttFont):
773		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
774		# If not, someone is calling the subtable decompile() directly, and must provide both args.
775		if data is not None and ttFont is not None:
776			self.decompileHeader(data, ttFont)
777		else:
778			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
779
780		data = self.data # decompileHeader assigns the data after the header to self.data
781		(segCountX2, searchRange, entrySelector, rangeShift) = \
782					struct.unpack(">4H", data[:8])
783		data = data[8:]
784		segCount = segCountX2 // 2
785
786		allCodes = array.array("H")
787		allCodes.frombytes(data)
788		self.data = data = None
789
790		if sys.byteorder != "big": allCodes.byteswap()
791
792		# divide the data
793		endCode = allCodes[:segCount]
794		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
795		startCode = allCodes[:segCount]
796		allCodes = allCodes[segCount:]
797		idDelta = allCodes[:segCount]
798		allCodes = allCodes[segCount:]
799		idRangeOffset = allCodes[:segCount]
800		glyphIndexArray = allCodes[segCount:]
801		lenGIArray = len(glyphIndexArray)
802
803		# build 2-byte character mapping
804		charCodes = []
805		gids = []
806		for i in range(len(startCode) - 1):	# don't do 0xffff!
807			start = startCode[i]
808			delta = idDelta[i]
809			rangeOffset = idRangeOffset[i]
810			partial = rangeOffset // 2 - start + i - len(idRangeOffset)
811
812			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
813			charCodes.extend(rangeCharCodes)
814			if rangeOffset == 0:
815				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
816			else:
817				for charCode in rangeCharCodes:
818					index = charCode + partial
819					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array is not less than the length of the array (%d) !" % (i, index, lenGIArray)
820					if glyphIndexArray[index] != 0:  # if not missing glyph
821						glyphID = glyphIndexArray[index] + delta
822					else:
823						glyphID = 0  # missing glyph
824					gids.append(glyphID & 0xFFFF)
825
826		self.cmap = _make_map(self.ttFont, charCodes, gids)
827
828	def compile(self, ttFont):
829		if self.data:
830			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
831
832		charCodes = list(self.cmap.keys())
833		if not charCodes:
834			startCode = [0xffff]
835			endCode = [0xffff]
836		else:
837			charCodes.sort()
838			names = [self.cmap[code] for code in charCodes]
839			nameMap = ttFont.getReverseGlyphMap()
840			try:
841				gids = [nameMap[name] for name in names]
842			except KeyError:
843				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
844				try:
845					gids = [nameMap[name] for name in names]
846				except KeyError:
847					# allow virtual GIDs in format 4 tables
848					gids = []
849					for name in names:
850						try:
851							gid = nameMap[name]
852						except KeyError:
853							try:
854								if (name[:3] == 'gid'):
855									gid = int(name[3:])
856								else:
857									gid = ttFont.getGlyphID(name)
858							except:
859								raise KeyError(name)
860
861						gids.append(gid)
862			cmap = {}  # code:glyphID mapping
863			for code, gid in zip(charCodes, gids):
864				cmap[code] = gid
865
866			# Build startCode and endCode lists.
867			# Split the char codes in ranges of consecutive char codes, then split
868			# each range in more ranges of consecutive/not consecutive glyph IDs.
869			# See splitRange().
870			lastCode = charCodes[0]
871			endCode = []
872			startCode = [lastCode]
873			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
874				if charCode == lastCode + 1:
875					lastCode = charCode
876					continue
877				start, end = splitRange(startCode[-1], lastCode, cmap)
878				startCode.extend(start)
879				endCode.extend(end)
880				startCode.append(charCode)
881				lastCode = charCode
882			start, end = splitRange(startCode[-1], lastCode, cmap)
883			startCode.extend(start)
884			endCode.extend(end)
885			startCode.append(0xffff)
886			endCode.append(0xffff)
887
888		# build up rest of cruft
889		idDelta = []
890		idRangeOffset = []
891		glyphIndexArray = []
892		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
893			indices = []
894			for charCode in range(startCode[i], endCode[i] + 1):
895				indices.append(cmap[charCode])
896			if (indices == list(range(indices[0], indices[0] + len(indices)))):
897				idDelta.append((indices[0] - startCode[i]) % 0x10000)
898				idRangeOffset.append(0)
899			else:
900				idDelta.append(0)
901				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
902				glyphIndexArray.extend(indices)
903		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
904		idRangeOffset.append(0)
905
906		# Insane.
907		segCount = len(endCode)
908		segCountX2 = segCount * 2
909		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)
910
911		charCodeArray = array.array("H", endCode + [0] + startCode)
912		idDeltaArray = array.array("H", idDelta)
913		restArray = array.array("H", idRangeOffset + glyphIndexArray)
914		if sys.byteorder != "big": charCodeArray.byteswap()
915		if sys.byteorder != "big": idDeltaArray.byteswap()
916		if sys.byteorder != "big": restArray.byteswap()
917		data = charCodeArray.tobytes() + idDeltaArray.tobytes() + restArray.tobytes()
918
919		length = struct.calcsize(cmap_format_4_format) + len(data)
920		header = struct.pack(cmap_format_4_format, self.format, length, self.language,
921				segCountX2, searchRange, entrySelector, rangeShift)
922		return header + data
923
924	def fromXML(self, name, attrs, content, ttFont):
925		self.language = safeEval(attrs["language"])
926		if not hasattr(self, "cmap"):
927			self.cmap = {}
928		cmap = self.cmap
929
930		for element in content:
931			if not isinstance(element, tuple):
932				continue
933			nameMap, attrsMap, dummyContent = element
934			if nameMap != "map":
935				assert 0, "Unrecognized keyword in cmap subtable"
936			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
937
938
939class cmap_format_6(CmapSubtable):
940
941	def decompile(self, data, ttFont):
942		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
943		# If not, someone is calling the subtable decompile() directly, and must provide both args.
944		if data is not None and ttFont is not None:
945			self.decompileHeader(data, ttFont)
946		else:
947			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
948
949		data = self.data # decompileHeader assigns the data after the header to self.data
950		firstCode, entryCount = struct.unpack(">HH", data[:4])
951		firstCode = int(firstCode)
952		data = data[4:]
953		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
954		gids = array.array("H")
955		gids.frombytes(data[:2 * int(entryCount)])
956		if sys.byteorder != "big": gids.byteswap()
957		self.data = data = None
958
959		charCodes = list(range(firstCode, firstCode + len(gids)))
960		self.cmap = _make_map(self.ttFont, charCodes, gids)
961
962	def compile(self, ttFont):
963		if self.data:
964			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
965		cmap = self.cmap
966		codes = sorted(cmap.keys())
967		if codes: # yes, there are empty cmap tables.
968			codes = list(range(codes[0], codes[-1] + 1))
969			firstCode = codes[0]
970			valueList = [
971				ttFont.getGlyphID(cmap[code]) if code in cmap else 0
972				for code in codes
973			]
974			gids = array.array("H", valueList)
975			if sys.byteorder != "big": gids.byteswap()
976			data = gids.tobytes()
977		else:
978			data = b""
979			firstCode = 0
980		header = struct.pack(">HHHHH",
981				6, len(data) + 10, self.language, firstCode, len(codes))
982		return header + data
983
984	def fromXML(self, name, attrs, content, ttFont):
985		self.language = safeEval(attrs["language"])
986		if not hasattr(self, "cmap"):
987			self.cmap = {}
988		cmap = self.cmap
989
990		for element in content:
991			if not isinstance(element, tuple):
992				continue
993			name, attrs, content = element
994			if name != "map":
995				continue
996			cmap[safeEval(attrs["code"])] = attrs["name"]
997
998
999class cmap_format_12_or_13(CmapSubtable):
1000
1001	def __init__(self, format):
1002		self.format = format
1003		self.reserved = 0
1004		self.data = None
1005		self.ttFont = None
1006
1007	def decompileHeader(self, data, ttFont):
1008		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
1009		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (self.format, len(data), length)
1010		self.format = format
1011		self.reserved = reserved
1012		self.length = length
1013		self.language = language
1014		self.nGroups = nGroups
1015		self.data = data[16:]
1016		self.ttFont = ttFont
1017
1018	def decompile(self, data, ttFont):
1019		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1020		# If not, someone is calling the subtable decompile() directly, and must provide both args.
1021		if data is not None and ttFont is not None:
1022			self.decompileHeader(data, ttFont)
1023		else:
1024			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1025
1026		data = self.data # decompileHeader assigns the data after the header to self.data
1027		charCodes = []
1028		gids = []
1029		pos = 0
1030		for i in range(self.nGroups):
1031			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
1032			pos += 12
1033			lenGroup = 1 + endCharCode - startCharCode
1034			charCodes.extend(list(range(startCharCode, endCharCode +1)))
1035			gids.extend(self._computeGIDs(glyphID, lenGroup))
1036		self.data = data = None
1037		self.cmap = _make_map(self.ttFont, charCodes, gids)
1038
1039	def compile(self, ttFont):
1040		if self.data:
1041			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
1042		charCodes = list(self.cmap.keys())
1043		names = list(self.cmap.values())
1044		nameMap = ttFont.getReverseGlyphMap()
1045		try:
1046			gids = [nameMap[name] for name in names]
1047		except KeyError:
1048			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
1049			try:
1050				gids = [nameMap[name] for name in names]
1051			except KeyError:
1052				# allow virtual GIDs in format 12 tables
1053				gids = []
1054				for name in names:
1055					try:
1056						gid = nameMap[name]
1057					except KeyError:
1058						try:
1059							if (name[:3] == 'gid'):
1060								gid = int(name[3:])
1061							else:
1062								gid = ttFont.getGlyphID(name)
1063						except:
1064							raise KeyError(name)
1065
1066					gids.append(gid)
1067
1068		cmap = {}  # code:glyphID mapping
1069		for code, gid in zip(charCodes, gids):
1070			cmap[code] = gid
1071
1072		charCodes.sort()
1073		index = 0
1074		startCharCode = charCodes[0]
1075		startGlyphID = cmap[startCharCode]
1076		lastGlyphID = startGlyphID - self._format_step
1077		lastCharCode = startCharCode - 1
1078		nGroups = 0
1079		dataList = []
1080		maxIndex = len(charCodes)
1081		for index in range(maxIndex):
1082			charCode = charCodes[index]
1083			glyphID = cmap[charCode]
1084			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
1085				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1086				startCharCode = charCode
1087				startGlyphID = glyphID
1088				nGroups = nGroups + 1
1089			lastGlyphID = glyphID
1090			lastCharCode = charCode
1091		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
1092		nGroups = nGroups + 1
1093		data = bytesjoin(dataList)
1094		lengthSubtable = len(data) +16
1095		assert len(data) == (nGroups*12) == (lengthSubtable-16)
1096		return struct.pack(">HHLLL", self.format, self.reserved, lengthSubtable, self.language, nGroups) + data
1097
1098	def toXML(self, writer, ttFont):
1099		writer.begintag(self.__class__.__name__, [
1100				("platformID", self.platformID),
1101				("platEncID", self.platEncID),
1102				("format", self.format),
1103				("reserved", self.reserved),
1104				("length", self.length),
1105				("language", self.language),
1106				("nGroups", self.nGroups),
1107				])
1108		writer.newline()
1109		codes = sorted(self.cmap.items())
1110		self._writeCodes(codes, writer)
1111		writer.endtag(self.__class__.__name__)
1112		writer.newline()
1113
1114	def fromXML(self, name, attrs, content, ttFont):
1115		self.format = safeEval(attrs["format"])
1116		self.reserved = safeEval(attrs["reserved"])
1117		self.length = safeEval(attrs["length"])
1118		self.language = safeEval(attrs["language"])
1119		self.nGroups = safeEval(attrs["nGroups"])
1120		if not hasattr(self, "cmap"):
1121			self.cmap = {}
1122		cmap = self.cmap
1123
1124		for element in content:
1125			if not isinstance(element, tuple):
1126				continue
1127			name, attrs, content = element
1128			if name != "map":
1129				continue
1130			cmap[safeEval(attrs["code"])] = attrs["name"]
1131
1132
1133class cmap_format_12(cmap_format_12_or_13):
1134
1135	_format_step = 1
1136
1137	def __init__(self, format=12):
1138		cmap_format_12_or_13.__init__(self, format)
1139
1140	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1141		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
1142
1143	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1144		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
1145
1146
1147class cmap_format_13(cmap_format_12_or_13):
1148
1149	_format_step = 0
1150
1151	def __init__(self, format=13):
1152		cmap_format_12_or_13.__init__(self, format)
1153
1154	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
1155		return [startingGlyph] * numberOfGlyphs
1156
1157	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
1158		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
1159
1160
1161def cvtToUVS(threeByteString):
1162	data = b"\0" + threeByteString
1163	val, = struct.unpack(">L", data)
1164	return val
1165
1166def cvtFromUVS(val):
1167	assert 0 <= val < 0x1000000
1168	fourByteString = struct.pack(">L", val)
1169	return fourByteString[1:]
1170
1171
1172class cmap_format_14(CmapSubtable):
1173
1174	def decompileHeader(self, data, ttFont):
1175		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
1176		self.data = data[10:]
1177		self.length = length
1178		self.numVarSelectorRecords = numVarSelectorRecords
1179		self.ttFont = ttFont
1180		self.language = 0xFF # has no language.
1181
1182	def decompile(self, data, ttFont):
1183		if data is not None and ttFont is not None:
1184			self.decompileHeader(data, ttFont)
1185		else:
1186			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1187		data = self.data
1188
1189		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1190		uvsDict = {}
1191		recOffset = 0
1192		for n in range(self.numVarSelectorRecords):
1193			uvs, defOVSOffset, nonDefUVSOffset = struct.unpack(">3sLL", data[recOffset:recOffset +11])
1194			recOffset += 11
1195			varUVS = cvtToUVS(uvs)
1196			if defOVSOffset:
1197				startOffset = defOVSOffset - 10
1198				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
1199				startOffset +=4
1200				for r in range(numValues):
1201					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
1202					startOffset += 4
1203					firstBaseUV = cvtToUVS(uv)
1204					cnt = addtlCnt+1
1205					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
1206					glyphList = [None]*cnt
1207					localUVList = zip(baseUVList, glyphList)
1208					try:
1209						uvsDict[varUVS].extend(localUVList)
1210					except KeyError:
1211						uvsDict[varUVS] = list(localUVList)
1212
1213			if nonDefUVSOffset:
1214				startOffset = nonDefUVSOffset - 10
1215				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
1216				startOffset +=4
1217				localUVList = []
1218				for r in range(numRecs):
1219					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
1220					startOffset += 5
1221					uv = cvtToUVS(uv)
1222					glyphName = self.ttFont.getGlyphName(gid)
1223					localUVList.append((uv, glyphName))
1224				try:
1225					uvsDict[varUVS].extend(localUVList)
1226				except KeyError:
1227					uvsDict[varUVS] = localUVList
1228
1229		self.uvsDict = uvsDict
1230
1231	def toXML(self, writer, ttFont):
1232		writer.begintag(self.__class__.__name__, [
1233				("platformID", self.platformID),
1234				("platEncID", self.platEncID),
1235				])
1236		writer.newline()
1237		uvsDict = self.uvsDict
1238		uvsList = sorted(uvsDict.keys())
1239		for uvs in uvsList:
1240			uvList = uvsDict[uvs]
1241			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
1242			for uv, gname in uvList:
1243				attrs = [("uv", hex(uv)), ("uvs", hex(uvs))]
1244				if gname is not None:
1245					attrs.append(("name", gname))
1246				writer.simpletag("map", attrs)
1247				writer.newline()
1248		writer.endtag(self.__class__.__name__)
1249		writer.newline()
1250
1251	def fromXML(self, name, attrs, content, ttFont):
1252		self.language = 0xFF # provide a value so that CmapSubtable.__lt__() won't fail
1253		if not hasattr(self, "cmap"):
1254			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
1255		if not hasattr(self, "uvsDict"):
1256			self.uvsDict = {}
1257			uvsDict = self.uvsDict
1258
1259		# For backwards compatibility reasons we accept "None" as an indicator
1260		# for "default mapping", unless the font actually has a glyph named
1261		# "None".
1262		_hasGlyphNamedNone = None
1263
1264		for element in content:
1265			if not isinstance(element, tuple):
1266				continue
1267			name, attrs, content = element
1268			if name != "map":
1269				continue
1270			uvs = safeEval(attrs["uvs"])
1271			uv = safeEval(attrs["uv"])
1272			gname = attrs.get("name")
1273			if gname == "None":
1274				if _hasGlyphNamedNone is None:
1275					_hasGlyphNamedNone = "None" in ttFont.getGlyphOrder()
1276				if not _hasGlyphNamedNone:
1277					gname = None
1278			try:
1279				uvsDict[uvs].append((uv, gname))
1280			except KeyError:
1281				uvsDict[uvs] = [(uv, gname)]
1282
1283	def compile(self, ttFont):
1284		if self.data:
1285			return struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords) + self.data
1286
1287		uvsDict = self.uvsDict
1288		uvsList = sorted(uvsDict.keys())
1289		self.numVarSelectorRecords = len(uvsList)
1290		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
1291		data = []
1292		varSelectorRecords =[]
1293		for uvs in uvsList:
1294			entryList = uvsDict[uvs]
1295
1296			defList = [entry for entry in entryList if entry[1] is None]
1297			if defList:
1298				defList = [entry[0] for entry in defList]
1299				defOVSOffset = offset
1300				defList.sort()
1301
1302				lastUV = defList[0]
1303				cnt = -1
1304				defRecs = []
1305				for defEntry in defList:
1306					cnt +=1
1307					if (lastUV+cnt) != defEntry:
1308						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
1309						lastUV = defEntry
1310						defRecs.append(rec)
1311						cnt = 0
1312
1313				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
1314				defRecs.append(rec)
1315
1316				numDefRecs = len(defRecs)
1317				data.append(struct.pack(">L", numDefRecs))
1318				data.extend(defRecs)
1319				offset += 4 + numDefRecs*4
1320			else:
1321				defOVSOffset = 0
1322
1323			ndefList = [entry for entry in entryList if entry[1] is not None]
1324			if ndefList:
1325				nonDefUVSOffset = offset
1326				ndefList.sort()
1327				numNonDefRecs = len(ndefList)
1328				data.append(struct.pack(">L", numNonDefRecs))
1329				offset += 4 + numNonDefRecs*5
1330
1331				for uv, gname in ndefList:
1332					gid = ttFont.getGlyphID(gname)
1333					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
1334					data.append(ndrec)
1335			else:
1336				nonDefUVSOffset = 0
1337
1338			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
1339			varSelectorRecords.append(vrec)
1340
1341		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
1342		self.length = 10 + len(data)
1343		headerdata = struct.pack(">HLL", self.format, self.length, self.numVarSelectorRecords)
1344
1345		return headerdata + data
1346
1347
1348class cmap_format_unknown(CmapSubtable):
1349
1350	def toXML(self, writer, ttFont):
1351		cmapName = self.__class__.__name__[:12] + str(self.format)
1352		writer.begintag(cmapName, [
1353				("platformID", self.platformID),
1354				("platEncID", self.platEncID),
1355				])
1356		writer.newline()
1357		writer.dumphex(self.data)
1358		writer.endtag(cmapName)
1359		writer.newline()
1360
1361	def fromXML(self, name, attrs, content, ttFont):
1362		self.data = readHex(content)
1363		self.cmap = {}
1364
1365	def decompileHeader(self, data, ttFont):
1366		self.language = 0  # dummy value
1367		self.data = data
1368
1369	def decompile(self, data, ttFont):
1370		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
1371		# If not, someone is calling the subtable decompile() directly, and must provide both args.
1372		if data is not None and ttFont is not None:
1373			self.decompileHeader(data, ttFont)
1374		else:
1375			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
1376
1377	def compile(self, ttFont):
1378		if self.data:
1379			return self.data
1380		else:
1381			return None
1382
1383cmap_classes = {
1384		0: cmap_format_0,
1385		2: cmap_format_2,
1386		4: cmap_format_4,
1387		6: cmap_format_6,
1388		12: cmap_format_12,
1389		13: cmap_format_13,
1390		14: cmap_format_14,
1391}
1392