• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
29 
30 #include "hb-open-type.hh"
31 #include "hb-set.hh"
32 
33 /*
34  * cmap -- Character to Glyph Index Mapping
35  * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
36  */
37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
38 
39 namespace OT {
40 
41 
42 struct CmapSubtableFormat0
43 {
get_glyphOT::CmapSubtableFormat044   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
45   {
46     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
47     if (!gid)
48       return false;
49     *glyph = gid;
50     return true;
51   }
52 
get_languageOT::CmapSubtableFormat053   unsigned get_language () const
54   {
55     return language;
56   }
57 
collect_unicodesOT::CmapSubtableFormat058   void collect_unicodes (hb_set_t *out) const
59   {
60     for (unsigned int i = 0; i < 256; i++)
61       if (glyphIdArray[i])
62 	out->add (i);
63   }
64 
collect_mappingOT::CmapSubtableFormat065   void collect_mapping (hb_set_t *unicodes, /* OUT */
66 			hb_map_t *mapping /* OUT */) const
67   {
68     for (unsigned i = 0; i < 256; i++)
69       if (glyphIdArray[i])
70       {
71 	hb_codepoint_t glyph = glyphIdArray[i];
72 	unicodes->add (i);
73 	mapping->set (i, glyph);
74       }
75   }
76 
sanitizeOT::CmapSubtableFormat077   bool sanitize (hb_sanitize_context_t *c) const
78   {
79     TRACE_SANITIZE (this);
80     return_trace (c->check_struct (this));
81   }
82 
83   protected:
84   HBUINT16	format;		/* Format number is set to 0. */
85   HBUINT16	length;		/* Byte length of this subtable. */
86   HBUINT16	language;	/* Ignore. */
87   HBUINT8	glyphIdArray[256];/* An array that maps character
88 				 * code to glyph index values. */
89   public:
90   DEFINE_SIZE_STATIC (6 + 256);
91 };
92 
93 struct CmapSubtableFormat4
94 {
95 
96   template<typename Iterator,
97 	   hb_requires (hb_is_iterator (Iterator))>
serialize_endcode_arrayOT::CmapSubtableFormat498   HBUINT16* serialize_endcode_array (hb_serialize_context_t *c,
99 				     Iterator it)
100   {
101     HBUINT16 *endCode = c->start_embed<HBUINT16> ();
102     hb_codepoint_t prev_endcp = 0xFFFF;
103 
104     for (const auto& _ : +it)
105     {
106       if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first)
107       {
108 	HBUINT16 end_code;
109 	end_code = prev_endcp;
110 	c->copy<HBUINT16> (end_code);
111       }
112       prev_endcp = _.first;
113     }
114 
115     {
116       // last endCode
117       HBUINT16 endcode;
118       endcode = prev_endcp;
119       if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr;
120       // There must be a final entry with end_code == 0xFFFF.
121       if (prev_endcp != 0xFFFF)
122       {
123 	HBUINT16 finalcode;
124 	finalcode = 0xFFFF;
125 	if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
126       }
127     }
128 
129     return endCode;
130   }
131 
132   template<typename Iterator,
133 	   hb_requires (hb_is_iterator (Iterator))>
serialize_startcode_arrayOT::CmapSubtableFormat4134   HBUINT16* serialize_startcode_array (hb_serialize_context_t *c,
135 				       Iterator it)
136   {
137     HBUINT16 *startCode = c->start_embed<HBUINT16> ();
138     hb_codepoint_t prev_cp = 0xFFFF;
139 
140     for (const auto& _ : +it)
141     {
142       if (prev_cp == 0xFFFF || prev_cp + 1u != _.first)
143       {
144 	HBUINT16 start_code;
145 	start_code = _.first;
146 	c->copy<HBUINT16> (start_code);
147       }
148 
149       prev_cp = _.first;
150     }
151 
152     // There must be a final entry with end_code == 0xFFFF.
153     if (it.len () == 0 || prev_cp != 0xFFFF)
154     {
155       HBUINT16 finalcode;
156       finalcode = 0xFFFF;
157       if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr;
158     }
159 
160     return startCode;
161   }
162 
163   template<typename Iterator,
164 	   hb_requires (hb_is_iterator (Iterator))>
serialize_idDelta_arrayOT::CmapSubtableFormat4165   HBINT16* serialize_idDelta_array (hb_serialize_context_t *c,
166 				    Iterator it,
167 				    HBUINT16 *endCode,
168 				    HBUINT16 *startCode,
169 				    unsigned segcount)
170   {
171     unsigned i = 0;
172     hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF;
173     bool use_delta = true;
174 
175     HBINT16 *idDelta = c->start_embed<HBINT16> ();
176     if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size)
177       return nullptr;
178 
179     for (const auto& _ : +it)
180     {
181       if (_.first == startCode[i])
182       {
183 	use_delta = true;
184 	start_gid = _.second;
185       }
186       else if (_.second != last_gid + 1) use_delta = false;
187 
188       if (_.first == endCode[i])
189       {
190 	HBINT16 delta;
191 	if (use_delta) delta = (int)start_gid - (int)startCode[i];
192 	else delta = 0;
193 	c->copy<HBINT16> (delta);
194 
195 	i++;
196       }
197 
198       last_gid = _.second;
199       last_cp = _.first;
200     }
201 
202     if (it.len () == 0 || last_cp != 0xFFFF)
203     {
204       HBINT16 delta;
205       delta = 1;
206       if (unlikely (!c->copy<HBINT16> (delta))) return nullptr;
207     }
208 
209     return idDelta;
210   }
211 
212   template<typename Iterator,
213 	   hb_requires (hb_is_iterator (Iterator))>
serialize_rangeoffset_glyidOT::CmapSubtableFormat4214   HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c,
215 					 Iterator it,
216 					 HBUINT16 *endCode,
217 					 HBUINT16 *startCode,
218 					 HBINT16 *idDelta,
219 					 unsigned segcount)
220   {
221     hb_hashmap_t<hb_codepoint_t, hb_codepoint_t> cp_to_gid;
222     + it | hb_sink (cp_to_gid);
223 
224     HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
225     if (unlikely (!c->check_success (idRangeOffset))) return nullptr;
226     if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr;
227 
228     for (unsigned i : + hb_range (segcount)
229              | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; }))
230     {
231       idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i);
232       for (hb_codepoint_t cp = startCode[i]; cp <= endCode[i]; cp++)
233       {
234         HBUINT16 gid;
235         gid = cp_to_gid[cp];
236         c->copy<HBUINT16> (gid);
237       }
238     }
239 
240     return idRangeOffset;
241   }
242 
243   template<typename Iterator,
244 	   hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtableFormat4245   void serialize (hb_serialize_context_t *c,
246 		  Iterator it)
247   {
248     auto format4_iter =
249     + it
250     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
251 		 { return _.first <= 0xFFFF; })
252     ;
253 
254     if (format4_iter.len () == 0) return;
255 
256     unsigned table_initpos = c->length ();
257     if (unlikely (!c->extend_min (this))) return;
258     this->format = 4;
259 
260     //serialize endCode[]
261     HBUINT16 *endCode = serialize_endcode_array (c, format4_iter);
262     if (unlikely (!endCode)) return;
263 
264     unsigned segcount = (c->length () - min_size) / HBUINT16::static_size;
265 
266     // 2 bytes of padding.
267     if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding.
268 
269    // serialize startCode[]
270     HBUINT16 *startCode = serialize_startcode_array (c, format4_iter);
271     if (unlikely (!startCode)) return;
272 
273     //serialize idDelta[]
274     HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount);
275     if (unlikely (!idDelta)) return;
276 
277     HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
278     if (unlikely (!c->check_success (idRangeOffset))) return;
279 
280     this->length = c->length () - table_initpos;
281     if ((long long) this->length != (long long) c->length () - table_initpos)
282     {
283       // Length overflowed. Discard the current object before setting the error condition, otherwise
284       // discard is a noop which prevents the higher level code from reverting the serializer to the
285       // pre-error state in cmap4 overflow handling code.
286       c->pop_discard ();
287       c->err (HB_SERIALIZE_ERROR_INT_OVERFLOW);
288       return;
289     }
290 
291     this->segCountX2 = segcount * 2;
292     this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1;
293     this->searchRange = 2 * (1u << this->entrySelector);
294     this->rangeShift = segcount * 2 > this->searchRange
295 		       ? 2 * segcount - this->searchRange
296 		       : 0;
297   }
298 
get_languageOT::CmapSubtableFormat4299   unsigned get_language () const
300   {
301     return language;
302   }
303 
304   struct accelerator_t
305   {
accelerator_tOT::CmapSubtableFormat4::accelerator_t306     accelerator_t () {}
accelerator_tOT::CmapSubtableFormat4::accelerator_t307     accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); }
~accelerator_tOT::CmapSubtableFormat4::accelerator_t308     ~accelerator_t () { fini (); }
309 
initOT::CmapSubtableFormat4::accelerator_t310     void init (const CmapSubtableFormat4 *subtable)
311     {
312       segCount = subtable->segCountX2 / 2;
313       endCount = subtable->values.arrayZ;
314       startCount = endCount + segCount + 1;
315       idDelta = startCount + segCount;
316       idRangeOffset = idDelta + segCount;
317       glyphIdArray = idRangeOffset + segCount;
318       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
319     }
finiOT::CmapSubtableFormat4::accelerator_t320     void fini () {}
321 
get_glyphOT::CmapSubtableFormat4::accelerator_t322     bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
323     {
324       struct CustomRange
325       {
326 	int cmp (hb_codepoint_t k,
327 		 unsigned distance) const
328 	{
329 	  if (k > last) return +1;
330 	  if (k < (&last)[distance]) return -1;
331 	  return 0;
332 	}
333 	HBUINT16 last;
334       };
335 
336       const HBUINT16 *found = hb_bsearch (codepoint,
337 					  this->endCount,
338 					  this->segCount,
339 					  2,
340 					  _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>,
341 					  this->segCount + 1);
342       if (!found)
343 	return false;
344       unsigned int i = found - endCount;
345 
346       hb_codepoint_t gid;
347       unsigned int rangeOffset = this->idRangeOffset[i];
348       if (rangeOffset == 0)
349 	gid = codepoint + this->idDelta[i];
350       else
351       {
352 	/* Somebody has been smoking... */
353 	unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
354 	if (unlikely (index >= this->glyphIdArrayLength))
355 	  return false;
356 	gid = this->glyphIdArray[index];
357 	if (unlikely (!gid))
358 	  return false;
359 	gid += this->idDelta[i];
360       }
361       gid &= 0xFFFFu;
362       if (!gid)
363 	return false;
364       *glyph = gid;
365       return true;
366     }
367 
get_glyph_funcOT::CmapSubtableFormat4::accelerator_t368     HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
369     { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); }
370 
collect_unicodesOT::CmapSubtableFormat4::accelerator_t371     void collect_unicodes (hb_set_t *out) const
372     {
373       unsigned int count = this->segCount;
374       if (count && this->startCount[count - 1] == 0xFFFFu)
375 	count--; /* Skip sentinel segment. */
376       for (unsigned int i = 0; i < count; i++)
377       {
378 	hb_codepoint_t start = this->startCount[i];
379 	hb_codepoint_t end = this->endCount[i];
380 	unsigned int rangeOffset = this->idRangeOffset[i];
381 	if (rangeOffset == 0)
382 	{
383 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
384 	  {
385 	    hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
386 	    if (unlikely (!gid))
387 	      continue;
388 	    out->add (codepoint);
389 	  }
390 	}
391 	else
392 	{
393 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
394 	  {
395 	    unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
396 	    if (unlikely (index >= this->glyphIdArrayLength))
397 	      break;
398 	    hb_codepoint_t gid = this->glyphIdArray[index];
399 	    if (unlikely (!gid))
400 	      continue;
401 	    out->add (codepoint);
402 	  }
403 	}
404       }
405     }
406 
collect_mappingOT::CmapSubtableFormat4::accelerator_t407     void collect_mapping (hb_set_t *unicodes, /* OUT */
408 			  hb_map_t *mapping /* OUT */) const
409     {
410       unsigned count = this->segCount;
411       if (count && this->startCount[count - 1] == 0xFFFFu)
412 	count--; /* Skip sentinel segment. */
413       for (unsigned i = 0; i < count; i++)
414       {
415 	hb_codepoint_t start = this->startCount[i];
416 	hb_codepoint_t end = this->endCount[i];
417 	unsigned rangeOffset = this->idRangeOffset[i];
418 	if (rangeOffset == 0)
419 	{
420 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
421 	  {
422 	    hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu;
423 	    if (unlikely (!gid))
424 	      continue;
425 	    unicodes->add (codepoint);
426 	    mapping->set (codepoint, gid);
427 	  }
428 	}
429 	else
430 	{
431 	  for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++)
432 	  {
433 	    unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
434 	    if (unlikely (index >= this->glyphIdArrayLength))
435 	      break;
436 	    hb_codepoint_t gid = this->glyphIdArray[index];
437 	    if (unlikely (!gid))
438 	      continue;
439 	    unicodes->add (codepoint);
440 	    mapping->set (codepoint, gid);
441 	  }
442 	}
443       }
444     }
445 
446     const HBUINT16 *endCount;
447     const HBUINT16 *startCount;
448     const HBUINT16 *idDelta;
449     const HBUINT16 *idRangeOffset;
450     const HBUINT16 *glyphIdArray;
451     unsigned int segCount;
452     unsigned int glyphIdArrayLength;
453   };
454 
get_glyphOT::CmapSubtableFormat4455   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
456   {
457     accelerator_t accel (this);
458     return accel.get_glyph_func (&accel, codepoint, glyph);
459   }
collect_unicodesOT::CmapSubtableFormat4460   void collect_unicodes (hb_set_t *out) const
461   {
462     accelerator_t accel (this);
463     accel.collect_unicodes (out);
464   }
465 
collect_mappingOT::CmapSubtableFormat4466   void collect_mapping (hb_set_t *unicodes, /* OUT */
467 			hb_map_t *mapping /* OUT */) const
468   {
469     accelerator_t accel (this);
470     accel.collect_mapping (unicodes, mapping);
471   }
472 
sanitizeOT::CmapSubtableFormat4473   bool sanitize (hb_sanitize_context_t *c) const
474   {
475     TRACE_SANITIZE (this);
476     if (unlikely (!c->check_struct (this)))
477       return_trace (false);
478 
479     if (unlikely (!c->check_range (this, length)))
480     {
481       /* Some broken fonts have too long of a "length" value.
482        * If that is the case, just change the value to truncate
483        * the subtable at the end of the blob. */
484       uint16_t new_length = (uint16_t) hb_min ((uintptr_t) 65535,
485 					       (uintptr_t) (c->end -
486 							    (char *) this));
487       if (!c->try_set (&length, new_length))
488 	return_trace (false);
489     }
490 
491     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
492   }
493 
494 
495 
496   protected:
497   HBUINT16	format;		/* Format number is set to 4. */
498   HBUINT16	length;		/* This is the length in bytes of the
499 				 * subtable. */
500   HBUINT16	language;	/* Ignore. */
501   HBUINT16	segCountX2;	/* 2 x segCount. */
502   HBUINT16	searchRange;	/* 2 * (2**floor(log2(segCount))) */
503   HBUINT16	entrySelector;	/* log2(searchRange/2) */
504   HBUINT16	rangeShift;	/* 2 x segCount - searchRange */
505 
506   UnsizedArrayOf<HBUINT16>
507 		values;
508 #if 0
509   HBUINT16	endCount[segCount];	/* End characterCode for each segment,
510 					 * last=0xFFFFu. */
511   HBUINT16	reservedPad;		/* Set to 0. */
512   HBUINT16	startCount[segCount];	/* Start character code for each segment. */
513   HBINT16		idDelta[segCount];	/* Delta for all character codes in segment. */
514   HBUINT16	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
515   UnsizedArrayOf<HBUINT16>
516 		glyphIdArray;	/* Glyph index array (arbitrary length) */
517 #endif
518 
519   public:
520   DEFINE_SIZE_ARRAY (14, values);
521 };
522 
523 struct CmapSubtableLongGroup
524 {
525   friend struct CmapSubtableFormat12;
526   friend struct CmapSubtableFormat13;
527   template<typename U>
528   friend struct CmapSubtableLongSegmented;
529   friend struct cmap;
530 
cmpOT::CmapSubtableLongGroup531   int cmp (hb_codepoint_t codepoint) const
532   {
533     if (codepoint < startCharCode) return -1;
534     if (codepoint > endCharCode)   return +1;
535     return 0;
536   }
537 
sanitizeOT::CmapSubtableLongGroup538   bool sanitize (hb_sanitize_context_t *c) const
539   {
540     TRACE_SANITIZE (this);
541     return_trace (c->check_struct (this));
542   }
543 
544   private:
545   HBUINT32		startCharCode;	/* First character code in this group. */
546   HBUINT32		endCharCode;	/* Last character code in this group. */
547   HBUINT32		glyphID;	/* Glyph index; interpretation depends on
548 					 * subtable format. */
549   public:
550   DEFINE_SIZE_STATIC (12);
551 };
552 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup);
553 
554 template <typename UINT>
555 struct CmapSubtableTrimmed
556 {
get_glyphOT::CmapSubtableTrimmed557   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
558   {
559     /* Rely on our implicit array bound-checking. */
560     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
561     if (!gid)
562       return false;
563     *glyph = gid;
564     return true;
565   }
566 
get_languageOT::CmapSubtableTrimmed567   unsigned get_language () const
568   {
569     return language;
570   }
571 
collect_unicodesOT::CmapSubtableTrimmed572   void collect_unicodes (hb_set_t *out) const
573   {
574     hb_codepoint_t start = startCharCode;
575     unsigned int count = glyphIdArray.len;
576     for (unsigned int i = 0; i < count; i++)
577       if (glyphIdArray[i])
578 	out->add (start + i);
579   }
580 
collect_mappingOT::CmapSubtableTrimmed581   void collect_mapping (hb_set_t *unicodes, /* OUT */
582 			hb_map_t *mapping /* OUT */) const
583   {
584     hb_codepoint_t start_cp = startCharCode;
585     unsigned count = glyphIdArray.len;
586     for (unsigned i = 0; i < count; i++)
587       if (glyphIdArray[i])
588       {
589 	hb_codepoint_t unicode = start_cp + i;
590 	hb_codepoint_t glyphid = glyphIdArray[i];
591 	unicodes->add (unicode);
592 	mapping->set (unicode, glyphid);
593       }
594   }
595 
sanitizeOT::CmapSubtableTrimmed596   bool sanitize (hb_sanitize_context_t *c) const
597   {
598     TRACE_SANITIZE (this);
599     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
600   }
601 
602   protected:
603   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
604   UINT		length;		/* Byte length of this subtable. */
605   UINT		language;	/* Ignore. */
606   UINT		startCharCode;	/* First character code covered. */
607   ArrayOf<HBGlyphID16, UINT>
608 		glyphIdArray;	/* Array of glyph index values for character
609 				 * codes in the range. */
610   public:
611   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
612 };
613 
614 struct CmapSubtableFormat6  : CmapSubtableTrimmed<HBUINT16> {};
615 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
616 
617 template <typename T>
618 struct CmapSubtableLongSegmented
619 {
620   friend struct cmap;
621 
get_glyphOT::CmapSubtableLongSegmented622   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
623   {
624     hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint);
625     if (!gid)
626       return false;
627     *glyph = gid;
628     return true;
629   }
630 
get_languageOT::CmapSubtableLongSegmented631   unsigned get_language () const
632   {
633     return language;
634   }
635 
collect_unicodesOT::CmapSubtableLongSegmented636   void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
637   {
638     for (unsigned int i = 0; i < this->groups.len; i++)
639     {
640       hb_codepoint_t start = this->groups[i].startCharCode;
641       hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
642 				   (hb_codepoint_t) HB_UNICODE_MAX);
643       hb_codepoint_t gid = this->groups[i].glyphID;
644       if (!gid)
645       {
646 	/* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
647 	if (! T::group_get_glyph (this->groups[i], end)) continue;
648 	start++;
649 	gid++;
650       }
651       if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
652       if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
653 	end = start + (hb_codepoint_t) num_glyphs - gid;
654 
655       out->add_range (start, end);
656     }
657   }
658 
collect_mappingOT::CmapSubtableLongSegmented659   void collect_mapping (hb_set_t *unicodes, /* OUT */
660 			hb_map_t *mapping, /* OUT */
661 			unsigned num_glyphs) const
662   {
663     for (unsigned i = 0; i < this->groups.len; i++)
664     {
665       hb_codepoint_t start = this->groups[i].startCharCode;
666       hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode,
667 				   (hb_codepoint_t) HB_UNICODE_MAX);
668       hb_codepoint_t gid = this->groups[i].glyphID;
669       if (!gid)
670       {
671 	/* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */
672 	if (! T::group_get_glyph (this->groups[i], end)) continue;
673 	start++;
674 	gid++;
675       }
676       if (unlikely ((unsigned int) gid >= num_glyphs)) continue;
677       if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs))
678 	end = start + (hb_codepoint_t) num_glyphs - gid;
679 
680       for (unsigned cp = start; cp <= end; cp++)
681       {
682 	unicodes->add (cp);
683 	mapping->set (cp, gid);
684 	gid++;
685       }
686     }
687   }
688 
sanitizeOT::CmapSubtableLongSegmented689   bool sanitize (hb_sanitize_context_t *c) const
690   {
691     TRACE_SANITIZE (this);
692     return_trace (c->check_struct (this) && groups.sanitize (c));
693   }
694 
695   protected:
696   HBUINT16	format;		/* Subtable format; set to 12. */
697   HBUINT16	reserved;	/* Reserved; set to 0. */
698   HBUINT32	length;		/* Byte length of this subtable. */
699   HBUINT32	language;	/* Ignore. */
700   SortedArray32Of<CmapSubtableLongGroup>
701 		groups;		/* Groupings. */
702   public:
703   DEFINE_SIZE_ARRAY (16, groups);
704 };
705 
706 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
707 {
group_get_glyphOT::CmapSubtableFormat12708   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
709 					 hb_codepoint_t u)
710   { return likely (group.startCharCode <= group.endCharCode) ?
711 	   group.glyphID + (u - group.startCharCode) : 0; }
712 
713 
714   template<typename Iterator,
715 	   hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtableFormat12716   void serialize (hb_serialize_context_t *c,
717 		  Iterator it)
718   {
719     if (it.len () == 0) return;
720     unsigned table_initpos = c->length ();
721     if (unlikely (!c->extend_min (this))) return;
722 
723     hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF;
724     hb_codepoint_t glyphID = 0;
725 
726     for (const auto& _ : +it)
727     {
728       if (startCharCode == 0xFFFF)
729       {
730 	startCharCode = _.first;
731 	endCharCode = _.first;
732 	glyphID = _.second;
733       }
734       else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second))
735       {
736 	CmapSubtableLongGroup  grouprecord;
737 	grouprecord.startCharCode = startCharCode;
738 	grouprecord.endCharCode = endCharCode;
739 	grouprecord.glyphID = glyphID;
740 	c->copy<CmapSubtableLongGroup> (grouprecord);
741 
742 	startCharCode = _.first;
743 	endCharCode = _.first;
744 	glyphID = _.second;
745       }
746       else
747 	endCharCode = _.first;
748     }
749 
750     CmapSubtableLongGroup record;
751     record.startCharCode = startCharCode;
752     record.endCharCode = endCharCode;
753     record.glyphID = glyphID;
754     c->copy<CmapSubtableLongGroup> (record);
755 
756     this->format = 12;
757     this->reserved = 0;
758     this->length = c->length () - table_initpos;
759     this->groups.len = (this->length - min_size)/CmapSubtableLongGroup::static_size;
760   }
761 
get_sub_table_sizeOT::CmapSubtableFormat12762   static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data)
763   { return 16 + 12 * groups_data.length; }
764 
765   private:
_is_gid_consecutiveOT::CmapSubtableFormat12766   static bool _is_gid_consecutive (hb_codepoint_t endCharCode,
767 				   hb_codepoint_t startCharCode,
768 				   hb_codepoint_t glyphID,
769 				   hb_codepoint_t cp,
770 				   hb_codepoint_t new_gid)
771   {
772     return (cp - 1 == endCharCode) &&
773 	new_gid == glyphID + (cp - startCharCode);
774   }
775 
776 };
777 
778 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
779 {
group_get_glyphOT::CmapSubtableFormat13780   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
781 					 hb_codepoint_t u HB_UNUSED)
782   { return group.glyphID; }
783 };
784 
785 typedef enum
786 {
787   GLYPH_VARIANT_NOT_FOUND = 0,
788   GLYPH_VARIANT_FOUND = 1,
789   GLYPH_VARIANT_USE_DEFAULT = 2
790 } glyph_variant_t;
791 
792 struct UnicodeValueRange
793 {
cmpOT::UnicodeValueRange794   int cmp (const hb_codepoint_t &codepoint) const
795   {
796     if (codepoint < startUnicodeValue) return -1;
797     if (codepoint > startUnicodeValue + additionalCount) return +1;
798     return 0;
799   }
800 
sanitizeOT::UnicodeValueRange801   bool sanitize (hb_sanitize_context_t *c) const
802   {
803     TRACE_SANITIZE (this);
804     return_trace (c->check_struct (this));
805   }
806 
807   HBUINT24	startUnicodeValue;	/* First value in this range. */
808   HBUINT8	additionalCount;	/* Number of additional values in this
809 					 * range. */
810   public:
811   DEFINE_SIZE_STATIC (4);
812 };
813 
814 struct DefaultUVS : SortedArray32Of<UnicodeValueRange>
815 {
collect_unicodesOT::DefaultUVS816   void collect_unicodes (hb_set_t *out) const
817   {
818     unsigned int count = len;
819     for (unsigned int i = 0; i < count; i++)
820     {
821       hb_codepoint_t first = arrayZ[i].startUnicodeValue;
822       hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
823 				    (hb_codepoint_t) HB_UNICODE_MAX);
824       out->add_range (first, last);
825     }
826   }
827 
copyOT::DefaultUVS828   DefaultUVS* copy (hb_serialize_context_t *c,
829 		    const hb_set_t *unicodes) const
830   {
831     DefaultUVS *out = c->start_embed<DefaultUVS> ();
832     if (unlikely (!out)) return nullptr;
833     auto snap = c->snapshot ();
834 
835     HBUINT32 len;
836     len = 0;
837     if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
838     unsigned init_len = c->length ();
839 
840     hb_codepoint_t lastCode = HB_MAP_VALUE_INVALID;
841     int count = -1;
842 
843     for (const UnicodeValueRange& _ : as_array ())
844     {
845       for (const unsigned addcnt : hb_range ((unsigned) _.additionalCount + 1))
846       {
847 	unsigned curEntry = (unsigned) _.startUnicodeValue + addcnt;
848 	if (!unicodes->has (curEntry)) continue;
849 	count += 1;
850 	if (lastCode == HB_MAP_VALUE_INVALID)
851 	  lastCode = curEntry;
852 	else if (lastCode + count != curEntry)
853 	{
854 	  UnicodeValueRange rec;
855 	  rec.startUnicodeValue = lastCode;
856 	  rec.additionalCount = count - 1;
857 	  c->copy<UnicodeValueRange> (rec);
858 
859 	  lastCode = curEntry;
860 	  count = 0;
861 	}
862       }
863     }
864 
865     if (lastCode != HB_MAP_VALUE_INVALID)
866     {
867       UnicodeValueRange rec;
868       rec.startUnicodeValue = lastCode;
869       rec.additionalCount = count;
870       c->copy<UnicodeValueRange> (rec);
871     }
872 
873     if (c->length () - init_len == 0)
874     {
875       c->revert (snap);
876       return nullptr;
877     }
878     else
879     {
880       if (unlikely (!c->check_assign (out->len,
881                                       (c->length () - init_len) / UnicodeValueRange::static_size,
882                                       HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr;
883       return out;
884     }
885   }
886 
887   public:
888   DEFINE_SIZE_ARRAY (4, *this);
889 };
890 
891 struct UVSMapping
892 {
cmpOT::UVSMapping893   int cmp (const hb_codepoint_t &codepoint) const
894   { return unicodeValue.cmp (codepoint); }
895 
sanitizeOT::UVSMapping896   bool sanitize (hb_sanitize_context_t *c) const
897   {
898     TRACE_SANITIZE (this);
899     return_trace (c->check_struct (this));
900   }
901 
902   HBUINT24	unicodeValue;	/* Base Unicode value of the UVS */
903   HBGlyphID16	glyphID;	/* Glyph ID of the UVS */
904   public:
905   DEFINE_SIZE_STATIC (5);
906 };
907 
908 struct NonDefaultUVS : SortedArray32Of<UVSMapping>
909 {
collect_unicodesOT::NonDefaultUVS910   void collect_unicodes (hb_set_t *out) const
911   {
912     for (const auto& a : as_array ())
913       out->add (a.unicodeValue);
914   }
915 
collect_mappingOT::NonDefaultUVS916   void collect_mapping (hb_set_t *unicodes, /* OUT */
917 			hb_map_t *mapping /* OUT */) const
918   {
919     for (const auto& a : as_array ())
920     {
921       hb_codepoint_t unicode = a.unicodeValue;
922       hb_codepoint_t glyphid = a.glyphID;
923       unicodes->add (unicode);
924       mapping->set (unicode, glyphid);
925     }
926   }
927 
closure_glyphsOT::NonDefaultUVS928   void closure_glyphs (const hb_set_t      *unicodes,
929 		       hb_set_t            *glyphset) const
930   {
931     + as_array ()
932     | hb_filter (unicodes, &UVSMapping::unicodeValue)
933     | hb_map (&UVSMapping::glyphID)
934     | hb_sink (glyphset)
935     ;
936   }
937 
copyOT::NonDefaultUVS938   NonDefaultUVS* copy (hb_serialize_context_t *c,
939 		       const hb_set_t *unicodes,
940 		       const hb_set_t *glyphs_requested,
941 		       const hb_map_t *glyph_map) const
942   {
943     NonDefaultUVS *out = c->start_embed<NonDefaultUVS> ();
944     if (unlikely (!out)) return nullptr;
945 
946     auto it =
947     + as_array ()
948     | hb_filter ([&] (const UVSMapping& _)
949 		 {
950 		   return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID);
951 		 })
952     ;
953 
954     if (!it) return nullptr;
955 
956     HBUINT32 len;
957     len = it.len ();
958     if (unlikely (!c->copy<HBUINT32> (len))) return nullptr;
959 
960     for (const UVSMapping& _ : it)
961     {
962       UVSMapping mapping;
963       mapping.unicodeValue = _.unicodeValue;
964       mapping.glyphID = glyph_map->get (_.glyphID);
965       c->copy<UVSMapping> (mapping);
966     }
967 
968     return out;
969   }
970 
971   public:
972   DEFINE_SIZE_ARRAY (4, *this);
973 };
974 
975 struct VariationSelectorRecord
976 {
get_glyphOT::VariationSelectorRecord977   glyph_variant_t get_glyph (hb_codepoint_t codepoint,
978 			     hb_codepoint_t *glyph,
979 			     const void *base) const
980   {
981     if ((base+defaultUVS).bfind (codepoint))
982       return GLYPH_VARIANT_USE_DEFAULT;
983     const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint);
984     if (nonDefault.glyphID)
985     {
986       *glyph = nonDefault.glyphID;
987        return GLYPH_VARIANT_FOUND;
988     }
989     return GLYPH_VARIANT_NOT_FOUND;
990   }
991 
VariationSelectorRecordOT::VariationSelectorRecord992   VariationSelectorRecord(const VariationSelectorRecord& other)
993   {
994     *this = other;
995   }
996 
operator =OT::VariationSelectorRecord997   void operator= (const VariationSelectorRecord& other)
998   {
999     varSelector = other.varSelector;
1000     HBUINT32 offset = other.defaultUVS;
1001     defaultUVS = offset;
1002     offset = other.nonDefaultUVS;
1003     nonDefaultUVS = offset;
1004   }
1005 
collect_unicodesOT::VariationSelectorRecord1006   void collect_unicodes (hb_set_t *out, const void *base) const
1007   {
1008     (base+defaultUVS).collect_unicodes (out);
1009     (base+nonDefaultUVS).collect_unicodes (out);
1010   }
1011 
collect_mappingOT::VariationSelectorRecord1012   void collect_mapping (const void *base,
1013 			hb_set_t *unicodes, /* OUT */
1014 			hb_map_t *mapping /* OUT */) const
1015   {
1016     (base+defaultUVS).collect_unicodes (unicodes);
1017     (base+nonDefaultUVS).collect_mapping (unicodes, mapping);
1018   }
1019 
cmpOT::VariationSelectorRecord1020   int cmp (const hb_codepoint_t &variation_selector) const
1021   { return varSelector.cmp (variation_selector); }
1022 
sanitizeOT::VariationSelectorRecord1023   bool sanitize (hb_sanitize_context_t *c, const void *base) const
1024   {
1025     TRACE_SANITIZE (this);
1026     return_trace (c->check_struct (this) &&
1027 		  defaultUVS.sanitize (c, base) &&
1028 		  nonDefaultUVS.sanitize (c, base));
1029   }
1030 
1031   hb_pair_t<unsigned, unsigned>
copyOT::VariationSelectorRecord1032   copy (hb_serialize_context_t *c,
1033 	const hb_set_t *unicodes,
1034 	const hb_set_t *glyphs_requested,
1035 	const hb_map_t *glyph_map,
1036 	const void *base) const
1037   {
1038     auto snap = c->snapshot ();
1039     auto *out = c->embed<VariationSelectorRecord> (*this);
1040     if (unlikely (!out)) return hb_pair (0, 0);
1041 
1042     out->defaultUVS = 0;
1043     out->nonDefaultUVS = 0;
1044 
1045     unsigned non_default_uvs_objidx = 0;
1046     if (nonDefaultUVS != 0)
1047     {
1048       c->push ();
1049       if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map))
1050 	non_default_uvs_objidx = c->pop_pack ();
1051       else c->pop_discard ();
1052     }
1053 
1054     unsigned default_uvs_objidx = 0;
1055     if (defaultUVS != 0)
1056     {
1057       c->push ();
1058       if (c->copy (base+defaultUVS, unicodes))
1059 	default_uvs_objidx = c->pop_pack ();
1060       else c->pop_discard ();
1061     }
1062 
1063 
1064     if (!default_uvs_objidx && !non_default_uvs_objidx)
1065       c->revert (snap);
1066 
1067     return hb_pair (default_uvs_objidx, non_default_uvs_objidx);
1068   }
1069 
1070   HBUINT24	varSelector;	/* Variation selector. */
1071   Offset32To<DefaultUVS>
1072 		defaultUVS;	/* Offset to Default UVS Table.  May be 0. */
1073   Offset32To<NonDefaultUVS>
1074 		nonDefaultUVS;	/* Offset to Non-Default UVS Table.  May be 0. */
1075   public:
1076   DEFINE_SIZE_STATIC (11);
1077 };
1078 
1079 struct CmapSubtableFormat14
1080 {
get_glyph_variantOT::CmapSubtableFormat141081   glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
1082 				     hb_codepoint_t variation_selector,
1083 				     hb_codepoint_t *glyph) const
1084   { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); }
1085 
collect_variation_selectorsOT::CmapSubtableFormat141086   void collect_variation_selectors (hb_set_t *out) const
1087   {
1088     for (const auto& a : record.as_array ())
1089       out->add (a.varSelector);
1090   }
collect_variation_unicodesOT::CmapSubtableFormat141091   void collect_variation_unicodes (hb_codepoint_t variation_selector,
1092 				   hb_set_t *out) const
1093   { record.bsearch (variation_selector).collect_unicodes (out, this); }
1094 
serializeOT::CmapSubtableFormat141095   void serialize (hb_serialize_context_t *c,
1096 		  const hb_set_t *unicodes,
1097 		  const hb_set_t *glyphs_requested,
1098 		  const hb_map_t *glyph_map,
1099 		  const void *base)
1100   {
1101     auto snap = c->snapshot ();
1102     unsigned table_initpos = c->length ();
1103     const char* init_tail = c->tail;
1104 
1105     if (unlikely (!c->extend_min (this))) return;
1106     this->format = 14;
1107 
1108     auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base);
1109 
1110     /*
1111      * Some versions of OTS require that offsets are in order. Due to the use
1112      * of push()/pop_pack() serializing the variation records in order results
1113      * in the offsets being in reverse order (first record has the largest
1114      * offset). While this is perfectly valid, it will cause some versions of
1115      * OTS to consider this table bad.
1116      *
1117      * So to prevent this issue we serialize the variation records in reverse
1118      * order, so that the offsets are ordered from small to large. Since
1119      * variation records are supposed to be in increasing order of varSelector
1120      * we then have to reverse the order of the written variation selector
1121      * records after everything is finalized.
1122      */
1123     hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices;
1124     for (int i = src_tbl->record.len - 1; i >= 0; i--)
1125     {
1126       hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base);
1127       if (result.first || result.second)
1128 	obj_indices.push (result);
1129     }
1130 
1131     if (c->length () - table_initpos == CmapSubtableFormat14::min_size)
1132     {
1133       c->revert (snap);
1134       return;
1135     }
1136 
1137     if (unlikely (!c->check_success (!obj_indices.in_error ())))
1138       return;
1139 
1140     int tail_len = init_tail - c->tail;
1141     c->check_assign (this->length, c->length () - table_initpos + tail_len,
1142                      HB_SERIALIZE_ERROR_INT_OVERFLOW);
1143     c->check_assign (this->record.len,
1144 		     (c->length () - table_initpos - CmapSubtableFormat14::min_size) /
1145 		     VariationSelectorRecord::static_size,
1146                      HB_SERIALIZE_ERROR_INT_OVERFLOW);
1147 
1148     /* Correct the incorrect write order by reversing the order of the variation
1149        records array. */
1150     _reverse_variation_records ();
1151 
1152     /* Now that records are in the right order, we can set up the offsets. */
1153     _add_links_to_variation_records (c, obj_indices);
1154   }
1155 
_reverse_variation_recordsOT::CmapSubtableFormat141156   void _reverse_variation_records ()
1157   {
1158     record.as_array ().reverse ();
1159   }
1160 
_add_links_to_variation_recordsOT::CmapSubtableFormat141161   void _add_links_to_variation_records (hb_serialize_context_t *c,
1162 					const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices)
1163   {
1164     for (unsigned i = 0; i < obj_indices.length; i++)
1165     {
1166       /*
1167        * Since the record array has been reversed (see comments in copy())
1168        * but obj_indices has not been, the indices at obj_indices[i]
1169        * are for the variation record at record[j].
1170        */
1171       int j = obj_indices.length - 1 - i;
1172       c->add_link (record[j].defaultUVS, obj_indices[i].first);
1173       c->add_link (record[j].nonDefaultUVS, obj_indices[i].second);
1174     }
1175   }
1176 
closure_glyphsOT::CmapSubtableFormat141177   void closure_glyphs (const hb_set_t      *unicodes,
1178 		       hb_set_t            *glyphset) const
1179   {
1180     + hb_iter (record)
1181     | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS)
1182     | hb_map (&VariationSelectorRecord::nonDefaultUVS)
1183     | hb_map (hb_add (this))
1184     | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); })
1185     ;
1186   }
1187 
collect_unicodesOT::CmapSubtableFormat141188   void collect_unicodes (hb_set_t *out) const
1189   {
1190     for (const VariationSelectorRecord& _ : record)
1191       _.collect_unicodes (out, this);
1192   }
1193 
collect_mappingOT::CmapSubtableFormat141194   void collect_mapping (hb_set_t *unicodes, /* OUT */
1195 			hb_map_t *mapping /* OUT */) const
1196   {
1197     for (const VariationSelectorRecord& _ : record)
1198       _.collect_mapping (this, unicodes, mapping);
1199   }
1200 
sanitizeOT::CmapSubtableFormat141201   bool sanitize (hb_sanitize_context_t *c) const
1202   {
1203     TRACE_SANITIZE (this);
1204     return_trace (c->check_struct (this) &&
1205 		  record.sanitize (c, this));
1206   }
1207 
1208   protected:
1209   HBUINT16	format;		/* Format number is set to 14. */
1210   HBUINT32	length;		/* Byte length of this subtable. */
1211   SortedArray32Of<VariationSelectorRecord>
1212 		record;		/* Variation selector records; sorted
1213 				 * in increasing order of `varSelector'. */
1214   public:
1215   DEFINE_SIZE_ARRAY (10, record);
1216 };
1217 
1218 struct CmapSubtable
1219 {
1220   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
1221 
get_glyphOT::CmapSubtable1222   bool get_glyph (hb_codepoint_t codepoint,
1223 		  hb_codepoint_t *glyph) const
1224   {
1225     switch (u.format) {
1226     case  0: return u.format0 .get_glyph (codepoint, glyph);
1227     case  4: return u.format4 .get_glyph (codepoint, glyph);
1228     case  6: return u.format6 .get_glyph (codepoint, glyph);
1229     case 10: return u.format10.get_glyph (codepoint, glyph);
1230     case 12: return u.format12.get_glyph (codepoint, glyph);
1231     case 13: return u.format13.get_glyph (codepoint, glyph);
1232     case 14:
1233     default: return false;
1234     }
1235   }
collect_unicodesOT::CmapSubtable1236   void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const
1237   {
1238     switch (u.format) {
1239     case  0: u.format0 .collect_unicodes (out); return;
1240     case  4: u.format4 .collect_unicodes (out); return;
1241     case  6: u.format6 .collect_unicodes (out); return;
1242     case 10: u.format10.collect_unicodes (out); return;
1243     case 12: u.format12.collect_unicodes (out, num_glyphs); return;
1244     case 13: u.format13.collect_unicodes (out, num_glyphs); return;
1245     case 14:
1246     default: return;
1247     }
1248   }
1249 
collect_mappingOT::CmapSubtable1250   void collect_mapping (hb_set_t *unicodes, /* OUT */
1251 			hb_map_t *mapping, /* OUT */
1252 			unsigned num_glyphs = UINT_MAX) const
1253   {
1254     switch (u.format) {
1255     case  0: u.format0 .collect_mapping (unicodes, mapping); return;
1256     case  4: u.format4 .collect_mapping (unicodes, mapping); return;
1257     case  6: u.format6 .collect_mapping (unicodes, mapping); return;
1258     case 10: u.format10.collect_mapping (unicodes, mapping); return;
1259     case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return;
1260     case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return;
1261     case 14:
1262     default: return;
1263     }
1264   }
1265 
get_languageOT::CmapSubtable1266   unsigned get_language () const
1267   {
1268     switch (u.format) {
1269     case  0: return u.format0 .get_language ();
1270     case  4: return u.format4 .get_language ();
1271     case  6: return u.format6 .get_language ();
1272     case 10: return u.format10.get_language ();
1273     case 12: return u.format12.get_language ();
1274     case 13: return u.format13.get_language ();
1275     case 14:
1276     default: return 0;
1277     }
1278   }
1279 
1280   template<typename Iterator,
1281 	   hb_requires (hb_is_iterator (Iterator))>
serializeOT::CmapSubtable1282   void serialize (hb_serialize_context_t *c,
1283 		  Iterator it,
1284 		  unsigned format,
1285 		  const hb_subset_plan_t *plan,
1286 		  const void *base)
1287   {
1288     switch (format) {
1289     case  4: return u.format4.serialize (c, it);
1290     case 12: return u.format12.serialize (c, it);
1291     case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base);
1292     default: return;
1293     }
1294   }
1295 
sanitizeOT::CmapSubtable1296   bool sanitize (hb_sanitize_context_t *c) const
1297   {
1298     TRACE_SANITIZE (this);
1299     if (!u.format.sanitize (c)) return_trace (false);
1300     switch (u.format) {
1301     case  0: return_trace (u.format0 .sanitize (c));
1302     case  4: return_trace (u.format4 .sanitize (c));
1303     case  6: return_trace (u.format6 .sanitize (c));
1304     case 10: return_trace (u.format10.sanitize (c));
1305     case 12: return_trace (u.format12.sanitize (c));
1306     case 13: return_trace (u.format13.sanitize (c));
1307     case 14: return_trace (u.format14.sanitize (c));
1308     default:return_trace (true);
1309     }
1310   }
1311 
1312   public:
1313   union {
1314   HBUINT16		format;		/* Format identifier */
1315   CmapSubtableFormat0	format0;
1316   CmapSubtableFormat4	format4;
1317   CmapSubtableFormat6	format6;
1318   CmapSubtableFormat10	format10;
1319   CmapSubtableFormat12	format12;
1320   CmapSubtableFormat13	format13;
1321   CmapSubtableFormat14	format14;
1322   } u;
1323   public:
1324   DEFINE_SIZE_UNION (2, format);
1325 };
1326 
1327 
1328 struct EncodingRecord
1329 {
cmpOT::EncodingRecord1330   int cmp (const EncodingRecord &other) const
1331   {
1332     int ret;
1333     ret = platformID.cmp (other.platformID);
1334     if (ret) return ret;
1335     ret = encodingID.cmp (other.encodingID);
1336     if (ret) return ret;
1337     return 0;
1338   }
1339 
sanitizeOT::EncodingRecord1340   bool sanitize (hb_sanitize_context_t *c, const void *base) const
1341   {
1342     TRACE_SANITIZE (this);
1343     return_trace (c->check_struct (this) &&
1344 		  subtable.sanitize (c, base));
1345   }
1346 
1347   template<typename Iterator,
1348 	   hb_requires (hb_is_iterator (Iterator))>
copyOT::EncodingRecord1349   EncodingRecord* copy (hb_serialize_context_t *c,
1350 			Iterator it,
1351 			unsigned format,
1352 			const void *base,
1353 			const hb_subset_plan_t *plan,
1354 			/* INOUT */ unsigned *objidx) const
1355   {
1356     TRACE_SERIALIZE (this);
1357     auto snap = c->snapshot ();
1358     auto *out = c->embed (this);
1359     if (unlikely (!out)) return_trace (nullptr);
1360     out->subtable = 0;
1361 
1362     if (*objidx == 0)
1363     {
1364       CmapSubtable *cmapsubtable = c->push<CmapSubtable> ();
1365       unsigned origin_length = c->length ();
1366       cmapsubtable->serialize (c, it, format, plan, &(base+subtable));
1367       if (c->length () - origin_length > 0) *objidx = c->pop_pack ();
1368       else c->pop_discard ();
1369     }
1370 
1371     if (*objidx == 0)
1372     {
1373       c->revert (snap);
1374       return_trace (nullptr);
1375     }
1376 
1377     c->add_link (out->subtable, *objidx);
1378     return_trace (out);
1379   }
1380 
1381   HBUINT16	platformID;	/* Platform ID. */
1382   HBUINT16	encodingID;	/* Platform-specific encoding ID. */
1383   Offset32To<CmapSubtable>
1384 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
1385   public:
1386   DEFINE_SIZE_STATIC (8);
1387 };
1388 
1389 struct cmap
1390 {
1391   static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap;
1392 
1393   template<typename Iterator, typename EncodingRecIter,
1394 	   hb_requires (hb_is_iterator (EncodingRecIter))>
serializeOT::cmap1395   bool serialize (hb_serialize_context_t *c,
1396 		  Iterator it,
1397 		  EncodingRecIter encodingrec_iter,
1398 		  const void *base,
1399 		  const hb_subset_plan_t *plan,
1400                   bool drop_format_4 = false)
1401   {
1402     if (unlikely (!c->extend_min ((*this))))  return false;
1403     this->version = 0;
1404 
1405     unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0;
1406     auto snap = c->snapshot ();
1407 
1408     for (const EncodingRecord& _ : encodingrec_iter)
1409     {
1410       if (c->in_error ())
1411         return false;
1412 
1413       unsigned format = (base+_.subtable).u.format;
1414       if (format != 4 && format != 12 && format != 14) continue;
1415 
1416       hb_set_t unicodes_set;
1417       (base+_.subtable).collect_unicodes (&unicodes_set);
1418 
1419       if (!drop_format_4 && format == 4)
1420       {
1421         c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
1422         if (c->in_error () && c->only_overflow ())
1423         {
1424           // cmap4 overflowed, reset and retry serialization without format 4 subtables.
1425           c->revert (snap);
1426           return serialize (c, it,
1427                             encodingrec_iter,
1428                             base,
1429                             plan,
1430                             true);
1431         }
1432       }
1433 
1434       else if (format == 12)
1435       {
1436         if (_can_drop (_, unicodes_set, base, + it | hb_map (hb_first), encodingrec_iter)) continue;
1437         c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
1438       }
1439       else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
1440     }
1441     c->check_assign(this->encodingRecord.len,
1442                     (c->length () - cmap::min_size)/EncodingRecord::static_size,
1443                     HB_SERIALIZE_ERROR_INT_OVERFLOW);
1444 
1445     // Fail if format 4 was dropped and there is no cmap12.
1446     return !drop_format_4 || format12objidx;
1447   }
1448 
1449   template<typename Iterator, typename EncodingRecordIterator,
1450       hb_requires (hb_is_iterator (Iterator)),
1451       hb_requires (hb_is_iterator (EncodingRecordIterator))>
_can_dropOT::cmap1452   bool _can_drop (const EncodingRecord& cmap12,
1453                   const hb_set_t& cmap12_unicodes,
1454                   const void* base,
1455                   Iterator subset_unicodes,
1456                   EncodingRecordIterator encoding_records)
1457   {
1458     for (auto cp : + subset_unicodes | hb_filter (cmap12_unicodes))
1459     {
1460       if (cp >= 0x10000) return false;
1461     }
1462 
1463     unsigned target_platform;
1464     unsigned target_encoding;
1465     unsigned target_language = (base+cmap12.subtable).get_language ();
1466 
1467     if (cmap12.platformID == 0 && cmap12.encodingID == 4)
1468     {
1469       target_platform = 0;
1470       target_encoding = 3;
1471     } else if (cmap12.platformID == 3 && cmap12.encodingID == 10) {
1472       target_platform = 3;
1473       target_encoding = 1;
1474     } else {
1475       return false;
1476     }
1477 
1478     for (const auto& _ : encoding_records)
1479     {
1480       if (_.platformID != target_platform
1481           || _.encodingID != target_encoding
1482           || (base+_.subtable).get_language() != target_language)
1483         continue;
1484 
1485       hb_set_t sibling_unicodes;
1486       (base+_.subtable).collect_unicodes (&sibling_unicodes);
1487 
1488       auto cmap12 = + subset_unicodes | hb_filter (cmap12_unicodes);
1489       auto sibling = + subset_unicodes | hb_filter (sibling_unicodes);
1490       for (; cmap12 && sibling; cmap12++, sibling++)
1491       {
1492         unsigned a = *cmap12;
1493         unsigned b = *sibling;
1494         if (a != b) return false;
1495       }
1496 
1497       return !cmap12 && !sibling;
1498     }
1499 
1500     return false;
1501   }
1502 
closure_glyphsOT::cmap1503   void closure_glyphs (const hb_set_t      *unicodes,
1504 		       hb_set_t            *glyphset) const
1505   {
1506     + hb_iter (encodingRecord)
1507     | hb_map (&EncodingRecord::subtable)
1508     | hb_map (hb_add (this))
1509     | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == 14; })
1510     | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); })
1511     ;
1512   }
1513 
subsetOT::cmap1514   bool subset (hb_subset_context_t *c) const
1515   {
1516     TRACE_SUBSET (this);
1517 
1518     cmap *cmap_prime = c->serializer->start_embed<cmap> ();
1519     if (unlikely (!c->serializer->check_success (cmap_prime))) return_trace (false);
1520 
1521     auto encodingrec_iter =
1522     + hb_iter (encodingRecord)
1523     | hb_filter ([&] (const EncodingRecord& _)
1524 		{
1525 		  if ((_.platformID == 0 && _.encodingID == 3) ||
1526 		      (_.platformID == 0 && _.encodingID == 4) ||
1527 		      (_.platformID == 3 && _.encodingID == 1) ||
1528 		      (_.platformID == 3 && _.encodingID == 10) ||
1529 		      (this + _.subtable).u.format == 14)
1530 		    return true;
1531 
1532 		  return false;
1533 		})
1534     ;
1535 
1536     if (unlikely (!encodingrec_iter.len ())) return_trace (false);
1537 
1538     const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr;
1539     bool has_format12 = false;
1540 
1541     for (const EncodingRecord& _ : encodingrec_iter)
1542     {
1543       unsigned format = (this + _.subtable).u.format;
1544       if (format == 12) has_format12 = true;
1545 
1546       const EncodingRecord *table = hb_addressof (_);
1547       if      (_.platformID == 0 && _.encodingID ==  3) unicode_bmp = table;
1548       else if (_.platformID == 0 && _.encodingID ==  4) unicode_ucs4 = table;
1549       else if (_.platformID == 3 && _.encodingID ==  1) ms_bmp = table;
1550       else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table;
1551     }
1552 
1553     if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false);
1554     if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false);
1555 
1556     auto it =
1557     + hb_iter (c->plan->unicodes)
1558     | hb_map ([&] (hb_codepoint_t _)
1559 	      {
1560 		hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID;
1561 		c->plan->new_gid_for_codepoint (_, &new_gid);
1562 		return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid);
1563 	      })
1564     | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
1565 		 { return (_.second != HB_MAP_VALUE_INVALID); })
1566     ;
1567 
1568     return_trace (cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan));
1569   }
1570 
find_best_subtableOT::cmap1571   const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
1572   {
1573     if (symbol) *symbol = false;
1574 
1575     const CmapSubtable *subtable;
1576 
1577     /* Symbol subtable.
1578      * Prefer symbol if available.
1579      * https://github.com/harfbuzz/harfbuzz/issues/1918 */
1580     if ((subtable = this->find_subtable (3, 0)))
1581     {
1582       if (symbol) *symbol = true;
1583       return subtable;
1584     }
1585 
1586     /* 32-bit subtables. */
1587     if ((subtable = this->find_subtable (3, 10))) return subtable;
1588     if ((subtable = this->find_subtable (0, 6))) return subtable;
1589     if ((subtable = this->find_subtable (0, 4))) return subtable;
1590 
1591     /* 16-bit subtables. */
1592     if ((subtable = this->find_subtable (3, 1))) return subtable;
1593     if ((subtable = this->find_subtable (0, 3))) return subtable;
1594     if ((subtable = this->find_subtable (0, 2))) return subtable;
1595     if ((subtable = this->find_subtable (0, 1))) return subtable;
1596     if ((subtable = this->find_subtable (0, 0))) return subtable;
1597 
1598     /* Meh. */
1599     return &Null (CmapSubtable);
1600   }
1601 
1602   struct accelerator_t
1603   {
initOT::cmap::accelerator_t1604     void init (hb_face_t *face)
1605     {
1606       this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
1607       bool symbol;
1608       this->subtable = table->find_best_subtable (&symbol);
1609       this->subtable_uvs = &Null (CmapSubtableFormat14);
1610       {
1611 	const CmapSubtable *st = table->find_subtable (0, 5);
1612 	if (st && st->u.format == 14)
1613 	  subtable_uvs = &st->u.format14;
1614       }
1615 
1616       this->get_glyph_data = subtable;
1617       if (unlikely (symbol))
1618 	this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>;
1619       else
1620       {
1621 	switch (subtable->u.format) {
1622 	/* Accelerate format 4 and format 12. */
1623 	default:
1624 	  this->get_glyph_funcZ = get_glyph_from<CmapSubtable>;
1625 	  break;
1626 	case 12:
1627 	  this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>;
1628 	  break;
1629 	case  4:
1630 	{
1631 	  this->format4_accel.init (&subtable->u.format4);
1632 	  this->get_glyph_data = &this->format4_accel;
1633 	  this->get_glyph_funcZ = this->format4_accel.get_glyph_func;
1634 	  break;
1635 	}
1636 	}
1637       }
1638     }
1639 
finiOT::cmap::accelerator_t1640     void fini () { this->table.destroy (); }
1641 
get_nominal_glyphOT::cmap::accelerator_t1642     bool get_nominal_glyph (hb_codepoint_t  unicode,
1643 			    hb_codepoint_t *glyph) const
1644     {
1645       if (unlikely (!this->get_glyph_funcZ)) return false;
1646       return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
1647     }
get_nominal_glyphsOT::cmap::accelerator_t1648     unsigned int get_nominal_glyphs (unsigned int count,
1649 				     const hb_codepoint_t *first_unicode,
1650 				     unsigned int unicode_stride,
1651 				     hb_codepoint_t *first_glyph,
1652 				     unsigned int glyph_stride) const
1653     {
1654       if (unlikely (!this->get_glyph_funcZ)) return 0;
1655 
1656       hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
1657       const void *get_glyph_data = this->get_glyph_data;
1658 
1659       unsigned int done;
1660       for (done = 0;
1661 	   done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
1662 	   done++)
1663       {
1664 	first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride);
1665 	first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride);
1666       }
1667       return done;
1668     }
1669 
get_variation_glyphOT::cmap::accelerator_t1670     bool get_variation_glyph (hb_codepoint_t  unicode,
1671 			      hb_codepoint_t  variation_selector,
1672 			      hb_codepoint_t *glyph) const
1673     {
1674       switch (this->subtable_uvs->get_glyph_variant (unicode,
1675 						     variation_selector,
1676 						     glyph))
1677       {
1678 	case GLYPH_VARIANT_NOT_FOUND:	return false;
1679 	case GLYPH_VARIANT_FOUND:	return true;
1680 	case GLYPH_VARIANT_USE_DEFAULT:	break;
1681       }
1682 
1683       return get_nominal_glyph (unicode, glyph);
1684     }
1685 
collect_unicodesOT::cmap::accelerator_t1686     void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
1687     { subtable->collect_unicodes (out, num_glyphs); }
collect_mappingOT::cmap::accelerator_t1688     void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping,
1689 			  unsigned num_glyphs = UINT_MAX) const
1690     { subtable->collect_mapping (unicodes, mapping, num_glyphs); }
collect_variation_selectorsOT::cmap::accelerator_t1691     void collect_variation_selectors (hb_set_t *out) const
1692     { subtable_uvs->collect_variation_selectors (out); }
collect_variation_unicodesOT::cmap::accelerator_t1693     void collect_variation_unicodes (hb_codepoint_t variation_selector,
1694 				     hb_set_t *out) const
1695     { subtable_uvs->collect_variation_unicodes (variation_selector, out); }
1696 
1697     protected:
1698     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
1699 					      hb_codepoint_t codepoint,
1700 					      hb_codepoint_t *glyph);
1701 
1702     template <typename Type>
get_glyph_fromOT::cmap::accelerator_t1703     HB_INTERNAL static bool get_glyph_from (const void *obj,
1704 					    hb_codepoint_t codepoint,
1705 					    hb_codepoint_t *glyph)
1706     {
1707       const Type *typed_obj = (const Type *) obj;
1708       return typed_obj->get_glyph (codepoint, glyph);
1709     }
1710 
1711     template <typename Type>
get_glyph_from_symbolOT::cmap::accelerator_t1712     HB_INTERNAL static bool get_glyph_from_symbol (const void *obj,
1713 						   hb_codepoint_t codepoint,
1714 						   hb_codepoint_t *glyph)
1715     {
1716       const Type *typed_obj = (const Type *) obj;
1717       if (likely (typed_obj->get_glyph (codepoint, glyph)))
1718 	return true;
1719 
1720       if (codepoint <= 0x00FFu)
1721       {
1722 	/* For symbol-encoded OpenType fonts, we duplicate the
1723 	 * U+F000..F0FF range at U+0000..U+00FF.  That's what
1724 	 * Windows seems to do, and that's hinted about at:
1725 	 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1726 	 * under "Non-Standard (Symbol) Fonts". */
1727 	return typed_obj->get_glyph (0xF000u + codepoint, glyph);
1728       }
1729 
1730       return false;
1731     }
1732 
1733     private:
1734     hb_nonnull_ptr_t<const CmapSubtable> subtable;
1735     hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
1736 
1737     hb_cmap_get_glyph_func_t get_glyph_funcZ;
1738     const void *get_glyph_data;
1739 
1740     CmapSubtableFormat4::accelerator_t format4_accel;
1741 
1742     public:
1743     hb_blob_ptr_t<cmap> table;
1744   };
1745 
1746   protected:
1747 
find_subtableOT::cmap1748   const CmapSubtable *find_subtable (unsigned int platform_id,
1749 				     unsigned int encoding_id) const
1750   {
1751     EncodingRecord key;
1752     key.platformID = platform_id;
1753     key.encodingID = encoding_id;
1754 
1755     const EncodingRecord &result = encodingRecord.bsearch (key);
1756     if (!result.subtable)
1757       return nullptr;
1758 
1759     return &(this+result.subtable);
1760   }
1761 
find_encodingrecOT::cmap1762   const EncodingRecord *find_encodingrec (unsigned int platform_id,
1763 					  unsigned int encoding_id) const
1764   {
1765     EncodingRecord key;
1766     key.platformID = platform_id;
1767     key.encodingID = encoding_id;
1768 
1769     return encodingRecord.as_array ().bsearch (key);
1770   }
1771 
find_subtableOT::cmap1772   bool find_subtable (unsigned format) const
1773   {
1774     auto it =
1775     + hb_iter (encodingRecord)
1776     | hb_map (&EncodingRecord::subtable)
1777     | hb_map (hb_add (this))
1778     | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; })
1779     ;
1780 
1781     return it.len ();
1782   }
1783 
1784   public:
1785 
sanitizeOT::cmap1786   bool sanitize (hb_sanitize_context_t *c) const
1787   {
1788     TRACE_SANITIZE (this);
1789     return_trace (c->check_struct (this) &&
1790 		  likely (version == 0) &&
1791 		  encodingRecord.sanitize (c, this));
1792   }
1793 
1794   protected:
1795   HBUINT16	version;	/* Table version number (0). */
1796   SortedArray16Of<EncodingRecord>
1797 		encodingRecord;	/* Encoding tables. */
1798   public:
1799   DEFINE_SIZE_ARRAY (4, encodingRecord);
1800 };
1801 
1802 struct cmap_accelerator_t : cmap::accelerator_t {};
1803 
1804 } /* namespace OT */
1805 
1806 
1807 #endif /* HB_OT_CMAP_TABLE_HH */
1808