• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
29 
30 #include "hb-open-type.hh"
31 #include "hb-set.hh"
32 
33 /*
34  * cmap -- Character to Glyph Index Mapping
35  * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
36  */
37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
38 
39 namespace OT {
40 
41 
42 struct CmapSubtableFormat0
43 {
get_glyphOT::CmapSubtableFormat044   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
45   {
46     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
47     if (!gid)
48       return false;
49     *glyph = gid;
50     return true;
51   }
collect_unicodesOT::CmapSubtableFormat052   void collect_unicodes (hb_set_t *out) const
53   {
54     for (unsigned int i = 0; i < 256; i++)
55       if (glyphIdArray[i])
56 	out->add (i);
57   }
58 
sanitizeOT::CmapSubtableFormat059   bool sanitize (hb_sanitize_context_t *c) const
60   {
61     TRACE_SANITIZE (this);
62     return_trace (c->check_struct (this));
63   }
64 
65   protected:
66   HBUINT16	format;		/* Format number is set to 0. */
67   HBUINT16	length;		/* Byte length of this subtable. */
68   HBUINT16	language;	/* Ignore. */
69   HBUINT8	glyphIdArray[256];/* An array that maps character
70 				 * code to glyph index values. */
71   public:
72   DEFINE_SIZE_STATIC (6 + 256);
73 };
74 
75 struct CmapSubtableFormat4
76 {
77   struct segment_plan
78   {
79     HBUINT16 start_code;
80     HBUINT16 end_code;
81     bool use_delta;
82   };
83 
serializeOT::CmapSubtableFormat484   bool serialize (hb_serialize_context_t *c,
85 		  const hb_subset_plan_t *plan,
86 		  const hb_vector_t<segment_plan> &segments)
87   {
88     TRACE_SERIALIZE (this);
89 
90     if (unlikely (!c->extend_min (*this))) return_trace (false);
91 
92     this->format.set (4);
93     this->length.set (get_sub_table_size (segments));
94 
95     this->segCountX2.set (segments.len * 2);
96     this->entrySelector.set (MAX (1u, hb_bit_storage (segments.len)) - 1);
97     this->searchRange.set (2 * (1u << this->entrySelector));
98     this->rangeShift.set (segments.len * 2 > this->searchRange
99 			  ? 2 * segments.len - this->searchRange
100 			  : 0);
101 
102     HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
103     c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding.
104     HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
105     HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.len);
106     HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
107 
108     if (id_range_offset == nullptr)
109       return_trace (false);
110 
111     for (unsigned int i = 0; i < segments.len; i++)
112     {
113       end_count[i].set (segments[i].end_code);
114       start_count[i].set (segments[i].start_code);
115       if (segments[i].use_delta)
116       {
117 	hb_codepoint_t cp = segments[i].start_code;
118 	hb_codepoint_t start_gid = 0;
119 	if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF))
120 	  return_trace (false);
121 	id_delta[i].set (start_gid - segments[i].start_code);
122       } else {
123 	id_delta[i].set (0);
124 	unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1;
125 	HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints);
126 	if (glyph_id_array == nullptr)
127 	  return_trace (false);
128 	// From the cmap spec:
129 	//
130 	// id_range_offset[i]/2
131 	// + (cp - segments[i].start_code)
132 	// + (id_range_offset + i)
133 	// =
134 	// glyph_id_array + (cp - segments[i].start_code)
135 	//
136 	// So, solve for id_range_offset[i]:
137 	//
138 	// id_range_offset[i]
139 	// =
140 	// 2 * (glyph_id_array - id_range_offset - i)
141 	id_range_offset[i].set (2 * (
142 	    glyph_id_array - id_range_offset - i));
143 	for (unsigned int j = 0; j < num_codepoints; j++)
144 	{
145 	  hb_codepoint_t cp = segments[i].start_code + j;
146 	  hb_codepoint_t new_gid;
147 	  if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
148 	    return_trace (false);
149 	  glyph_id_array[j].set (new_gid);
150 	}
151       }
152     }
153 
154     return_trace (true);
155   }
156 
get_sub_table_sizeOT::CmapSubtableFormat4157   static size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments)
158   {
159     size_t segment_size = 0;
160     for (unsigned int i = 0; i < segments.len; i++)
161     {
162       // Parallel array entries
163       segment_size +=
164 	    2  // end count
165 	  + 2  // start count
166 	  + 2  // delta
167 	  + 2; // range offset
168 
169       if (!segments[i].use_delta)
170 	// Add bytes for the glyph index array entries for this segment.
171 	segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2;
172     }
173 
174     return min_size
175 	+ 2 // Padding
176 	+ segment_size;
177   }
178 
create_sub_table_planOT::CmapSubtableFormat4179   static bool create_sub_table_plan (const hb_subset_plan_t *plan,
180 				     hb_vector_t<segment_plan> *segments)
181   {
182     segment_plan *segment = nullptr;
183     hb_codepoint_t last_gid = 0;
184 
185     hb_codepoint_t cp = HB_SET_VALUE_INVALID;
186     while (plan->unicodes->next (&cp)) {
187       hb_codepoint_t new_gid;
188       if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
189       {
190 	DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
191 	return false;
192       }
193 
194       /* Stop adding to cmap if we are now outside of unicode BMP. */
195       if (cp > 0xFFFF) break;
196 
197       if (!segment ||
198 	  cp != segment->end_code + 1u)
199       {
200 	segment = segments->push ();
201 	segment->start_code.set (cp);
202 	segment->end_code.set (cp);
203 	segment->use_delta = true;
204       } else {
205 	segment->end_code.set (cp);
206 	if (last_gid + 1u != new_gid)
207 	  // gid's are not consecutive in this segment so delta
208 	  // cannot be used.
209 	  segment->use_delta = false;
210       }
211 
212       last_gid = new_gid;
213     }
214 
215     // There must be a final entry with end_code == 0xFFFF. Check if we need to add one.
216     if (segment == nullptr || segment->end_code != 0xFFFF)
217     {
218       segment = segments->push ();
219       segment->start_code.set (0xFFFF);
220       segment->end_code.set (0xFFFF);
221       segment->use_delta = true;
222     }
223 
224     return true;
225   }
226 
227   struct accelerator_t
228   {
accelerator_tOT::CmapSubtableFormat4::accelerator_t229     accelerator_t () {}
accelerator_tOT::CmapSubtableFormat4::accelerator_t230     accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); }
~accelerator_tOT::CmapSubtableFormat4::accelerator_t231     ~accelerator_t () { fini (); }
232 
initOT::CmapSubtableFormat4::accelerator_t233     void init (const CmapSubtableFormat4 *subtable)
234     {
235       segCount = subtable->segCountX2 / 2;
236       endCount = subtable->values.arrayZ;
237       startCount = endCount + segCount + 1;
238       idDelta = startCount + segCount;
239       idRangeOffset = idDelta + segCount;
240       glyphIdArray = idRangeOffset + segCount;
241       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
242     }
finiOT::CmapSubtableFormat4::accelerator_t243     void fini () {}
244 
get_glyphOT::CmapSubtableFormat4::accelerator_t245     bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
246     {
247       /* Custom two-array bsearch. */
248       int min = 0, max = (int) this->segCount - 1;
249       const HBUINT16 *startCount = this->startCount;
250       const HBUINT16 *endCount = this->endCount;
251       unsigned int i;
252       while (min <= max)
253       {
254 	int mid = ((unsigned int) min + (unsigned int) max) / 2;
255 	if (codepoint < startCount[mid])
256 	  max = mid - 1;
257 	else if (codepoint > endCount[mid])
258 	  min = mid + 1;
259 	else
260 	{
261 	  i = mid;
262 	  goto found;
263 	}
264       }
265       return false;
266 
267     found:
268       hb_codepoint_t gid;
269       unsigned int rangeOffset = this->idRangeOffset[i];
270       if (rangeOffset == 0)
271 	gid = codepoint + this->idDelta[i];
272       else
273       {
274 	/* Somebody has been smoking... */
275 	unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
276 	if (unlikely (index >= this->glyphIdArrayLength))
277 	  return false;
278 	gid = this->glyphIdArray[index];
279 	if (unlikely (!gid))
280 	  return false;
281 	gid += this->idDelta[i];
282       }
283       gid &= 0xFFFFu;
284       if (!gid)
285 	return false;
286       *glyph = gid;
287       return true;
288     }
get_glyph_funcOT::CmapSubtableFormat4::accelerator_t289     static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
290     {
291       return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph);
292     }
collect_unicodesOT::CmapSubtableFormat4::accelerator_t293     void collect_unicodes (hb_set_t *out) const
294     {
295       unsigned int count = this->segCount;
296       if (count && this->startCount[count - 1] == 0xFFFFu)
297 	count--; /* Skip sentinel segment. */
298       for (unsigned int i = 0; i < count; i++)
299       {
300 	unsigned int rangeOffset = this->idRangeOffset[i];
301 	if (rangeOffset == 0)
302 	  out->add_range (this->startCount[i], this->endCount[i]);
303 	else
304 	{
305 	  for (hb_codepoint_t codepoint = this->startCount[i];
306 	       codepoint <= this->endCount[i];
307 	       codepoint++)
308 	  {
309 	    unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
310 	    if (unlikely (index >= this->glyphIdArrayLength))
311 	      break;
312 	    hb_codepoint_t gid = this->glyphIdArray[index];
313 	    if (unlikely (!gid))
314 	      continue;
315 	    out->add (codepoint);
316 	  }
317 	}
318       }
319     }
320 
321     const HBUINT16 *endCount;
322     const HBUINT16 *startCount;
323     const HBUINT16 *idDelta;
324     const HBUINT16 *idRangeOffset;
325     const HBUINT16 *glyphIdArray;
326     unsigned int segCount;
327     unsigned int glyphIdArrayLength;
328   };
329 
get_glyphOT::CmapSubtableFormat4330   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
331   {
332     accelerator_t accel (this);
333     return accel.get_glyph_func (&accel, codepoint, glyph);
334   }
collect_unicodesOT::CmapSubtableFormat4335   void collect_unicodes (hb_set_t *out) const
336   {
337     accelerator_t accel (this);
338     accel.collect_unicodes (out);
339   }
340 
sanitizeOT::CmapSubtableFormat4341   bool sanitize (hb_sanitize_context_t *c) const
342   {
343     TRACE_SANITIZE (this);
344     if (unlikely (!c->check_struct (this)))
345       return_trace (false);
346 
347     if (unlikely (!c->check_range (this, length)))
348     {
349       /* Some broken fonts have too long of a "length" value.
350        * If that is the case, just change the value to truncate
351        * the subtable at the end of the blob. */
352       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
353 					    (uintptr_t) (c->end -
354 							 (char *) this));
355       if (!c->try_set (&length, new_length))
356 	return_trace (false);
357     }
358 
359     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
360   }
361 
362 
363 
364   protected:
365   HBUINT16	format;		/* Format number is set to 4. */
366   HBUINT16	length;		/* This is the length in bytes of the
367 				 * subtable. */
368   HBUINT16	language;	/* Ignore. */
369   HBUINT16	segCountX2;	/* 2 x segCount. */
370   HBUINT16	searchRange;	/* 2 * (2**floor(log2(segCount))) */
371   HBUINT16	entrySelector;	/* log2(searchRange/2) */
372   HBUINT16	rangeShift;	/* 2 x segCount - searchRange */
373 
374   UnsizedArrayOf<HBUINT16>
375 		values;
376 #if 0
377   HBUINT16	endCount[segCount];	/* End characterCode for each segment,
378 					 * last=0xFFFFu. */
379   HBUINT16	reservedPad;		/* Set to 0. */
380   HBUINT16	startCount[segCount];	/* Start character code for each segment. */
381   HBINT16		idDelta[segCount];	/* Delta for all character codes in segment. */
382   HBUINT16	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
383   UnsizedArrayOf<HBUINT16>
384 		glyphIdArray;	/* Glyph index array (arbitrary length) */
385 #endif
386 
387   public:
388   DEFINE_SIZE_ARRAY (14, values);
389 };
390 
391 struct CmapSubtableLongGroup
392 {
393   friend struct CmapSubtableFormat12;
394   friend struct CmapSubtableFormat13;
395   template<typename U>
396   friend struct CmapSubtableLongSegmented;
397   friend struct cmap;
398 
cmpOT::CmapSubtableLongGroup399   int cmp (hb_codepoint_t codepoint) const
400   {
401     if (codepoint < startCharCode) return -1;
402     if (codepoint > endCharCode)   return +1;
403     return 0;
404   }
405 
sanitizeOT::CmapSubtableLongGroup406   bool sanitize (hb_sanitize_context_t *c) const
407   {
408     TRACE_SANITIZE (this);
409     return_trace (c->check_struct (this));
410   }
411 
412   private:
413   HBUINT32		startCharCode;	/* First character code in this group. */
414   HBUINT32		endCharCode;	/* Last character code in this group. */
415   HBUINT32		glyphID;	/* Glyph index; interpretation depends on
416 					 * subtable format. */
417   public:
418   DEFINE_SIZE_STATIC (12);
419 };
420 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup);
421 
422 template <typename UINT>
423 struct CmapSubtableTrimmed
424 {
get_glyphOT::CmapSubtableTrimmed425   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
426   {
427     /* Rely on our implicit array bound-checking. */
428     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
429     if (!gid)
430       return false;
431     *glyph = gid;
432     return true;
433   }
collect_unicodesOT::CmapSubtableTrimmed434   void collect_unicodes (hb_set_t *out) const
435   {
436     hb_codepoint_t start = startCharCode;
437     unsigned int count = glyphIdArray.len;
438     for (unsigned int i = 0; i < count; i++)
439       if (glyphIdArray[i])
440 	out->add (start + i);
441   }
442 
sanitizeOT::CmapSubtableTrimmed443   bool sanitize (hb_sanitize_context_t *c) const
444   {
445     TRACE_SANITIZE (this);
446     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
447   }
448 
449   protected:
450   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
451   UINT		length;		/* Byte length of this subtable. */
452   UINT		language;	/* Ignore. */
453   UINT		startCharCode;	/* First character code covered. */
454   ArrayOf<GlyphID, UINT>
455 		glyphIdArray;	/* Array of glyph index values for character
456 				 * codes in the range. */
457   public:
458   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
459 };
460 
461 struct CmapSubtableFormat6  : CmapSubtableTrimmed<HBUINT16> {};
462 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
463 
464 template <typename T>
465 struct CmapSubtableLongSegmented
466 {
467   friend struct cmap;
468 
get_glyphOT::CmapSubtableLongSegmented469   bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
470   {
471     hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint);
472     if (!gid)
473       return false;
474     *glyph = gid;
475     return true;
476   }
477 
collect_unicodesOT::CmapSubtableLongSegmented478   void collect_unicodes (hb_set_t *out) const
479   {
480     for (unsigned int i = 0; i < this->groups.len; i++) {
481       out->add_range (this->groups[i].startCharCode,
482 		      MIN ((hb_codepoint_t) this->groups[i].endCharCode,
483 			   (hb_codepoint_t) HB_UNICODE_MAX));
484     }
485   }
486 
sanitizeOT::CmapSubtableLongSegmented487   bool sanitize (hb_sanitize_context_t *c) const
488   {
489     TRACE_SANITIZE (this);
490     return_trace (c->check_struct (this) && groups.sanitize (c));
491   }
492 
serializeOT::CmapSubtableLongSegmented493   bool serialize (hb_serialize_context_t *c,
494 		  const hb_vector_t<CmapSubtableLongGroup> &group_data)
495   {
496     TRACE_SERIALIZE (this);
497     if (unlikely (!c->extend_min (*this))) return_trace (false);
498     if (unlikely (!groups.serialize (c, group_data.as_array ()))) return_trace (false);
499     return true;
500   }
501 
502   protected:
503   HBUINT16	format;		/* Subtable format; set to 12. */
504   HBUINT16	reserved;	/* Reserved; set to 0. */
505   HBUINT32	length;		/* Byte length of this subtable. */
506   HBUINT32	language;	/* Ignore. */
507   SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
508 		groups;		/* Groupings. */
509   public:
510   DEFINE_SIZE_ARRAY (16, groups);
511 };
512 
513 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
514 {
group_get_glyphOT::CmapSubtableFormat12515   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
516 					 hb_codepoint_t u)
517   { return likely (group.startCharCode <= group.endCharCode) ?
518 	   group.glyphID + (u - group.startCharCode) : 0; }
519 
520 
serializeOT::CmapSubtableFormat12521   bool serialize (hb_serialize_context_t *c,
522 		  const hb_vector_t<CmapSubtableLongGroup> &groups)
523   {
524     if (unlikely (!c->extend_min (*this))) return false;
525 
526     this->format.set (12);
527     this->reserved.set (0);
528     this->length.set (get_sub_table_size (groups));
529 
530     return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups);
531   }
532 
get_sub_table_sizeOT::CmapSubtableFormat12533   static size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups)
534   {
535     return 16 + 12 * groups.len;
536   }
537 
create_sub_table_planOT::CmapSubtableFormat12538   static bool create_sub_table_plan (const hb_subset_plan_t *plan,
539 				     hb_vector_t<CmapSubtableLongGroup> *groups)
540   {
541     CmapSubtableLongGroup *group = nullptr;
542 
543     hb_codepoint_t cp = HB_SET_VALUE_INVALID;
544     while (plan->unicodes->next (&cp)) {
545       hb_codepoint_t new_gid;
546       if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
547       {
548 	DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
549 	return false;
550       }
551 
552       if (!group || !_is_gid_consecutive (group, cp, new_gid))
553       {
554 	group = groups->push ();
555 	group->startCharCode.set (cp);
556 	group->endCharCode.set (cp);
557 	group->glyphID.set (new_gid);
558       }
559       else group->endCharCode.set (cp);
560     }
561 
562     DEBUG_MSG(SUBSET, nullptr, "cmap");
563     for (unsigned int i = 0; i < groups->len; i++) {
564       CmapSubtableLongGroup& group = (*groups)[i];
565       DEBUG_MSG(SUBSET, nullptr, "  %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode));
566     }
567 
568     return true;
569   }
570 
571  private:
_is_gid_consecutiveOT::CmapSubtableFormat12572   static bool _is_gid_consecutive (CmapSubtableLongGroup *group,
573 				   hb_codepoint_t cp,
574 				   hb_codepoint_t new_gid)
575   {
576     return (cp - 1 == group->endCharCode) &&
577 	new_gid == group->glyphID + (cp - group->startCharCode);
578   }
579 
580 };
581 
582 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
583 {
group_get_glyphOT::CmapSubtableFormat13584   static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
585 					 hb_codepoint_t u HB_UNUSED)
586   { return group.glyphID; }
587 };
588 
589 typedef enum
590 {
591   GLYPH_VARIANT_NOT_FOUND = 0,
592   GLYPH_VARIANT_FOUND = 1,
593   GLYPH_VARIANT_USE_DEFAULT = 2
594 } glyph_variant_t;
595 
596 struct UnicodeValueRange
597 {
cmpOT::UnicodeValueRange598   int cmp (const hb_codepoint_t &codepoint) const
599   {
600     if (codepoint < startUnicodeValue) return -1;
601     if (codepoint > startUnicodeValue + additionalCount) return +1;
602     return 0;
603   }
604 
sanitizeOT::UnicodeValueRange605   bool sanitize (hb_sanitize_context_t *c) const
606   {
607     TRACE_SANITIZE (this);
608     return_trace (c->check_struct (this));
609   }
610 
611   HBUINT24	startUnicodeValue;	/* First value in this range. */
612   HBUINT8	additionalCount;	/* Number of additional values in this
613 					 * range. */
614   public:
615   DEFINE_SIZE_STATIC (4);
616 };
617 
618 struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
619 {
collect_unicodesOT::DefaultUVS620   void collect_unicodes (hb_set_t *out) const
621   {
622     unsigned int count = len;
623     for (unsigned int i = 0; i < count; i++)
624     {
625       hb_codepoint_t first = arrayZ[i].startUnicodeValue;
626       hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
627 				 (hb_codepoint_t) HB_UNICODE_MAX);
628       out->add_range (first, last);
629     }
630   }
631 
632   public:
633   DEFINE_SIZE_ARRAY (4, *this);
634 };
635 
636 struct UVSMapping
637 {
cmpOT::UVSMapping638   int cmp (const hb_codepoint_t &codepoint) const
639   {
640     return unicodeValue.cmp (codepoint);
641   }
642 
sanitizeOT::UVSMapping643   bool sanitize (hb_sanitize_context_t *c) const
644   {
645     TRACE_SANITIZE (this);
646     return_trace (c->check_struct (this));
647   }
648 
649   HBUINT24	unicodeValue;	/* Base Unicode value of the UVS */
650   GlyphID	glyphID;	/* Glyph ID of the UVS */
651   public:
652   DEFINE_SIZE_STATIC (5);
653 };
654 
655 struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
656 {
collect_unicodesOT::NonDefaultUVS657   void collect_unicodes (hb_set_t *out) const
658   {
659     unsigned int count = len;
660     for (unsigned int i = 0; i < count; i++)
661       out->add (arrayZ[i].glyphID);
662   }
663 
664   public:
665   DEFINE_SIZE_ARRAY (4, *this);
666 };
667 
668 struct VariationSelectorRecord
669 {
get_glyphOT::VariationSelectorRecord670   glyph_variant_t get_glyph (hb_codepoint_t codepoint,
671 			     hb_codepoint_t *glyph,
672 			     const void *base) const
673   {
674     if ((base+defaultUVS).bfind (codepoint))
675       return GLYPH_VARIANT_USE_DEFAULT;
676     const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint);
677     if (nonDefault.glyphID)
678     {
679       *glyph = nonDefault.glyphID;
680        return GLYPH_VARIANT_FOUND;
681     }
682     return GLYPH_VARIANT_NOT_FOUND;
683   }
684 
collect_unicodesOT::VariationSelectorRecord685   void collect_unicodes (hb_set_t *out, const void *base) const
686   {
687     (base+defaultUVS).collect_unicodes (out);
688     (base+nonDefaultUVS).collect_unicodes (out);
689   }
690 
cmpOT::VariationSelectorRecord691   int cmp (const hb_codepoint_t &variation_selector) const
692   {
693     return varSelector.cmp (variation_selector);
694   }
695 
sanitizeOT::VariationSelectorRecord696   bool sanitize (hb_sanitize_context_t *c, const void *base) const
697   {
698     TRACE_SANITIZE (this);
699     return_trace (c->check_struct (this) &&
700 		  defaultUVS.sanitize (c, base) &&
701 		  nonDefaultUVS.sanitize (c, base));
702   }
703 
704   HBUINT24	varSelector;	/* Variation selector. */
705   LOffsetTo<DefaultUVS>
706 		defaultUVS;	/* Offset to Default UVS Table.  May be 0. */
707   LOffsetTo<NonDefaultUVS>
708 		nonDefaultUVS;	/* Offset to Non-Default UVS Table.  May be 0. */
709   public:
710   DEFINE_SIZE_STATIC (11);
711 };
712 
713 struct CmapSubtableFormat14
714 {
get_glyph_variantOT::CmapSubtableFormat14715   glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
716 				     hb_codepoint_t variation_selector,
717 				     hb_codepoint_t *glyph) const
718   {
719     return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this);
720   }
721 
collect_variation_selectorsOT::CmapSubtableFormat14722   void collect_variation_selectors (hb_set_t *out) const
723   {
724     unsigned int count = record.len;
725     for (unsigned int i = 0; i < count; i++)
726       out->add (record.arrayZ[i].varSelector);
727   }
collect_variation_unicodesOT::CmapSubtableFormat14728   void collect_variation_unicodes (hb_codepoint_t variation_selector,
729 				   hb_set_t *out) const
730   {
731     record.bsearch (variation_selector).collect_unicodes (out, this);
732   }
733 
sanitizeOT::CmapSubtableFormat14734   bool sanitize (hb_sanitize_context_t *c) const
735   {
736     TRACE_SANITIZE (this);
737     return_trace (c->check_struct (this) &&
738 		  record.sanitize (c, this));
739   }
740 
741   protected:
742   HBUINT16	format;		/* Format number is set to 14. */
743   HBUINT32	length;		/* Byte length of this subtable. */
744   SortedArrayOf<VariationSelectorRecord, HBUINT32>
745 		record;		/* Variation selector records; sorted
746 				 * in increasing order of `varSelector'. */
747   public:
748   DEFINE_SIZE_ARRAY (10, record);
749 };
750 
751 struct CmapSubtable
752 {
753   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
754 
get_glyphOT::CmapSubtable755   bool get_glyph (hb_codepoint_t codepoint,
756 		  hb_codepoint_t *glyph) const
757   {
758     switch (u.format) {
759     case  0: return u.format0 .get_glyph (codepoint, glyph);
760     case  4: return u.format4 .get_glyph (codepoint, glyph);
761     case  6: return u.format6 .get_glyph (codepoint, glyph);
762     case 10: return u.format10.get_glyph (codepoint, glyph);
763     case 12: return u.format12.get_glyph (codepoint, glyph);
764     case 13: return u.format13.get_glyph (codepoint, glyph);
765     case 14:
766     default: return false;
767     }
768   }
collect_unicodesOT::CmapSubtable769   void collect_unicodes (hb_set_t *out) const
770   {
771     switch (u.format) {
772     case  0: u.format0 .collect_unicodes (out); return;
773     case  4: u.format4 .collect_unicodes (out); return;
774     case  6: u.format6 .collect_unicodes (out); return;
775     case 10: u.format10.collect_unicodes (out); return;
776     case 12: u.format12.collect_unicodes (out); return;
777     case 13: u.format13.collect_unicodes (out); return;
778     case 14:
779     default: return;
780     }
781   }
782 
sanitizeOT::CmapSubtable783   bool sanitize (hb_sanitize_context_t *c) const
784   {
785     TRACE_SANITIZE (this);
786     if (!u.format.sanitize (c)) return_trace (false);
787     switch (u.format) {
788     case  0: return_trace (u.format0 .sanitize (c));
789     case  4: return_trace (u.format4 .sanitize (c));
790     case  6: return_trace (u.format6 .sanitize (c));
791     case 10: return_trace (u.format10.sanitize (c));
792     case 12: return_trace (u.format12.sanitize (c));
793     case 13: return_trace (u.format13.sanitize (c));
794     case 14: return_trace (u.format14.sanitize (c));
795     default:return_trace (true);
796     }
797   }
798 
799   public:
800   union {
801   HBUINT16		format;		/* Format identifier */
802   CmapSubtableFormat0	format0;
803   CmapSubtableFormat4	format4;
804   CmapSubtableFormat6	format6;
805   CmapSubtableFormat10	format10;
806   CmapSubtableFormat12	format12;
807   CmapSubtableFormat13	format13;
808   CmapSubtableFormat14	format14;
809   } u;
810   public:
811   DEFINE_SIZE_UNION (2, format);
812 };
813 
814 
815 struct EncodingRecord
816 {
cmpOT::EncodingRecord817   int cmp (const EncodingRecord &other) const
818   {
819     int ret;
820     ret = platformID.cmp (other.platformID);
821     if (ret) return ret;
822     ret = encodingID.cmp (other.encodingID);
823     if (ret) return ret;
824     return 0;
825   }
826 
sanitizeOT::EncodingRecord827   bool sanitize (hb_sanitize_context_t *c, const void *base) const
828   {
829     TRACE_SANITIZE (this);
830     return_trace (c->check_struct (this) &&
831 		  subtable.sanitize (c, base));
832   }
833 
834   HBUINT16	platformID;	/* Platform ID. */
835   HBUINT16	encodingID;	/* Platform-specific encoding ID. */
836   LOffsetTo<CmapSubtable>
837 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
838   public:
839   DEFINE_SIZE_STATIC (8);
840 };
841 
842 struct cmap
843 {
844   enum { tableTag = HB_OT_TAG_cmap };
845 
846   struct subset_plan
847   {
final_sizeOT::cmap::subset_plan848     size_t final_size () const
849     {
850       return 4 // header
851 	  +  8 * 3 // 3 EncodingRecord
852 	  +  CmapSubtableFormat4::get_sub_table_size (this->format4_segments)
853 	  +  CmapSubtableFormat12::get_sub_table_size (this->format12_groups);
854     }
855 
856     hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments;
857     hb_vector_t<CmapSubtableLongGroup> format12_groups;
858   };
859 
_create_planOT::cmap860   bool _create_plan (const hb_subset_plan_t *plan,
861 		     subset_plan *cmap_plan) const
862   {
863     if (unlikely (!CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments)))
864       return false;
865 
866     return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups);
867   }
868 
_subsetOT::cmap869   bool _subset (const hb_subset_plan_t *plan,
870 		const subset_plan &cmap_subset_plan,
871 		size_t dest_sz,
872 		void *dest) const
873   {
874     hb_serialize_context_t c (dest, dest_sz);
875 
876     cmap *table = c.start_serialize<cmap> ();
877     if (unlikely (!c.extend_min (*table)))
878     {
879       return false;
880     }
881 
882     table->version.set (0);
883 
884     if (unlikely (!table->encodingRecord.serialize (&c, /* numTables */ 3)))
885       return false;
886 
887     // TODO(grieger): Convert the below to a for loop
888 
889     // Format 4, Plat 0 Encoding Record
890     EncodingRecord &format4_plat0_rec = table->encodingRecord[0];
891     format4_plat0_rec.platformID.set (0); // Unicode
892     format4_plat0_rec.encodingID.set (3);
893 
894     // Format 4, Plat 3 Encoding Record
895     EncodingRecord &format4_plat3_rec = table->encodingRecord[1];
896     format4_plat3_rec.platformID.set (3); // Windows
897     format4_plat3_rec.encodingID.set (1); // Unicode BMP
898 
899     // Format 12 Encoding Record
900     EncodingRecord &format12_rec = table->encodingRecord[2];
901     format12_rec.platformID.set (3); // Windows
902     format12_rec.encodingID.set (10); // Unicode UCS-4
903 
904     // Write out format 4 sub table
905     {
906       CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, table);
907       format4_plat3_rec.subtable.set (format4_plat0_rec.subtable);
908       subtable.u.format.set (4);
909 
910       CmapSubtableFormat4 &format4 = subtable.u.format4;
911       if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments)))
912 	return false;
913     }
914 
915     // Write out format 12 sub table.
916     {
917       CmapSubtable &subtable = format12_rec.subtable.serialize (&c, table);
918       subtable.u.format.set (12);
919 
920       CmapSubtableFormat12 &format12 = subtable.u.format12;
921       if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups)))
922 	return false;
923     }
924 
925     c.end_serialize ();
926 
927     return true;
928   }
929 
subsetOT::cmap930   bool subset (hb_subset_plan_t *plan) const
931   {
932     subset_plan cmap_subset_plan;
933 
934     if (unlikely (!_create_plan (plan, &cmap_subset_plan)))
935     {
936       DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan.");
937       return false;
938     }
939 
940     // We now know how big our blob needs to be
941     size_t dest_sz = cmap_subset_plan.final_size ();
942     void *dest = malloc (dest_sz);
943     if (unlikely (!dest)) {
944       DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output", (unsigned long) dest_sz);
945       return false;
946     }
947 
948     if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest)))
949     {
950       DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap.");
951       free (dest);
952       return false;
953     }
954 
955     // all done, write the blob into dest
956     hb_blob_t *cmap_prime = hb_blob_create ((const char *) dest,
957 					    dest_sz,
958 					    HB_MEMORY_MODE_READONLY,
959 					    dest,
960 					    free);
961     bool result =  plan->add_table (HB_OT_TAG_cmap, cmap_prime);
962     hb_blob_destroy (cmap_prime);
963     return result;
964   }
965 
find_best_subtableOT::cmap966   const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
967   {
968     if (symbol) *symbol = false;
969 
970     const CmapSubtable *subtable;
971 
972     /* 32-bit subtables. */
973     if ((subtable = this->find_subtable (3, 10))) return subtable;
974     if ((subtable = this->find_subtable (0, 6))) return subtable;
975     if ((subtable = this->find_subtable (0, 4))) return subtable;
976 
977     /* 16-bit subtables. */
978     if ((subtable = this->find_subtable (3, 1))) return subtable;
979     if ((subtable = this->find_subtable (0, 3))) return subtable;
980     if ((subtable = this->find_subtable (0, 2))) return subtable;
981     if ((subtable = this->find_subtable (0, 1))) return subtable;
982     if ((subtable = this->find_subtable (0, 0))) return subtable;
983 
984     /* Symbol subtable. */
985     if ((subtable = this->find_subtable (3, 0)))
986     {
987       if (symbol) *symbol = true;
988       return subtable;
989     }
990 
991     /* Meh. */
992     return &Null (CmapSubtable);
993   }
994 
995   struct accelerator_t
996   {
initOT::cmap::accelerator_t997     void init (hb_face_t *face)
998     {
999       this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
1000       bool symbol;
1001       this->subtable = table->find_best_subtable (&symbol);
1002       this->subtable_uvs = &Null (CmapSubtableFormat14);
1003       {
1004 	const CmapSubtable *st = table->find_subtable (0, 5);
1005 	if (st && st->u.format == 14)
1006 	  subtable_uvs = &st->u.format14;
1007       }
1008 
1009       this->get_glyph_data = subtable;
1010       if (unlikely (symbol))
1011       {
1012 	this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>;
1013       } else {
1014 	switch (subtable->u.format) {
1015 	/* Accelerate format 4 and format 12. */
1016 	default:
1017 	  this->get_glyph_funcZ = get_glyph_from<CmapSubtable>;
1018 	  break;
1019 	case 12:
1020 	  this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>;
1021 	  break;
1022 	case  4:
1023 	  {
1024 	    this->format4_accel.init (&subtable->u.format4);
1025 	    this->get_glyph_data = &this->format4_accel;
1026 	    this->get_glyph_funcZ = this->format4_accel.get_glyph_func;
1027 	  }
1028 	  break;
1029 	}
1030       }
1031     }
1032 
finiOT::cmap::accelerator_t1033     void fini () { this->table.destroy (); }
1034 
get_nominal_glyphOT::cmap::accelerator_t1035     bool get_nominal_glyph (hb_codepoint_t  unicode,
1036 				   hb_codepoint_t *glyph) const
1037     {
1038       if (unlikely (!this->get_glyph_funcZ)) return false;
1039       return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph);
1040     }
get_nominal_glyphsOT::cmap::accelerator_t1041     unsigned int get_nominal_glyphs (unsigned int count,
1042 				     const hb_codepoint_t *first_unicode,
1043 				     unsigned int unicode_stride,
1044 				     hb_codepoint_t *first_glyph,
1045 				     unsigned int glyph_stride) const
1046     {
1047       if (unlikely (!this->get_glyph_funcZ)) return 0;
1048 
1049       hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ;
1050       const void *get_glyph_data = this->get_glyph_data;
1051 
1052       unsigned int done;
1053       for (done = 0;
1054 	   done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph);
1055 	   done++)
1056       {
1057 	first_unicode = &StructAtOffset<hb_codepoint_t> (first_unicode, unicode_stride);
1058 	first_glyph = &StructAtOffset<hb_codepoint_t> (first_glyph, glyph_stride);
1059       }
1060       return done;
1061     }
1062 
get_variation_glyphOT::cmap::accelerator_t1063     bool get_variation_glyph (hb_codepoint_t  unicode,
1064 			      hb_codepoint_t  variation_selector,
1065 			      hb_codepoint_t *glyph) const
1066     {
1067       switch (this->subtable_uvs->get_glyph_variant (unicode,
1068 						     variation_selector,
1069 						     glyph))
1070       {
1071 	case GLYPH_VARIANT_NOT_FOUND:	return false;
1072 	case GLYPH_VARIANT_FOUND:	return true;
1073 	case GLYPH_VARIANT_USE_DEFAULT:	break;
1074       }
1075 
1076       return get_nominal_glyph (unicode, glyph);
1077     }
1078 
collect_unicodesOT::cmap::accelerator_t1079     void collect_unicodes (hb_set_t *out) const
1080     {
1081       subtable->collect_unicodes (out);
1082     }
collect_variation_selectorsOT::cmap::accelerator_t1083     void collect_variation_selectors (hb_set_t *out) const
1084     {
1085       subtable_uvs->collect_variation_selectors (out);
1086     }
collect_variation_unicodesOT::cmap::accelerator_t1087     void collect_variation_unicodes (hb_codepoint_t variation_selector,
1088 				     hb_set_t *out) const
1089     {
1090       subtable_uvs->collect_variation_unicodes (variation_selector, out);
1091     }
1092 
1093     protected:
1094     typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
1095 					      hb_codepoint_t codepoint,
1096 					      hb_codepoint_t *glyph);
1097 
1098     template <typename Type>
get_glyph_fromOT::cmap::accelerator_t1099     static bool get_glyph_from (const void *obj,
1100 				hb_codepoint_t codepoint,
1101 				hb_codepoint_t *glyph)
1102     {
1103       const Type *typed_obj = (const Type *) obj;
1104       return typed_obj->get_glyph (codepoint, glyph);
1105     }
1106 
1107     template <typename Type>
get_glyph_from_symbolOT::cmap::accelerator_t1108     static bool get_glyph_from_symbol (const void *obj,
1109 					      hb_codepoint_t codepoint,
1110 					      hb_codepoint_t *glyph)
1111     {
1112       const Type *typed_obj = (const Type *) obj;
1113       if (likely (typed_obj->get_glyph (codepoint, glyph)))
1114 	return true;
1115 
1116       if (codepoint <= 0x00FFu)
1117       {
1118 	/* For symbol-encoded OpenType fonts, we duplicate the
1119 	 * U+F000..F0FF range at U+0000..U+00FF.  That's what
1120 	 * Windows seems to do, and that's hinted about at:
1121 	 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1122 	 * under "Non-Standard (Symbol) Fonts". */
1123 	return typed_obj->get_glyph (0xF000u + codepoint, glyph);
1124       }
1125 
1126       return false;
1127     }
1128 
1129     private:
1130     hb_nonnull_ptr_t<const CmapSubtable> subtable;
1131     hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
1132 
1133     hb_cmap_get_glyph_func_t get_glyph_funcZ;
1134     const void *get_glyph_data;
1135 
1136     CmapSubtableFormat4::accelerator_t format4_accel;
1137 
1138     hb_blob_ptr_t<cmap> table;
1139   };
1140 
1141   protected:
1142 
find_subtableOT::cmap1143   const CmapSubtable *find_subtable (unsigned int platform_id,
1144 				     unsigned int encoding_id) const
1145   {
1146     EncodingRecord key;
1147     key.platformID.set (platform_id);
1148     key.encodingID.set (encoding_id);
1149 
1150     const EncodingRecord &result = encodingRecord.bsearch (key);
1151     if (!result.subtable)
1152       return nullptr;
1153 
1154     return &(this+result.subtable);
1155   }
1156 
1157   public:
1158 
sanitizeOT::cmap1159   bool sanitize (hb_sanitize_context_t *c) const
1160   {
1161     TRACE_SANITIZE (this);
1162     return_trace (c->check_struct (this) &&
1163 		  likely (version == 0) &&
1164 		  encodingRecord.sanitize (c, this));
1165   }
1166 
1167   protected:
1168   HBUINT16		version;	/* Table version number (0). */
1169   SortedArrayOf<EncodingRecord>
1170 			encodingRecord;	/* Encoding tables. */
1171   public:
1172   DEFINE_SIZE_ARRAY (4, encodingRecord);
1173 };
1174 
1175 struct cmap_accelerator_t : cmap::accelerator_t {};
1176 
1177 } /* namespace OT */
1178 
1179 
1180 #endif /* HB_OT_CMAP_TABLE_HH */
1181