• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014  Google, Inc.
3  *
4  *  This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #ifndef HB_OT_CMAP_TABLE_HH
28 #define HB_OT_CMAP_TABLE_HH
29 
30 #include "hb-open-type-private.hh"
31 
32 
33 namespace OT {
34 
35 
36 /*
37  * cmap -- Character To Glyph Index Mapping Table
38  */
39 
40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
41 
42 
43 struct CmapSubtableFormat0
44 {
get_glyphOT::CmapSubtableFormat045   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
46   {
47     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
48     if (!gid)
49       return false;
50     *glyph = gid;
51     return true;
52   }
53 
sanitizeOT::CmapSubtableFormat054   inline bool sanitize (hb_sanitize_context_t *c) {
55     TRACE_SANITIZE (this);
56     return TRACE_RETURN (c->check_struct (this));
57   }
58 
59   protected:
60   USHORT	format;		/* Format number is set to 0. */
61   USHORT	lengthZ;	/* Byte length of this subtable. */
62   USHORT	languageZ;	/* Ignore. */
63   BYTE		glyphIdArray[256];/* An array that maps character
64 				 * code to glyph index values. */
65   public:
66   DEFINE_SIZE_STATIC (6 + 256);
67 };
68 
69 struct CmapSubtableFormat4
70 {
get_glyphOT::CmapSubtableFormat471   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
72   {
73     unsigned int segCount;
74     const USHORT *endCount;
75     const USHORT *startCount;
76     const USHORT *idDelta;
77     const USHORT *idRangeOffset;
78     const USHORT *glyphIdArray;
79     unsigned int glyphIdArrayLength;
80 
81     segCount = this->segCountX2 / 2;
82     endCount = this->values;
83     startCount = endCount + segCount + 1;
84     idDelta = startCount + segCount;
85     idRangeOffset = idDelta + segCount;
86     glyphIdArray = idRangeOffset + segCount;
87     glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
88 
89     /* Custom two-array bsearch. */
90     int min = 0, max = (int) segCount - 1;
91     unsigned int i;
92     while (min <= max)
93     {
94       int mid = (min + max) / 2;
95       if (codepoint < startCount[mid])
96         max = mid - 1;
97       else if (codepoint > endCount[mid])
98         min = mid + 1;
99       else
100       {
101 	i = mid;
102 	goto found;
103       }
104     }
105     return false;
106 
107   found:
108     hb_codepoint_t gid;
109     unsigned int rangeOffset = idRangeOffset[i];
110     if (rangeOffset == 0)
111       gid = codepoint + idDelta[i];
112     else
113     {
114       /* Somebody has been smoking... */
115       unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
116       if (unlikely (index >= glyphIdArrayLength))
117 	return false;
118       gid = glyphIdArray[index];
119       if (unlikely (!gid))
120 	return false;
121       gid += idDelta[i];
122     }
123 
124     *glyph = gid & 0xFFFFu;
125     return true;
126   }
127 
sanitizeOT::CmapSubtableFormat4128   inline bool sanitize (hb_sanitize_context_t *c)
129   {
130     TRACE_SANITIZE (this);
131     if (unlikely (!c->check_struct (this)))
132       return TRACE_RETURN (false);
133 
134     if (unlikely (!c->check_range (this, length)))
135     {
136       /* Some broken fonts have too long of a "length" value.
137        * If that is the case, just change the value to truncate
138        * the subtable at the end of the blob. */
139       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
140 					    (uintptr_t) (c->end -
141 							 (char *) this));
142       if (!c->try_set (&length, new_length))
143 	return TRACE_RETURN (false);
144     }
145 
146     return TRACE_RETURN (16 + 4 * (unsigned int) segCountX2 <= length);
147   }
148 
149   protected:
150   USHORT	format;		/* Format number is set to 4. */
151   USHORT	length;		/* This is the length in bytes of the
152 				 * subtable. */
153   USHORT	languageZ;	/* Ignore. */
154   USHORT	segCountX2;	/* 2 x segCount. */
155   USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
156   USHORT	entrySelectorZ;	/* log2(searchRange/2) */
157   USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
158 
159   USHORT	values[VAR];
160 #if 0
161   USHORT	endCount[segCount];	/* End characterCode for each segment,
162 					 * last=0xFFFFu. */
163   USHORT	reservedPad;		/* Set to 0. */
164   USHORT	startCount[segCount];	/* Start character code for each segment. */
165   SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
166   USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
167   USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
168 #endif
169 
170   public:
171   DEFINE_SIZE_ARRAY (14, values);
172 };
173 
174 struct CmapSubtableLongGroup
175 {
176   friend struct CmapSubtableFormat12;
177   friend struct CmapSubtableFormat13;
178 
cmpOT::CmapSubtableLongGroup179   int cmp (hb_codepoint_t codepoint) const
180   {
181     if (codepoint < startCharCode) return -1;
182     if (codepoint > endCharCode)   return +1;
183     return 0;
184   }
185 
sanitizeOT::CmapSubtableLongGroup186   inline bool sanitize (hb_sanitize_context_t *c) {
187     TRACE_SANITIZE (this);
188     return TRACE_RETURN (c->check_struct (this));
189   }
190 
191   private:
192   ULONG		startCharCode;	/* First character code in this group. */
193   ULONG		endCharCode;	/* Last character code in this group. */
194   ULONG		glyphID;	/* Glyph index; interpretation depends on
195 				 * subtable format. */
196   public:
197   DEFINE_SIZE_STATIC (12);
198 };
199 
200 template <typename UINT>
201 struct CmapSubtableTrimmed
202 {
get_glyphOT::CmapSubtableTrimmed203   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
204   {
205     /* Rely on our implicit array bound-checking. */
206     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
207     if (!gid)
208       return false;
209     *glyph = gid;
210     return true;
211   }
212 
sanitizeOT::CmapSubtableTrimmed213   inline bool sanitize (hb_sanitize_context_t *c) {
214     TRACE_SANITIZE (this);
215     return TRACE_RETURN (c->check_struct (this) && glyphIdArray.sanitize (c));
216   }
217 
218   protected:
219   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
220   UINT		lengthZ;	/* Byte length of this subtable. */
221   UINT		languageZ;	/* Ignore. */
222   UINT		startCharCode;	/* First character code covered. */
223   ArrayOf<GlyphID, UINT>
224 		glyphIdArray;	/* Array of glyph index values for character
225 				 * codes in the range. */
226   public:
227   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
228 };
229 
230 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
231 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
232 
233 template <typename T>
234 struct CmapSubtableLongSegmented
235 {
get_glyphOT::CmapSubtableLongSegmented236   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
237   {
238     int i = groups.bsearch (codepoint);
239     if (i == -1)
240       return false;
241     *glyph = T::group_get_glyph (groups[i], codepoint);
242     return true;
243   }
244 
sanitizeOT::CmapSubtableLongSegmented245   inline bool sanitize (hb_sanitize_context_t *c) {
246     TRACE_SANITIZE (this);
247     return TRACE_RETURN (c->check_struct (this) && groups.sanitize (c));
248   }
249 
250   protected:
251   USHORT	format;		/* Subtable format; set to 12. */
252   USHORT	reservedZ;	/* Reserved; set to 0. */
253   ULONG		lengthZ;	/* Byte length of this subtable. */
254   ULONG		languageZ;	/* Ignore. */
255   SortedArrayOf<CmapSubtableLongGroup, ULONG>
256 		groups;		/* Groupings. */
257   public:
258   DEFINE_SIZE_ARRAY (16, groups);
259 };
260 
261 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
262 {
group_get_glyphOT::CmapSubtableFormat12263   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
264 						hb_codepoint_t u)
265   { return group.glyphID + (u - group.startCharCode); }
266 };
267 
268 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
269 {
group_get_glyphOT::CmapSubtableFormat13270   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
271 						hb_codepoint_t u HB_UNUSED)
272   { return group.glyphID; }
273 };
274 
275 typedef enum
276 {
277   GLYPH_VARIANT_NOT_FOUND = 0,
278   GLYPH_VARIANT_FOUND = 1,
279   GLYPH_VARIANT_USE_DEFAULT = 2
280 } glyph_variant_t;
281 
282 struct UnicodeValueRange
283 {
cmpOT::UnicodeValueRange284   inline int cmp (const hb_codepoint_t &codepoint) const
285   {
286     if (codepoint < startUnicodeValue) return -1;
287     if (codepoint > startUnicodeValue + additionalCount) return +1;
288     return 0;
289   }
290 
sanitizeOT::UnicodeValueRange291   inline bool sanitize (hb_sanitize_context_t *c) {
292     TRACE_SANITIZE (this);
293     return TRACE_RETURN (c->check_struct (this));
294   }
295 
296   UINT24	startUnicodeValue;	/* First value in this range. */
297   BYTE		additionalCount;	/* Number of additional values in this
298 					 * range. */
299   public:
300   DEFINE_SIZE_STATIC (4);
301 };
302 
303 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
304 
305 struct UVSMapping
306 {
cmpOT::UVSMapping307   inline int cmp (const hb_codepoint_t &codepoint) const
308   {
309     return unicodeValue.cmp (codepoint);
310   }
311 
sanitizeOT::UVSMapping312   inline bool sanitize (hb_sanitize_context_t *c) {
313     TRACE_SANITIZE (this);
314     return TRACE_RETURN (c->check_struct (this));
315   }
316 
317   UINT24	unicodeValue;	/* Base Unicode value of the UVS */
318   GlyphID	glyphID;	/* Glyph ID of the UVS */
319   public:
320   DEFINE_SIZE_STATIC (5);
321 };
322 
323 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
324 
325 struct VariationSelectorRecord
326 {
get_glyphOT::VariationSelectorRecord327   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
328 				    hb_codepoint_t *glyph,
329 				    const void *base) const
330   {
331     int i;
332     const DefaultUVS &defaults = base+defaultUVS;
333     i = defaults.bsearch (codepoint);
334     if (i != -1)
335       return GLYPH_VARIANT_USE_DEFAULT;
336     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
337     i = nonDefaults.bsearch (codepoint);
338     if (i != -1)
339     {
340       *glyph = nonDefaults[i].glyphID;
341        return GLYPH_VARIANT_FOUND;
342     }
343     return GLYPH_VARIANT_NOT_FOUND;
344   }
345 
cmpOT::VariationSelectorRecord346   inline int cmp (const hb_codepoint_t &variation_selector) const
347   {
348     return varSelector.cmp (variation_selector);
349   }
350 
sanitizeOT::VariationSelectorRecord351   inline bool sanitize (hb_sanitize_context_t *c, void *base) {
352     TRACE_SANITIZE (this);
353     return TRACE_RETURN (c->check_struct (this) &&
354 			 defaultUVS.sanitize (c, base) &&
355 			 nonDefaultUVS.sanitize (c, base));
356   }
357 
358   UINT24	varSelector;	/* Variation selector. */
359   OffsetTo<DefaultUVS, ULONG>
360 		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
361   OffsetTo<NonDefaultUVS, ULONG>
362 		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
363   public:
364   DEFINE_SIZE_STATIC (11);
365 };
366 
367 struct CmapSubtableFormat14
368 {
get_glyph_variantOT::CmapSubtableFormat14369   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
370 					    hb_codepoint_t variation_selector,
371 					    hb_codepoint_t *glyph) const
372   {
373     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
374   }
375 
sanitizeOT::CmapSubtableFormat14376   inline bool sanitize (hb_sanitize_context_t *c) {
377     TRACE_SANITIZE (this);
378     return TRACE_RETURN (c->check_struct (this) &&
379 			 record.sanitize (c, this));
380   }
381 
382   protected:
383   USHORT	format;		/* Format number is set to 0. */
384   ULONG		lengthZ;	/* Byte length of this subtable. */
385   SortedArrayOf<VariationSelectorRecord, ULONG>
386 		record;		/* Variation selector records; sorted
387 				 * in increasing order of `varSelector'. */
388   public:
389   DEFINE_SIZE_ARRAY (10, record);
390 };
391 
392 struct CmapSubtable
393 {
394   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
395 
get_glyphOT::CmapSubtable396   inline bool get_glyph (hb_codepoint_t codepoint,
397 			 hb_codepoint_t *glyph) const
398   {
399     switch (u.format) {
400     case  0: return u.format0 .get_glyph(codepoint, glyph);
401     case  4: return u.format4 .get_glyph(codepoint, glyph);
402     case  6: return u.format6 .get_glyph(codepoint, glyph);
403     case 10: return u.format10.get_glyph(codepoint, glyph);
404     case 12: return u.format12.get_glyph(codepoint, glyph);
405     case 13: return u.format13.get_glyph(codepoint, glyph);
406     case 14:
407     default: return false;
408     }
409   }
410 
get_glyph_variantOT::CmapSubtable411   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
412 					    hb_codepoint_t variation_selector,
413 					    hb_codepoint_t *glyph) const
414   {
415     switch (u.format) {
416     case 14: return u.format14.get_glyph_variant(codepoint, variation_selector, glyph);
417     default: return GLYPH_VARIANT_NOT_FOUND;
418     }
419   }
420 
sanitizeOT::CmapSubtable421   inline bool sanitize (hb_sanitize_context_t *c) {
422     TRACE_SANITIZE (this);
423     if (!u.format.sanitize (c)) return TRACE_RETURN (false);
424     switch (u.format) {
425     case  0: return TRACE_RETURN (u.format0 .sanitize (c));
426     case  4: return TRACE_RETURN (u.format4 .sanitize (c));
427     case  6: return TRACE_RETURN (u.format6 .sanitize (c));
428     case 10: return TRACE_RETURN (u.format10.sanitize (c));
429     case 12: return TRACE_RETURN (u.format12.sanitize (c));
430     case 13: return TRACE_RETURN (u.format13.sanitize (c));
431     case 14: return TRACE_RETURN (u.format14.sanitize (c));
432     default:return TRACE_RETURN (true);
433     }
434   }
435 
436   protected:
437   union {
438   USHORT		format;		/* Format identifier */
439   CmapSubtableFormat0	format0;
440   CmapSubtableFormat4	format4;
441   CmapSubtableFormat6	format6;
442   CmapSubtableFormat10	format10;
443   CmapSubtableFormat12	format12;
444   CmapSubtableFormat13	format13;
445   CmapSubtableFormat14	format14;
446   } u;
447   public:
448   DEFINE_SIZE_UNION (2, format);
449 };
450 
451 
452 struct EncodingRecord
453 {
cmpOT::EncodingRecord454   inline int cmp (const EncodingRecord &other) const
455   {
456     int ret;
457     ret = platformID.cmp (other.platformID);
458     if (ret) return ret;
459     ret = encodingID.cmp (other.encodingID);
460     if (ret) return ret;
461     return 0;
462   }
463 
sanitizeOT::EncodingRecord464   inline bool sanitize (hb_sanitize_context_t *c, void *base) {
465     TRACE_SANITIZE (this);
466     return TRACE_RETURN (c->check_struct (this) &&
467 			 subtable.sanitize (c, base));
468   }
469 
470   USHORT	platformID;	/* Platform ID. */
471   USHORT	encodingID;	/* Platform-specific encoding ID. */
472   OffsetTo<CmapSubtable, ULONG>
473 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
474   public:
475   DEFINE_SIZE_STATIC (8);
476 };
477 
478 struct cmap
479 {
480   static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
481 
find_subtableOT::cmap482   inline const CmapSubtable *find_subtable (unsigned int platform_id,
483 					    unsigned int encoding_id) const
484   {
485     EncodingRecord key;
486     key.platformID.set (platform_id);
487     key.encodingID.set (encoding_id);
488 
489     /* Note: We can use bsearch, but since it has no performance
490      * implications, we use lsearch and as such accept fonts with
491      * unsorted subtable list. */
492     int result = encodingRecord./*bsearch*/lsearch (key);
493     if (result == -1 || !encodingRecord[result].subtable)
494       return NULL;
495 
496     return &(this+encodingRecord[result].subtable);
497   }
498 
sanitizeOT::cmap499   inline bool sanitize (hb_sanitize_context_t *c) {
500     TRACE_SANITIZE (this);
501     return TRACE_RETURN (c->check_struct (this) &&
502 			 likely (version == 0) &&
503 			 encodingRecord.sanitize (c, this));
504   }
505 
506   USHORT		version;	/* Table version number (0). */
507   SortedArrayOf<EncodingRecord>
508 			encodingRecord;	/* Encoding tables. */
509   public:
510   DEFINE_SIZE_ARRAY (4, encodingRecord);
511 };
512 
513 
514 } /* namespace OT */
515 
516 
517 #endif /* HB_OT_CMAP_TABLE_HH */
518