• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "cmap.h"
6 
7 #include <algorithm>
8 #include <set>
9 #include <utility>
10 #include <vector>
11 
12 #include "maxp.h"
13 #include "os2.h"
14 
15 // cmap - Character To Glyph Index Mapping Table
16 // http://www.microsoft.com/opentype/otspec/cmap.htm
17 
18 namespace {
19 
20 struct CMAPSubtableHeader {
21   uint16_t platform;
22   uint16_t encoding;
23   uint32_t offset;
24   uint16_t format;
25   uint32_t length;
26 };
27 
28 struct Subtable314Range {
29   uint16_t start_range;
30   uint16_t end_range;
31   int16_t id_delta;
32   uint16_t id_range_offset;
33   uint32_t id_range_offset_offset;
34 };
35 
36 // The maximum number of groups in format 12, 13 or 14 subtables.
37 // Note: 0xFFFF is the maximum number of glyphs in a single font file.
38 const unsigned kMaxCMAPGroups = 0xFFFF;
39 
40 // Glyph array size for the Mac Roman (format 0) table.
41 const size_t kFormat0ArraySize = 256;
42 
43 // The upper limit of the Unicode code point.
44 const uint32_t kUnicodeUpperLimit = 0x10FFFF;
45 
46 // The maximum number of UVS records (See below).
47 const uint32_t kMaxCMAPSelectorRecords = 259;
48 // The range of UVSes are:
49 //   0x180B-0x180D (3 code points)
50 //   0xFE00-0xFE0F (16 code points)
51 //   0xE0100-0xE01EF (240 code points)
52 const uint32_t kMongolianVSStart = 0x180B;
53 const uint32_t kMongolianVSEnd = 0x180D;
54 const uint32_t kVSStart = 0xFE00;
55 const uint32_t kVSEnd = 0xFE0F;
56 const uint32_t kIVSStart = 0xE0100;
57 const uint32_t kIVSEnd = 0xE01EF;
58 const uint32_t kUVSUpperLimit = 0xFFFFFF;
59 
60 // Parses Format 4 tables
ParseFormat4(ots::OpenTypeFile * file,int platform,int encoding,const uint8_t * data,size_t length,uint16_t num_glyphs)61 bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding,
62               const uint8_t *data, size_t length, uint16_t num_glyphs) {
63   ots::Buffer subtable(data, length);
64 
65   // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
66   // whole thing and recompacting it, we validate it and include it verbatim
67   // in the output.
68 
69   if (!file->os2) {
70     return OTS_FAILURE();
71   }
72 
73   if (!subtable.Skip(4)) {
74     return OTS_FAILURE();
75   }
76   uint16_t language = 0;
77   if (!subtable.ReadU16(&language)) {
78     return OTS_FAILURE();
79   }
80   if (language) {
81     // Platform ID 3 (windows) subtables should have language '0'.
82     return OTS_FAILURE();
83   }
84 
85   uint16_t segcountx2, search_range, entry_selector, range_shift;
86   segcountx2 = search_range = entry_selector = range_shift = 0;
87   if (!subtable.ReadU16(&segcountx2) ||
88       !subtable.ReadU16(&search_range) ||
89       !subtable.ReadU16(&entry_selector) ||
90       !subtable.ReadU16(&range_shift)) {
91     return OTS_FAILURE();
92   }
93 
94   if (segcountx2 & 1 || search_range & 1) {
95     return OTS_FAILURE();
96   }
97   const uint16_t segcount = segcountx2 >> 1;
98   // There must be at least one segment according the spec.
99   if (segcount < 1) {
100     return OTS_FAILURE();
101   }
102 
103   // log2segcount is the maximal x s.t. 2^x < segcount
104   unsigned log2segcount = 0;
105   while (1u << (log2segcount + 1) <= segcount) {
106     log2segcount++;
107   }
108 
109   const uint16_t expected_search_range = 2 * 1u << log2segcount;
110   if (expected_search_range != search_range) {
111     return OTS_FAILURE();
112   }
113 
114   if (entry_selector != log2segcount) {
115     return OTS_FAILURE();
116   }
117 
118   const uint16_t expected_range_shift = segcountx2 - search_range;
119   if (range_shift != expected_range_shift) {
120     return OTS_FAILURE();
121   }
122 
123   std::vector<Subtable314Range> ranges(segcount);
124 
125   for (unsigned i = 0; i < segcount; ++i) {
126     if (!subtable.ReadU16(&ranges[i].end_range)) {
127       return OTS_FAILURE();
128     }
129   }
130 
131   uint16_t padding;
132   if (!subtable.ReadU16(&padding)) {
133     return OTS_FAILURE();
134   }
135   if (padding) {
136     return OTS_FAILURE();
137   }
138 
139   for (unsigned i = 0; i < segcount; ++i) {
140     if (!subtable.ReadU16(&ranges[i].start_range)) {
141       return OTS_FAILURE();
142     }
143   }
144   for (unsigned i = 0; i < segcount; ++i) {
145     if (!subtable.ReadS16(&ranges[i].id_delta)) {
146       return OTS_FAILURE();
147     }
148   }
149   for (unsigned i = 0; i < segcount; ++i) {
150     ranges[i].id_range_offset_offset = subtable.offset();
151     if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
152       return OTS_FAILURE();
153     }
154 
155     if (ranges[i].id_range_offset & 1) {
156       // Some font generators seem to put 65535 on id_range_offset
157       // for 0xFFFF-0xFFFF range.
158       // (e.g., many fonts in http://www.princexml.com/fonts/)
159       if (i == segcount - 1u) {
160         OTS_WARNING("bad id_range_offset");
161         ranges[i].id_range_offset = 0;
162         // The id_range_offset value in the transcoded font will not change
163         // since this table is not actually "transcoded" yet.
164       } else {
165         return OTS_FAILURE();
166       }
167     }
168   }
169 
170   // ranges must be ascending order, based on the end_code. Ranges may not
171   // overlap.
172   for (unsigned i = 1; i < segcount; ++i) {
173     if ((i == segcount - 1u) &&
174         (ranges[i - 1].start_range == 0xffff) &&
175         (ranges[i - 1].end_range == 0xffff) &&
176         (ranges[i].start_range == 0xffff) &&
177         (ranges[i].end_range == 0xffff)) {
178       // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
179       // We'll accept them as an exception.
180       OTS_WARNING("multiple 0xffff terminators found");
181       continue;
182     }
183 
184     // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
185     // unsorted table...
186     if (ranges[i].end_range <= ranges[i - 1].end_range) {
187       return OTS_FAILURE();
188     }
189     if (ranges[i].start_range <= ranges[i - 1].end_range) {
190       return OTS_FAILURE();
191     }
192 
193     // On many fonts, the value of {first, last}_char_index are incorrect.
194     // Fix them.
195     if (file->os2->first_char_index != 0xFFFF &&
196         ranges[i].start_range != 0xFFFF &&
197         file->os2->first_char_index > ranges[i].start_range) {
198       file->os2->first_char_index = ranges[i].start_range;
199     }
200     if (file->os2->last_char_index != 0xFFFF &&
201         ranges[i].end_range != 0xFFFF &&
202         file->os2->last_char_index < ranges[i].end_range) {
203       file->os2->last_char_index = ranges[i].end_range;
204     }
205   }
206 
207   // The last range must end at 0xffff
208   if (ranges[segcount - 1].end_range != 0xffff) {
209     return OTS_FAILURE();
210   }
211 
212   // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
213   // each code-point defined in the table and make sure that they are all valid
214   // glyphs and that we don't access anything out-of-bounds.
215   for (unsigned i = 0; i < segcount; ++i) {
216     for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
217       const uint16_t code_point = cp;
218       if (ranges[i].id_range_offset == 0) {
219         // this is explictly allowed to overflow in the spec
220         const uint16_t glyph = code_point + ranges[i].id_delta;
221         if (glyph >= num_glyphs) {
222           return OTS_FAILURE();
223         }
224       } else {
225         const uint16_t range_delta = code_point - ranges[i].start_range;
226         // this might seem odd, but it's true. The offset is relative to the
227         // location of the offset value itself.
228         const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
229                                          ranges[i].id_range_offset +
230                                          range_delta * 2;
231         // We need to be able to access a 16-bit value from this offset
232         if (glyph_id_offset + 1 >= length) {
233           return OTS_FAILURE();
234         }
235         uint16_t glyph;
236         std::memcpy(&glyph, data + glyph_id_offset, 2);
237         glyph = ntohs(glyph);
238         if (glyph >= num_glyphs) {
239           return OTS_FAILURE();
240         }
241       }
242     }
243   }
244 
245   // We accept the table.
246   // TODO(yusukes): transcode the subtable.
247   if (platform == 3 && encoding == 0) {
248     file->cmap->subtable_3_0_4_data = data;
249     file->cmap->subtable_3_0_4_length = length;
250   } else if (platform == 3 && encoding == 1) {
251     file->cmap->subtable_3_1_4_data = data;
252     file->cmap->subtable_3_1_4_length = length;
253   } else if (platform == 0 && encoding == 3) {
254     file->cmap->subtable_0_3_4_data = data;
255     file->cmap->subtable_0_3_4_length = length;
256   } else {
257     return OTS_FAILURE();
258   }
259 
260   return true;
261 }
262 
Parse31012(ots::OpenTypeFile * file,const uint8_t * data,size_t length,uint16_t num_glyphs)263 bool Parse31012(ots::OpenTypeFile *file,
264                 const uint8_t *data, size_t length, uint16_t num_glyphs) {
265   ots::Buffer subtable(data, length);
266 
267   // Format 12 tables are simple. We parse these and fully serialise them
268   // later.
269 
270   if (!subtable.Skip(8)) {
271     return OTS_FAILURE();
272   }
273   uint32_t language = 0;
274   if (!subtable.ReadU32(&language)) {
275     return OTS_FAILURE();
276   }
277   if (language) {
278     return OTS_FAILURE();
279   }
280 
281   uint32_t num_groups = 0;
282   if (!subtable.ReadU32(&num_groups)) {
283     return OTS_FAILURE();
284   }
285   if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
286     return OTS_FAILURE();
287   }
288 
289   std::vector<ots::OpenTypeCMAPSubtableRange> &groups
290       = file->cmap->subtable_3_10_12;
291   groups.resize(num_groups);
292 
293   for (unsigned i = 0; i < num_groups; ++i) {
294     if (!subtable.ReadU32(&groups[i].start_range) ||
295         !subtable.ReadU32(&groups[i].end_range) ||
296         !subtable.ReadU32(&groups[i].start_glyph_id)) {
297       return OTS_FAILURE();
298     }
299 
300     if (groups[i].start_range > kUnicodeUpperLimit ||
301         groups[i].end_range > kUnicodeUpperLimit ||
302         groups[i].start_glyph_id > 0xFFFF) {
303       return OTS_FAILURE();
304     }
305 
306     // [0xD800, 0xDFFF] are surrogate code points.
307     if (groups[i].start_range >= 0xD800 &&
308         groups[i].start_range <= 0xDFFF) {
309       return OTS_FAILURE();
310     }
311     if (groups[i].end_range >= 0xD800 &&
312         groups[i].end_range <= 0xDFFF) {
313       return OTS_FAILURE();
314     }
315     if (groups[i].start_range < 0xD800 &&
316         groups[i].end_range > 0xDFFF) {
317       return OTS_FAILURE();
318     }
319 
320     // We assert that the glyph value is within range. Because of the range
321     // limits, above, we don't need to worry about overflow.
322     if (groups[i].end_range < groups[i].start_range) {
323       return OTS_FAILURE();
324     }
325     if ((groups[i].end_range - groups[i].start_range) +
326         groups[i].start_glyph_id > num_glyphs) {
327       return OTS_FAILURE();
328     }
329   }
330 
331   // the groups must be sorted by start code and may not overlap
332   for (unsigned i = 1; i < num_groups; ++i) {
333     if (groups[i].start_range <= groups[i - 1].start_range) {
334       return OTS_FAILURE();
335     }
336     if (groups[i].start_range <= groups[i - 1].end_range) {
337       return OTS_FAILURE();
338     }
339   }
340 
341   return true;
342 }
343 
Parse31013(ots::OpenTypeFile * file,const uint8_t * data,size_t length,uint16_t num_glyphs)344 bool Parse31013(ots::OpenTypeFile *file,
345                 const uint8_t *data, size_t length, uint16_t num_glyphs) {
346   ots::Buffer subtable(data, length);
347 
348   // Format 13 tables are simple. We parse these and fully serialise them
349   // later.
350 
351   if (!subtable.Skip(8)) {
352     return OTS_FAILURE();
353   }
354   uint16_t language = 0;
355   if (!subtable.ReadU16(&language)) {
356     return OTS_FAILURE();
357   }
358   if (language) {
359     return OTS_FAILURE();
360   }
361 
362   uint32_t num_groups = 0;
363   if (!subtable.ReadU32(&num_groups)) {
364     return OTS_FAILURE();
365   }
366 
367   // We limit the number of groups in the same way as in 3.10.12 tables. See
368   // the comment there in
369   if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
370     return OTS_FAILURE();
371   }
372 
373   std::vector<ots::OpenTypeCMAPSubtableRange> &groups
374       = file->cmap->subtable_3_10_13;
375   groups.resize(num_groups);
376 
377   for (unsigned i = 0; i < num_groups; ++i) {
378     if (!subtable.ReadU32(&groups[i].start_range) ||
379         !subtable.ReadU32(&groups[i].end_range) ||
380         !subtable.ReadU32(&groups[i].start_glyph_id)) {
381       return OTS_FAILURE();
382     }
383 
384     // We conservatively limit all of the values to protect some parsers from
385     // overflows
386     if (groups[i].start_range > kUnicodeUpperLimit ||
387         groups[i].end_range > kUnicodeUpperLimit ||
388         groups[i].start_glyph_id > 0xFFFF) {
389       return OTS_FAILURE();
390     }
391 
392     if (groups[i].start_glyph_id >= num_glyphs) {
393       return OTS_FAILURE();
394     }
395   }
396 
397   // the groups must be sorted by start code and may not overlap
398   for (unsigned i = 1; i < num_groups; ++i) {
399     if (groups[i].start_range <= groups[i - 1].start_range) {
400       return OTS_FAILURE();
401     }
402     if (groups[i].start_range <= groups[i - 1].end_range) {
403       return OTS_FAILURE();
404     }
405   }
406 
407   return true;
408 }
409 
Parse0514(ots::OpenTypeFile * file,const uint8_t * data,size_t length,uint16_t num_glyphs)410 bool Parse0514(ots::OpenTypeFile *file,
411                const uint8_t *data, size_t length, uint16_t num_glyphs) {
412   // Unicode Variation Selector table
413   ots::Buffer subtable(data, length);
414 
415   // Format 14 tables are simple. We parse these and fully serialise them
416   // later.
417 
418   // Skip format (USHORT) and length (ULONG)
419   if (!subtable.Skip(6)) {
420     return OTS_FAILURE();
421   }
422 
423   uint32_t num_records = 0;
424   if (!subtable.ReadU32(&num_records)) {
425     return OTS_FAILURE();
426   }
427   if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
428     return OTS_FAILURE();
429   }
430 
431   std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
432       = file->cmap->subtable_0_5_14;
433   records.resize(num_records);
434 
435   for (unsigned i = 0; i < num_records; ++i) {
436     if (!subtable.ReadU24(&records[i].var_selector) ||
437         !subtable.ReadU32(&records[i].default_offset) ||
438         !subtable.ReadU32(&records[i].non_default_offset)) {
439       return OTS_FAILURE();
440     }
441     // Checks the value of variation selector
442     if (!((records[i].var_selector >= kMongolianVSStart &&
443            records[i].var_selector <= kMongolianVSEnd) ||
444           (records[i].var_selector >= kVSStart &&
445            records[i].var_selector <= kVSEnd) ||
446           (records[i].var_selector >= kIVSStart &&
447            records[i].var_selector <= kIVSEnd))) {
448       return OTS_FAILURE();
449     }
450     if (i > 0 &&
451         records[i-1].var_selector >= records[i].var_selector) {
452       return OTS_FAILURE();
453     }
454 
455     // Checks offsets
456     if (!records[i].default_offset && !records[i].non_default_offset) {
457       return OTS_FAILURE();
458     }
459     if (records[i].default_offset &&
460         records[i].default_offset >= length) {
461       return OTS_FAILURE();
462     }
463     if (records[i].non_default_offset &&
464         records[i].non_default_offset >= length) {
465       return OTS_FAILURE();
466     }
467   }
468 
469   for (unsigned i = 0; i < num_records; ++i) {
470     // Checks default UVS table
471     if (records[i].default_offset) {
472       subtable.set_offset(records[i].default_offset);
473       uint32_t num_ranges = 0;
474       if (!subtable.ReadU32(&num_ranges)) {
475         return OTS_FAILURE();
476       }
477       if (!num_ranges || num_ranges > kMaxCMAPGroups) {
478         return OTS_FAILURE();
479       }
480 
481       uint32_t last_unicode_value = 0;
482       std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
483           = records[i].ranges;
484       ranges.resize(num_ranges);
485 
486       for (unsigned j = 0; j < num_ranges; ++j) {
487         if (!subtable.ReadU24(&ranges[j].unicode_value) ||
488             !subtable.ReadU8(&ranges[j].additional_count)) {
489           return OTS_FAILURE();
490         }
491         const uint32_t check_value =
492             ranges[j].unicode_value + ranges[j].additional_count;
493         if (ranges[j].unicode_value == 0 ||
494             ranges[j].unicode_value > kUnicodeUpperLimit ||
495             check_value > kUVSUpperLimit ||
496             (last_unicode_value &&
497              ranges[j].unicode_value <= last_unicode_value)) {
498           return OTS_FAILURE();
499         }
500         last_unicode_value = check_value;
501       }
502     }
503 
504     // Checks non default UVS table
505     if (records[i].non_default_offset) {
506       subtable.set_offset(records[i].non_default_offset);
507       uint32_t num_mappings = 0;
508       if (!subtable.ReadU32(&num_mappings)) {
509         return OTS_FAILURE();
510       }
511       if (!num_mappings || num_mappings > kMaxCMAPGroups) {
512         return OTS_FAILURE();
513       }
514 
515       uint32_t last_unicode_value = 0;
516       std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
517           = records[i].mappings;
518       mappings.resize(num_mappings);
519 
520       for (unsigned j = 0; j < num_mappings; ++j) {
521         if (!subtable.ReadU24(&mappings[j].unicode_value) ||
522             !subtable.ReadU16(&mappings[j].glyph_id)) {
523           return OTS_FAILURE();
524         }
525         if (mappings[j].glyph_id == 0 ||
526             mappings[j].unicode_value == 0 ||
527             mappings[j].unicode_value > kUnicodeUpperLimit ||
528             (last_unicode_value &&
529              mappings[j].unicode_value <= last_unicode_value)) {
530           return OTS_FAILURE();
531         }
532         last_unicode_value = mappings[j].unicode_value;
533       }
534     }
535   }
536 
537   if (subtable.offset() != length) {
538     return OTS_FAILURE();
539   }
540   file->cmap->subtable_0_5_14_length = subtable.offset();
541   return true;
542 }
543 
Parse100(ots::OpenTypeFile * file,const uint8_t * data,size_t length)544 bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) {
545   // Mac Roman table
546   ots::Buffer subtable(data, length);
547 
548   if (!subtable.Skip(4)) {
549     return OTS_FAILURE();
550   }
551   uint16_t language = 0;
552   if (!subtable.ReadU16(&language)) {
553     return OTS_FAILURE();
554   }
555   if (language) {
556     // simsun.ttf has non-zero language id.
557     OTS_WARNING("language id should be zero: %u", language);
558   }
559 
560   file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize);
561   for (size_t i = 0; i < kFormat0ArraySize; ++i) {
562     uint8_t glyph_id = 0;
563     if (!subtable.ReadU8(&glyph_id)) {
564       return OTS_FAILURE();
565     }
566     file->cmap->subtable_1_0_0.push_back(glyph_id);
567   }
568 
569   return true;
570 }
571 
572 }  // namespace
573 
574 namespace ots {
575 
ots_cmap_parse(OpenTypeFile * file,const uint8_t * data,size_t length)576 bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
577   Buffer table(data, length);
578   file->cmap = new OpenTypeCMAP;
579 
580   uint16_t version = 0;
581   uint16_t num_tables = 0;
582   if (!table.ReadU16(&version) ||
583       !table.ReadU16(&num_tables)) {
584     return OTS_FAILURE();
585   }
586 
587   if (version != 0) {
588     return OTS_FAILURE();
589   }
590   if (!num_tables) {
591     return OTS_FAILURE();
592   }
593 
594   std::vector<CMAPSubtableHeader> subtable_headers;
595 
596   // read the subtable headers
597   subtable_headers.reserve(num_tables);
598   for (unsigned i = 0; i < num_tables; ++i) {
599     CMAPSubtableHeader subt;
600 
601     if (!table.ReadU16(&subt.platform) ||
602         !table.ReadU16(&subt.encoding) ||
603         !table.ReadU32(&subt.offset)) {
604       return OTS_FAILURE();
605     }
606 
607     subtable_headers.push_back(subt);
608   }
609 
610   const size_t data_offset = table.offset();
611 
612   // make sure that all the offsets are valid.
613   uint32_t last_id = 0;
614   for (unsigned i = 0; i < num_tables; ++i) {
615     if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
616       return OTS_FAILURE();
617     }
618     if (subtable_headers[i].offset < data_offset ||
619         subtable_headers[i].offset >= length) {
620       return OTS_FAILURE();
621     }
622 
623     // check if the table is sorted first by platform ID, then by encoding ID.
624     uint32_t current_id
625         = (subtable_headers[i].platform << 16) + subtable_headers[i].encoding;
626     if ((i != 0) && (last_id >= current_id)) {
627       return OTS_FAILURE();
628     }
629     last_id = current_id;
630   }
631 
632   // the format of the table is the first couple of bytes in the table. The
633   // length of the table is stored in a format-specific way.
634   for (unsigned i = 0; i < num_tables; ++i) {
635     table.set_offset(subtable_headers[i].offset);
636     if (!table.ReadU16(&subtable_headers[i].format)) {
637       return OTS_FAILURE();
638     }
639 
640     uint16_t len = 0;
641     switch (subtable_headers[i].format) {
642       case 0:
643       case 4:
644         if (!table.ReadU16(&len)) {
645           return OTS_FAILURE();
646         }
647         subtable_headers[i].length = len;
648         break;
649       case 12:
650       case 13:
651         if (!table.Skip(2)) {
652           return OTS_FAILURE();
653         }
654         if (!table.ReadU32(&subtable_headers[i].length)) {
655           return OTS_FAILURE();
656         }
657         break;
658       case 14:
659         if (!table.ReadU32(&subtable_headers[i].length)) {
660           return OTS_FAILURE();
661         }
662         break;
663       default:
664         subtable_headers[i].length = 0;
665         break;
666     }
667   }
668 
669   // Now, verify that all the lengths are sane
670   for (unsigned i = 0; i < num_tables; ++i) {
671     if (!subtable_headers[i].length) continue;
672     if (subtable_headers[i].length > 1024 * 1024 * 1024) {
673       return OTS_FAILURE();
674     }
675     // We know that both the offset and length are < 1GB, so the following
676     // addition doesn't overflow
677     const uint32_t end_byte
678         = subtable_headers[i].offset + subtable_headers[i].length;
679     if (end_byte > length) {
680       return OTS_FAILURE();
681     }
682   }
683 
684   // check that the cmap subtables are not overlapping.
685   std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
686   std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
687   for (unsigned i = 0; i < num_tables; ++i) {
688     const uint32_t end_byte
689         = subtable_headers[i].offset + subtable_headers[i].length;
690 
691     if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
692                                             end_byte)).second) {
693       // Sometimes Unicode table and MS table share exactly the same data.
694       // We'll allow this.
695       continue;
696     }
697     overlap_checker.push_back(
698         std::make_pair(subtable_headers[i].offset,
699                        static_cast<uint8_t>(1) /* start */));
700     overlap_checker.push_back(
701         std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
702   }
703   std::sort(overlap_checker.begin(), overlap_checker.end());
704   int overlap_count = 0;
705   for (unsigned i = 0; i < overlap_checker.size(); ++i) {
706     overlap_count += (overlap_checker[i].second ? 1 : -1);
707     if (overlap_count > 1) {
708       return OTS_FAILURE();
709     }
710   }
711 
712   // we grab the number of glyphs in the file from the maxp table to make sure
713   // that the character map isn't referencing anything beyound this range.
714   if (!file->maxp) {
715     return OTS_FAILURE();
716   }
717   const uint16_t num_glyphs = file->maxp->num_glyphs;
718 
719   // We only support a subset of the possible character map tables. Microsoft
720   // 'strongly recommends' that everyone supports the Unicode BMP table with
721   // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
722   //   Platform ID   Encoding ID  Format
723   //   0             0            4       (Unicode Default)
724   //   0             3            4       (Unicode BMP)
725   //   0             3            12      (Unicode UCS-4)
726   //   0             5            14      (Unicode Variation Sequences)
727   //   1             0            0       (Mac Roman)
728   //   3             0            4       (MS Symbol)
729   //   3             1            4       (MS Unicode BMP)
730   //   3             10           12      (MS Unicode UCS-4)
731   //   3             10           13      (MS UCS-4 Fallback mapping)
732   //
733   // Note:
734   //  * 0-0-4 table is (usually) written as a 3-1-4 table. If 3-1-4 table
735   //    also exists, the 0-0-4 table is ignored.
736   //  * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
737   //    Some fonts which include 0-5-14 table seems to be required 0-3-4
738   //    table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
739   //  * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
740   //    exists, the 0-3-12 table is ignored.
741   //
742 
743   for (unsigned i = 0; i < num_tables; ++i) {
744     if (subtable_headers[i].platform == 0) {
745       // Unicode platform
746 
747       if ((subtable_headers[i].encoding == 0) &&
748           (subtable_headers[i].format == 4)) {
749         // parse and output the 0-0-4 table as 3-1-4 table. Sometimes the 0-0-4
750         // table actually points to MS symbol data and thus should be parsed as
751         // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
752         // recovered in ots_cmap_serialise().
753         if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset,
754                       subtable_headers[i].length, num_glyphs)) {
755           return OTS_FAILURE();
756         }
757       } else if ((subtable_headers[i].encoding == 3) &&
758                  (subtable_headers[i].format == 4)) {
759         // parse and output the 0-3-4 table as 0-3-4 table.
760         if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset,
761                       subtable_headers[i].length, num_glyphs)) {
762           return OTS_FAILURE();
763         }
764       } else if ((subtable_headers[i].encoding == 3) &&
765                  (subtable_headers[i].format == 12)) {
766         // parse and output the 0-3-12 table as 3-10-12 table.
767         if (!Parse31012(file, data + subtable_headers[i].offset,
768                         subtable_headers[i].length, num_glyphs)) {
769           return OTS_FAILURE();
770         }
771       } else if ((subtable_headers[i].encoding == 5) &&
772                  (subtable_headers[i].format == 14)) {
773         if (!Parse0514(file, data + subtable_headers[i].offset,
774                        subtable_headers[i].length, num_glyphs)) {
775           return OTS_FAILURE();
776         }
777       }
778     } else if (subtable_headers[i].platform == 1) {
779       // Mac platform
780 
781       if ((subtable_headers[i].encoding == 0) &&
782           (subtable_headers[i].format == 0)) {
783         // parse and output the 1-0-0 table.
784         if (!Parse100(file, data + subtable_headers[i].offset,
785                       subtable_headers[i].length)) {
786           return OTS_FAILURE();
787         }
788       }
789     } else if (subtable_headers[i].platform == 3) {
790       // MS platform
791 
792       switch (subtable_headers[i].encoding) {
793         case 0:
794         case 1:
795           if (subtable_headers[i].format == 4) {
796             // parse 3-0-4 or 3-1-4 table.
797             if (!ParseFormat4(file, subtable_headers[i].platform,
798                           subtable_headers[i].encoding,
799                           data + subtable_headers[i].offset,
800                           subtable_headers[i].length, num_glyphs)) {
801               return OTS_FAILURE();
802             }
803           }
804           break;
805         case 10:
806           if (subtable_headers[i].format == 12) {
807             file->cmap->subtable_3_10_12.clear();
808             if (!Parse31012(file, data + subtable_headers[i].offset,
809                             subtable_headers[i].length, num_glyphs)) {
810               return OTS_FAILURE();
811             }
812           } else if (subtable_headers[i].format == 13) {
813             file->cmap->subtable_3_10_13.clear();
814             if (!Parse31013(file, data + subtable_headers[i].offset,
815                             subtable_headers[i].length, num_glyphs)) {
816               return OTS_FAILURE();
817             }
818           }
819           break;
820       }
821     }
822   }
823 
824   return true;
825 }
826 
ots_cmap_should_serialise(OpenTypeFile * file)827 bool ots_cmap_should_serialise(OpenTypeFile *file) {
828   return file->cmap != NULL;
829 }
830 
ots_cmap_serialise(OTSStream * out,OpenTypeFile * file)831 bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) {
832   const bool have_034 = file->cmap->subtable_0_3_4_data != NULL;
833   const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0;
834   const bool have_100 = file->cmap->subtable_1_0_0.size() != 0;
835   const bool have_304 = file->cmap->subtable_3_0_4_data != NULL;
836   // MS Symbol and MS Unicode tables should not co-exist.
837   // See the comment above in 0-0-4 parser.
838   const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data;
839   const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0;
840   const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0;
841   const unsigned num_subtables = static_cast<unsigned>(have_034) +
842                                  static_cast<unsigned>(have_0514) +
843                                  static_cast<unsigned>(have_100) +
844                                  static_cast<unsigned>(have_304) +
845                                  static_cast<unsigned>(have_314) +
846                                  static_cast<unsigned>(have_31012) +
847                                  static_cast<unsigned>(have_31013);
848   const off_t table_start = out->Tell();
849 
850   // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
851   // (e.g., old fonts for Mac). We don't support them.
852   if (!have_304 && !have_314 && !have_034) {
853     return OTS_FAILURE();
854   }
855 
856   if (!out->WriteU16(0) ||
857       !out->WriteU16(num_subtables)) {
858     return OTS_FAILURE();
859   }
860 
861   const off_t record_offset = out->Tell();
862   if (!out->Pad(num_subtables * 8)) {
863     return OTS_FAILURE();
864   }
865 
866   const off_t offset_034 = out->Tell();
867   if (have_034) {
868     if (!out->Write(file->cmap->subtable_0_3_4_data,
869                     file->cmap->subtable_0_3_4_length)) {
870       return OTS_FAILURE();
871     }
872   }
873 
874   const off_t offset_0514 = out->Tell();
875   if (have_0514) {
876     const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
877         = file->cmap->subtable_0_5_14;
878     const unsigned num_records = records.size();
879     if (!out->WriteU16(14) ||
880         !out->WriteU32(file->cmap->subtable_0_5_14_length) ||
881         !out->WriteU32(num_records)) {
882       return OTS_FAILURE();
883     }
884     for (unsigned i = 0; i < num_records; ++i) {
885       if (!out->WriteU24(records[i].var_selector) ||
886           !out->WriteU32(records[i].default_offset) ||
887           !out->WriteU32(records[i].non_default_offset)) {
888         return OTS_FAILURE();
889       }
890     }
891     for (unsigned i = 0; i < num_records; ++i) {
892       if (records[i].default_offset) {
893         const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
894             = records[i].ranges;
895         const unsigned num_ranges = ranges.size();
896         if (!out->Seek(records[i].default_offset + offset_0514) ||
897             !out->WriteU32(num_ranges)) {
898           return OTS_FAILURE();
899         }
900         for (unsigned j = 0; j < num_ranges; ++j) {
901           if (!out->WriteU24(ranges[j].unicode_value) ||
902               !out->WriteU8(ranges[j].additional_count)) {
903             return OTS_FAILURE();
904           }
905         }
906       }
907       if (records[i].non_default_offset) {
908         const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
909             = records[i].mappings;
910         const unsigned num_mappings = mappings.size();
911         if (!out->Seek(records[i].non_default_offset + offset_0514) ||
912             !out->WriteU32(num_mappings)) {
913           return OTS_FAILURE();
914         }
915         for (unsigned j = 0; j < num_mappings; ++j) {
916           if (!out->WriteU24(mappings[j].unicode_value) ||
917               !out->WriteU16(mappings[j].glyph_id)) {
918             return OTS_FAILURE();
919           }
920         }
921       }
922     }
923   }
924 
925   const off_t offset_100 = out->Tell();
926   if (have_100) {
927     if (!out->WriteU16(0) ||  // format
928         !out->WriteU16(6 + kFormat0ArraySize) ||  // length
929         !out->WriteU16(0)) {  // language
930       return OTS_FAILURE();
931     }
932     if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) {
933       return OTS_FAILURE();
934     }
935   }
936 
937   const off_t offset_304 = out->Tell();
938   if (have_304) {
939     if (!out->Write(file->cmap->subtable_3_0_4_data,
940                     file->cmap->subtable_3_0_4_length)) {
941       return OTS_FAILURE();
942     }
943   }
944 
945   const off_t offset_314 = out->Tell();
946   if (have_314) {
947     if (!out->Write(file->cmap->subtable_3_1_4_data,
948                     file->cmap->subtable_3_1_4_length)) {
949       return OTS_FAILURE();
950     }
951   }
952 
953   const off_t offset_31012 = out->Tell();
954   if (have_31012) {
955     std::vector<OpenTypeCMAPSubtableRange> &groups
956         = file->cmap->subtable_3_10_12;
957     const unsigned num_groups = groups.size();
958     if (!out->WriteU16(12) ||
959         !out->WriteU16(0) ||
960         !out->WriteU32(num_groups * 12 + 16) ||
961         !out->WriteU32(0) ||
962         !out->WriteU32(num_groups)) {
963       return OTS_FAILURE();
964     }
965 
966     for (unsigned i = 0; i < num_groups; ++i) {
967       if (!out->WriteU32(groups[i].start_range) ||
968           !out->WriteU32(groups[i].end_range) ||
969           !out->WriteU32(groups[i].start_glyph_id)) {
970         return OTS_FAILURE();
971       }
972     }
973   }
974 
975   const off_t offset_31013 = out->Tell();
976   if (have_31013) {
977     std::vector<OpenTypeCMAPSubtableRange> &groups
978         = file->cmap->subtable_3_10_13;
979     const unsigned num_groups = groups.size();
980     if (!out->WriteU16(13) ||
981         !out->WriteU16(0) ||
982         !out->WriteU32(num_groups * 12 + 14) ||
983         !out->WriteU32(0) ||
984         !out->WriteU32(num_groups)) {
985       return OTS_FAILURE();
986     }
987 
988     for (unsigned i = 0; i < num_groups; ++i) {
989       if (!out->WriteU32(groups[i].start_range) ||
990           !out->WriteU32(groups[i].end_range) ||
991           !out->WriteU32(groups[i].start_glyph_id)) {
992         return OTS_FAILURE();
993       }
994     }
995   }
996 
997   const off_t table_end = out->Tell();
998   // We might have hanging bytes from the above's checksum which the OTSStream
999   // then merges into the table of offsets.
1000   OTSStream::ChecksumState saved_checksum = out->SaveChecksumState();
1001   out->ResetChecksum();
1002 
1003   // Now seek back and write the table of offsets
1004   if (!out->Seek(record_offset)) {
1005     return OTS_FAILURE();
1006   }
1007 
1008   if (have_034) {
1009     if (!out->WriteU16(0) ||
1010         !out->WriteU16(3) ||
1011         !out->WriteU32(offset_034 - table_start)) {
1012       return OTS_FAILURE();
1013     }
1014   }
1015 
1016   if (have_0514) {
1017     if (!out->WriteU16(0) ||
1018         !out->WriteU16(5) ||
1019         !out->WriteU32(offset_0514 - table_start)) {
1020       return OTS_FAILURE();
1021     }
1022   }
1023 
1024   if (have_100) {
1025     if (!out->WriteU16(1) ||
1026         !out->WriteU16(0) ||
1027         !out->WriteU32(offset_100 - table_start)) {
1028       return OTS_FAILURE();
1029     }
1030   }
1031 
1032   if (have_304) {
1033     if (!out->WriteU16(3) ||
1034         !out->WriteU16(0) ||
1035         !out->WriteU32(offset_304 - table_start)) {
1036       return OTS_FAILURE();
1037     }
1038   }
1039 
1040   if (have_314) {
1041     if (!out->WriteU16(3) ||
1042         !out->WriteU16(1) ||
1043         !out->WriteU32(offset_314 - table_start)) {
1044       return OTS_FAILURE();
1045     }
1046   }
1047 
1048   if (have_31012) {
1049     if (!out->WriteU16(3) ||
1050         !out->WriteU16(10) ||
1051         !out->WriteU32(offset_31012 - table_start)) {
1052       return OTS_FAILURE();
1053     }
1054   }
1055 
1056   if (have_31013) {
1057     if (!out->WriteU16(3) ||
1058         !out->WriteU16(10) ||
1059         !out->WriteU32(offset_31013 - table_start)) {
1060       return OTS_FAILURE();
1061     }
1062   }
1063 
1064   if (!out->Seek(table_end)) {
1065     return OTS_FAILURE();
1066   }
1067   out->RestoreChecksum(saved_checksum);
1068 
1069   return true;
1070 }
1071 
ots_cmap_free(OpenTypeFile * file)1072 void ots_cmap_free(OpenTypeFile *file) {
1073   delete file->cmap;
1074 }
1075 
1076 }  // namespace ots
1077