1 /* 2 * Copyright © 2014 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_OT_CMAP_TABLE_HH 28 #define HB_OT_CMAP_TABLE_HH 29 30 #include "hb-open-type.hh" 31 #include "hb-set.hh" 32 33 /* 34 * cmap -- Character to Glyph Index Mapping 35 * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap 36 */ 37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p') 38 39 namespace OT { 40 41 42 struct CmapSubtableFormat0 43 { get_glyphOT::CmapSubtableFormat044 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 45 { 46 hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; 47 if (!gid) 48 return false; 49 *glyph = gid; 50 return true; 51 } 52 get_languageOT::CmapSubtableFormat053 unsigned get_language () const 54 { 55 return language; 56 } 57 collect_unicodesOT::CmapSubtableFormat058 void collect_unicodes (hb_set_t *out) const 59 { 60 for (unsigned int i = 0; i < 256; i++) 61 if (glyphIdArray[i]) 62 out->add (i); 63 } 64 collect_mappingOT::CmapSubtableFormat065 void collect_mapping (hb_set_t *unicodes, /* OUT */ 66 hb_map_t *mapping /* OUT */) const 67 { 68 for (unsigned i = 0; i < 256; i++) 69 if (glyphIdArray[i]) 70 { 71 hb_codepoint_t glyph = glyphIdArray[i]; 72 unicodes->add (i); 73 mapping->set (i, glyph); 74 } 75 } 76 sanitizeOT::CmapSubtableFormat077 bool sanitize (hb_sanitize_context_t *c) const 78 { 79 TRACE_SANITIZE (this); 80 return_trace (c->check_struct (this)); 81 } 82 83 protected: 84 HBUINT16 format; /* Format number is set to 0. */ 85 HBUINT16 length; /* Byte length of this subtable. */ 86 HBUINT16 language; /* Ignore. */ 87 HBUINT8 glyphIdArray[256];/* An array that maps character 88 * code to glyph index values. */ 89 public: 90 DEFINE_SIZE_STATIC (6 + 256); 91 }; 92 93 struct CmapSubtableFormat4 94 { 95 96 template<typename Iterator, 97 hb_requires (hb_is_iterator (Iterator))> serialize_endcode_arrayOT::CmapSubtableFormat498 HBUINT16* serialize_endcode_array (hb_serialize_context_t *c, 99 Iterator it) 100 { 101 HBUINT16 *endCode = c->start_embed<HBUINT16> (); 102 hb_codepoint_t prev_endcp = 0xFFFF; 103 104 for (const auto& _ : +it) 105 { 106 if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first) 107 { 108 HBUINT16 end_code; 109 end_code = prev_endcp; 110 c->copy<HBUINT16> (end_code); 111 } 112 prev_endcp = _.first; 113 } 114 115 { 116 // last endCode 117 HBUINT16 endcode; 118 endcode = prev_endcp; 119 if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr; 120 // There must be a final entry with end_code == 0xFFFF. 121 if (prev_endcp != 0xFFFF) 122 { 123 HBUINT16 finalcode; 124 finalcode = 0xFFFF; 125 if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr; 126 } 127 } 128 129 return endCode; 130 } 131 132 template<typename Iterator, 133 hb_requires (hb_is_iterator (Iterator))> serialize_startcode_arrayOT::CmapSubtableFormat4134 HBUINT16* serialize_startcode_array (hb_serialize_context_t *c, 135 Iterator it) 136 { 137 HBUINT16 *startCode = c->start_embed<HBUINT16> (); 138 hb_codepoint_t prev_cp = 0xFFFF; 139 140 for (const auto& _ : +it) 141 { 142 if (prev_cp == 0xFFFF || prev_cp + 1u != _.first) 143 { 144 HBUINT16 start_code; 145 start_code = _.first; 146 c->copy<HBUINT16> (start_code); 147 } 148 149 prev_cp = _.first; 150 } 151 152 // There must be a final entry with end_code == 0xFFFF. 153 if (it.len () == 0 || prev_cp != 0xFFFF) 154 { 155 HBUINT16 finalcode; 156 finalcode = 0xFFFF; 157 if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr; 158 } 159 160 return startCode; 161 } 162 163 template<typename Iterator, 164 hb_requires (hb_is_iterator (Iterator))> serialize_idDelta_arrayOT::CmapSubtableFormat4165 HBINT16* serialize_idDelta_array (hb_serialize_context_t *c, 166 Iterator it, 167 HBUINT16 *endCode, 168 HBUINT16 *startCode, 169 unsigned segcount) 170 { 171 unsigned i = 0; 172 hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF; 173 bool use_delta = true; 174 175 HBINT16 *idDelta = c->start_embed<HBINT16> (); 176 if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size) 177 return nullptr; 178 179 for (const auto& _ : +it) 180 { 181 if (_.first == startCode[i]) 182 { 183 use_delta = true; 184 start_gid = _.second; 185 } 186 else if (_.second != last_gid + 1) use_delta = false; 187 188 if (_.first == endCode[i]) 189 { 190 HBINT16 delta; 191 if (use_delta) delta = (int)start_gid - (int)startCode[i]; 192 else delta = 0; 193 c->copy<HBINT16> (delta); 194 195 i++; 196 } 197 198 last_gid = _.second; 199 last_cp = _.first; 200 } 201 202 if (it.len () == 0 || last_cp != 0xFFFF) 203 { 204 HBINT16 delta; 205 delta = 1; 206 if (unlikely (!c->copy<HBINT16> (delta))) return nullptr; 207 } 208 209 return idDelta; 210 } 211 212 template<typename Iterator, 213 hb_requires (hb_is_iterator (Iterator))> serialize_rangeoffset_glyidOT::CmapSubtableFormat4214 HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c, 215 Iterator it, 216 HBUINT16 *endCode, 217 HBUINT16 *startCode, 218 HBINT16 *idDelta, 219 unsigned segcount) 220 { 221 hb_hashmap_t<hb_codepoint_t, hb_codepoint_t> cp_to_gid; 222 + it | hb_sink (cp_to_gid); 223 224 HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount); 225 if (unlikely (!c->check_success (idRangeOffset))) return nullptr; 226 if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr; 227 228 for (unsigned i : + hb_range (segcount) 229 | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; })) 230 { 231 idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i); 232 for (hb_codepoint_t cp = startCode[i]; cp <= endCode[i]; cp++) 233 { 234 HBUINT16 gid; 235 gid = cp_to_gid[cp]; 236 c->copy<HBUINT16> (gid); 237 } 238 } 239 240 return idRangeOffset; 241 } 242 243 template<typename Iterator, 244 hb_requires (hb_is_iterator (Iterator))> serializeOT::CmapSubtableFormat4245 void serialize (hb_serialize_context_t *c, 246 Iterator it) 247 { 248 auto format4_iter = 249 + it 250 | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _) 251 { return _.first <= 0xFFFF; }) 252 ; 253 254 if (format4_iter.len () == 0) return; 255 256 unsigned table_initpos = c->length (); 257 if (unlikely (!c->extend_min (this))) return; 258 this->format = 4; 259 260 //serialize endCode[] 261 HBUINT16 *endCode = serialize_endcode_array (c, format4_iter); 262 if (unlikely (!endCode)) return; 263 264 unsigned segcount = (c->length () - min_size) / HBUINT16::static_size; 265 266 // 2 bytes of padding. 267 if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding. 268 269 // serialize startCode[] 270 HBUINT16 *startCode = serialize_startcode_array (c, format4_iter); 271 if (unlikely (!startCode)) return; 272 273 //serialize idDelta[] 274 HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount); 275 if (unlikely (!idDelta)) return; 276 277 HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount); 278 if (unlikely (!c->check_success (idRangeOffset))) return; 279 280 this->length = c->length () - table_initpos; 281 if ((long long) this->length != (long long) c->length () - table_initpos) 282 { 283 // Length overflowed. Discard the current object before setting the error condition, otherwise 284 // discard is a noop which prevents the higher level code from reverting the serializer to the 285 // pre-error state in cmap4 overflow handling code. 286 c->pop_discard (); 287 c->err (HB_SERIALIZE_ERROR_INT_OVERFLOW); 288 return; 289 } 290 291 this->segCountX2 = segcount * 2; 292 this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1; 293 this->searchRange = 2 * (1u << this->entrySelector); 294 this->rangeShift = segcount * 2 > this->searchRange 295 ? 2 * segcount - this->searchRange 296 : 0; 297 } 298 get_languageOT::CmapSubtableFormat4299 unsigned get_language () const 300 { 301 return language; 302 } 303 304 struct accelerator_t 305 { accelerator_tOT::CmapSubtableFormat4::accelerator_t306 accelerator_t () {} accelerator_tOT::CmapSubtableFormat4::accelerator_t307 accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); } ~accelerator_tOT::CmapSubtableFormat4::accelerator_t308 ~accelerator_t () { fini (); } 309 initOT::CmapSubtableFormat4::accelerator_t310 void init (const CmapSubtableFormat4 *subtable) 311 { 312 segCount = subtable->segCountX2 / 2; 313 endCount = subtable->values.arrayZ; 314 startCount = endCount + segCount + 1; 315 idDelta = startCount + segCount; 316 idRangeOffset = idDelta + segCount; 317 glyphIdArray = idRangeOffset + segCount; 318 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; 319 } finiOT::CmapSubtableFormat4::accelerator_t320 void fini () {} 321 get_glyphOT::CmapSubtableFormat4::accelerator_t322 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 323 { 324 struct CustomRange 325 { 326 int cmp (hb_codepoint_t k, 327 unsigned distance) const 328 { 329 if (k > last) return +1; 330 if (k < (&last)[distance]) return -1; 331 return 0; 332 } 333 HBUINT16 last; 334 }; 335 336 const HBUINT16 *found = hb_bsearch (codepoint, 337 this->endCount, 338 this->segCount, 339 2, 340 _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>, 341 this->segCount + 1); 342 if (!found) 343 return false; 344 unsigned int i = found - endCount; 345 346 hb_codepoint_t gid; 347 unsigned int rangeOffset = this->idRangeOffset[i]; 348 if (rangeOffset == 0) 349 gid = codepoint + this->idDelta[i]; 350 else 351 { 352 /* Somebody has been smoking... */ 353 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 354 if (unlikely (index >= this->glyphIdArrayLength)) 355 return false; 356 gid = this->glyphIdArray[index]; 357 if (unlikely (!gid)) 358 return false; 359 gid += this->idDelta[i]; 360 } 361 gid &= 0xFFFFu; 362 if (!gid) 363 return false; 364 *glyph = gid; 365 return true; 366 } 367 get_glyph_funcOT::CmapSubtableFormat4::accelerator_t368 HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) 369 { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); } 370 collect_unicodesOT::CmapSubtableFormat4::accelerator_t371 void collect_unicodes (hb_set_t *out) const 372 { 373 unsigned int count = this->segCount; 374 if (count && this->startCount[count - 1] == 0xFFFFu) 375 count--; /* Skip sentinel segment. */ 376 for (unsigned int i = 0; i < count; i++) 377 { 378 hb_codepoint_t start = this->startCount[i]; 379 hb_codepoint_t end = this->endCount[i]; 380 unsigned int rangeOffset = this->idRangeOffset[i]; 381 if (rangeOffset == 0) 382 { 383 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 384 { 385 hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu; 386 if (unlikely (!gid)) 387 continue; 388 out->add (codepoint); 389 } 390 } 391 else 392 { 393 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 394 { 395 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 396 if (unlikely (index >= this->glyphIdArrayLength)) 397 break; 398 hb_codepoint_t gid = this->glyphIdArray[index]; 399 if (unlikely (!gid)) 400 continue; 401 out->add (codepoint); 402 } 403 } 404 } 405 } 406 collect_mappingOT::CmapSubtableFormat4::accelerator_t407 void collect_mapping (hb_set_t *unicodes, /* OUT */ 408 hb_map_t *mapping /* OUT */) const 409 { 410 unsigned count = this->segCount; 411 if (count && this->startCount[count - 1] == 0xFFFFu) 412 count--; /* Skip sentinel segment. */ 413 for (unsigned i = 0; i < count; i++) 414 { 415 hb_codepoint_t start = this->startCount[i]; 416 hb_codepoint_t end = this->endCount[i]; 417 unsigned rangeOffset = this->idRangeOffset[i]; 418 if (rangeOffset == 0) 419 { 420 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 421 { 422 hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu; 423 if (unlikely (!gid)) 424 continue; 425 unicodes->add (codepoint); 426 mapping->set (codepoint, gid); 427 } 428 } 429 else 430 { 431 for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) 432 { 433 unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 434 if (unlikely (index >= this->glyphIdArrayLength)) 435 break; 436 hb_codepoint_t gid = this->glyphIdArray[index]; 437 if (unlikely (!gid)) 438 continue; 439 unicodes->add (codepoint); 440 mapping->set (codepoint, gid); 441 } 442 } 443 } 444 } 445 446 const HBUINT16 *endCount; 447 const HBUINT16 *startCount; 448 const HBUINT16 *idDelta; 449 const HBUINT16 *idRangeOffset; 450 const HBUINT16 *glyphIdArray; 451 unsigned int segCount; 452 unsigned int glyphIdArrayLength; 453 }; 454 get_glyphOT::CmapSubtableFormat4455 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 456 { 457 accelerator_t accel (this); 458 return accel.get_glyph_func (&accel, codepoint, glyph); 459 } collect_unicodesOT::CmapSubtableFormat4460 void collect_unicodes (hb_set_t *out) const 461 { 462 accelerator_t accel (this); 463 accel.collect_unicodes (out); 464 } 465 collect_mappingOT::CmapSubtableFormat4466 void collect_mapping (hb_set_t *unicodes, /* OUT */ 467 hb_map_t *mapping /* OUT */) const 468 { 469 accelerator_t accel (this); 470 accel.collect_mapping (unicodes, mapping); 471 } 472 sanitizeOT::CmapSubtableFormat4473 bool sanitize (hb_sanitize_context_t *c) const 474 { 475 TRACE_SANITIZE (this); 476 if (unlikely (!c->check_struct (this))) 477 return_trace (false); 478 479 if (unlikely (!c->check_range (this, length))) 480 { 481 /* Some broken fonts have too long of a "length" value. 482 * If that is the case, just change the value to truncate 483 * the subtable at the end of the blob. */ 484 uint16_t new_length = (uint16_t) hb_min ((uintptr_t) 65535, 485 (uintptr_t) (c->end - 486 (char *) this)); 487 if (!c->try_set (&length, new_length)) 488 return_trace (false); 489 } 490 491 return_trace (16 + 4 * (unsigned int) segCountX2 <= length); 492 } 493 494 495 496 protected: 497 HBUINT16 format; /* Format number is set to 4. */ 498 HBUINT16 length; /* This is the length in bytes of the 499 * subtable. */ 500 HBUINT16 language; /* Ignore. */ 501 HBUINT16 segCountX2; /* 2 x segCount. */ 502 HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */ 503 HBUINT16 entrySelector; /* log2(searchRange/2) */ 504 HBUINT16 rangeShift; /* 2 x segCount - searchRange */ 505 506 UnsizedArrayOf<HBUINT16> 507 values; 508 #if 0 509 HBUINT16 endCount[segCount]; /* End characterCode for each segment, 510 * last=0xFFFFu. */ 511 HBUINT16 reservedPad; /* Set to 0. */ 512 HBUINT16 startCount[segCount]; /* Start character code for each segment. */ 513 HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */ 514 HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ 515 UnsizedArrayOf<HBUINT16> 516 glyphIdArray; /* Glyph index array (arbitrary length) */ 517 #endif 518 519 public: 520 DEFINE_SIZE_ARRAY (14, values); 521 }; 522 523 struct CmapSubtableLongGroup 524 { 525 friend struct CmapSubtableFormat12; 526 friend struct CmapSubtableFormat13; 527 template<typename U> 528 friend struct CmapSubtableLongSegmented; 529 friend struct cmap; 530 cmpOT::CmapSubtableLongGroup531 int cmp (hb_codepoint_t codepoint) const 532 { 533 if (codepoint < startCharCode) return -1; 534 if (codepoint > endCharCode) return +1; 535 return 0; 536 } 537 sanitizeOT::CmapSubtableLongGroup538 bool sanitize (hb_sanitize_context_t *c) const 539 { 540 TRACE_SANITIZE (this); 541 return_trace (c->check_struct (this)); 542 } 543 544 private: 545 HBUINT32 startCharCode; /* First character code in this group. */ 546 HBUINT32 endCharCode; /* Last character code in this group. */ 547 HBUINT32 glyphID; /* Glyph index; interpretation depends on 548 * subtable format. */ 549 public: 550 DEFINE_SIZE_STATIC (12); 551 }; 552 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup); 553 554 template <typename UINT> 555 struct CmapSubtableTrimmed 556 { get_glyphOT::CmapSubtableTrimmed557 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 558 { 559 /* Rely on our implicit array bound-checking. */ 560 hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; 561 if (!gid) 562 return false; 563 *glyph = gid; 564 return true; 565 } 566 get_languageOT::CmapSubtableTrimmed567 unsigned get_language () const 568 { 569 return language; 570 } 571 collect_unicodesOT::CmapSubtableTrimmed572 void collect_unicodes (hb_set_t *out) const 573 { 574 hb_codepoint_t start = startCharCode; 575 unsigned int count = glyphIdArray.len; 576 for (unsigned int i = 0; i < count; i++) 577 if (glyphIdArray[i]) 578 out->add (start + i); 579 } 580 collect_mappingOT::CmapSubtableTrimmed581 void collect_mapping (hb_set_t *unicodes, /* OUT */ 582 hb_map_t *mapping /* OUT */) const 583 { 584 hb_codepoint_t start_cp = startCharCode; 585 unsigned count = glyphIdArray.len; 586 for (unsigned i = 0; i < count; i++) 587 if (glyphIdArray[i]) 588 { 589 hb_codepoint_t unicode = start_cp + i; 590 hb_codepoint_t glyphid = glyphIdArray[i]; 591 unicodes->add (unicode); 592 mapping->set (unicode, glyphid); 593 } 594 } 595 sanitizeOT::CmapSubtableTrimmed596 bool sanitize (hb_sanitize_context_t *c) const 597 { 598 TRACE_SANITIZE (this); 599 return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); 600 } 601 602 protected: 603 UINT formatReserved; /* Subtable format and (maybe) padding. */ 604 UINT length; /* Byte length of this subtable. */ 605 UINT language; /* Ignore. */ 606 UINT startCharCode; /* First character code covered. */ 607 ArrayOf<HBGlyphID16, UINT> 608 glyphIdArray; /* Array of glyph index values for character 609 * codes in the range. */ 610 public: 611 DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); 612 }; 613 614 struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {}; 615 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {}; 616 617 template <typename T> 618 struct CmapSubtableLongSegmented 619 { 620 friend struct cmap; 621 get_glyphOT::CmapSubtableLongSegmented622 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 623 { 624 hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint); 625 if (!gid) 626 return false; 627 *glyph = gid; 628 return true; 629 } 630 get_languageOT::CmapSubtableLongSegmented631 unsigned get_language () const 632 { 633 return language; 634 } 635 collect_unicodesOT::CmapSubtableLongSegmented636 void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const 637 { 638 for (unsigned int i = 0; i < this->groups.len; i++) 639 { 640 hb_codepoint_t start = this->groups[i].startCharCode; 641 hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode, 642 (hb_codepoint_t) HB_UNICODE_MAX); 643 hb_codepoint_t gid = this->groups[i].glyphID; 644 if (!gid) 645 { 646 /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */ 647 if (! T::group_get_glyph (this->groups[i], end)) continue; 648 start++; 649 gid++; 650 } 651 if (unlikely ((unsigned int) gid >= num_glyphs)) continue; 652 if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs)) 653 end = start + (hb_codepoint_t) num_glyphs - gid; 654 655 out->add_range (start, end); 656 } 657 } 658 collect_mappingOT::CmapSubtableLongSegmented659 void collect_mapping (hb_set_t *unicodes, /* OUT */ 660 hb_map_t *mapping, /* OUT */ 661 unsigned num_glyphs) const 662 { 663 for (unsigned i = 0; i < this->groups.len; i++) 664 { 665 hb_codepoint_t start = this->groups[i].startCharCode; 666 hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode, 667 (hb_codepoint_t) HB_UNICODE_MAX); 668 hb_codepoint_t gid = this->groups[i].glyphID; 669 if (!gid) 670 { 671 /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */ 672 if (! T::group_get_glyph (this->groups[i], end)) continue; 673 start++; 674 gid++; 675 } 676 if (unlikely ((unsigned int) gid >= num_glyphs)) continue; 677 if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs)) 678 end = start + (hb_codepoint_t) num_glyphs - gid; 679 680 for (unsigned cp = start; cp <= end; cp++) 681 { 682 unicodes->add (cp); 683 mapping->set (cp, gid); 684 gid++; 685 } 686 } 687 } 688 sanitizeOT::CmapSubtableLongSegmented689 bool sanitize (hb_sanitize_context_t *c) const 690 { 691 TRACE_SANITIZE (this); 692 return_trace (c->check_struct (this) && groups.sanitize (c)); 693 } 694 695 protected: 696 HBUINT16 format; /* Subtable format; set to 12. */ 697 HBUINT16 reserved; /* Reserved; set to 0. */ 698 HBUINT32 length; /* Byte length of this subtable. */ 699 HBUINT32 language; /* Ignore. */ 700 SortedArray32Of<CmapSubtableLongGroup> 701 groups; /* Groupings. */ 702 public: 703 DEFINE_SIZE_ARRAY (16, groups); 704 }; 705 706 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> 707 { group_get_glyphOT::CmapSubtableFormat12708 static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 709 hb_codepoint_t u) 710 { return likely (group.startCharCode <= group.endCharCode) ? 711 group.glyphID + (u - group.startCharCode) : 0; } 712 713 714 template<typename Iterator, 715 hb_requires (hb_is_iterator (Iterator))> serializeOT::CmapSubtableFormat12716 void serialize (hb_serialize_context_t *c, 717 Iterator it) 718 { 719 if (it.len () == 0) return; 720 unsigned table_initpos = c->length (); 721 if (unlikely (!c->extend_min (this))) return; 722 723 hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF; 724 hb_codepoint_t glyphID = 0; 725 726 for (const auto& _ : +it) 727 { 728 if (startCharCode == 0xFFFF) 729 { 730 startCharCode = _.first; 731 endCharCode = _.first; 732 glyphID = _.second; 733 } 734 else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second)) 735 { 736 CmapSubtableLongGroup grouprecord; 737 grouprecord.startCharCode = startCharCode; 738 grouprecord.endCharCode = endCharCode; 739 grouprecord.glyphID = glyphID; 740 c->copy<CmapSubtableLongGroup> (grouprecord); 741 742 startCharCode = _.first; 743 endCharCode = _.first; 744 glyphID = _.second; 745 } 746 else 747 endCharCode = _.first; 748 } 749 750 CmapSubtableLongGroup record; 751 record.startCharCode = startCharCode; 752 record.endCharCode = endCharCode; 753 record.glyphID = glyphID; 754 c->copy<CmapSubtableLongGroup> (record); 755 756 this->format = 12; 757 this->reserved = 0; 758 this->length = c->length () - table_initpos; 759 this->groups.len = (this->length - min_size)/CmapSubtableLongGroup::static_size; 760 } 761 get_sub_table_sizeOT::CmapSubtableFormat12762 static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data) 763 { return 16 + 12 * groups_data.length; } 764 765 private: _is_gid_consecutiveOT::CmapSubtableFormat12766 static bool _is_gid_consecutive (hb_codepoint_t endCharCode, 767 hb_codepoint_t startCharCode, 768 hb_codepoint_t glyphID, 769 hb_codepoint_t cp, 770 hb_codepoint_t new_gid) 771 { 772 return (cp - 1 == endCharCode) && 773 new_gid == glyphID + (cp - startCharCode); 774 } 775 776 }; 777 778 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> 779 { group_get_glyphOT::CmapSubtableFormat13780 static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 781 hb_codepoint_t u HB_UNUSED) 782 { return group.glyphID; } 783 }; 784 785 typedef enum 786 { 787 GLYPH_VARIANT_NOT_FOUND = 0, 788 GLYPH_VARIANT_FOUND = 1, 789 GLYPH_VARIANT_USE_DEFAULT = 2 790 } glyph_variant_t; 791 792 struct UnicodeValueRange 793 { cmpOT::UnicodeValueRange794 int cmp (const hb_codepoint_t &codepoint) const 795 { 796 if (codepoint < startUnicodeValue) return -1; 797 if (codepoint > startUnicodeValue + additionalCount) return +1; 798 return 0; 799 } 800 sanitizeOT::UnicodeValueRange801 bool sanitize (hb_sanitize_context_t *c) const 802 { 803 TRACE_SANITIZE (this); 804 return_trace (c->check_struct (this)); 805 } 806 807 HBUINT24 startUnicodeValue; /* First value in this range. */ 808 HBUINT8 additionalCount; /* Number of additional values in this 809 * range. */ 810 public: 811 DEFINE_SIZE_STATIC (4); 812 }; 813 814 struct DefaultUVS : SortedArray32Of<UnicodeValueRange> 815 { collect_unicodesOT::DefaultUVS816 void collect_unicodes (hb_set_t *out) const 817 { 818 unsigned int count = len; 819 for (unsigned int i = 0; i < count; i++) 820 { 821 hb_codepoint_t first = arrayZ[i].startUnicodeValue; 822 hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount), 823 (hb_codepoint_t) HB_UNICODE_MAX); 824 out->add_range (first, last); 825 } 826 } 827 copyOT::DefaultUVS828 DefaultUVS* copy (hb_serialize_context_t *c, 829 const hb_set_t *unicodes) const 830 { 831 DefaultUVS *out = c->start_embed<DefaultUVS> (); 832 if (unlikely (!out)) return nullptr; 833 auto snap = c->snapshot (); 834 835 HBUINT32 len; 836 len = 0; 837 if (unlikely (!c->copy<HBUINT32> (len))) return nullptr; 838 unsigned init_len = c->length (); 839 840 hb_codepoint_t lastCode = HB_MAP_VALUE_INVALID; 841 int count = -1; 842 843 for (const UnicodeValueRange& _ : as_array ()) 844 { 845 for (const unsigned addcnt : hb_range ((unsigned) _.additionalCount + 1)) 846 { 847 unsigned curEntry = (unsigned) _.startUnicodeValue + addcnt; 848 if (!unicodes->has (curEntry)) continue; 849 count += 1; 850 if (lastCode == HB_MAP_VALUE_INVALID) 851 lastCode = curEntry; 852 else if (lastCode + count != curEntry) 853 { 854 UnicodeValueRange rec; 855 rec.startUnicodeValue = lastCode; 856 rec.additionalCount = count - 1; 857 c->copy<UnicodeValueRange> (rec); 858 859 lastCode = curEntry; 860 count = 0; 861 } 862 } 863 } 864 865 if (lastCode != HB_MAP_VALUE_INVALID) 866 { 867 UnicodeValueRange rec; 868 rec.startUnicodeValue = lastCode; 869 rec.additionalCount = count; 870 c->copy<UnicodeValueRange> (rec); 871 } 872 873 if (c->length () - init_len == 0) 874 { 875 c->revert (snap); 876 return nullptr; 877 } 878 else 879 { 880 if (unlikely (!c->check_assign (out->len, 881 (c->length () - init_len) / UnicodeValueRange::static_size, 882 HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr; 883 return out; 884 } 885 } 886 887 public: 888 DEFINE_SIZE_ARRAY (4, *this); 889 }; 890 891 struct UVSMapping 892 { cmpOT::UVSMapping893 int cmp (const hb_codepoint_t &codepoint) const 894 { return unicodeValue.cmp (codepoint); } 895 sanitizeOT::UVSMapping896 bool sanitize (hb_sanitize_context_t *c) const 897 { 898 TRACE_SANITIZE (this); 899 return_trace (c->check_struct (this)); 900 } 901 902 HBUINT24 unicodeValue; /* Base Unicode value of the UVS */ 903 HBGlyphID16 glyphID; /* Glyph ID of the UVS */ 904 public: 905 DEFINE_SIZE_STATIC (5); 906 }; 907 908 struct NonDefaultUVS : SortedArray32Of<UVSMapping> 909 { collect_unicodesOT::NonDefaultUVS910 void collect_unicodes (hb_set_t *out) const 911 { 912 for (const auto& a : as_array ()) 913 out->add (a.unicodeValue); 914 } 915 collect_mappingOT::NonDefaultUVS916 void collect_mapping (hb_set_t *unicodes, /* OUT */ 917 hb_map_t *mapping /* OUT */) const 918 { 919 for (const auto& a : as_array ()) 920 { 921 hb_codepoint_t unicode = a.unicodeValue; 922 hb_codepoint_t glyphid = a.glyphID; 923 unicodes->add (unicode); 924 mapping->set (unicode, glyphid); 925 } 926 } 927 closure_glyphsOT::NonDefaultUVS928 void closure_glyphs (const hb_set_t *unicodes, 929 hb_set_t *glyphset) const 930 { 931 + as_array () 932 | hb_filter (unicodes, &UVSMapping::unicodeValue) 933 | hb_map (&UVSMapping::glyphID) 934 | hb_sink (glyphset) 935 ; 936 } 937 copyOT::NonDefaultUVS938 NonDefaultUVS* copy (hb_serialize_context_t *c, 939 const hb_set_t *unicodes, 940 const hb_set_t *glyphs_requested, 941 const hb_map_t *glyph_map) const 942 { 943 NonDefaultUVS *out = c->start_embed<NonDefaultUVS> (); 944 if (unlikely (!out)) return nullptr; 945 946 auto it = 947 + as_array () 948 | hb_filter ([&] (const UVSMapping& _) 949 { 950 return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID); 951 }) 952 ; 953 954 if (!it) return nullptr; 955 956 HBUINT32 len; 957 len = it.len (); 958 if (unlikely (!c->copy<HBUINT32> (len))) return nullptr; 959 960 for (const UVSMapping& _ : it) 961 { 962 UVSMapping mapping; 963 mapping.unicodeValue = _.unicodeValue; 964 mapping.glyphID = glyph_map->get (_.glyphID); 965 c->copy<UVSMapping> (mapping); 966 } 967 968 return out; 969 } 970 971 public: 972 DEFINE_SIZE_ARRAY (4, *this); 973 }; 974 975 struct VariationSelectorRecord 976 { get_glyphOT::VariationSelectorRecord977 glyph_variant_t get_glyph (hb_codepoint_t codepoint, 978 hb_codepoint_t *glyph, 979 const void *base) const 980 { 981 if ((base+defaultUVS).bfind (codepoint)) 982 return GLYPH_VARIANT_USE_DEFAULT; 983 const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint); 984 if (nonDefault.glyphID) 985 { 986 *glyph = nonDefault.glyphID; 987 return GLYPH_VARIANT_FOUND; 988 } 989 return GLYPH_VARIANT_NOT_FOUND; 990 } 991 VariationSelectorRecordOT::VariationSelectorRecord992 VariationSelectorRecord(const VariationSelectorRecord& other) 993 { 994 *this = other; 995 } 996 operator =OT::VariationSelectorRecord997 void operator= (const VariationSelectorRecord& other) 998 { 999 varSelector = other.varSelector; 1000 HBUINT32 offset = other.defaultUVS; 1001 defaultUVS = offset; 1002 offset = other.nonDefaultUVS; 1003 nonDefaultUVS = offset; 1004 } 1005 collect_unicodesOT::VariationSelectorRecord1006 void collect_unicodes (hb_set_t *out, const void *base) const 1007 { 1008 (base+defaultUVS).collect_unicodes (out); 1009 (base+nonDefaultUVS).collect_unicodes (out); 1010 } 1011 collect_mappingOT::VariationSelectorRecord1012 void collect_mapping (const void *base, 1013 hb_set_t *unicodes, /* OUT */ 1014 hb_map_t *mapping /* OUT */) const 1015 { 1016 (base+defaultUVS).collect_unicodes (unicodes); 1017 (base+nonDefaultUVS).collect_mapping (unicodes, mapping); 1018 } 1019 cmpOT::VariationSelectorRecord1020 int cmp (const hb_codepoint_t &variation_selector) const 1021 { return varSelector.cmp (variation_selector); } 1022 sanitizeOT::VariationSelectorRecord1023 bool sanitize (hb_sanitize_context_t *c, const void *base) const 1024 { 1025 TRACE_SANITIZE (this); 1026 return_trace (c->check_struct (this) && 1027 defaultUVS.sanitize (c, base) && 1028 nonDefaultUVS.sanitize (c, base)); 1029 } 1030 1031 hb_pair_t<unsigned, unsigned> copyOT::VariationSelectorRecord1032 copy (hb_serialize_context_t *c, 1033 const hb_set_t *unicodes, 1034 const hb_set_t *glyphs_requested, 1035 const hb_map_t *glyph_map, 1036 const void *base) const 1037 { 1038 auto snap = c->snapshot (); 1039 auto *out = c->embed<VariationSelectorRecord> (*this); 1040 if (unlikely (!out)) return hb_pair (0, 0); 1041 1042 out->defaultUVS = 0; 1043 out->nonDefaultUVS = 0; 1044 1045 unsigned non_default_uvs_objidx = 0; 1046 if (nonDefaultUVS != 0) 1047 { 1048 c->push (); 1049 if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map)) 1050 non_default_uvs_objidx = c->pop_pack (); 1051 else c->pop_discard (); 1052 } 1053 1054 unsigned default_uvs_objidx = 0; 1055 if (defaultUVS != 0) 1056 { 1057 c->push (); 1058 if (c->copy (base+defaultUVS, unicodes)) 1059 default_uvs_objidx = c->pop_pack (); 1060 else c->pop_discard (); 1061 } 1062 1063 1064 if (!default_uvs_objidx && !non_default_uvs_objidx) 1065 c->revert (snap); 1066 1067 return hb_pair (default_uvs_objidx, non_default_uvs_objidx); 1068 } 1069 1070 HBUINT24 varSelector; /* Variation selector. */ 1071 Offset32To<DefaultUVS> 1072 defaultUVS; /* Offset to Default UVS Table. May be 0. */ 1073 Offset32To<NonDefaultUVS> 1074 nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ 1075 public: 1076 DEFINE_SIZE_STATIC (11); 1077 }; 1078 1079 struct CmapSubtableFormat14 1080 { get_glyph_variantOT::CmapSubtableFormat141081 glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, 1082 hb_codepoint_t variation_selector, 1083 hb_codepoint_t *glyph) const 1084 { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); } 1085 collect_variation_selectorsOT::CmapSubtableFormat141086 void collect_variation_selectors (hb_set_t *out) const 1087 { 1088 for (const auto& a : record.as_array ()) 1089 out->add (a.varSelector); 1090 } collect_variation_unicodesOT::CmapSubtableFormat141091 void collect_variation_unicodes (hb_codepoint_t variation_selector, 1092 hb_set_t *out) const 1093 { record.bsearch (variation_selector).collect_unicodes (out, this); } 1094 serializeOT::CmapSubtableFormat141095 void serialize (hb_serialize_context_t *c, 1096 const hb_set_t *unicodes, 1097 const hb_set_t *glyphs_requested, 1098 const hb_map_t *glyph_map, 1099 const void *base) 1100 { 1101 auto snap = c->snapshot (); 1102 unsigned table_initpos = c->length (); 1103 const char* init_tail = c->tail; 1104 1105 if (unlikely (!c->extend_min (this))) return; 1106 this->format = 14; 1107 1108 auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base); 1109 1110 /* 1111 * Some versions of OTS require that offsets are in order. Due to the use 1112 * of push()/pop_pack() serializing the variation records in order results 1113 * in the offsets being in reverse order (first record has the largest 1114 * offset). While this is perfectly valid, it will cause some versions of 1115 * OTS to consider this table bad. 1116 * 1117 * So to prevent this issue we serialize the variation records in reverse 1118 * order, so that the offsets are ordered from small to large. Since 1119 * variation records are supposed to be in increasing order of varSelector 1120 * we then have to reverse the order of the written variation selector 1121 * records after everything is finalized. 1122 */ 1123 hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices; 1124 for (int i = src_tbl->record.len - 1; i >= 0; i--) 1125 { 1126 hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base); 1127 if (result.first || result.second) 1128 obj_indices.push (result); 1129 } 1130 1131 if (c->length () - table_initpos == CmapSubtableFormat14::min_size) 1132 { 1133 c->revert (snap); 1134 return; 1135 } 1136 1137 if (unlikely (!c->check_success (!obj_indices.in_error ()))) 1138 return; 1139 1140 int tail_len = init_tail - c->tail; 1141 c->check_assign (this->length, c->length () - table_initpos + tail_len, 1142 HB_SERIALIZE_ERROR_INT_OVERFLOW); 1143 c->check_assign (this->record.len, 1144 (c->length () - table_initpos - CmapSubtableFormat14::min_size) / 1145 VariationSelectorRecord::static_size, 1146 HB_SERIALIZE_ERROR_INT_OVERFLOW); 1147 1148 /* Correct the incorrect write order by reversing the order of the variation 1149 records array. */ 1150 _reverse_variation_records (); 1151 1152 /* Now that records are in the right order, we can set up the offsets. */ 1153 _add_links_to_variation_records (c, obj_indices); 1154 } 1155 _reverse_variation_recordsOT::CmapSubtableFormat141156 void _reverse_variation_records () 1157 { 1158 record.as_array ().reverse (); 1159 } 1160 _add_links_to_variation_recordsOT::CmapSubtableFormat141161 void _add_links_to_variation_records (hb_serialize_context_t *c, 1162 const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices) 1163 { 1164 for (unsigned i = 0; i < obj_indices.length; i++) 1165 { 1166 /* 1167 * Since the record array has been reversed (see comments in copy()) 1168 * but obj_indices has not been, the indices at obj_indices[i] 1169 * are for the variation record at record[j]. 1170 */ 1171 int j = obj_indices.length - 1 - i; 1172 c->add_link (record[j].defaultUVS, obj_indices[i].first); 1173 c->add_link (record[j].nonDefaultUVS, obj_indices[i].second); 1174 } 1175 } 1176 closure_glyphsOT::CmapSubtableFormat141177 void closure_glyphs (const hb_set_t *unicodes, 1178 hb_set_t *glyphset) const 1179 { 1180 + hb_iter (record) 1181 | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS) 1182 | hb_map (&VariationSelectorRecord::nonDefaultUVS) 1183 | hb_map (hb_add (this)) 1184 | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); }) 1185 ; 1186 } 1187 collect_unicodesOT::CmapSubtableFormat141188 void collect_unicodes (hb_set_t *out) const 1189 { 1190 for (const VariationSelectorRecord& _ : record) 1191 _.collect_unicodes (out, this); 1192 } 1193 collect_mappingOT::CmapSubtableFormat141194 void collect_mapping (hb_set_t *unicodes, /* OUT */ 1195 hb_map_t *mapping /* OUT */) const 1196 { 1197 for (const VariationSelectorRecord& _ : record) 1198 _.collect_mapping (this, unicodes, mapping); 1199 } 1200 sanitizeOT::CmapSubtableFormat141201 bool sanitize (hb_sanitize_context_t *c) const 1202 { 1203 TRACE_SANITIZE (this); 1204 return_trace (c->check_struct (this) && 1205 record.sanitize (c, this)); 1206 } 1207 1208 protected: 1209 HBUINT16 format; /* Format number is set to 14. */ 1210 HBUINT32 length; /* Byte length of this subtable. */ 1211 SortedArray32Of<VariationSelectorRecord> 1212 record; /* Variation selector records; sorted 1213 * in increasing order of `varSelector'. */ 1214 public: 1215 DEFINE_SIZE_ARRAY (10, record); 1216 }; 1217 1218 struct CmapSubtable 1219 { 1220 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ 1221 get_glyphOT::CmapSubtable1222 bool get_glyph (hb_codepoint_t codepoint, 1223 hb_codepoint_t *glyph) const 1224 { 1225 switch (u.format) { 1226 case 0: return u.format0 .get_glyph (codepoint, glyph); 1227 case 4: return u.format4 .get_glyph (codepoint, glyph); 1228 case 6: return u.format6 .get_glyph (codepoint, glyph); 1229 case 10: return u.format10.get_glyph (codepoint, glyph); 1230 case 12: return u.format12.get_glyph (codepoint, glyph); 1231 case 13: return u.format13.get_glyph (codepoint, glyph); 1232 case 14: 1233 default: return false; 1234 } 1235 } collect_unicodesOT::CmapSubtable1236 void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const 1237 { 1238 switch (u.format) { 1239 case 0: u.format0 .collect_unicodes (out); return; 1240 case 4: u.format4 .collect_unicodes (out); return; 1241 case 6: u.format6 .collect_unicodes (out); return; 1242 case 10: u.format10.collect_unicodes (out); return; 1243 case 12: u.format12.collect_unicodes (out, num_glyphs); return; 1244 case 13: u.format13.collect_unicodes (out, num_glyphs); return; 1245 case 14: 1246 default: return; 1247 } 1248 } 1249 collect_mappingOT::CmapSubtable1250 void collect_mapping (hb_set_t *unicodes, /* OUT */ 1251 hb_map_t *mapping, /* OUT */ 1252 unsigned num_glyphs = UINT_MAX) const 1253 { 1254 switch (u.format) { 1255 case 0: u.format0 .collect_mapping (unicodes, mapping); return; 1256 case 4: u.format4 .collect_mapping (unicodes, mapping); return; 1257 case 6: u.format6 .collect_mapping (unicodes, mapping); return; 1258 case 10: u.format10.collect_mapping (unicodes, mapping); return; 1259 case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return; 1260 case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return; 1261 case 14: 1262 default: return; 1263 } 1264 } 1265 get_languageOT::CmapSubtable1266 unsigned get_language () const 1267 { 1268 switch (u.format) { 1269 case 0: return u.format0 .get_language (); 1270 case 4: return u.format4 .get_language (); 1271 case 6: return u.format6 .get_language (); 1272 case 10: return u.format10.get_language (); 1273 case 12: return u.format12.get_language (); 1274 case 13: return u.format13.get_language (); 1275 case 14: 1276 default: return 0; 1277 } 1278 } 1279 1280 template<typename Iterator, 1281 hb_requires (hb_is_iterator (Iterator))> serializeOT::CmapSubtable1282 void serialize (hb_serialize_context_t *c, 1283 Iterator it, 1284 unsigned format, 1285 const hb_subset_plan_t *plan, 1286 const void *base) 1287 { 1288 switch (format) { 1289 case 4: return u.format4.serialize (c, it); 1290 case 12: return u.format12.serialize (c, it); 1291 case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base); 1292 default: return; 1293 } 1294 } 1295 sanitizeOT::CmapSubtable1296 bool sanitize (hb_sanitize_context_t *c) const 1297 { 1298 TRACE_SANITIZE (this); 1299 if (!u.format.sanitize (c)) return_trace (false); 1300 switch (u.format) { 1301 case 0: return_trace (u.format0 .sanitize (c)); 1302 case 4: return_trace (u.format4 .sanitize (c)); 1303 case 6: return_trace (u.format6 .sanitize (c)); 1304 case 10: return_trace (u.format10.sanitize (c)); 1305 case 12: return_trace (u.format12.sanitize (c)); 1306 case 13: return_trace (u.format13.sanitize (c)); 1307 case 14: return_trace (u.format14.sanitize (c)); 1308 default:return_trace (true); 1309 } 1310 } 1311 1312 public: 1313 union { 1314 HBUINT16 format; /* Format identifier */ 1315 CmapSubtableFormat0 format0; 1316 CmapSubtableFormat4 format4; 1317 CmapSubtableFormat6 format6; 1318 CmapSubtableFormat10 format10; 1319 CmapSubtableFormat12 format12; 1320 CmapSubtableFormat13 format13; 1321 CmapSubtableFormat14 format14; 1322 } u; 1323 public: 1324 DEFINE_SIZE_UNION (2, format); 1325 }; 1326 1327 1328 struct EncodingRecord 1329 { cmpOT::EncodingRecord1330 int cmp (const EncodingRecord &other) const 1331 { 1332 int ret; 1333 ret = platformID.cmp (other.platformID); 1334 if (ret) return ret; 1335 ret = encodingID.cmp (other.encodingID); 1336 if (ret) return ret; 1337 return 0; 1338 } 1339 sanitizeOT::EncodingRecord1340 bool sanitize (hb_sanitize_context_t *c, const void *base) const 1341 { 1342 TRACE_SANITIZE (this); 1343 return_trace (c->check_struct (this) && 1344 subtable.sanitize (c, base)); 1345 } 1346 1347 template<typename Iterator, 1348 hb_requires (hb_is_iterator (Iterator))> copyOT::EncodingRecord1349 EncodingRecord* copy (hb_serialize_context_t *c, 1350 Iterator it, 1351 unsigned format, 1352 const void *base, 1353 const hb_subset_plan_t *plan, 1354 /* INOUT */ unsigned *objidx) const 1355 { 1356 TRACE_SERIALIZE (this); 1357 auto snap = c->snapshot (); 1358 auto *out = c->embed (this); 1359 if (unlikely (!out)) return_trace (nullptr); 1360 out->subtable = 0; 1361 1362 if (*objidx == 0) 1363 { 1364 CmapSubtable *cmapsubtable = c->push<CmapSubtable> (); 1365 unsigned origin_length = c->length (); 1366 cmapsubtable->serialize (c, it, format, plan, &(base+subtable)); 1367 if (c->length () - origin_length > 0) *objidx = c->pop_pack (); 1368 else c->pop_discard (); 1369 } 1370 1371 if (*objidx == 0) 1372 { 1373 c->revert (snap); 1374 return_trace (nullptr); 1375 } 1376 1377 c->add_link (out->subtable, *objidx); 1378 return_trace (out); 1379 } 1380 1381 HBUINT16 platformID; /* Platform ID. */ 1382 HBUINT16 encodingID; /* Platform-specific encoding ID. */ 1383 Offset32To<CmapSubtable> 1384 subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ 1385 public: 1386 DEFINE_SIZE_STATIC (8); 1387 }; 1388 1389 struct cmap 1390 { 1391 static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap; 1392 1393 template<typename Iterator, typename EncodingRecIter, 1394 hb_requires (hb_is_iterator (EncodingRecIter))> serializeOT::cmap1395 bool serialize (hb_serialize_context_t *c, 1396 Iterator it, 1397 EncodingRecIter encodingrec_iter, 1398 const void *base, 1399 const hb_subset_plan_t *plan, 1400 bool drop_format_4 = false) 1401 { 1402 if (unlikely (!c->extend_min ((*this)))) return false; 1403 this->version = 0; 1404 1405 unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0; 1406 auto snap = c->snapshot (); 1407 1408 for (const EncodingRecord& _ : encodingrec_iter) 1409 { 1410 if (c->in_error ()) 1411 return false; 1412 1413 unsigned format = (base+_.subtable).u.format; 1414 if (format != 4 && format != 12 && format != 14) continue; 1415 1416 hb_set_t unicodes_set; 1417 (base+_.subtable).collect_unicodes (&unicodes_set); 1418 1419 if (!drop_format_4 && format == 4) 1420 { 1421 c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx); 1422 if (c->in_error () && c->only_overflow ()) 1423 { 1424 // cmap4 overflowed, reset and retry serialization without format 4 subtables. 1425 c->revert (snap); 1426 return serialize (c, it, 1427 encodingrec_iter, 1428 base, 1429 plan, 1430 true); 1431 } 1432 } 1433 1434 else if (format == 12) 1435 { 1436 if (_can_drop (_, unicodes_set, base, + it | hb_map (hb_first), encodingrec_iter)) continue; 1437 c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx); 1438 } 1439 else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx); 1440 } 1441 c->check_assign(this->encodingRecord.len, 1442 (c->length () - cmap::min_size)/EncodingRecord::static_size, 1443 HB_SERIALIZE_ERROR_INT_OVERFLOW); 1444 1445 // Fail if format 4 was dropped and there is no cmap12. 1446 return !drop_format_4 || format12objidx; 1447 } 1448 1449 template<typename Iterator, typename EncodingRecordIterator, 1450 hb_requires (hb_is_iterator (Iterator)), 1451 hb_requires (hb_is_iterator (EncodingRecordIterator))> _can_dropOT::cmap1452 bool _can_drop (const EncodingRecord& cmap12, 1453 const hb_set_t& cmap12_unicodes, 1454 const void* base, 1455 Iterator subset_unicodes, 1456 EncodingRecordIterator encoding_records) 1457 { 1458 for (auto cp : + subset_unicodes | hb_filter (cmap12_unicodes)) 1459 { 1460 if (cp >= 0x10000) return false; 1461 } 1462 1463 unsigned target_platform; 1464 unsigned target_encoding; 1465 unsigned target_language = (base+cmap12.subtable).get_language (); 1466 1467 if (cmap12.platformID == 0 && cmap12.encodingID == 4) 1468 { 1469 target_platform = 0; 1470 target_encoding = 3; 1471 } else if (cmap12.platformID == 3 && cmap12.encodingID == 10) { 1472 target_platform = 3; 1473 target_encoding = 1; 1474 } else { 1475 return false; 1476 } 1477 1478 for (const auto& _ : encoding_records) 1479 { 1480 if (_.platformID != target_platform 1481 || _.encodingID != target_encoding 1482 || (base+_.subtable).get_language() != target_language) 1483 continue; 1484 1485 hb_set_t sibling_unicodes; 1486 (base+_.subtable).collect_unicodes (&sibling_unicodes); 1487 1488 auto cmap12 = + subset_unicodes | hb_filter (cmap12_unicodes); 1489 auto sibling = + subset_unicodes | hb_filter (sibling_unicodes); 1490 for (; cmap12 && sibling; cmap12++, sibling++) 1491 { 1492 unsigned a = *cmap12; 1493 unsigned b = *sibling; 1494 if (a != b) return false; 1495 } 1496 1497 return !cmap12 && !sibling; 1498 } 1499 1500 return false; 1501 } 1502 closure_glyphsOT::cmap1503 void closure_glyphs (const hb_set_t *unicodes, 1504 hb_set_t *glyphset) const 1505 { 1506 + hb_iter (encodingRecord) 1507 | hb_map (&EncodingRecord::subtable) 1508 | hb_map (hb_add (this)) 1509 | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == 14; }) 1510 | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); }) 1511 ; 1512 } 1513 subsetOT::cmap1514 bool subset (hb_subset_context_t *c) const 1515 { 1516 TRACE_SUBSET (this); 1517 1518 cmap *cmap_prime = c->serializer->start_embed<cmap> (); 1519 if (unlikely (!c->serializer->check_success (cmap_prime))) return_trace (false); 1520 1521 auto encodingrec_iter = 1522 + hb_iter (encodingRecord) 1523 | hb_filter ([&] (const EncodingRecord& _) 1524 { 1525 if ((_.platformID == 0 && _.encodingID == 3) || 1526 (_.platformID == 0 && _.encodingID == 4) || 1527 (_.platformID == 3 && _.encodingID == 1) || 1528 (_.platformID == 3 && _.encodingID == 10) || 1529 (this + _.subtable).u.format == 14) 1530 return true; 1531 1532 return false; 1533 }) 1534 ; 1535 1536 if (unlikely (!encodingrec_iter.len ())) return_trace (false); 1537 1538 const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr; 1539 bool has_format12 = false; 1540 1541 for (const EncodingRecord& _ : encodingrec_iter) 1542 { 1543 unsigned format = (this + _.subtable).u.format; 1544 if (format == 12) has_format12 = true; 1545 1546 const EncodingRecord *table = hb_addressof (_); 1547 if (_.platformID == 0 && _.encodingID == 3) unicode_bmp = table; 1548 else if (_.platformID == 0 && _.encodingID == 4) unicode_ucs4 = table; 1549 else if (_.platformID == 3 && _.encodingID == 1) ms_bmp = table; 1550 else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table; 1551 } 1552 1553 if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false); 1554 if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false); 1555 1556 auto it = 1557 + hb_iter (c->plan->unicodes) 1558 | hb_map ([&] (hb_codepoint_t _) 1559 { 1560 hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID; 1561 c->plan->new_gid_for_codepoint (_, &new_gid); 1562 return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid); 1563 }) 1564 | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _) 1565 { return (_.second != HB_MAP_VALUE_INVALID); }) 1566 ; 1567 1568 return_trace (cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan)); 1569 } 1570 find_best_subtableOT::cmap1571 const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const 1572 { 1573 if (symbol) *symbol = false; 1574 1575 const CmapSubtable *subtable; 1576 1577 /* Symbol subtable. 1578 * Prefer symbol if available. 1579 * https://github.com/harfbuzz/harfbuzz/issues/1918 */ 1580 if ((subtable = this->find_subtable (3, 0))) 1581 { 1582 if (symbol) *symbol = true; 1583 return subtable; 1584 } 1585 1586 /* 32-bit subtables. */ 1587 if ((subtable = this->find_subtable (3, 10))) return subtable; 1588 if ((subtable = this->find_subtable (0, 6))) return subtable; 1589 if ((subtable = this->find_subtable (0, 4))) return subtable; 1590 1591 /* 16-bit subtables. */ 1592 if ((subtable = this->find_subtable (3, 1))) return subtable; 1593 if ((subtable = this->find_subtable (0, 3))) return subtable; 1594 if ((subtable = this->find_subtable (0, 2))) return subtable; 1595 if ((subtable = this->find_subtable (0, 1))) return subtable; 1596 if ((subtable = this->find_subtable (0, 0))) return subtable; 1597 1598 /* Meh. */ 1599 return &Null (CmapSubtable); 1600 } 1601 1602 struct accelerator_t 1603 { initOT::cmap::accelerator_t1604 void init (hb_face_t *face) 1605 { 1606 this->table = hb_sanitize_context_t ().reference_table<cmap> (face); 1607 bool symbol; 1608 this->subtable = table->find_best_subtable (&symbol); 1609 this->subtable_uvs = &Null (CmapSubtableFormat14); 1610 { 1611 const CmapSubtable *st = table->find_subtable (0, 5); 1612 if (st && st->u.format == 14) 1613 subtable_uvs = &st->u.format14; 1614 } 1615 1616 this->get_glyph_data = subtable; 1617 if (unlikely (symbol)) 1618 this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>; 1619 else 1620 { 1621 switch (subtable->u.format) { 1622 /* Accelerate format 4 and format 12. */ 1623 default: 1624 this->get_glyph_funcZ = get_glyph_from<CmapSubtable>; 1625 break; 1626 case 12: 1627 this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>; 1628 break; 1629 case 4: 1630 { 1631 this->format4_accel.init (&subtable->u.format4); 1632 this->get_glyph_data = &this->format4_accel; 1633 this->get_glyph_funcZ = this->format4_accel.get_glyph_func; 1634 break; 1635 } 1636 } 1637 } 1638 } 1639 finiOT::cmap::accelerator_t1640 void fini () { this->table.destroy (); } 1641 get_nominal_glyphOT::cmap::accelerator_t1642 bool get_nominal_glyph (hb_codepoint_t unicode, 1643 hb_codepoint_t *glyph) const 1644 { 1645 if (unlikely (!this->get_glyph_funcZ)) return false; 1646 return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph); 1647 } get_nominal_glyphsOT::cmap::accelerator_t1648 unsigned int get_nominal_glyphs (unsigned int count, 1649 const hb_codepoint_t *first_unicode, 1650 unsigned int unicode_stride, 1651 hb_codepoint_t *first_glyph, 1652 unsigned int glyph_stride) const 1653 { 1654 if (unlikely (!this->get_glyph_funcZ)) return 0; 1655 1656 hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ; 1657 const void *get_glyph_data = this->get_glyph_data; 1658 1659 unsigned int done; 1660 for (done = 0; 1661 done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph); 1662 done++) 1663 { 1664 first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride); 1665 first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); 1666 } 1667 return done; 1668 } 1669 get_variation_glyphOT::cmap::accelerator_t1670 bool get_variation_glyph (hb_codepoint_t unicode, 1671 hb_codepoint_t variation_selector, 1672 hb_codepoint_t *glyph) const 1673 { 1674 switch (this->subtable_uvs->get_glyph_variant (unicode, 1675 variation_selector, 1676 glyph)) 1677 { 1678 case GLYPH_VARIANT_NOT_FOUND: return false; 1679 case GLYPH_VARIANT_FOUND: return true; 1680 case GLYPH_VARIANT_USE_DEFAULT: break; 1681 } 1682 1683 return get_nominal_glyph (unicode, glyph); 1684 } 1685 collect_unicodesOT::cmap::accelerator_t1686 void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const 1687 { subtable->collect_unicodes (out, num_glyphs); } collect_mappingOT::cmap::accelerator_t1688 void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping, 1689 unsigned num_glyphs = UINT_MAX) const 1690 { subtable->collect_mapping (unicodes, mapping, num_glyphs); } collect_variation_selectorsOT::cmap::accelerator_t1691 void collect_variation_selectors (hb_set_t *out) const 1692 { subtable_uvs->collect_variation_selectors (out); } collect_variation_unicodesOT::cmap::accelerator_t1693 void collect_variation_unicodes (hb_codepoint_t variation_selector, 1694 hb_set_t *out) const 1695 { subtable_uvs->collect_variation_unicodes (variation_selector, out); } 1696 1697 protected: 1698 typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, 1699 hb_codepoint_t codepoint, 1700 hb_codepoint_t *glyph); 1701 1702 template <typename Type> get_glyph_fromOT::cmap::accelerator_t1703 HB_INTERNAL static bool get_glyph_from (const void *obj, 1704 hb_codepoint_t codepoint, 1705 hb_codepoint_t *glyph) 1706 { 1707 const Type *typed_obj = (const Type *) obj; 1708 return typed_obj->get_glyph (codepoint, glyph); 1709 } 1710 1711 template <typename Type> get_glyph_from_symbolOT::cmap::accelerator_t1712 HB_INTERNAL static bool get_glyph_from_symbol (const void *obj, 1713 hb_codepoint_t codepoint, 1714 hb_codepoint_t *glyph) 1715 { 1716 const Type *typed_obj = (const Type *) obj; 1717 if (likely (typed_obj->get_glyph (codepoint, glyph))) 1718 return true; 1719 1720 if (codepoint <= 0x00FFu) 1721 { 1722 /* For symbol-encoded OpenType fonts, we duplicate the 1723 * U+F000..F0FF range at U+0000..U+00FF. That's what 1724 * Windows seems to do, and that's hinted about at: 1725 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom 1726 * under "Non-Standard (Symbol) Fonts". */ 1727 return typed_obj->get_glyph (0xF000u + codepoint, glyph); 1728 } 1729 1730 return false; 1731 } 1732 1733 private: 1734 hb_nonnull_ptr_t<const CmapSubtable> subtable; 1735 hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs; 1736 1737 hb_cmap_get_glyph_func_t get_glyph_funcZ; 1738 const void *get_glyph_data; 1739 1740 CmapSubtableFormat4::accelerator_t format4_accel; 1741 1742 public: 1743 hb_blob_ptr_t<cmap> table; 1744 }; 1745 1746 protected: 1747 find_subtableOT::cmap1748 const CmapSubtable *find_subtable (unsigned int platform_id, 1749 unsigned int encoding_id) const 1750 { 1751 EncodingRecord key; 1752 key.platformID = platform_id; 1753 key.encodingID = encoding_id; 1754 1755 const EncodingRecord &result = encodingRecord.bsearch (key); 1756 if (!result.subtable) 1757 return nullptr; 1758 1759 return &(this+result.subtable); 1760 } 1761 find_encodingrecOT::cmap1762 const EncodingRecord *find_encodingrec (unsigned int platform_id, 1763 unsigned int encoding_id) const 1764 { 1765 EncodingRecord key; 1766 key.platformID = platform_id; 1767 key.encodingID = encoding_id; 1768 1769 return encodingRecord.as_array ().bsearch (key); 1770 } 1771 find_subtableOT::cmap1772 bool find_subtable (unsigned format) const 1773 { 1774 auto it = 1775 + hb_iter (encodingRecord) 1776 | hb_map (&EncodingRecord::subtable) 1777 | hb_map (hb_add (this)) 1778 | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; }) 1779 ; 1780 1781 return it.len (); 1782 } 1783 1784 public: 1785 sanitizeOT::cmap1786 bool sanitize (hb_sanitize_context_t *c) const 1787 { 1788 TRACE_SANITIZE (this); 1789 return_trace (c->check_struct (this) && 1790 likely (version == 0) && 1791 encodingRecord.sanitize (c, this)); 1792 } 1793 1794 protected: 1795 HBUINT16 version; /* Table version number (0). */ 1796 SortedArray16Of<EncodingRecord> 1797 encodingRecord; /* Encoding tables. */ 1798 public: 1799 DEFINE_SIZE_ARRAY (4, encodingRecord); 1800 }; 1801 1802 struct cmap_accelerator_t : cmap::accelerator_t {}; 1803 1804 } /* namespace OT */ 1805 1806 1807 #endif /* HB_OT_CMAP_TABLE_HH */ 1808