1 /* 2 * Copyright © 2014 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Behdad Esfahbod 25 */ 26 27 #ifndef HB_OT_CMAP_TABLE_HH 28 #define HB_OT_CMAP_TABLE_HH 29 30 #include "hb-open-type.hh" 31 #include "hb-set.hh" 32 33 /* 34 * cmap -- Character to Glyph Index Mapping 35 * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap 36 */ 37 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p') 38 39 namespace OT { 40 41 42 struct CmapSubtableFormat0 43 { get_glyphOT::CmapSubtableFormat044 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 45 { 46 hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; 47 if (!gid) 48 return false; 49 *glyph = gid; 50 return true; 51 } collect_unicodesOT::CmapSubtableFormat052 void collect_unicodes (hb_set_t *out) const 53 { 54 for (unsigned int i = 0; i < 256; i++) 55 if (glyphIdArray[i]) 56 out->add (i); 57 } 58 sanitizeOT::CmapSubtableFormat059 bool sanitize (hb_sanitize_context_t *c) const 60 { 61 TRACE_SANITIZE (this); 62 return_trace (c->check_struct (this)); 63 } 64 65 protected: 66 HBUINT16 format; /* Format number is set to 0. */ 67 HBUINT16 length; /* Byte length of this subtable. */ 68 HBUINT16 language; /* Ignore. */ 69 HBUINT8 glyphIdArray[256];/* An array that maps character 70 * code to glyph index values. */ 71 public: 72 DEFINE_SIZE_STATIC (6 + 256); 73 }; 74 75 struct CmapSubtableFormat4 76 { 77 struct segment_plan 78 { 79 HBUINT16 start_code; 80 HBUINT16 end_code; 81 bool use_delta; 82 }; 83 serializeOT::CmapSubtableFormat484 bool serialize (hb_serialize_context_t *c, 85 const hb_subset_plan_t *plan, 86 const hb_vector_t<segment_plan> &segments) 87 { 88 TRACE_SERIALIZE (this); 89 90 if (unlikely (!c->extend_min (*this))) return_trace (false); 91 92 this->format.set (4); 93 this->length.set (get_sub_table_size (segments)); 94 95 this->segCountX2.set (segments.len * 2); 96 this->entrySelector.set (MAX (1u, hb_bit_storage (segments.len)) - 1); 97 this->searchRange.set (2 * (1u << this->entrySelector)); 98 this->rangeShift.set (segments.len * 2 > this->searchRange 99 ? 2 * segments.len - this->searchRange 100 : 0); 101 102 HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len); 103 c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding. 104 HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len); 105 HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.len); 106 HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len); 107 108 if (id_range_offset == nullptr) 109 return_trace (false); 110 111 for (unsigned int i = 0; i < segments.len; i++) 112 { 113 end_count[i].set (segments[i].end_code); 114 start_count[i].set (segments[i].start_code); 115 if (segments[i].use_delta) 116 { 117 hb_codepoint_t cp = segments[i].start_code; 118 hb_codepoint_t start_gid = 0; 119 if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF)) 120 return_trace (false); 121 id_delta[i].set (start_gid - segments[i].start_code); 122 } else { 123 id_delta[i].set (0); 124 unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1; 125 HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints); 126 if (glyph_id_array == nullptr) 127 return_trace (false); 128 // From the cmap spec: 129 // 130 // id_range_offset[i]/2 131 // + (cp - segments[i].start_code) 132 // + (id_range_offset + i) 133 // = 134 // glyph_id_array + (cp - segments[i].start_code) 135 // 136 // So, solve for id_range_offset[i]: 137 // 138 // id_range_offset[i] 139 // = 140 // 2 * (glyph_id_array - id_range_offset - i) 141 id_range_offset[i].set (2 * ( 142 glyph_id_array - id_range_offset - i)); 143 for (unsigned int j = 0; j < num_codepoints; j++) 144 { 145 hb_codepoint_t cp = segments[i].start_code + j; 146 hb_codepoint_t new_gid; 147 if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) 148 return_trace (false); 149 glyph_id_array[j].set (new_gid); 150 } 151 } 152 } 153 154 return_trace (true); 155 } 156 get_sub_table_sizeOT::CmapSubtableFormat4157 static size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments) 158 { 159 size_t segment_size = 0; 160 for (unsigned int i = 0; i < segments.len; i++) 161 { 162 // Parallel array entries 163 segment_size += 164 2 // end count 165 + 2 // start count 166 + 2 // delta 167 + 2; // range offset 168 169 if (!segments[i].use_delta) 170 // Add bytes for the glyph index array entries for this segment. 171 segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2; 172 } 173 174 return min_size 175 + 2 // Padding 176 + segment_size; 177 } 178 create_sub_table_planOT::CmapSubtableFormat4179 static bool create_sub_table_plan (const hb_subset_plan_t *plan, 180 hb_vector_t<segment_plan> *segments) 181 { 182 segment_plan *segment = nullptr; 183 hb_codepoint_t last_gid = 0; 184 185 hb_codepoint_t cp = HB_SET_VALUE_INVALID; 186 while (plan->unicodes->next (&cp)) { 187 hb_codepoint_t new_gid; 188 if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) 189 { 190 DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp); 191 return false; 192 } 193 194 /* Stop adding to cmap if we are now outside of unicode BMP. */ 195 if (cp > 0xFFFF) break; 196 197 if (!segment || 198 cp != segment->end_code + 1u) 199 { 200 segment = segments->push (); 201 segment->start_code.set (cp); 202 segment->end_code.set (cp); 203 segment->use_delta = true; 204 } else { 205 segment->end_code.set (cp); 206 if (last_gid + 1u != new_gid) 207 // gid's are not consecutive in this segment so delta 208 // cannot be used. 209 segment->use_delta = false; 210 } 211 212 last_gid = new_gid; 213 } 214 215 // There must be a final entry with end_code == 0xFFFF. Check if we need to add one. 216 if (segment == nullptr || segment->end_code != 0xFFFF) 217 { 218 segment = segments->push (); 219 segment->start_code.set (0xFFFF); 220 segment->end_code.set (0xFFFF); 221 segment->use_delta = true; 222 } 223 224 return true; 225 } 226 227 struct accelerator_t 228 { accelerator_tOT::CmapSubtableFormat4::accelerator_t229 accelerator_t () {} accelerator_tOT::CmapSubtableFormat4::accelerator_t230 accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); } ~accelerator_tOT::CmapSubtableFormat4::accelerator_t231 ~accelerator_t () { fini (); } 232 initOT::CmapSubtableFormat4::accelerator_t233 void init (const CmapSubtableFormat4 *subtable) 234 { 235 segCount = subtable->segCountX2 / 2; 236 endCount = subtable->values.arrayZ; 237 startCount = endCount + segCount + 1; 238 idDelta = startCount + segCount; 239 idRangeOffset = idDelta + segCount; 240 glyphIdArray = idRangeOffset + segCount; 241 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; 242 } finiOT::CmapSubtableFormat4::accelerator_t243 void fini () {} 244 get_glyphOT::CmapSubtableFormat4::accelerator_t245 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 246 { 247 /* Custom two-array bsearch. */ 248 int min = 0, max = (int) this->segCount - 1; 249 const HBUINT16 *startCount = this->startCount; 250 const HBUINT16 *endCount = this->endCount; 251 unsigned int i; 252 while (min <= max) 253 { 254 int mid = ((unsigned int) min + (unsigned int) max) / 2; 255 if (codepoint < startCount[mid]) 256 max = mid - 1; 257 else if (codepoint > endCount[mid]) 258 min = mid + 1; 259 else 260 { 261 i = mid; 262 goto found; 263 } 264 } 265 return false; 266 267 found: 268 hb_codepoint_t gid; 269 unsigned int rangeOffset = this->idRangeOffset[i]; 270 if (rangeOffset == 0) 271 gid = codepoint + this->idDelta[i]; 272 else 273 { 274 /* Somebody has been smoking... */ 275 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 276 if (unlikely (index >= this->glyphIdArrayLength)) 277 return false; 278 gid = this->glyphIdArray[index]; 279 if (unlikely (!gid)) 280 return false; 281 gid += this->idDelta[i]; 282 } 283 gid &= 0xFFFFu; 284 if (!gid) 285 return false; 286 *glyph = gid; 287 return true; 288 } get_glyph_funcOT::CmapSubtableFormat4::accelerator_t289 static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) 290 { 291 return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); 292 } collect_unicodesOT::CmapSubtableFormat4::accelerator_t293 void collect_unicodes (hb_set_t *out) const 294 { 295 unsigned int count = this->segCount; 296 if (count && this->startCount[count - 1] == 0xFFFFu) 297 count--; /* Skip sentinel segment. */ 298 for (unsigned int i = 0; i < count; i++) 299 { 300 unsigned int rangeOffset = this->idRangeOffset[i]; 301 if (rangeOffset == 0) 302 out->add_range (this->startCount[i], this->endCount[i]); 303 else 304 { 305 for (hb_codepoint_t codepoint = this->startCount[i]; 306 codepoint <= this->endCount[i]; 307 codepoint++) 308 { 309 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; 310 if (unlikely (index >= this->glyphIdArrayLength)) 311 break; 312 hb_codepoint_t gid = this->glyphIdArray[index]; 313 if (unlikely (!gid)) 314 continue; 315 out->add (codepoint); 316 } 317 } 318 } 319 } 320 321 const HBUINT16 *endCount; 322 const HBUINT16 *startCount; 323 const HBUINT16 *idDelta; 324 const HBUINT16 *idRangeOffset; 325 const HBUINT16 *glyphIdArray; 326 unsigned int segCount; 327 unsigned int glyphIdArrayLength; 328 }; 329 get_glyphOT::CmapSubtableFormat4330 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 331 { 332 accelerator_t accel (this); 333 return accel.get_glyph_func (&accel, codepoint, glyph); 334 } collect_unicodesOT::CmapSubtableFormat4335 void collect_unicodes (hb_set_t *out) const 336 { 337 accelerator_t accel (this); 338 accel.collect_unicodes (out); 339 } 340 sanitizeOT::CmapSubtableFormat4341 bool sanitize (hb_sanitize_context_t *c) const 342 { 343 TRACE_SANITIZE (this); 344 if (unlikely (!c->check_struct (this))) 345 return_trace (false); 346 347 if (unlikely (!c->check_range (this, length))) 348 { 349 /* Some broken fonts have too long of a "length" value. 350 * If that is the case, just change the value to truncate 351 * the subtable at the end of the blob. */ 352 uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535, 353 (uintptr_t) (c->end - 354 (char *) this)); 355 if (!c->try_set (&length, new_length)) 356 return_trace (false); 357 } 358 359 return_trace (16 + 4 * (unsigned int) segCountX2 <= length); 360 } 361 362 363 364 protected: 365 HBUINT16 format; /* Format number is set to 4. */ 366 HBUINT16 length; /* This is the length in bytes of the 367 * subtable. */ 368 HBUINT16 language; /* Ignore. */ 369 HBUINT16 segCountX2; /* 2 x segCount. */ 370 HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */ 371 HBUINT16 entrySelector; /* log2(searchRange/2) */ 372 HBUINT16 rangeShift; /* 2 x segCount - searchRange */ 373 374 UnsizedArrayOf<HBUINT16> 375 values; 376 #if 0 377 HBUINT16 endCount[segCount]; /* End characterCode for each segment, 378 * last=0xFFFFu. */ 379 HBUINT16 reservedPad; /* Set to 0. */ 380 HBUINT16 startCount[segCount]; /* Start character code for each segment. */ 381 HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */ 382 HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ 383 UnsizedArrayOf<HBUINT16> 384 glyphIdArray; /* Glyph index array (arbitrary length) */ 385 #endif 386 387 public: 388 DEFINE_SIZE_ARRAY (14, values); 389 }; 390 391 struct CmapSubtableLongGroup 392 { 393 friend struct CmapSubtableFormat12; 394 friend struct CmapSubtableFormat13; 395 template<typename U> 396 friend struct CmapSubtableLongSegmented; 397 friend struct cmap; 398 cmpOT::CmapSubtableLongGroup399 int cmp (hb_codepoint_t codepoint) const 400 { 401 if (codepoint < startCharCode) return -1; 402 if (codepoint > endCharCode) return +1; 403 return 0; 404 } 405 sanitizeOT::CmapSubtableLongGroup406 bool sanitize (hb_sanitize_context_t *c) const 407 { 408 TRACE_SANITIZE (this); 409 return_trace (c->check_struct (this)); 410 } 411 412 private: 413 HBUINT32 startCharCode; /* First character code in this group. */ 414 HBUINT32 endCharCode; /* Last character code in this group. */ 415 HBUINT32 glyphID; /* Glyph index; interpretation depends on 416 * subtable format. */ 417 public: 418 DEFINE_SIZE_STATIC (12); 419 }; 420 DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup); 421 422 template <typename UINT> 423 struct CmapSubtableTrimmed 424 { get_glyphOT::CmapSubtableTrimmed425 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 426 { 427 /* Rely on our implicit array bound-checking. */ 428 hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; 429 if (!gid) 430 return false; 431 *glyph = gid; 432 return true; 433 } collect_unicodesOT::CmapSubtableTrimmed434 void collect_unicodes (hb_set_t *out) const 435 { 436 hb_codepoint_t start = startCharCode; 437 unsigned int count = glyphIdArray.len; 438 for (unsigned int i = 0; i < count; i++) 439 if (glyphIdArray[i]) 440 out->add (start + i); 441 } 442 sanitizeOT::CmapSubtableTrimmed443 bool sanitize (hb_sanitize_context_t *c) const 444 { 445 TRACE_SANITIZE (this); 446 return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); 447 } 448 449 protected: 450 UINT formatReserved; /* Subtable format and (maybe) padding. */ 451 UINT length; /* Byte length of this subtable. */ 452 UINT language; /* Ignore. */ 453 UINT startCharCode; /* First character code covered. */ 454 ArrayOf<GlyphID, UINT> 455 glyphIdArray; /* Array of glyph index values for character 456 * codes in the range. */ 457 public: 458 DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); 459 }; 460 461 struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {}; 462 struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {}; 463 464 template <typename T> 465 struct CmapSubtableLongSegmented 466 { 467 friend struct cmap; 468 get_glyphOT::CmapSubtableLongSegmented469 bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const 470 { 471 hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint); 472 if (!gid) 473 return false; 474 *glyph = gid; 475 return true; 476 } 477 collect_unicodesOT::CmapSubtableLongSegmented478 void collect_unicodes (hb_set_t *out) const 479 { 480 for (unsigned int i = 0; i < this->groups.len; i++) { 481 out->add_range (this->groups[i].startCharCode, 482 MIN ((hb_codepoint_t) this->groups[i].endCharCode, 483 (hb_codepoint_t) HB_UNICODE_MAX)); 484 } 485 } 486 sanitizeOT::CmapSubtableLongSegmented487 bool sanitize (hb_sanitize_context_t *c) const 488 { 489 TRACE_SANITIZE (this); 490 return_trace (c->check_struct (this) && groups.sanitize (c)); 491 } 492 serializeOT::CmapSubtableLongSegmented493 bool serialize (hb_serialize_context_t *c, 494 const hb_vector_t<CmapSubtableLongGroup> &group_data) 495 { 496 TRACE_SERIALIZE (this); 497 if (unlikely (!c->extend_min (*this))) return_trace (false); 498 if (unlikely (!groups.serialize (c, group_data.as_array ()))) return_trace (false); 499 return true; 500 } 501 502 protected: 503 HBUINT16 format; /* Subtable format; set to 12. */ 504 HBUINT16 reserved; /* Reserved; set to 0. */ 505 HBUINT32 length; /* Byte length of this subtable. */ 506 HBUINT32 language; /* Ignore. */ 507 SortedArrayOf<CmapSubtableLongGroup, HBUINT32> 508 groups; /* Groupings. */ 509 public: 510 DEFINE_SIZE_ARRAY (16, groups); 511 }; 512 513 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> 514 { group_get_glyphOT::CmapSubtableFormat12515 static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 516 hb_codepoint_t u) 517 { return likely (group.startCharCode <= group.endCharCode) ? 518 group.glyphID + (u - group.startCharCode) : 0; } 519 520 serializeOT::CmapSubtableFormat12521 bool serialize (hb_serialize_context_t *c, 522 const hb_vector_t<CmapSubtableLongGroup> &groups) 523 { 524 if (unlikely (!c->extend_min (*this))) return false; 525 526 this->format.set (12); 527 this->reserved.set (0); 528 this->length.set (get_sub_table_size (groups)); 529 530 return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups); 531 } 532 get_sub_table_sizeOT::CmapSubtableFormat12533 static size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups) 534 { 535 return 16 + 12 * groups.len; 536 } 537 create_sub_table_planOT::CmapSubtableFormat12538 static bool create_sub_table_plan (const hb_subset_plan_t *plan, 539 hb_vector_t<CmapSubtableLongGroup> *groups) 540 { 541 CmapSubtableLongGroup *group = nullptr; 542 543 hb_codepoint_t cp = HB_SET_VALUE_INVALID; 544 while (plan->unicodes->next (&cp)) { 545 hb_codepoint_t new_gid; 546 if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) 547 { 548 DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp); 549 return false; 550 } 551 552 if (!group || !_is_gid_consecutive (group, cp, new_gid)) 553 { 554 group = groups->push (); 555 group->startCharCode.set (cp); 556 group->endCharCode.set (cp); 557 group->glyphID.set (new_gid); 558 } 559 else group->endCharCode.set (cp); 560 } 561 562 DEBUG_MSG(SUBSET, nullptr, "cmap"); 563 for (unsigned int i = 0; i < groups->len; i++) { 564 CmapSubtableLongGroup& group = (*groups)[i]; 565 DEBUG_MSG(SUBSET, nullptr, " %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode)); 566 } 567 568 return true; 569 } 570 571 private: _is_gid_consecutiveOT::CmapSubtableFormat12572 static bool _is_gid_consecutive (CmapSubtableLongGroup *group, 573 hb_codepoint_t cp, 574 hb_codepoint_t new_gid) 575 { 576 return (cp - 1 == group->endCharCode) && 577 new_gid == group->glyphID + (cp - group->startCharCode); 578 } 579 580 }; 581 582 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> 583 { group_get_glyphOT::CmapSubtableFormat13584 static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, 585 hb_codepoint_t u HB_UNUSED) 586 { return group.glyphID; } 587 }; 588 589 typedef enum 590 { 591 GLYPH_VARIANT_NOT_FOUND = 0, 592 GLYPH_VARIANT_FOUND = 1, 593 GLYPH_VARIANT_USE_DEFAULT = 2 594 } glyph_variant_t; 595 596 struct UnicodeValueRange 597 { cmpOT::UnicodeValueRange598 int cmp (const hb_codepoint_t &codepoint) const 599 { 600 if (codepoint < startUnicodeValue) return -1; 601 if (codepoint > startUnicodeValue + additionalCount) return +1; 602 return 0; 603 } 604 sanitizeOT::UnicodeValueRange605 bool sanitize (hb_sanitize_context_t *c) const 606 { 607 TRACE_SANITIZE (this); 608 return_trace (c->check_struct (this)); 609 } 610 611 HBUINT24 startUnicodeValue; /* First value in this range. */ 612 HBUINT8 additionalCount; /* Number of additional values in this 613 * range. */ 614 public: 615 DEFINE_SIZE_STATIC (4); 616 }; 617 618 struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32> 619 { collect_unicodesOT::DefaultUVS620 void collect_unicodes (hb_set_t *out) const 621 { 622 unsigned int count = len; 623 for (unsigned int i = 0; i < count; i++) 624 { 625 hb_codepoint_t first = arrayZ[i].startUnicodeValue; 626 hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount), 627 (hb_codepoint_t) HB_UNICODE_MAX); 628 out->add_range (first, last); 629 } 630 } 631 632 public: 633 DEFINE_SIZE_ARRAY (4, *this); 634 }; 635 636 struct UVSMapping 637 { cmpOT::UVSMapping638 int cmp (const hb_codepoint_t &codepoint) const 639 { 640 return unicodeValue.cmp (codepoint); 641 } 642 sanitizeOT::UVSMapping643 bool sanitize (hb_sanitize_context_t *c) const 644 { 645 TRACE_SANITIZE (this); 646 return_trace (c->check_struct (this)); 647 } 648 649 HBUINT24 unicodeValue; /* Base Unicode value of the UVS */ 650 GlyphID glyphID; /* Glyph ID of the UVS */ 651 public: 652 DEFINE_SIZE_STATIC (5); 653 }; 654 655 struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32> 656 { collect_unicodesOT::NonDefaultUVS657 void collect_unicodes (hb_set_t *out) const 658 { 659 unsigned int count = len; 660 for (unsigned int i = 0; i < count; i++) 661 out->add (arrayZ[i].glyphID); 662 } 663 664 public: 665 DEFINE_SIZE_ARRAY (4, *this); 666 }; 667 668 struct VariationSelectorRecord 669 { get_glyphOT::VariationSelectorRecord670 glyph_variant_t get_glyph (hb_codepoint_t codepoint, 671 hb_codepoint_t *glyph, 672 const void *base) const 673 { 674 if ((base+defaultUVS).bfind (codepoint)) 675 return GLYPH_VARIANT_USE_DEFAULT; 676 const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint); 677 if (nonDefault.glyphID) 678 { 679 *glyph = nonDefault.glyphID; 680 return GLYPH_VARIANT_FOUND; 681 } 682 return GLYPH_VARIANT_NOT_FOUND; 683 } 684 collect_unicodesOT::VariationSelectorRecord685 void collect_unicodes (hb_set_t *out, const void *base) const 686 { 687 (base+defaultUVS).collect_unicodes (out); 688 (base+nonDefaultUVS).collect_unicodes (out); 689 } 690 cmpOT::VariationSelectorRecord691 int cmp (const hb_codepoint_t &variation_selector) const 692 { 693 return varSelector.cmp (variation_selector); 694 } 695 sanitizeOT::VariationSelectorRecord696 bool sanitize (hb_sanitize_context_t *c, const void *base) const 697 { 698 TRACE_SANITIZE (this); 699 return_trace (c->check_struct (this) && 700 defaultUVS.sanitize (c, base) && 701 nonDefaultUVS.sanitize (c, base)); 702 } 703 704 HBUINT24 varSelector; /* Variation selector. */ 705 LOffsetTo<DefaultUVS> 706 defaultUVS; /* Offset to Default UVS Table. May be 0. */ 707 LOffsetTo<NonDefaultUVS> 708 nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ 709 public: 710 DEFINE_SIZE_STATIC (11); 711 }; 712 713 struct CmapSubtableFormat14 714 { get_glyph_variantOT::CmapSubtableFormat14715 glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, 716 hb_codepoint_t variation_selector, 717 hb_codepoint_t *glyph) const 718 { 719 return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); 720 } 721 collect_variation_selectorsOT::CmapSubtableFormat14722 void collect_variation_selectors (hb_set_t *out) const 723 { 724 unsigned int count = record.len; 725 for (unsigned int i = 0; i < count; i++) 726 out->add (record.arrayZ[i].varSelector); 727 } collect_variation_unicodesOT::CmapSubtableFormat14728 void collect_variation_unicodes (hb_codepoint_t variation_selector, 729 hb_set_t *out) const 730 { 731 record.bsearch (variation_selector).collect_unicodes (out, this); 732 } 733 sanitizeOT::CmapSubtableFormat14734 bool sanitize (hb_sanitize_context_t *c) const 735 { 736 TRACE_SANITIZE (this); 737 return_trace (c->check_struct (this) && 738 record.sanitize (c, this)); 739 } 740 741 protected: 742 HBUINT16 format; /* Format number is set to 14. */ 743 HBUINT32 length; /* Byte length of this subtable. */ 744 SortedArrayOf<VariationSelectorRecord, HBUINT32> 745 record; /* Variation selector records; sorted 746 * in increasing order of `varSelector'. */ 747 public: 748 DEFINE_SIZE_ARRAY (10, record); 749 }; 750 751 struct CmapSubtable 752 { 753 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ 754 get_glyphOT::CmapSubtable755 bool get_glyph (hb_codepoint_t codepoint, 756 hb_codepoint_t *glyph) const 757 { 758 switch (u.format) { 759 case 0: return u.format0 .get_glyph (codepoint, glyph); 760 case 4: return u.format4 .get_glyph (codepoint, glyph); 761 case 6: return u.format6 .get_glyph (codepoint, glyph); 762 case 10: return u.format10.get_glyph (codepoint, glyph); 763 case 12: return u.format12.get_glyph (codepoint, glyph); 764 case 13: return u.format13.get_glyph (codepoint, glyph); 765 case 14: 766 default: return false; 767 } 768 } collect_unicodesOT::CmapSubtable769 void collect_unicodes (hb_set_t *out) const 770 { 771 switch (u.format) { 772 case 0: u.format0 .collect_unicodes (out); return; 773 case 4: u.format4 .collect_unicodes (out); return; 774 case 6: u.format6 .collect_unicodes (out); return; 775 case 10: u.format10.collect_unicodes (out); return; 776 case 12: u.format12.collect_unicodes (out); return; 777 case 13: u.format13.collect_unicodes (out); return; 778 case 14: 779 default: return; 780 } 781 } 782 sanitizeOT::CmapSubtable783 bool sanitize (hb_sanitize_context_t *c) const 784 { 785 TRACE_SANITIZE (this); 786 if (!u.format.sanitize (c)) return_trace (false); 787 switch (u.format) { 788 case 0: return_trace (u.format0 .sanitize (c)); 789 case 4: return_trace (u.format4 .sanitize (c)); 790 case 6: return_trace (u.format6 .sanitize (c)); 791 case 10: return_trace (u.format10.sanitize (c)); 792 case 12: return_trace (u.format12.sanitize (c)); 793 case 13: return_trace (u.format13.sanitize (c)); 794 case 14: return_trace (u.format14.sanitize (c)); 795 default:return_trace (true); 796 } 797 } 798 799 public: 800 union { 801 HBUINT16 format; /* Format identifier */ 802 CmapSubtableFormat0 format0; 803 CmapSubtableFormat4 format4; 804 CmapSubtableFormat6 format6; 805 CmapSubtableFormat10 format10; 806 CmapSubtableFormat12 format12; 807 CmapSubtableFormat13 format13; 808 CmapSubtableFormat14 format14; 809 } u; 810 public: 811 DEFINE_SIZE_UNION (2, format); 812 }; 813 814 815 struct EncodingRecord 816 { cmpOT::EncodingRecord817 int cmp (const EncodingRecord &other) const 818 { 819 int ret; 820 ret = platformID.cmp (other.platformID); 821 if (ret) return ret; 822 ret = encodingID.cmp (other.encodingID); 823 if (ret) return ret; 824 return 0; 825 } 826 sanitizeOT::EncodingRecord827 bool sanitize (hb_sanitize_context_t *c, const void *base) const 828 { 829 TRACE_SANITIZE (this); 830 return_trace (c->check_struct (this) && 831 subtable.sanitize (c, base)); 832 } 833 834 HBUINT16 platformID; /* Platform ID. */ 835 HBUINT16 encodingID; /* Platform-specific encoding ID. */ 836 LOffsetTo<CmapSubtable> 837 subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ 838 public: 839 DEFINE_SIZE_STATIC (8); 840 }; 841 842 struct cmap 843 { 844 enum { tableTag = HB_OT_TAG_cmap }; 845 846 struct subset_plan 847 { final_sizeOT::cmap::subset_plan848 size_t final_size () const 849 { 850 return 4 // header 851 + 8 * 3 // 3 EncodingRecord 852 + CmapSubtableFormat4::get_sub_table_size (this->format4_segments) 853 + CmapSubtableFormat12::get_sub_table_size (this->format12_groups); 854 } 855 856 hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments; 857 hb_vector_t<CmapSubtableLongGroup> format12_groups; 858 }; 859 _create_planOT::cmap860 bool _create_plan (const hb_subset_plan_t *plan, 861 subset_plan *cmap_plan) const 862 { 863 if (unlikely (!CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments))) 864 return false; 865 866 return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups); 867 } 868 _subsetOT::cmap869 bool _subset (const hb_subset_plan_t *plan, 870 const subset_plan &cmap_subset_plan, 871 size_t dest_sz, 872 void *dest) const 873 { 874 hb_serialize_context_t c (dest, dest_sz); 875 876 cmap *table = c.start_serialize<cmap> (); 877 if (unlikely (!c.extend_min (*table))) 878 { 879 return false; 880 } 881 882 table->version.set (0); 883 884 if (unlikely (!table->encodingRecord.serialize (&c, /* numTables */ 3))) 885 return false; 886 887 // TODO(grieger): Convert the below to a for loop 888 889 // Format 4, Plat 0 Encoding Record 890 EncodingRecord &format4_plat0_rec = table->encodingRecord[0]; 891 format4_plat0_rec.platformID.set (0); // Unicode 892 format4_plat0_rec.encodingID.set (3); 893 894 // Format 4, Plat 3 Encoding Record 895 EncodingRecord &format4_plat3_rec = table->encodingRecord[1]; 896 format4_plat3_rec.platformID.set (3); // Windows 897 format4_plat3_rec.encodingID.set (1); // Unicode BMP 898 899 // Format 12 Encoding Record 900 EncodingRecord &format12_rec = table->encodingRecord[2]; 901 format12_rec.platformID.set (3); // Windows 902 format12_rec.encodingID.set (10); // Unicode UCS-4 903 904 // Write out format 4 sub table 905 { 906 CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, table); 907 format4_plat3_rec.subtable.set (format4_plat0_rec.subtable); 908 subtable.u.format.set (4); 909 910 CmapSubtableFormat4 &format4 = subtable.u.format4; 911 if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments))) 912 return false; 913 } 914 915 // Write out format 12 sub table. 916 { 917 CmapSubtable &subtable = format12_rec.subtable.serialize (&c, table); 918 subtable.u.format.set (12); 919 920 CmapSubtableFormat12 &format12 = subtable.u.format12; 921 if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups))) 922 return false; 923 } 924 925 c.end_serialize (); 926 927 return true; 928 } 929 subsetOT::cmap930 bool subset (hb_subset_plan_t *plan) const 931 { 932 subset_plan cmap_subset_plan; 933 934 if (unlikely (!_create_plan (plan, &cmap_subset_plan))) 935 { 936 DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan."); 937 return false; 938 } 939 940 // We now know how big our blob needs to be 941 size_t dest_sz = cmap_subset_plan.final_size (); 942 void *dest = malloc (dest_sz); 943 if (unlikely (!dest)) { 944 DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output", (unsigned long) dest_sz); 945 return false; 946 } 947 948 if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest))) 949 { 950 DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap."); 951 free (dest); 952 return false; 953 } 954 955 // all done, write the blob into dest 956 hb_blob_t *cmap_prime = hb_blob_create ((const char *) dest, 957 dest_sz, 958 HB_MEMORY_MODE_READONLY, 959 dest, 960 free); 961 bool result = plan->add_table (HB_OT_TAG_cmap, cmap_prime); 962 hb_blob_destroy (cmap_prime); 963 return result; 964 } 965 find_best_subtableOT::cmap966 const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const 967 { 968 if (symbol) *symbol = false; 969 970 const CmapSubtable *subtable; 971 972 /* 32-bit subtables. */ 973 if ((subtable = this->find_subtable (3, 10))) return subtable; 974 if ((subtable = this->find_subtable (0, 6))) return subtable; 975 if ((subtable = this->find_subtable (0, 4))) return subtable; 976 977 /* 16-bit subtables. */ 978 if ((subtable = this->find_subtable (3, 1))) return subtable; 979 if ((subtable = this->find_subtable (0, 3))) return subtable; 980 if ((subtable = this->find_subtable (0, 2))) return subtable; 981 if ((subtable = this->find_subtable (0, 1))) return subtable; 982 if ((subtable = this->find_subtable (0, 0))) return subtable; 983 984 /* Symbol subtable. */ 985 if ((subtable = this->find_subtable (3, 0))) 986 { 987 if (symbol) *symbol = true; 988 return subtable; 989 } 990 991 /* Meh. */ 992 return &Null (CmapSubtable); 993 } 994 995 struct accelerator_t 996 { initOT::cmap::accelerator_t997 void init (hb_face_t *face) 998 { 999 this->table = hb_sanitize_context_t ().reference_table<cmap> (face); 1000 bool symbol; 1001 this->subtable = table->find_best_subtable (&symbol); 1002 this->subtable_uvs = &Null (CmapSubtableFormat14); 1003 { 1004 const CmapSubtable *st = table->find_subtable (0, 5); 1005 if (st && st->u.format == 14) 1006 subtable_uvs = &st->u.format14; 1007 } 1008 1009 this->get_glyph_data = subtable; 1010 if (unlikely (symbol)) 1011 { 1012 this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>; 1013 } else { 1014 switch (subtable->u.format) { 1015 /* Accelerate format 4 and format 12. */ 1016 default: 1017 this->get_glyph_funcZ = get_glyph_from<CmapSubtable>; 1018 break; 1019 case 12: 1020 this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>; 1021 break; 1022 case 4: 1023 { 1024 this->format4_accel.init (&subtable->u.format4); 1025 this->get_glyph_data = &this->format4_accel; 1026 this->get_glyph_funcZ = this->format4_accel.get_glyph_func; 1027 } 1028 break; 1029 } 1030 } 1031 } 1032 finiOT::cmap::accelerator_t1033 void fini () { this->table.destroy (); } 1034 get_nominal_glyphOT::cmap::accelerator_t1035 bool get_nominal_glyph (hb_codepoint_t unicode, 1036 hb_codepoint_t *glyph) const 1037 { 1038 if (unlikely (!this->get_glyph_funcZ)) return false; 1039 return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph); 1040 } get_nominal_glyphsOT::cmap::accelerator_t1041 unsigned int get_nominal_glyphs (unsigned int count, 1042 const hb_codepoint_t *first_unicode, 1043 unsigned int unicode_stride, 1044 hb_codepoint_t *first_glyph, 1045 unsigned int glyph_stride) const 1046 { 1047 if (unlikely (!this->get_glyph_funcZ)) return 0; 1048 1049 hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ; 1050 const void *get_glyph_data = this->get_glyph_data; 1051 1052 unsigned int done; 1053 for (done = 0; 1054 done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph); 1055 done++) 1056 { 1057 first_unicode = &StructAtOffset<hb_codepoint_t> (first_unicode, unicode_stride); 1058 first_glyph = &StructAtOffset<hb_codepoint_t> (first_glyph, glyph_stride); 1059 } 1060 return done; 1061 } 1062 get_variation_glyphOT::cmap::accelerator_t1063 bool get_variation_glyph (hb_codepoint_t unicode, 1064 hb_codepoint_t variation_selector, 1065 hb_codepoint_t *glyph) const 1066 { 1067 switch (this->subtable_uvs->get_glyph_variant (unicode, 1068 variation_selector, 1069 glyph)) 1070 { 1071 case GLYPH_VARIANT_NOT_FOUND: return false; 1072 case GLYPH_VARIANT_FOUND: return true; 1073 case GLYPH_VARIANT_USE_DEFAULT: break; 1074 } 1075 1076 return get_nominal_glyph (unicode, glyph); 1077 } 1078 collect_unicodesOT::cmap::accelerator_t1079 void collect_unicodes (hb_set_t *out) const 1080 { 1081 subtable->collect_unicodes (out); 1082 } collect_variation_selectorsOT::cmap::accelerator_t1083 void collect_variation_selectors (hb_set_t *out) const 1084 { 1085 subtable_uvs->collect_variation_selectors (out); 1086 } collect_variation_unicodesOT::cmap::accelerator_t1087 void collect_variation_unicodes (hb_codepoint_t variation_selector, 1088 hb_set_t *out) const 1089 { 1090 subtable_uvs->collect_variation_unicodes (variation_selector, out); 1091 } 1092 1093 protected: 1094 typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, 1095 hb_codepoint_t codepoint, 1096 hb_codepoint_t *glyph); 1097 1098 template <typename Type> get_glyph_fromOT::cmap::accelerator_t1099 static bool get_glyph_from (const void *obj, 1100 hb_codepoint_t codepoint, 1101 hb_codepoint_t *glyph) 1102 { 1103 const Type *typed_obj = (const Type *) obj; 1104 return typed_obj->get_glyph (codepoint, glyph); 1105 } 1106 1107 template <typename Type> get_glyph_from_symbolOT::cmap::accelerator_t1108 static bool get_glyph_from_symbol (const void *obj, 1109 hb_codepoint_t codepoint, 1110 hb_codepoint_t *glyph) 1111 { 1112 const Type *typed_obj = (const Type *) obj; 1113 if (likely (typed_obj->get_glyph (codepoint, glyph))) 1114 return true; 1115 1116 if (codepoint <= 0x00FFu) 1117 { 1118 /* For symbol-encoded OpenType fonts, we duplicate the 1119 * U+F000..F0FF range at U+0000..U+00FF. That's what 1120 * Windows seems to do, and that's hinted about at: 1121 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom 1122 * under "Non-Standard (Symbol) Fonts". */ 1123 return typed_obj->get_glyph (0xF000u + codepoint, glyph); 1124 } 1125 1126 return false; 1127 } 1128 1129 private: 1130 hb_nonnull_ptr_t<const CmapSubtable> subtable; 1131 hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs; 1132 1133 hb_cmap_get_glyph_func_t get_glyph_funcZ; 1134 const void *get_glyph_data; 1135 1136 CmapSubtableFormat4::accelerator_t format4_accel; 1137 1138 hb_blob_ptr_t<cmap> table; 1139 }; 1140 1141 protected: 1142 find_subtableOT::cmap1143 const CmapSubtable *find_subtable (unsigned int platform_id, 1144 unsigned int encoding_id) const 1145 { 1146 EncodingRecord key; 1147 key.platformID.set (platform_id); 1148 key.encodingID.set (encoding_id); 1149 1150 const EncodingRecord &result = encodingRecord.bsearch (key); 1151 if (!result.subtable) 1152 return nullptr; 1153 1154 return &(this+result.subtable); 1155 } 1156 1157 public: 1158 sanitizeOT::cmap1159 bool sanitize (hb_sanitize_context_t *c) const 1160 { 1161 TRACE_SANITIZE (this); 1162 return_trace (c->check_struct (this) && 1163 likely (version == 0) && 1164 encodingRecord.sanitize (c, this)); 1165 } 1166 1167 protected: 1168 HBUINT16 version; /* Table version number (0). */ 1169 SortedArrayOf<EncodingRecord> 1170 encodingRecord; /* Encoding tables. */ 1171 public: 1172 DEFINE_SIZE_ARRAY (4, encodingRecord); 1173 }; 1174 1175 struct cmap_accelerator_t : cmap::accelerator_t {}; 1176 1177 } /* namespace OT */ 1178 1179 1180 #endif /* HB_OT_CMAP_TABLE_HH */ 1181