• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Minikin"
18 
19 #include "minikin/FontCollection.h"
20 
21 #include <log/log.h>
22 #include <unicode/unorm2.h>
23 
24 #include <algorithm>
25 
26 #include "Locale.h"
27 #include "LocaleListCache.h"
28 #include "MinikinInternal.h"
29 #include "minikin/Characters.h"
30 #include "minikin/Emoji.h"
31 #include "minikin/FontFileParser.h"
32 
33 using std::vector;
34 
35 namespace minikin {
36 
37 template <typename T>
max(T a,T b)38 static inline T max(T a, T b) {
39     return a > b ? a : b;
40 }
41 
42 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
43 const uint32_t TEXT_STYLE_VS = 0xFE0E;
44 
45 static std::atomic<uint32_t> gNextCollectionId = {0};
46 
47 namespace {
48 
isEmojiBreak(uint32_t prevCh,uint32_t ch)49 inline bool isEmojiBreak(uint32_t prevCh, uint32_t ch) {
50     return !(isEmojiModifier(ch) || (isRegionalIndicator(prevCh) && isRegionalIndicator(ch)) ||
51              isKeyCap(ch) || isTagChar(ch) || ch == CHAR_ZWJ || prevCh == CHAR_ZWJ);
52 }
53 
54 // Lower is better
getGlyphScore(U16StringPiece text,uint32_t start,uint32_t end,const HbFontUniquePtr & font)55 uint32_t getGlyphScore(U16StringPiece text, uint32_t start, uint32_t end,
56                        const HbFontUniquePtr& font) {
57     HbBufferUniquePtr buffer(hb_buffer_create());
58     hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
59     hb_buffer_add_utf16(buffer.get(), text.data() + start, end - start, 0, end - start);
60     hb_buffer_guess_segment_properties(buffer.get());
61 
62     unsigned int numGlyphs = -1;
63     hb_shape(font.get(), buffer.get(), nullptr, 0);
64     hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
65 
66     // HarfBuzz squashed unsupported tag sequence into first emoji glyph. So, we cannot use glyph
67     // count for the font selection score. Give extra score if the base score is different from the
68     // first glyph.
69     if (numGlyphs == 1) {
70         constexpr uint32_t TAG_SEQUENCE_FALLBACK_PENALTY = 0x10000;
71 
72         uint32_t ch = 0;
73         const uint16_t* string = text.data();
74         const uint32_t string_size = text.size();
75         uint32_t readLength = 0;
76 
77         U16_NEXT(string, readLength, string_size, ch);
78         if (U_IS_SURROGATE(ch)) {
79             return numGlyphs;  // Broken surrogate pair.
80         }
81 
82         if (readLength >= string_size) {
83             return numGlyphs;  // No more characters remaining.
84         }
85 
86         uint32_t nextCh = 0;
87         U16_NEXT(string, readLength, string_size, nextCh);
88 
89         if (!isTagChar(nextCh)) {
90             return numGlyphs;  // Not a tag sequence.
91         }
92 
93         uint32_t composedGlyphId = info[0].codepoint;
94 
95         // Shape only the first base emoji.
96         hb_buffer_reset(buffer.get());
97         hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
98         hb_buffer_add_codepoints(buffer.get(), &ch, 1, 0, 1);
99         hb_buffer_guess_segment_properties(buffer.get());
100 
101         unsigned int numGlyphs = -1;
102         hb_shape(font.get(), buffer.get(), nullptr, 0);
103         info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
104 
105         if (numGlyphs != 1) {
106             // If the single code point of the first base emoji is decomposed to multiple glyphs,
107             // we don't support it.
108             return numGlyphs;
109         }
110 
111         uint32_t baseGlyphId = info[0].codepoint;
112         if (composedGlyphId == baseGlyphId) {
113             return numGlyphs + TAG_SEQUENCE_FALLBACK_PENALTY;
114         } else {
115             return numGlyphs;
116         }
117     }
118 
119     return numGlyphs;
120 }
121 
122 }  // namespace
123 
FontCollection(std::shared_ptr<FontFamily> && typeface)124 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
125     std::vector<std::shared_ptr<FontFamily>> typefaces;
126     typefaces.push_back(typeface);
127     init(typefaces);
128 }
129 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)130 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) {
131     init(typefaces);
132 }
133 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)134 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
135     mId = gNextCollectionId++;
136     vector<uint32_t> lastChar;
137     size_t nTypefaces = typefaces.size();
138     const FontStyle defaultStyle;
139     for (size_t i = 0; i < nTypefaces; i++) {
140         const std::shared_ptr<FontFamily>& family = typefaces[i];
141         if (family->getClosestMatch(defaultStyle).font == nullptr) {
142             continue;
143         }
144         const SparseBitSet& coverage = family->getCoverage();
145         mFamilies.push_back(family);  // emplace_back would be better
146         if (family->hasVSTable()) {
147             mVSFamilyVec.push_back(family);
148         }
149         mMaxChar = max(mMaxChar, coverage.length());
150         lastChar.push_back(coverage.nextSetBit(0));
151 
152         const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
153         mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
154     }
155     nTypefaces = mFamilies.size();
156     MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface");
157     MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT,
158                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
159     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
160     // TODO: Use variation selector map for mRanges construction.
161     // A font can have a glyph for a base code point and variation selector pair but no glyph for
162     // the base code point without variation selector. The family won't be listed in the range in
163     // this case.
164     mOwnedRanges = std::make_unique<Range[]>(nPages);
165     mRanges = mOwnedRanges.get();
166     mRangesCount = nPages;
167     for (size_t i = 0; i < nPages; i++) {
168         Range* range = &mOwnedRanges[i];
169         range->start = mOwnedFamilyVec.size();
170         for (size_t j = 0; j < nTypefaces; j++) {
171             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
172                 const std::shared_ptr<FontFamily>& family = mFamilies[j];
173                 mOwnedFamilyVec.push_back(static_cast<uint8_t>(j));
174                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
175                 lastChar[j] = nextChar;
176             }
177         }
178         range->end = mOwnedFamilyVec.size();
179     }
180     // See the comment in Range for more details.
181     LOG_ALWAYS_FATAL_IF(mOwnedFamilyVec.size() >= 0xFFFF,
182                         "Exceeded the maximum indexable cmap coverage.");
183     mFamilyVec = mOwnedFamilyVec.data();
184     mFamilyVecCount = mOwnedFamilyVec.size();
185 }
186 
FontCollection(BufferReader * reader,const std::vector<std::shared_ptr<FontFamily>> & families)187 FontCollection::FontCollection(BufferReader* reader,
188                                const std::vector<std::shared_ptr<FontFamily>>& families) {
189     mId = gNextCollectionId++;
190     mMaxChar = reader->read<uint32_t>();
191     uint32_t familiesCount = reader->read<uint32_t>();
192     mFamilies.reserve(familiesCount);
193     for (uint32_t i = 0; i < familiesCount; i++) {
194         uint32_t index = reader->read<uint32_t>();
195         if (index >= families.size()) {
196             ALOGE("Invalid FontFamily index: %zu", (size_t)index);
197         } else {
198             mFamilies.push_back(families[index]);
199             if (families[index]->hasVSTable()) {
200                 mVSFamilyVec.push_back(families[index]);
201             }
202         }
203     }
204     // Range is two packed uint16_t
205     static_assert(sizeof(Range) == 4);
206     std::tie(mRanges, mRangesCount) = reader->readArray<Range>();
207     std::tie(mFamilyVec, mFamilyVecCount) = reader->readArray<uint8_t>();
208     const auto& [axesPtr, axesCount] = reader->readArray<AxisTag>();
209     mSupportedAxes.insert(axesPtr, axesPtr + axesCount);
210 }
211 
writeTo(BufferWriter * writer,const std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> & fontFamilyToIndexMap) const212 void FontCollection::writeTo(BufferWriter* writer,
213                              const std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>&
214                                      fontFamilyToIndexMap) const {
215     writer->write<uint32_t>(mMaxChar);
216     writer->write<uint32_t>(mFamilies.size());
217     for (const std::shared_ptr<FontFamily>& fontFamily : mFamilies) {
218         auto it = fontFamilyToIndexMap.find(fontFamily);
219         if (it == fontFamilyToIndexMap.end()) {
220             ALOGE("fontFamily not found in fontFamilyToIndexMap");
221             writer->write<uint32_t>(-1);
222         } else {
223             writer->write<uint32_t>(it->second);
224         }
225     }
226     writer->writeArray<Range>(mRanges, mRangesCount);
227     writer->writeArray<uint8_t>(mFamilyVec, mFamilyVecCount);
228     // No need to serialize mVSFamilyVec as it can be reconstructed easily from mFamilies.
229     std::vector<AxisTag> axes(mSupportedAxes.begin(), mSupportedAxes.end());
230     // Sort axes to be deterministic.
231     std::sort(axes.begin(), axes.end());
232     writer->writeArray<AxisTag>(axes.data(), axes.size());
233 }
234 
235 // static
collectAllFontFamilies(const std::vector<std::shared_ptr<FontCollection>> & fontCollections,std::vector<std::shared_ptr<FontFamily>> * outAllFontFamilies,std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> * outFontFamilyToIndexMap)236 void FontCollection::collectAllFontFamilies(
237         const std::vector<std::shared_ptr<FontCollection>>& fontCollections,
238         std::vector<std::shared_ptr<FontFamily>>* outAllFontFamilies,
239         std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>* outFontFamilyToIndexMap) {
240     for (const auto& fontCollection : fontCollections) {
241         for (const std::shared_ptr<FontFamily>& fontFamily : fontCollection->mFamilies) {
242             bool inserted =
243                     outFontFamilyToIndexMap->emplace(fontFamily, outAllFontFamilies->size()).second;
244             if (inserted) {
245                 outAllFontFamilies->push_back(fontFamily);
246             }
247         }
248     }
249 }
250 
251 // Special scores for the font fallback.
252 const uint32_t kUnsupportedFontScore = 0;
253 const uint32_t kFirstFontScore = UINT32_MAX;
254 
255 // Calculates a font score.
256 // The score of the font family is based on three subscores.
257 //  - Coverage Score: How well the font family covers the given character or variation sequence.
258 //  - Locale Score: How well the font family is appropriate for the locale.
259 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
260 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
261 //
262 // Then, there is a priority for these three subscores as follow:
263 //   Coverage Score > Locale Score > Variant Score
264 // The returned score reflects this priority order.
265 //
266 // Note that there are two special scores.
267 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
268 //    base character.
269 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
270 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const271 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
272                                          uint32_t localeListId,
273                                          const std::shared_ptr<FontFamily>& fontFamily) const {
274     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
275     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
276         // No need to calculate other scores.
277         return coverageScore;
278     }
279 
280     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
281     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
282 
283     // Subscores are encoded into 31 bits representation to meet the subscore priority.
284     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
285     // then the last 1 bit is for variant score.
286     return coverageScore << 29 | localeScore << 1 | variantScore;
287 }
288 
289 // Calculates a font score based on variation sequence coverage.
290 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
291 //   character.
292 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
293 //   supports the given character or variation sequence.
294 // - Returns 3 if the font family supports the variation sequence.
295 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
296 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
297 // - Returns 1 if the variation selector is not specified or if the font family only supports the
298 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const299 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
300                                            const std::shared_ptr<FontFamily>& fontFamily) const {
301     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
302     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
303         // The font doesn't support either variation sequence or even the base character.
304         return kUnsupportedFontScore;
305     }
306 
307     if ((vs == 0 || hasVSGlyph) && (mFamilies[0] == fontFamily || fontFamily->isCustomFallback())) {
308         // If the first font family supports the given character or variation sequence, always use
309         // it.
310         return kFirstFontScore;
311     }
312 
313     if (vs != 0 && hasVSGlyph) {
314         return 3;
315     }
316 
317     bool colorEmojiRequest;
318     if (vs == EMOJI_STYLE_VS) {
319         colorEmojiRequest = true;
320     } else if (vs == TEXT_STYLE_VS) {
321         colorEmojiRequest = false;
322     } else {
323         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
324             case EmojiStyle::EMOJI:
325                 colorEmojiRequest = true;
326                 break;
327             case EmojiStyle::TEXT:
328                 colorEmojiRequest = false;
329                 break;
330             case EmojiStyle::EMPTY:
331             case EmojiStyle::DEFAULT:
332             default:
333                 // Do not give any extra score for the default emoji style.
334                 return 1;
335                 break;
336         }
337     }
338 
339     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
340 }
341 
342 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
343 //
344 // 1. If only the font's language matches or there is no matches between requested font and
345 //    supported font, then the font obtains a score of 0.
346 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
347 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
348 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
349 //    language-and-script obtains a socre of 3 with the same reason above.
350 //
351 // If two locales in the requested list have the same locale score, the font matching with higher
352 // priority locale gets a higher score. For example, in the case the user requested locale list is
353 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
354 // "en-Latn".
355 //
356 // To achieve score calculation with priorities, the locale score is determined as follows:
357 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
358 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
359 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)360 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
361                                                  const FontFamily& fontFamily) {
362     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
363     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
364 
365     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
366     uint32_t score = 0;
367     for (size_t i = 0; i < maxCompareNum; ++i) {
368         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
369     }
370     return score;
371 }
372 
373 // Calculates a font score based on variant ("compact" or "elegant") matching.
374 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
375 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)376 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
377                                                   const FontFamily& fontFamily) {
378     const FamilyVariant familyVariant = fontFamily.variant();
379     if (familyVariant == FamilyVariant::DEFAULT) {
380         return 1;
381     }
382     if (familyVariant == variant) {
383         return 1;
384     }
385     if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
386         // If default is requested, prefer compat variation.
387         return 1;
388     }
389     return 0;
390 }
391 
392 // Implement heuristic for choosing best-match font. Here are the rules:
393 // 1. If first font in the collection has the character, it wins.
394 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
395 // 3. Highest score wins, with ties resolved to the first font.
396 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const397 FontCollection::FamilyMatchResult FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
398                                                                    uint32_t localeListId,
399                                                                    FamilyVariant variant) const {
400     if (ch >= mMaxChar) {
401         return FamilyMatchResult::Builder().add(0).build();
402     }
403 
404     Range range = mRanges[ch >> kLogCharsPerPage];
405 
406     if (vs != 0) {
407         range = {0, static_cast<uint16_t>(mFamilies.size())};
408     }
409 
410     uint32_t bestScore = kUnsupportedFontScore;
411     FamilyMatchResult::Builder builder;
412 
413     for (size_t i = range.start; i < range.end; i++) {
414         const uint8_t familyIndex = vs == 0 ? mFamilyVec[i] : i;
415         const std::shared_ptr<FontFamily>& family = mFamilies[familyIndex];
416         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
417         if (score == kFirstFontScore) {
418             // If the first font family supports the given character or variation sequence, always
419             // use it.
420             return builder.add(familyIndex).build();
421         }
422         if (score != kUnsupportedFontScore && score >= bestScore) {
423             if (score > bestScore) {
424                 builder.reset();
425                 bestScore = score;
426             }
427             builder.add(familyIndex);
428         }
429     }
430     if (builder.empty()) {
431         UErrorCode errorCode = U_ZERO_ERROR;
432         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
433         if (U_SUCCESS(errorCode)) {
434             UChar decomposed[4];
435             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
436             if (U_SUCCESS(errorCode) && len > 0) {
437                 int off = 0;
438                 U16_NEXT_UNSAFE(decomposed, off, ch);
439                 return getFamilyForChar(ch, vs, localeListId, variant);
440             }
441         }
442         return FamilyMatchResult::Builder().add(0).build();
443     }
444     return builder.build();
445 }
446 
447 // Characters where we want to continue using existing font run for (or stick to the next run if
448 // they start a string), even if the font does not support them explicitly. These are handled
449 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
450 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)451 static bool doesNotNeedFontSupport(uint32_t c) {
452     return c == 0x00AD                      // SOFT HYPHEN
453            || c == 0x034F                   // COMBINING GRAPHEME JOINER
454            || c == 0x061C                   // ARABIC LETTER MARK
455            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
456            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
457            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
458            || c == 0xFEFF                   // BYTE ORDER MARK
459            || isVariationSelector(c);
460 }
461 
462 // Characters where we want to continue using existing font run instead of
463 // recomputing the best match in the fallback list.
464 static const uint32_t stickyAllowlist[] = {
465         '!',    ',', '-', '.', ':', ';', '?',
466         0x00A0,  // NBSP
467         0x2010,  // HYPHEN
468         0x2011,  // NB_HYPHEN
469         0x202F,  // NNBSP
470         0x2640,  // FEMALE_SIGN,
471         0x2642,  // MALE_SIGN,
472         0x2695,  // STAFF_OF_AESCULAPIUS
473 };
474 
isStickyAllowlisted(uint32_t c)475 static bool isStickyAllowlisted(uint32_t c) {
476     for (size_t i = 0; i < sizeof(stickyAllowlist) / sizeof(stickyAllowlist[0]); i++) {
477         if (stickyAllowlist[i] == c) return true;
478     }
479     return false;
480 }
481 
isCombining(uint32_t c)482 static inline bool isCombining(uint32_t c) {
483     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
484 }
485 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const486 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
487                                           uint32_t variationSelector) const {
488     if (!isVariationSelector(variationSelector)) {
489         return false;
490     }
491     if (baseCodepoint >= mMaxChar) {
492         return false;
493     }
494 
495     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
496     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
497         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
498             return true;
499         }
500     }
501 
502     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
503     // for <char, text presentation selector> case since we have special fallback rule for the
504     // sequence. Note that we don't need to restrict this to already standardized variation
505     // sequences, since Unicode is adding variation sequences more frequently now and may even move
506     // towards allowing text and emoji variation selectors on any character.
507     if (variationSelector == TEXT_STYLE_VS) {
508         for (size_t i = 0; i < mFamilies.size(); ++i) {
509             if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
510                 return true;
511             }
512         }
513     }
514 
515     return false;
516 }
517 
518 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
519 
intersect(FontCollection::FamilyMatchResult l,FontCollection::FamilyMatchResult r)520 FontCollection::FamilyMatchResult FontCollection::FamilyMatchResult::intersect(
521         FontCollection::FamilyMatchResult l, FontCollection::FamilyMatchResult r) {
522     if (l == r) {
523         return l;
524     }
525 
526     uint32_t li = 0;
527     uint32_t ri = 0;
528     FamilyMatchResult::Builder b;
529     while (li < l.size() && ri < r.size()) {
530         if (l[li] < r[ri]) {
531             li++;
532         } else if (l[li] > r[ri]) {
533             ri++;
534         } else {  // l[li] == r[ri]
535             b.add(l[li]);
536             li++;
537             ri++;
538         }
539     }
540     return b.build();
541 }
542 
itemize(U16StringPiece text,FontStyle,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const543 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle,
544                                                          uint32_t localeListId,
545                                                          FamilyVariant familyVariant,
546                                                          uint32_t runMax) const {
547     const uint16_t* string = text.data();
548     const uint32_t string_size = text.size();
549 
550     FamilyMatchResult lastFamilyIndices = FamilyMatchResult();
551 
552     if (string_size == 0) {
553         return std::vector<Run>();
554     }
555 
556     const uint32_t kEndOfString = 0xFFFFFFFF;
557     std::vector<Run> result;
558     Run* run = nullptr;
559 
560     uint32_t nextCh = 0;
561     uint32_t prevCh = 0;
562     size_t nextUtf16Pos = 0;
563     size_t readLength = 0;
564     U16_NEXT(string, readLength, string_size, nextCh);
565     if (U_IS_SURROGATE(nextCh)) {
566         nextCh = REPLACEMENT_CHARACTER;
567     }
568 
569     do {
570         const uint32_t ch = nextCh;
571         const size_t utf16Pos = nextUtf16Pos;
572         nextUtf16Pos = readLength;
573         if (readLength < string_size) {
574             U16_NEXT(string, readLength, string_size, nextCh);
575             if (U_IS_SURROGATE(nextCh)) {
576                 nextCh = REPLACEMENT_CHARACTER;
577             }
578         } else {
579             nextCh = kEndOfString;
580         }
581 
582         bool shouldContinueRun = false;
583         if (doesNotNeedFontSupport(ch)) {
584             // Always continue if the character is a format character not needed to be in the font.
585             shouldContinueRun = true;
586         } else if (!lastFamilyIndices.empty() && (isStickyAllowlisted(ch) || isCombining(ch))) {
587             // Continue using existing font as long as it has coverage and is whitelisted.
588 
589             const std::shared_ptr<FontFamily>& lastFamily = mFamilies[lastFamilyIndices[0]];
590             if (lastFamily->isColorEmojiFamily()) {
591                 // If the last family is color emoji font, find the longest family.
592                 shouldContinueRun = false;
593                 for (uint8_t ix : lastFamilyIndices) {
594                     shouldContinueRun |= mFamilies[ix]->getCoverage().get(ch);
595                 }
596             } else {
597                 shouldContinueRun = lastFamily->getCoverage().get(ch);
598             }
599         }
600 
601         if (!shouldContinueRun) {
602             FamilyMatchResult familyIndices = getFamilyForChar(
603                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
604             bool breakRun;
605             if (utf16Pos == 0 || lastFamilyIndices.empty()) {
606                 breakRun = true;
607             } else {
608                 const std::shared_ptr<FontFamily>& lastFamily = mFamilies[lastFamilyIndices[0]];
609                 if (lastFamily->isColorEmojiFamily()) {
610                     FamilyMatchResult intersection =
611                             FamilyMatchResult::intersect(familyIndices, lastFamilyIndices);
612                     if (intersection.empty()) {
613                         breakRun = true;  // None of last family can draw the given char.
614                     } else {
615                         breakRun = isEmojiBreak(prevCh, ch);
616                         if (!breakRun) {
617                             // To select sequence supported families, update family indices with the
618                             // intersection between the supported families between prev char and
619                             // current char.
620                             familyIndices = intersection;
621                             lastFamilyIndices = intersection;
622                             run->familyMatch = intersection;
623                         }
624                     }
625                 } else {
626                     breakRun = familyIndices[0] != lastFamilyIndices[0];
627                 }
628             }
629 
630             if (breakRun) {
631                 size_t start = utf16Pos;
632                 // Workaround for combining marks and emoji modifiers until we implement
633                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
634                 // a different font that also supports the previous character, attach previous
635                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
636                 // handled properly by this since it's a combining mark too.
637                 if (utf16Pos != 0 &&
638                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh)))) {
639                     for (uint8_t ix : familyIndices) {
640                         if (mFamilies[ix]->getCoverage().get(prevCh)) {
641                             const size_t prevChLength = U16_LENGTH(prevCh);
642                             if (run != nullptr) {
643                                 run->end -= prevChLength;
644                                 if (run->start == run->end) {
645                                     result.pop_back();
646                                 }
647                             }
648                             start -= prevChLength;
649                             break;
650                         }
651                     }
652                 }
653                 if (lastFamilyIndices.empty()) {
654                     // This is the first family ever assigned. We are either seeing the very first
655                     // character (which means start would already be zero), or we have only seen
656                     // characters that don't need any font support (which means we need to adjust
657                     // start to be 0 to include those characters).
658                     start = 0;
659                 }
660                 result.push_back({familyIndices, static_cast<int>(start), 0});
661                 run = &result.back();
662                 lastFamilyIndices = run->familyMatch;
663             }
664         }
665         prevCh = ch;
666         if (run != nullptr) {
667             run->end = nextUtf16Pos;  // exclusive
668         }
669 
670         // Stop searching the remaining characters if the result length gets runMax + 2.
671         // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
672         // If the result.size() equals to runMax, the run may be still expanding.
673         // if the result.size() equals to runMax + 2, the last run may be removed and the last run
674         // may be exntended the previous run with above workaround.
675         if (result.size() >= 2 && runMax == result.size() - 2) {
676             break;
677         }
678     } while (nextCh != kEndOfString);
679 
680     if (lastFamilyIndices.empty()) {
681         // No character needed any font support, so it doesn't really matter which font they end up
682         // getting displayed in. We put the whole string in one run, using the first font.
683         result.push_back(
684                 {FamilyMatchResult::Builder().add(0).build(), 0, static_cast<int>(string_size)});
685     }
686 
687     if (result.size() > runMax) {
688         // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
689         return std::vector<Run>(result.begin(), result.begin() + runMax);
690     }
691 
692     return result;
693 }
694 
getBestFont(U16StringPiece text,const Run & run,FontStyle style)695 FakedFont FontCollection::getBestFont(U16StringPiece text, const Run& run, FontStyle style) {
696     uint8_t bestIndex = 0;
697     uint32_t bestScore = 0xFFFFFFFF;
698 
699     const std::shared_ptr<FontFamily>& family = mFamilies[run.familyMatch[0]];
700     if (family->isColorEmojiFamily() && run.familyMatch.size() > 1) {
701         for (size_t i = 0; i < run.familyMatch.size(); ++i) {
702             const std::shared_ptr<FontFamily>& family = mFamilies[run.familyMatch[i]];
703             const HbFontUniquePtr& font = family->getFont(0)->baseFont();
704             uint32_t score = getGlyphScore(text, run.start, run.end, font);
705 
706             if (score < bestScore) {
707                 bestIndex = run.familyMatch[i];
708                 bestScore = score;
709             }
710         }
711     } else {
712         bestIndex = run.familyMatch[0];
713     }
714     return mFamilies[bestIndex]->getClosestMatch(style);
715 }
716 
baseFontFaked(FontStyle style)717 FakedFont FontCollection::baseFontFaked(FontStyle style) {
718     return mFamilies[0]->getClosestMatch(style);
719 }
720 
createCollectionWithVariation(const std::vector<FontVariation> & variations)721 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
722         const std::vector<FontVariation>& variations) {
723     if (variations.empty() || mSupportedAxes.empty()) {
724         return nullptr;
725     }
726 
727     bool hasSupportedAxis = false;
728     for (const FontVariation& variation : variations) {
729         if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
730             hasSupportedAxis = true;
731             break;
732         }
733     }
734     if (!hasSupportedAxis) {
735         // None of variation axes are supported by this font collection.
736         return nullptr;
737     }
738 
739     std::vector<std::shared_ptr<FontFamily>> families;
740     for (const std::shared_ptr<FontFamily>& family : mFamilies) {
741         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
742         if (newFamily) {
743             families.push_back(newFamily);
744         } else {
745             families.push_back(family);
746         }
747     }
748 
749     return std::shared_ptr<FontCollection>(new FontCollection(families));
750 }
751 
getId() const752 uint32_t FontCollection::getId() const {
753     return mId;
754 }
755 
756 }  // namespace minikin
757