• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Minikin"
18 
19 #include "minikin/FontCollection.h"
20 
21 #include <log/log.h>
22 #include <unicode/unorm2.h>
23 
24 #include <algorithm>
25 #include <unordered_set>
26 
27 #include "Locale.h"
28 #include "LocaleListCache.h"
29 #include "MinikinInternal.h"
30 #include "minikin/Characters.h"
31 #include "minikin/Emoji.h"
32 #include "minikin/FontFileParser.h"
33 
34 using std::vector;
35 
36 namespace minikin {
37 
38 template <typename T>
max(T a,T b)39 static inline T max(T a, T b) {
40     return a > b ? a : b;
41 }
42 
43 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
44 const uint32_t TEXT_STYLE_VS = 0xFE0E;
45 
46 static std::atomic<uint32_t> gNextCollectionId = {0};
47 
48 namespace {
49 
isEmojiBreak(uint32_t prevCh,uint32_t ch)50 inline bool isEmojiBreak(uint32_t prevCh, uint32_t ch) {
51     return !(isEmojiModifier(ch) || (isRegionalIndicator(prevCh) && isRegionalIndicator(ch)) ||
52              isKeyCap(ch) || isTagChar(ch) || ch == CHAR_ZWJ || prevCh == CHAR_ZWJ);
53 }
54 
55 // Lower is better
getGlyphScore(U16StringPiece text,uint32_t start,uint32_t end,const HbFontUniquePtr & font)56 uint32_t getGlyphScore(U16StringPiece text, uint32_t start, uint32_t end,
57                        const HbFontUniquePtr& font) {
58     HbBufferUniquePtr buffer(hb_buffer_create());
59     hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
60     hb_buffer_add_utf16(buffer.get(), text.data() + start, end - start, 0, end - start);
61     hb_buffer_guess_segment_properties(buffer.get());
62 
63     unsigned int numGlyphs = -1;
64     hb_shape(font.get(), buffer.get(), nullptr, 0);
65     hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
66 
67     // HarfBuzz squashed unsupported tag sequence into first emoji glyph. So, we cannot use glyph
68     // count for the font selection score. Give extra score if the base score is different from the
69     // first glyph.
70     if (numGlyphs == 1) {
71         constexpr uint32_t TAG_SEQUENCE_FALLBACK_PENALTY = 0x10000;
72 
73         uint32_t ch = 0;
74         const uint16_t* string = text.data();
75         const uint32_t string_size = text.size();
76         uint32_t readLength = 0;
77 
78         U16_NEXT(string, readLength, string_size, ch);
79         if (U_IS_SURROGATE(ch)) {
80             return numGlyphs;  // Broken surrogate pair.
81         }
82 
83         if (readLength >= string_size) {
84             return numGlyphs;  // No more characters remaining.
85         }
86 
87         uint32_t nextCh = 0;
88         U16_NEXT(string, readLength, string_size, nextCh);
89 
90         if (!isTagChar(nextCh)) {
91             return numGlyphs;  // Not a tag sequence.
92         }
93 
94         uint32_t composedGlyphId = info[0].codepoint;
95 
96         // Shape only the first base emoji.
97         hb_buffer_reset(buffer.get());
98         hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
99         hb_buffer_add_codepoints(buffer.get(), &ch, 1, 0, 1);
100         hb_buffer_guess_segment_properties(buffer.get());
101 
102         unsigned int numGlyphs = -1;
103         hb_shape(font.get(), buffer.get(), nullptr, 0);
104         info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
105 
106         if (numGlyphs != 1) {
107             // If the single code point of the first base emoji is decomposed to multiple glyphs,
108             // we don't support it.
109             return numGlyphs;
110         }
111 
112         uint32_t baseGlyphId = info[0].codepoint;
113         if (composedGlyphId == baseGlyphId) {
114             return numGlyphs + TAG_SEQUENCE_FALLBACK_PENALTY;
115         } else {
116             return numGlyphs;
117         }
118     }
119 
120     return numGlyphs;
121 }
122 
123 }  // namespace
124 
125 // static
create(std::shared_ptr<FontFamily> && typeface)126 std::shared_ptr<FontCollection> FontCollection::create(std::shared_ptr<FontFamily>&& typeface) {
127     std::vector<std::shared_ptr<FontFamily>> typefaces;
128     typefaces.push_back(typeface);
129     return create(typefaces);
130 }
131 
132 // static
create(const vector<std::shared_ptr<FontFamily>> & typefaces)133 std::shared_ptr<FontCollection> FontCollection::create(
134         const vector<std::shared_ptr<FontFamily>>& typefaces) {
135     // TODO(b/174672300): Revert back to make_shared.
136     return std::shared_ptr<FontCollection>(new FontCollection(typefaces));
137 }
138 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)139 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces)
140         : mMaxChar(0), mSupportedAxes(nullptr) {
141     init(typefaces);
142 }
143 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)144 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
145     mId = gNextCollectionId++;
146     vector<uint32_t> lastChar;
147     size_t nTypefaces = typefaces.size();
148     const FontStyle defaultStyle;
149     auto families = std::make_shared<vector<std::shared_ptr<FontFamily>>>();
150     std::unordered_set<AxisTag> supportedAxesSet;
151     for (size_t i = 0; i < nTypefaces; i++) {
152         const std::shared_ptr<FontFamily>& family = typefaces[i];
153         if (family->getClosestMatch(defaultStyle).font == nullptr) {
154             continue;
155         }
156         const SparseBitSet& coverage = family->getCoverage();
157         families->emplace_back(family);
158         if (family->hasVSTable()) {
159             mVSFamilyVec.push_back(family);
160         }
161         mMaxChar = max(mMaxChar, coverage.length());
162         lastChar.push_back(coverage.nextSetBit(0));
163 
164         for (size_t i = 0; i < family->getSupportedAxesCount(); i++) {
165             supportedAxesSet.insert(family->getSupportedAxisAt(i));
166         }
167     }
168     // mMaybeSharedFamilies is not shared.
169     mMaybeSharedFamilies = families;
170     mFamilyCount = families->size();
171     mFamilyIndices = nullptr;
172     MINIKIN_ASSERT(mFamilyCount > 0, "Font collection must have at least one valid typeface");
173     MINIKIN_ASSERT(mFamilyCount <= MAX_FAMILY_COUNT,
174                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
175     // Although OpenType supports up to 2^16-1 axes per font,
176     // mSupportedAxesCount may exceed 2^16-1 as we have multiple fonts.
177     mSupportedAxesCount = static_cast<uint32_t>(supportedAxesSet.size());
178     if (mSupportedAxesCount > 0) {
179         mSupportedAxes = sortedArrayFromSet(supportedAxesSet);
180     }
181     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
182     // TODO: Use variation selector map for mRanges construction.
183     // A font can have a glyph for a base code point and variation selector pair but no glyph for
184     // the base code point without variation selector. The family won't be listed in the range in
185     // this case.
186     mOwnedRanges = std::make_unique<Range[]>(nPages);
187     mRanges = mOwnedRanges.get();
188     mRangesCount = nPages;
189     for (size_t i = 0; i < nPages; i++) {
190         Range* range = &mOwnedRanges[i];
191         range->start = mOwnedFamilyVec.size();
192         for (size_t j = 0; j < getFamilyCount(); j++) {
193             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
194                 const std::shared_ptr<FontFamily>& family = getFamilyAt(j);
195                 mOwnedFamilyVec.push_back(static_cast<uint8_t>(j));
196                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
197                 lastChar[j] = nextChar;
198             }
199         }
200         range->end = mOwnedFamilyVec.size();
201     }
202     // See the comment in Range for more details.
203     LOG_ALWAYS_FATAL_IF(mOwnedFamilyVec.size() >= 0xFFFF,
204                         "Exceeded the maximum indexable cmap coverage.");
205     mFamilyVec = mOwnedFamilyVec.data();
206     mFamilyVecCount = mOwnedFamilyVec.size();
207 }
208 
FontCollection(BufferReader * reader,const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>> & families)209 FontCollection::FontCollection(
210         BufferReader* reader,
211         const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>>& families)
212         : mSupportedAxes(nullptr) {
213     mId = gNextCollectionId++;
214     mMaxChar = reader->read<uint32_t>();
215     mMaybeSharedFamilies = families;
216     std::tie(mFamilyIndices, mFamilyCount) = reader->readArray<uint32_t>();
217     for (size_t i = 0; i < getFamilyCount(); i++) {
218         const auto& family = getFamilyAt(i);
219         if (family->hasVSTable()) mVSFamilyVec.emplace_back(family);
220     }
221     // Range is two packed uint16_t
222     static_assert(sizeof(Range) == 4);
223     std::tie(mRanges, mRangesCount) = reader->readArray<Range>();
224     std::tie(mFamilyVec, mFamilyVecCount) = reader->readArray<uint8_t>();
225     const auto& [axesPtr, axesCount] = reader->readArray<AxisTag>();
226     mSupportedAxesCount = axesCount;
227     if (axesCount > 0) {
228         mSupportedAxes = std::unique_ptr<AxisTag[]>(new AxisTag[axesCount]);
229         std::copy(axesPtr, axesPtr + axesCount, mSupportedAxes.get());
230     }
231 }
232 
writeTo(BufferWriter * writer,const std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> & fontFamilyToIndexMap) const233 void FontCollection::writeTo(BufferWriter* writer,
234                              const std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>&
235                                      fontFamilyToIndexMap) const {
236     writer->write<uint32_t>(mMaxChar);
237     std::vector<uint32_t> indices;
238     indices.reserve(getFamilyCount());
239     for (size_t i = 0; i < getFamilyCount(); ++i) {
240         const std::shared_ptr<FontFamily>& fontFamily = getFamilyAt(i);
241         auto it = fontFamilyToIndexMap.find(fontFamily);
242         if (it == fontFamilyToIndexMap.end()) {
243             ALOGE("fontFamily not found in fontFamilyToIndexMap");
244         } else {
245             indices.push_back(it->second);
246         }
247     }
248     writer->writeArray<uint32_t>(indices.data(), indices.size());
249     writer->writeArray<Range>(mRanges, mRangesCount);
250     writer->writeArray<uint8_t>(mFamilyVec, mFamilyVecCount);
251     // No need to serialize mVSFamilyVec as it can be reconstructed easily from mFamilies.
252     writer->writeArray<AxisTag>(mSupportedAxes.get(), mSupportedAxesCount);
253 }
254 
255 // static
readVector(BufferReader * reader)256 std::vector<std::shared_ptr<FontCollection>> FontCollection::readVector(BufferReader* reader) {
257     auto allFontFamilies = std::make_shared<std::vector<std::shared_ptr<FontFamily>>>(
258             FontFamily::readVector(reader));
259     uint32_t count = reader->read<uint32_t>();
260     std::vector<std::shared_ptr<FontCollection>> fontCollections;
261     fontCollections.reserve(count);
262     for (uint32_t i = 0; i < count; i++) {
263         fontCollections.emplace_back(new FontCollection(reader, allFontFamilies));
264     }
265     return fontCollections;
266 }
267 
268 // static
writeVector(BufferWriter * writer,const std::vector<std::shared_ptr<FontCollection>> & fontCollections)269 void FontCollection::writeVector(
270         BufferWriter* writer, const std::vector<std::shared_ptr<FontCollection>>& fontCollections) {
271     std::vector<std::shared_ptr<FontFamily>> allFontFamilies;
272     // Note: operator== for shared_ptr compares raw pointer values.
273     std::unordered_map<std::shared_ptr<FontFamily>, uint32_t> fontFamilyToIndexMap;
274     collectAllFontFamilies(fontCollections, &allFontFamilies, &fontFamilyToIndexMap);
275 
276     FontFamily::writeVector(writer, allFontFamilies);
277     writer->write<uint32_t>(fontCollections.size());
278     for (const auto& fontCollection : fontCollections) {
279         fontCollection->writeTo(writer, fontFamilyToIndexMap);
280     }
281 }
282 
283 // static
collectAllFontFamilies(const std::vector<std::shared_ptr<FontCollection>> & fontCollections,std::vector<std::shared_ptr<FontFamily>> * outAllFontFamilies,std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> * outFontFamilyToIndexMap)284 void FontCollection::collectAllFontFamilies(
285         const std::vector<std::shared_ptr<FontCollection>>& fontCollections,
286         std::vector<std::shared_ptr<FontFamily>>* outAllFontFamilies,
287         std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>* outFontFamilyToIndexMap) {
288     for (const auto& fontCollection : fontCollections) {
289         for (size_t i = 0; i < fontCollection->getFamilyCount(); ++i) {
290             const std::shared_ptr<FontFamily>& fontFamily = fontCollection->getFamilyAt(i);
291             bool inserted =
292                     outFontFamilyToIndexMap->emplace(fontFamily, outAllFontFamilies->size()).second;
293             if (inserted) {
294                 outAllFontFamilies->push_back(fontFamily);
295             }
296         }
297     }
298 }
299 
300 // Special scores for the font fallback.
301 const uint32_t kUnsupportedFontScore = 0;
302 const uint32_t kFirstFontScore = UINT32_MAX;
303 
304 // Calculates a font score.
305 // The score of the font family is based on three subscores.
306 //  - Coverage Score: How well the font family covers the given character or variation sequence.
307 //  - Locale Score: How well the font family is appropriate for the locale.
308 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
309 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
310 //
311 // Then, there is a priority for these three subscores as follow:
312 //   Coverage Score > Locale Score > Variant Score
313 // The returned score reflects this priority order.
314 //
315 // Note that there are two special scores.
316 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
317 //    base character.
318 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
319 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const320 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
321                                          uint32_t localeListId,
322                                          const std::shared_ptr<FontFamily>& fontFamily) const {
323     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
324     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
325         // No need to calculate other scores.
326         return coverageScore;
327     }
328 
329     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
330     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
331 
332     // Subscores are encoded into 31 bits representation to meet the subscore priority.
333     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
334     // then the last 1 bit is for variant score.
335     return coverageScore << 29 | localeScore << 1 | variantScore;
336 }
337 
338 // Returns true if
339 //  - the fontFamily is a developer specified custom fallback.
340 //  - no custom fallback is provided and the fontFamily is a default fallback.
isPrimaryFamily(const std::shared_ptr<FontFamily> & fontFamily) const341 bool FontCollection::isPrimaryFamily(const std::shared_ptr<FontFamily>& fontFamily) const {
342     // If the font family is provided by developers, it is primary.
343     if (fontFamily->isCustomFallback()) {
344         return true;
345     }
346 
347     if (getFamilyAt(0)->isCustomFallback()) {
348         return false;
349     } else {
350         return fontFamily->isDefaultFallback();
351     }
352 }
353 
354 // Calculates a font score based on variation sequence coverage.
355 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
356 //   character.
357 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
358 //   supports the given character or variation sequence.
359 // - Returns 3 if the font family supports the variation sequence.
360 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
361 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
362 // - Returns 1 if the variation selector is not specified or if the font family only supports the
363 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const364 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
365                                            const std::shared_ptr<FontFamily>& fontFamily) const {
366     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
367     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
368         // The font doesn't support either variation sequence or even the base character.
369         return kUnsupportedFontScore;
370     }
371 
372     if ((vs == 0 || hasVSGlyph) && isPrimaryFamily(fontFamily)) {
373         // If the first font family supports the given character or variation sequence, always use
374         // it.
375         return kFirstFontScore;
376     }
377 
378     if (vs != 0 && hasVSGlyph) {
379         return 3;
380     }
381 
382     bool colorEmojiRequest;
383     if (vs == EMOJI_STYLE_VS) {
384         colorEmojiRequest = true;
385     } else if (vs == TEXT_STYLE_VS) {
386         colorEmojiRequest = false;
387     } else {
388         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
389             case EmojiStyle::EMOJI:
390                 colorEmojiRequest = true;
391                 break;
392             case EmojiStyle::TEXT:
393                 colorEmojiRequest = false;
394                 break;
395             case EmojiStyle::EMPTY:
396             case EmojiStyle::DEFAULT:
397             default:
398                 // Do not give any extra score for the default emoji style.
399                 return 1;
400                 break;
401         }
402     }
403 
404     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
405 }
406 
407 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
408 //
409 // 1. If only the font's language matches or there is no matches between requested font and
410 //    supported font, then the font obtains a score of 0.
411 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
412 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
413 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
414 //    language-and-script obtains a socre of 3 with the same reason above.
415 //
416 // If two locales in the requested list have the same locale score, the font matching with higher
417 // priority locale gets a higher score. For example, in the case the user requested locale list is
418 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
419 // "en-Latn".
420 //
421 // To achieve score calculation with priorities, the locale score is determined as follows:
422 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
423 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
424 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)425 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
426                                                  const FontFamily& fontFamily) {
427     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
428     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
429 
430     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
431     uint32_t score = 0;
432     for (size_t i = 0; i < maxCompareNum; ++i) {
433         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
434     }
435     return score;
436 }
437 
438 // Calculates a font score based on variant ("compact" or "elegant") matching.
439 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
440 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)441 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
442                                                   const FontFamily& fontFamily) {
443     const FamilyVariant familyVariant = fontFamily.variant();
444     if (familyVariant == FamilyVariant::DEFAULT) {
445         return 1;
446     }
447     if (familyVariant == variant) {
448         return 1;
449     }
450     if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
451         // If default is requested, prefer compat variation.
452         return 1;
453     }
454     return 0;
455 }
456 
457 // Implement heuristic for choosing best-match font. Here are the rules:
458 // 1. If first font in the collection has the character, it wins.
459 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
460 // 3. Highest score wins, with ties resolved to the first font.
461 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const462 FontCollection::FamilyMatchResult FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
463                                                                    uint32_t localeListId,
464                                                                    FamilyVariant variant) const {
465     if (ch >= mMaxChar) {
466         return FamilyMatchResult::Builder().add(0).build();
467     }
468 
469     Range range = mRanges[ch >> kLogCharsPerPage];
470 
471     if (vs != 0) {
472         range = {0, static_cast<uint16_t>(getFamilyCount())};
473     }
474 
475     uint32_t bestScore = kUnsupportedFontScore;
476     FamilyMatchResult::Builder builder;
477 
478     for (size_t i = range.start; i < range.end; i++) {
479         const uint8_t familyIndex = vs == 0 ? mFamilyVec[i] : i;
480         const std::shared_ptr<FontFamily>& family = getFamilyAt(familyIndex);
481         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
482         if (score == kFirstFontScore) {
483             // If the first font family supports the given character or variation sequence, always
484             // use it.
485             return builder.add(familyIndex).build();
486         }
487         if (score != kUnsupportedFontScore && score >= bestScore) {
488             if (score > bestScore) {
489                 builder.reset();
490                 bestScore = score;
491             }
492             builder.add(familyIndex);
493         }
494     }
495     if (builder.empty()) {
496         UErrorCode errorCode = U_ZERO_ERROR;
497         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
498         if (U_SUCCESS(errorCode)) {
499             UChar decomposed[4];
500             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
501             if (U_SUCCESS(errorCode) && len > 0) {
502                 int off = 0;
503                 U16_NEXT_UNSAFE(decomposed, off, ch);
504                 return getFamilyForChar(ch, vs, localeListId, variant);
505             }
506         }
507         return FamilyMatchResult::Builder().add(0).build();
508     }
509     return builder.build();
510 }
511 
512 // Characters where we want to continue using existing font run for (or stick to the next run if
513 // they start a string), even if the font does not support them explicitly. These are handled
514 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
515 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)516 static bool doesNotNeedFontSupport(uint32_t c) {
517     return c == 0x00AD                      // SOFT HYPHEN
518            || c == 0x034F                   // COMBINING GRAPHEME JOINER
519            || c == 0x061C                   // ARABIC LETTER MARK
520            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
521            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
522            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
523            || c == 0xFEFF                   // BYTE ORDER MARK
524            || isVariationSelector(c);
525 }
526 
527 // Characters where we want to continue using existing font run instead of
528 // recomputing the best match in the fallback list.
529 static const uint32_t stickyAllowlist[] = {
530         '!',    ',', '-', '.', ':', ';', '?',
531         0x00A0,  // NBSP
532         0x2010,  // HYPHEN
533         0x2011,  // NB_HYPHEN
534         0x202F,  // NNBSP
535         0x2640,  // FEMALE_SIGN,
536         0x2642,  // MALE_SIGN,
537         0x2695,  // STAFF_OF_AESCULAPIUS
538 };
539 
isStickyAllowlisted(uint32_t c)540 static bool isStickyAllowlisted(uint32_t c) {
541     for (size_t i = 0; i < sizeof(stickyAllowlist) / sizeof(stickyAllowlist[0]); i++) {
542         if (stickyAllowlist[i] == c) return true;
543     }
544     return false;
545 }
546 
isCombining(uint32_t c)547 static inline bool isCombining(uint32_t c) {
548     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
549 }
550 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const551 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
552                                           uint32_t variationSelector) const {
553     if (!isVariationSelector(variationSelector)) {
554         return false;
555     }
556     if (baseCodepoint >= mMaxChar) {
557         return false;
558     }
559 
560     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
561     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
562         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
563             return true;
564         }
565     }
566 
567     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
568     // for <char, text presentation selector> case since we have special fallback rule for the
569     // sequence. Note that we don't need to restrict this to already standardized variation
570     // sequences, since Unicode is adding variation sequences more frequently now and may even move
571     // towards allowing text and emoji variation selectors on any character.
572     if (variationSelector == TEXT_STYLE_VS) {
573         for (size_t i = 0; i < getFamilyCount(); ++i) {
574             const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
575             if (!family->isColorEmojiFamily() && family->hasGlyph(baseCodepoint, 0)) {
576                 return true;
577             }
578         }
579     }
580 
581     return false;
582 }
583 
584 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
585 
intersect(FontCollection::FamilyMatchResult l,FontCollection::FamilyMatchResult r)586 FontCollection::FamilyMatchResult FontCollection::FamilyMatchResult::intersect(
587         FontCollection::FamilyMatchResult l, FontCollection::FamilyMatchResult r) {
588     if (l == r) {
589         return l;
590     }
591 
592     uint32_t li = 0;
593     uint32_t ri = 0;
594     FamilyMatchResult::Builder b;
595     while (li < l.size() && ri < r.size()) {
596         if (l[li] < r[ri]) {
597             li++;
598         } else if (l[li] > r[ri]) {
599             ri++;
600         } else {  // l[li] == r[ri]
601             b.add(l[li]);
602             li++;
603             ri++;
604         }
605     }
606     return b.build();
607 }
608 
itemize(U16StringPiece text,FontStyle,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const609 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle,
610                                                          uint32_t localeListId,
611                                                          FamilyVariant familyVariant,
612                                                          uint32_t runMax) const {
613     const uint16_t* string = text.data();
614     const uint32_t string_size = text.size();
615 
616     FamilyMatchResult lastFamilyIndices = FamilyMatchResult();
617 
618     if (string_size == 0) {
619         return std::vector<Run>();
620     }
621 
622     const uint32_t kEndOfString = 0xFFFFFFFF;
623     std::vector<Run> result;
624     Run* run = nullptr;
625 
626     uint32_t nextCh = 0;
627     uint32_t prevCh = 0;
628     size_t nextUtf16Pos = 0;
629     size_t readLength = 0;
630     U16_NEXT(string, readLength, string_size, nextCh);
631     if (U_IS_SURROGATE(nextCh)) {
632         nextCh = REPLACEMENT_CHARACTER;
633     }
634 
635     do {
636         const uint32_t ch = nextCh;
637         const size_t utf16Pos = nextUtf16Pos;
638         nextUtf16Pos = readLength;
639         if (readLength < string_size) {
640             U16_NEXT(string, readLength, string_size, nextCh);
641             if (U_IS_SURROGATE(nextCh)) {
642                 nextCh = REPLACEMENT_CHARACTER;
643             }
644         } else {
645             nextCh = kEndOfString;
646         }
647 
648         bool shouldContinueRun = false;
649         if (doesNotNeedFontSupport(ch)) {
650             // Always continue if the character is a format character not needed to be in the font.
651             shouldContinueRun = true;
652         } else if (!lastFamilyIndices.empty() && (isStickyAllowlisted(ch) || isCombining(ch))) {
653             // Continue using existing font as long as it has coverage and is whitelisted.
654 
655             const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
656             if (lastFamily->isColorEmojiFamily()) {
657                 // If the last family is color emoji font, find the longest family.
658                 shouldContinueRun = false;
659                 for (uint8_t ix : lastFamilyIndices) {
660                     shouldContinueRun |= getFamilyAt(ix)->getCoverage().get(ch);
661                 }
662             } else {
663                 shouldContinueRun = lastFamily->getCoverage().get(ch);
664             }
665         }
666 
667         if (!shouldContinueRun) {
668             FamilyMatchResult familyIndices = getFamilyForChar(
669                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
670             bool breakRun;
671             if (utf16Pos == 0 || lastFamilyIndices.empty()) {
672                 breakRun = true;
673             } else {
674                 const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
675                 if (lastFamily->isColorEmojiFamily()) {
676                     FamilyMatchResult intersection =
677                             FamilyMatchResult::intersect(familyIndices, lastFamilyIndices);
678                     if (intersection.empty()) {
679                         breakRun = true;  // None of last family can draw the given char.
680                     } else {
681                         breakRun = isEmojiBreak(prevCh, ch);
682                         if (!breakRun) {
683                             // To select sequence supported families, update family indices with the
684                             // intersection between the supported families between prev char and
685                             // current char.
686                             familyIndices = intersection;
687                             lastFamilyIndices = intersection;
688                             run->familyMatch = intersection;
689                         }
690                     }
691                 } else {
692                     breakRun = familyIndices[0] != lastFamilyIndices[0];
693                 }
694             }
695 
696             if (breakRun) {
697                 size_t start = utf16Pos;
698                 // Workaround for combining marks and emoji modifiers until we implement
699                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
700                 // a different font that also supports the previous character, attach previous
701                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
702                 // handled properly by this since it's a combining mark too.
703                 if (utf16Pos != 0 &&
704                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh)))) {
705                     for (uint8_t ix : familyIndices) {
706                         if (getFamilyAt(ix)->getCoverage().get(prevCh)) {
707                             const size_t prevChLength = U16_LENGTH(prevCh);
708                             if (run != nullptr) {
709                                 run->end -= prevChLength;
710                                 if (run->start == run->end) {
711                                     result.pop_back();
712                                 }
713                             }
714                             start -= prevChLength;
715                             break;
716                         }
717                     }
718                 }
719                 if (lastFamilyIndices.empty()) {
720                     // This is the first family ever assigned. We are either seeing the very first
721                     // character (which means start would already be zero), or we have only seen
722                     // characters that don't need any font support (which means we need to adjust
723                     // start to be 0 to include those characters).
724                     start = 0;
725                 }
726                 result.push_back({familyIndices, static_cast<int>(start), 0});
727                 run = &result.back();
728                 lastFamilyIndices = run->familyMatch;
729             }
730         }
731         prevCh = ch;
732         if (run != nullptr) {
733             run->end = nextUtf16Pos;  // exclusive
734         }
735 
736         // Stop searching the remaining characters if the result length gets runMax + 2.
737         // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
738         // If the result.size() equals to runMax, the run may be still expanding.
739         // if the result.size() equals to runMax + 2, the last run may be removed and the last run
740         // may be exntended the previous run with above workaround.
741         if (result.size() >= 2 && runMax == result.size() - 2) {
742             break;
743         }
744     } while (nextCh != kEndOfString);
745 
746     if (lastFamilyIndices.empty()) {
747         // No character needed any font support, so it doesn't really matter which font they end up
748         // getting displayed in. We put the whole string in one run, using the first font.
749         result.push_back(
750                 {FamilyMatchResult::Builder().add(0).build(), 0, static_cast<int>(string_size)});
751     }
752 
753     if (result.size() > runMax) {
754         // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
755         return std::vector<Run>(result.begin(), result.begin() + runMax);
756     }
757 
758     return result;
759 }
760 
getBestFont(U16StringPiece text,const Run & run,FontStyle style)761 FakedFont FontCollection::getBestFont(U16StringPiece text, const Run& run, FontStyle style) {
762     uint8_t bestIndex = 0;
763     uint32_t bestScore = 0xFFFFFFFF;
764 
765     const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[0]);
766     if (family->isColorEmojiFamily() && run.familyMatch.size() > 1) {
767         for (size_t i = 0; i < run.familyMatch.size(); ++i) {
768             const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[i]);
769             const HbFontUniquePtr& font = family->getFont(0)->baseFont();
770             uint32_t score = getGlyphScore(text, run.start, run.end, font);
771 
772             if (score < bestScore) {
773                 bestIndex = run.familyMatch[i];
774                 bestScore = score;
775             }
776         }
777     } else {
778         bestIndex = run.familyMatch[0];
779     }
780     return getFamilyAt(bestIndex)->getClosestMatch(style);
781 }
782 
baseFontFaked(FontStyle style)783 FakedFont FontCollection::baseFontFaked(FontStyle style) {
784     return getFamilyAt(0)->getClosestMatch(style);
785 }
786 
createCollectionWithVariation(const std::vector<FontVariation> & variations)787 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
788         const std::vector<FontVariation>& variations) {
789     if (variations.empty() || mSupportedAxesCount == 0) {
790         return nullptr;
791     }
792 
793     bool hasSupportedAxis = false;
794     for (const FontVariation& variation : variations) {
795         if (std::binary_search(mSupportedAxes.get(), mSupportedAxes.get() + mSupportedAxesCount,
796                                variation.axisTag)) {
797             hasSupportedAxis = true;
798             break;
799         }
800     }
801     if (!hasSupportedAxis) {
802         // None of variation axes are supported by this font collection.
803         return nullptr;
804     }
805 
806     std::vector<std::shared_ptr<FontFamily>> families;
807     for (size_t i = 0; i < getFamilyCount(); ++i) {
808         const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
809         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
810         if (newFamily) {
811             families.push_back(newFamily);
812         } else {
813             families.push_back(family);
814         }
815     }
816 
817     return std::shared_ptr<FontCollection>(new FontCollection(families));
818 }
819 
createCollectionWithFamilies(std::vector<std::shared_ptr<FontFamily>> && families) const820 std::shared_ptr<FontCollection> FontCollection::createCollectionWithFamilies(
821         std::vector<std::shared_ptr<FontFamily>>&& families) const {
822     families.reserve(families.size() + getFamilyCount());
823     for (size_t i = 0; i < getFamilyCount(); i++) {
824         families.push_back(getFamilyAt(i));
825     }
826     return FontCollection::create(families);
827 }
828 
getId() const829 uint32_t FontCollection::getId() const {
830     return mId;
831 }
832 
833 }  // namespace minikin
834