1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Minikin"
18
19 #include "minikin/FontCollection.h"
20
21 #include <log/log.h>
22 #include <unicode/unorm2.h>
23
24 #include <algorithm>
25 #include <unordered_set>
26
27 #include "Locale.h"
28 #include "LocaleListCache.h"
29 #include "MinikinInternal.h"
30 #include "minikin/Characters.h"
31 #include "minikin/Emoji.h"
32 #include "minikin/FontFileParser.h"
33
34 using std::vector;
35
36 namespace minikin {
37
38 template <typename T>
max(T a,T b)39 static inline T max(T a, T b) {
40 return a > b ? a : b;
41 }
42
43 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
44 const uint32_t TEXT_STYLE_VS = 0xFE0E;
45
46 static std::atomic<uint32_t> gNextCollectionId = {0};
47
48 namespace {
49
isEmojiBreak(uint32_t prevCh,uint32_t ch)50 inline bool isEmojiBreak(uint32_t prevCh, uint32_t ch) {
51 return !(isEmojiModifier(ch) || (isRegionalIndicator(prevCh) && isRegionalIndicator(ch)) ||
52 isKeyCap(ch) || isTagChar(ch) || ch == CHAR_ZWJ || prevCh == CHAR_ZWJ);
53 }
54
55 // Lower is better
getGlyphScore(U16StringPiece text,uint32_t start,uint32_t end,const HbFontUniquePtr & font)56 uint32_t getGlyphScore(U16StringPiece text, uint32_t start, uint32_t end,
57 const HbFontUniquePtr& font) {
58 HbBufferUniquePtr buffer(hb_buffer_create());
59 hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
60 hb_buffer_add_utf16(buffer.get(), text.data() + start, end - start, 0, end - start);
61 hb_buffer_guess_segment_properties(buffer.get());
62
63 unsigned int numGlyphs = -1;
64 hb_shape(font.get(), buffer.get(), nullptr, 0);
65 hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
66
67 // HarfBuzz squashed unsupported tag sequence into first emoji glyph. So, we cannot use glyph
68 // count for the font selection score. Give extra score if the base score is different from the
69 // first glyph.
70 if (numGlyphs == 1) {
71 constexpr uint32_t TAG_SEQUENCE_FALLBACK_PENALTY = 0x10000;
72
73 uint32_t ch = 0;
74 const uint16_t* string = text.data();
75 const uint32_t string_size = text.size();
76 uint32_t readLength = 0;
77
78 U16_NEXT(string, readLength, string_size, ch);
79 if (U_IS_SURROGATE(ch)) {
80 return numGlyphs; // Broken surrogate pair.
81 }
82
83 if (readLength >= string_size) {
84 return numGlyphs; // No more characters remaining.
85 }
86
87 uint32_t nextCh = 0;
88 U16_NEXT(string, readLength, string_size, nextCh);
89
90 if (!isTagChar(nextCh)) {
91 return numGlyphs; // Not a tag sequence.
92 }
93
94 uint32_t composedGlyphId = info[0].codepoint;
95
96 // Shape only the first base emoji.
97 hb_buffer_reset(buffer.get());
98 hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
99 hb_buffer_add_codepoints(buffer.get(), &ch, 1, 0, 1);
100 hb_buffer_guess_segment_properties(buffer.get());
101
102 unsigned int numGlyphs = -1;
103 hb_shape(font.get(), buffer.get(), nullptr, 0);
104 info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
105
106 if (numGlyphs != 1) {
107 // If the single code point of the first base emoji is decomposed to multiple glyphs,
108 // we don't support it.
109 return numGlyphs;
110 }
111
112 uint32_t baseGlyphId = info[0].codepoint;
113 if (composedGlyphId == baseGlyphId) {
114 return numGlyphs + TAG_SEQUENCE_FALLBACK_PENALTY;
115 } else {
116 return numGlyphs;
117 }
118 }
119
120 return numGlyphs;
121 }
122
123 } // namespace
124
125 // static
create(std::shared_ptr<FontFamily> && typeface)126 std::shared_ptr<FontCollection> FontCollection::create(std::shared_ptr<FontFamily>&& typeface) {
127 std::vector<std::shared_ptr<FontFamily>> typefaces;
128 typefaces.push_back(typeface);
129 return create(typefaces);
130 }
131
132 // static
create(const vector<std::shared_ptr<FontFamily>> & typefaces)133 std::shared_ptr<FontCollection> FontCollection::create(
134 const vector<std::shared_ptr<FontFamily>>& typefaces) {
135 // TODO(b/174672300): Revert back to make_shared.
136 return std::shared_ptr<FontCollection>(new FontCollection(typefaces));
137 }
138
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)139 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces)
140 : mMaxChar(0), mSupportedAxes(nullptr) {
141 init(typefaces);
142 }
143
init(const vector<std::shared_ptr<FontFamily>> & typefaces)144 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
145 mId = gNextCollectionId++;
146 vector<uint32_t> lastChar;
147 size_t nTypefaces = typefaces.size();
148 const FontStyle defaultStyle;
149 auto families = std::make_shared<vector<std::shared_ptr<FontFamily>>>();
150 std::unordered_set<AxisTag> supportedAxesSet;
151 for (size_t i = 0; i < nTypefaces; i++) {
152 const std::shared_ptr<FontFamily>& family = typefaces[i];
153 if (family->getClosestMatch(defaultStyle).font == nullptr) {
154 continue;
155 }
156 const SparseBitSet& coverage = family->getCoverage();
157 families->emplace_back(family);
158 if (family->hasVSTable()) {
159 mVSFamilyVec.push_back(family);
160 }
161 mMaxChar = max(mMaxChar, coverage.length());
162 lastChar.push_back(coverage.nextSetBit(0));
163
164 for (size_t i = 0; i < family->getSupportedAxesCount(); i++) {
165 supportedAxesSet.insert(family->getSupportedAxisAt(i));
166 }
167 }
168 // mMaybeSharedFamilies is not shared.
169 mMaybeSharedFamilies = families;
170 mFamilyCount = families->size();
171 mFamilyIndices = nullptr;
172 MINIKIN_ASSERT(mFamilyCount > 0, "Font collection must have at least one valid typeface");
173 MINIKIN_ASSERT(mFamilyCount <= MAX_FAMILY_COUNT,
174 "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
175 // Although OpenType supports up to 2^16-1 axes per font,
176 // mSupportedAxesCount may exceed 2^16-1 as we have multiple fonts.
177 mSupportedAxesCount = static_cast<uint32_t>(supportedAxesSet.size());
178 if (mSupportedAxesCount > 0) {
179 mSupportedAxes = sortedArrayFromSet(supportedAxesSet);
180 }
181 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
182 // TODO: Use variation selector map for mRanges construction.
183 // A font can have a glyph for a base code point and variation selector pair but no glyph for
184 // the base code point without variation selector. The family won't be listed in the range in
185 // this case.
186 mOwnedRanges = std::make_unique<Range[]>(nPages);
187 mRanges = mOwnedRanges.get();
188 mRangesCount = nPages;
189 for (size_t i = 0; i < nPages; i++) {
190 Range* range = &mOwnedRanges[i];
191 range->start = mOwnedFamilyVec.size();
192 for (size_t j = 0; j < getFamilyCount(); j++) {
193 if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
194 const std::shared_ptr<FontFamily>& family = getFamilyAt(j);
195 mOwnedFamilyVec.push_back(static_cast<uint8_t>(j));
196 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
197 lastChar[j] = nextChar;
198 }
199 }
200 range->end = mOwnedFamilyVec.size();
201 }
202 // See the comment in Range for more details.
203 LOG_ALWAYS_FATAL_IF(mOwnedFamilyVec.size() >= 0xFFFF,
204 "Exceeded the maximum indexable cmap coverage.");
205 mFamilyVec = mOwnedFamilyVec.data();
206 mFamilyVecCount = mOwnedFamilyVec.size();
207 }
208
FontCollection(BufferReader * reader,const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>> & families)209 FontCollection::FontCollection(
210 BufferReader* reader,
211 const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>>& families)
212 : mSupportedAxes(nullptr) {
213 mId = gNextCollectionId++;
214 mMaxChar = reader->read<uint32_t>();
215 mMaybeSharedFamilies = families;
216 std::tie(mFamilyIndices, mFamilyCount) = reader->readArray<uint32_t>();
217 for (size_t i = 0; i < getFamilyCount(); i++) {
218 const auto& family = getFamilyAt(i);
219 if (family->hasVSTable()) mVSFamilyVec.emplace_back(family);
220 }
221 // Range is two packed uint16_t
222 static_assert(sizeof(Range) == 4);
223 std::tie(mRanges, mRangesCount) = reader->readArray<Range>();
224 std::tie(mFamilyVec, mFamilyVecCount) = reader->readArray<uint8_t>();
225 const auto& [axesPtr, axesCount] = reader->readArray<AxisTag>();
226 mSupportedAxesCount = axesCount;
227 if (axesCount > 0) {
228 mSupportedAxes = std::unique_ptr<AxisTag[]>(new AxisTag[axesCount]);
229 std::copy(axesPtr, axesPtr + axesCount, mSupportedAxes.get());
230 }
231 }
232
writeTo(BufferWriter * writer,const std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> & fontFamilyToIndexMap) const233 void FontCollection::writeTo(BufferWriter* writer,
234 const std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>&
235 fontFamilyToIndexMap) const {
236 writer->write<uint32_t>(mMaxChar);
237 std::vector<uint32_t> indices;
238 indices.reserve(getFamilyCount());
239 for (size_t i = 0; i < getFamilyCount(); ++i) {
240 const std::shared_ptr<FontFamily>& fontFamily = getFamilyAt(i);
241 auto it = fontFamilyToIndexMap.find(fontFamily);
242 if (it == fontFamilyToIndexMap.end()) {
243 ALOGE("fontFamily not found in fontFamilyToIndexMap");
244 } else {
245 indices.push_back(it->second);
246 }
247 }
248 writer->writeArray<uint32_t>(indices.data(), indices.size());
249 writer->writeArray<Range>(mRanges, mRangesCount);
250 writer->writeArray<uint8_t>(mFamilyVec, mFamilyVecCount);
251 // No need to serialize mVSFamilyVec as it can be reconstructed easily from mFamilies.
252 writer->writeArray<AxisTag>(mSupportedAxes.get(), mSupportedAxesCount);
253 }
254
255 // static
readVector(BufferReader * reader)256 std::vector<std::shared_ptr<FontCollection>> FontCollection::readVector(BufferReader* reader) {
257 auto allFontFamilies = std::make_shared<std::vector<std::shared_ptr<FontFamily>>>(
258 FontFamily::readVector(reader));
259 uint32_t count = reader->read<uint32_t>();
260 std::vector<std::shared_ptr<FontCollection>> fontCollections;
261 fontCollections.reserve(count);
262 for (uint32_t i = 0; i < count; i++) {
263 fontCollections.emplace_back(new FontCollection(reader, allFontFamilies));
264 }
265 return fontCollections;
266 }
267
268 // static
writeVector(BufferWriter * writer,const std::vector<std::shared_ptr<FontCollection>> & fontCollections)269 void FontCollection::writeVector(
270 BufferWriter* writer, const std::vector<std::shared_ptr<FontCollection>>& fontCollections) {
271 std::vector<std::shared_ptr<FontFamily>> allFontFamilies;
272 // Note: operator== for shared_ptr compares raw pointer values.
273 std::unordered_map<std::shared_ptr<FontFamily>, uint32_t> fontFamilyToIndexMap;
274 collectAllFontFamilies(fontCollections, &allFontFamilies, &fontFamilyToIndexMap);
275
276 FontFamily::writeVector(writer, allFontFamilies);
277 writer->write<uint32_t>(fontCollections.size());
278 for (const auto& fontCollection : fontCollections) {
279 fontCollection->writeTo(writer, fontFamilyToIndexMap);
280 }
281 }
282
283 // static
collectAllFontFamilies(const std::vector<std::shared_ptr<FontCollection>> & fontCollections,std::vector<std::shared_ptr<FontFamily>> * outAllFontFamilies,std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> * outFontFamilyToIndexMap)284 void FontCollection::collectAllFontFamilies(
285 const std::vector<std::shared_ptr<FontCollection>>& fontCollections,
286 std::vector<std::shared_ptr<FontFamily>>* outAllFontFamilies,
287 std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>* outFontFamilyToIndexMap) {
288 for (const auto& fontCollection : fontCollections) {
289 for (size_t i = 0; i < fontCollection->getFamilyCount(); ++i) {
290 const std::shared_ptr<FontFamily>& fontFamily = fontCollection->getFamilyAt(i);
291 bool inserted =
292 outFontFamilyToIndexMap->emplace(fontFamily, outAllFontFamilies->size()).second;
293 if (inserted) {
294 outAllFontFamilies->push_back(fontFamily);
295 }
296 }
297 }
298 }
299
300 // Special scores for the font fallback.
301 const uint32_t kUnsupportedFontScore = 0;
302 const uint32_t kFirstFontScore = UINT32_MAX;
303
304 // Calculates a font score.
305 // The score of the font family is based on three subscores.
306 // - Coverage Score: How well the font family covers the given character or variation sequence.
307 // - Locale Score: How well the font family is appropriate for the locale.
308 // - Variant Score: Whether the font family matches the variant. Note that this variant is not the
309 // one in BCP47. This is our own font variant (e.g., elegant, compact).
310 //
311 // Then, there is a priority for these three subscores as follow:
312 // Coverage Score > Locale Score > Variant Score
313 // The returned score reflects this priority order.
314 //
315 // Note that there are two special scores.
316 // - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
317 // base character.
318 // - kFirstFontScore: When the font is the first font family in the collection and it supports the
319 // given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const320 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
321 uint32_t localeListId,
322 const std::shared_ptr<FontFamily>& fontFamily) const {
323 const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
324 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
325 // No need to calculate other scores.
326 return coverageScore;
327 }
328
329 const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
330 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
331
332 // Subscores are encoded into 31 bits representation to meet the subscore priority.
333 // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
334 // then the last 1 bit is for variant score.
335 return coverageScore << 29 | localeScore << 1 | variantScore;
336 }
337
338 // Returns true if
339 // - the fontFamily is a developer specified custom fallback.
340 // - no custom fallback is provided and the fontFamily is a default fallback.
isPrimaryFamily(const std::shared_ptr<FontFamily> & fontFamily) const341 bool FontCollection::isPrimaryFamily(const std::shared_ptr<FontFamily>& fontFamily) const {
342 // If the font family is provided by developers, it is primary.
343 if (fontFamily->isCustomFallback()) {
344 return true;
345 }
346
347 if (getFamilyAt(0)->isCustomFallback()) {
348 return false;
349 } else {
350 return fontFamily->isDefaultFallback();
351 }
352 }
353
354 // Calculates a font score based on variation sequence coverage.
355 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
356 // character.
357 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
358 // supports the given character or variation sequence.
359 // - Returns 3 if the font family supports the variation sequence.
360 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
361 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
362 // - Returns 1 if the variation selector is not specified or if the font family only supports the
363 // variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const364 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
365 const std::shared_ptr<FontFamily>& fontFamily) const {
366 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
367 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
368 // The font doesn't support either variation sequence or even the base character.
369 return kUnsupportedFontScore;
370 }
371
372 if ((vs == 0 || hasVSGlyph) && isPrimaryFamily(fontFamily)) {
373 // If the first font family supports the given character or variation sequence, always use
374 // it.
375 return kFirstFontScore;
376 }
377
378 if (vs != 0 && hasVSGlyph) {
379 return 3;
380 }
381
382 bool colorEmojiRequest;
383 if (vs == EMOJI_STYLE_VS) {
384 colorEmojiRequest = true;
385 } else if (vs == TEXT_STYLE_VS) {
386 colorEmojiRequest = false;
387 } else {
388 switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
389 case EmojiStyle::EMOJI:
390 colorEmojiRequest = true;
391 break;
392 case EmojiStyle::TEXT:
393 colorEmojiRequest = false;
394 break;
395 case EmojiStyle::EMPTY:
396 case EmojiStyle::DEFAULT:
397 default:
398 // Do not give any extra score for the default emoji style.
399 return 1;
400 break;
401 }
402 }
403
404 return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
405 }
406
407 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
408 //
409 // 1. If only the font's language matches or there is no matches between requested font and
410 // supported font, then the font obtains a score of 0.
411 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
412 // a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
413 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
414 // language-and-script obtains a socre of 3 with the same reason above.
415 //
416 // If two locales in the requested list have the same locale score, the font matching with higher
417 // priority locale gets a higher score. For example, in the case the user requested locale list is
418 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
419 // "en-Latn".
420 //
421 // To achieve score calculation with priorities, the locale score is determined as follows:
422 // LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
423 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
424 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)425 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
426 const FontFamily& fontFamily) {
427 const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
428 const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
429
430 const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
431 uint32_t score = 0;
432 for (size_t i = 0; i < maxCompareNum; ++i) {
433 score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
434 }
435 return score;
436 }
437
438 // Calculates a font score based on variant ("compact" or "elegant") matching.
439 // - Returns 1 if the font doesn't have variant or the variant matches with the text style.
440 // - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)441 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
442 const FontFamily& fontFamily) {
443 const FamilyVariant familyVariant = fontFamily.variant();
444 if (familyVariant == FamilyVariant::DEFAULT) {
445 return 1;
446 }
447 if (familyVariant == variant) {
448 return 1;
449 }
450 if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
451 // If default is requested, prefer compat variation.
452 return 1;
453 }
454 return 0;
455 }
456
457 // Implement heuristic for choosing best-match font. Here are the rules:
458 // 1. If first font in the collection has the character, it wins.
459 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
460 // 3. Highest score wins, with ties resolved to the first font.
461 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const462 FontCollection::FamilyMatchResult FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
463 uint32_t localeListId,
464 FamilyVariant variant) const {
465 if (ch >= mMaxChar) {
466 return FamilyMatchResult::Builder().add(0).build();
467 }
468
469 Range range = mRanges[ch >> kLogCharsPerPage];
470
471 if (vs != 0) {
472 range = {0, static_cast<uint16_t>(getFamilyCount())};
473 }
474
475 uint32_t bestScore = kUnsupportedFontScore;
476 FamilyMatchResult::Builder builder;
477
478 for (size_t i = range.start; i < range.end; i++) {
479 const uint8_t familyIndex = vs == 0 ? mFamilyVec[i] : i;
480 const std::shared_ptr<FontFamily>& family = getFamilyAt(familyIndex);
481 const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
482 if (score == kFirstFontScore) {
483 // If the first font family supports the given character or variation sequence, always
484 // use it.
485 return builder.add(familyIndex).build();
486 }
487 if (score != kUnsupportedFontScore && score >= bestScore) {
488 if (score > bestScore) {
489 builder.reset();
490 bestScore = score;
491 }
492 builder.add(familyIndex);
493 }
494 }
495 if (builder.empty()) {
496 UErrorCode errorCode = U_ZERO_ERROR;
497 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
498 if (U_SUCCESS(errorCode)) {
499 UChar decomposed[4];
500 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
501 if (U_SUCCESS(errorCode) && len > 0) {
502 int off = 0;
503 U16_NEXT_UNSAFE(decomposed, off, ch);
504 return getFamilyForChar(ch, vs, localeListId, variant);
505 }
506 }
507 return FamilyMatchResult::Builder().add(0).build();
508 }
509 return builder.build();
510 }
511
512 // Characters where we want to continue using existing font run for (or stick to the next run if
513 // they start a string), even if the font does not support them explicitly. These are handled
514 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
515 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)516 static bool doesNotNeedFontSupport(uint32_t c) {
517 return c == 0x00AD // SOFT HYPHEN
518 || c == 0x034F // COMBINING GRAPHEME JOINER
519 || c == 0x061C // ARABIC LETTER MARK
520 || (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
521 || (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
522 || (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
523 || c == 0xFEFF // BYTE ORDER MARK
524 || isVariationSelector(c);
525 }
526
527 // Characters where we want to continue using existing font run instead of
528 // recomputing the best match in the fallback list.
529 static const uint32_t stickyAllowlist[] = {
530 '!', ',', '-', '.', ':', ';', '?',
531 0x00A0, // NBSP
532 0x2010, // HYPHEN
533 0x2011, // NB_HYPHEN
534 0x202F, // NNBSP
535 0x2640, // FEMALE_SIGN,
536 0x2642, // MALE_SIGN,
537 0x2695, // STAFF_OF_AESCULAPIUS
538 };
539
isStickyAllowlisted(uint32_t c)540 static bool isStickyAllowlisted(uint32_t c) {
541 for (size_t i = 0; i < sizeof(stickyAllowlist) / sizeof(stickyAllowlist[0]); i++) {
542 if (stickyAllowlist[i] == c) return true;
543 }
544 return false;
545 }
546
isCombining(uint32_t c)547 static inline bool isCombining(uint32_t c) {
548 return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
549 }
550
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const551 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
552 uint32_t variationSelector) const {
553 if (!isVariationSelector(variationSelector)) {
554 return false;
555 }
556 if (baseCodepoint >= mMaxChar) {
557 return false;
558 }
559
560 // Currently mRanges can not be used here since it isn't aware of the variation sequence.
561 for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
562 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
563 return true;
564 }
565 }
566
567 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
568 // for <char, text presentation selector> case since we have special fallback rule for the
569 // sequence. Note that we don't need to restrict this to already standardized variation
570 // sequences, since Unicode is adding variation sequences more frequently now and may even move
571 // towards allowing text and emoji variation selectors on any character.
572 if (variationSelector == TEXT_STYLE_VS) {
573 for (size_t i = 0; i < getFamilyCount(); ++i) {
574 const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
575 if (!family->isColorEmojiFamily() && family->hasGlyph(baseCodepoint, 0)) {
576 return true;
577 }
578 }
579 }
580
581 return false;
582 }
583
584 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
585
intersect(FontCollection::FamilyMatchResult l,FontCollection::FamilyMatchResult r)586 FontCollection::FamilyMatchResult FontCollection::FamilyMatchResult::intersect(
587 FontCollection::FamilyMatchResult l, FontCollection::FamilyMatchResult r) {
588 if (l == r) {
589 return l;
590 }
591
592 uint32_t li = 0;
593 uint32_t ri = 0;
594 FamilyMatchResult::Builder b;
595 while (li < l.size() && ri < r.size()) {
596 if (l[li] < r[ri]) {
597 li++;
598 } else if (l[li] > r[ri]) {
599 ri++;
600 } else { // l[li] == r[ri]
601 b.add(l[li]);
602 li++;
603 ri++;
604 }
605 }
606 return b.build();
607 }
608
itemize(U16StringPiece text,FontStyle,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const609 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle,
610 uint32_t localeListId,
611 FamilyVariant familyVariant,
612 uint32_t runMax) const {
613 const uint16_t* string = text.data();
614 const uint32_t string_size = text.size();
615
616 FamilyMatchResult lastFamilyIndices = FamilyMatchResult();
617
618 if (string_size == 0) {
619 return std::vector<Run>();
620 }
621
622 const uint32_t kEndOfString = 0xFFFFFFFF;
623 std::vector<Run> result;
624 Run* run = nullptr;
625
626 uint32_t nextCh = 0;
627 uint32_t prevCh = 0;
628 size_t nextUtf16Pos = 0;
629 size_t readLength = 0;
630 U16_NEXT(string, readLength, string_size, nextCh);
631 if (U_IS_SURROGATE(nextCh)) {
632 nextCh = REPLACEMENT_CHARACTER;
633 }
634
635 do {
636 const uint32_t ch = nextCh;
637 const size_t utf16Pos = nextUtf16Pos;
638 nextUtf16Pos = readLength;
639 if (readLength < string_size) {
640 U16_NEXT(string, readLength, string_size, nextCh);
641 if (U_IS_SURROGATE(nextCh)) {
642 nextCh = REPLACEMENT_CHARACTER;
643 }
644 } else {
645 nextCh = kEndOfString;
646 }
647
648 bool shouldContinueRun = false;
649 if (doesNotNeedFontSupport(ch)) {
650 // Always continue if the character is a format character not needed to be in the font.
651 shouldContinueRun = true;
652 } else if (!lastFamilyIndices.empty() && (isStickyAllowlisted(ch) || isCombining(ch))) {
653 // Continue using existing font as long as it has coverage and is whitelisted.
654
655 const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
656 if (lastFamily->isColorEmojiFamily()) {
657 // If the last family is color emoji font, find the longest family.
658 shouldContinueRun = false;
659 for (uint8_t ix : lastFamilyIndices) {
660 shouldContinueRun |= getFamilyAt(ix)->getCoverage().get(ch);
661 }
662 } else {
663 shouldContinueRun = lastFamily->getCoverage().get(ch);
664 }
665 }
666
667 if (!shouldContinueRun) {
668 FamilyMatchResult familyIndices = getFamilyForChar(
669 ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
670 bool breakRun;
671 if (utf16Pos == 0 || lastFamilyIndices.empty()) {
672 breakRun = true;
673 } else {
674 const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
675 if (lastFamily->isColorEmojiFamily()) {
676 FamilyMatchResult intersection =
677 FamilyMatchResult::intersect(familyIndices, lastFamilyIndices);
678 if (intersection.empty()) {
679 breakRun = true; // None of last family can draw the given char.
680 } else {
681 breakRun = isEmojiBreak(prevCh, ch);
682 if (!breakRun) {
683 // To select sequence supported families, update family indices with the
684 // intersection between the supported families between prev char and
685 // current char.
686 familyIndices = intersection;
687 lastFamilyIndices = intersection;
688 run->familyMatch = intersection;
689 }
690 }
691 } else {
692 breakRun = familyIndices[0] != lastFamilyIndices[0];
693 }
694 }
695
696 if (breakRun) {
697 size_t start = utf16Pos;
698 // Workaround for combining marks and emoji modifiers until we implement
699 // per-cluster font selection: if a combining mark or an emoji modifier is found in
700 // a different font that also supports the previous character, attach previous
701 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
702 // handled properly by this since it's a combining mark too.
703 if (utf16Pos != 0 &&
704 (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh)))) {
705 for (uint8_t ix : familyIndices) {
706 if (getFamilyAt(ix)->getCoverage().get(prevCh)) {
707 const size_t prevChLength = U16_LENGTH(prevCh);
708 if (run != nullptr) {
709 run->end -= prevChLength;
710 if (run->start == run->end) {
711 result.pop_back();
712 }
713 }
714 start -= prevChLength;
715 break;
716 }
717 }
718 }
719 if (lastFamilyIndices.empty()) {
720 // This is the first family ever assigned. We are either seeing the very first
721 // character (which means start would already be zero), or we have only seen
722 // characters that don't need any font support (which means we need to adjust
723 // start to be 0 to include those characters).
724 start = 0;
725 }
726 result.push_back({familyIndices, static_cast<int>(start), 0});
727 run = &result.back();
728 lastFamilyIndices = run->familyMatch;
729 }
730 }
731 prevCh = ch;
732 if (run != nullptr) {
733 run->end = nextUtf16Pos; // exclusive
734 }
735
736 // Stop searching the remaining characters if the result length gets runMax + 2.
737 // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
738 // If the result.size() equals to runMax, the run may be still expanding.
739 // if the result.size() equals to runMax + 2, the last run may be removed and the last run
740 // may be exntended the previous run with above workaround.
741 if (result.size() >= 2 && runMax == result.size() - 2) {
742 break;
743 }
744 } while (nextCh != kEndOfString);
745
746 if (lastFamilyIndices.empty()) {
747 // No character needed any font support, so it doesn't really matter which font they end up
748 // getting displayed in. We put the whole string in one run, using the first font.
749 result.push_back(
750 {FamilyMatchResult::Builder().add(0).build(), 0, static_cast<int>(string_size)});
751 }
752
753 if (result.size() > runMax) {
754 // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
755 return std::vector<Run>(result.begin(), result.begin() + runMax);
756 }
757
758 return result;
759 }
760
getBestFont(U16StringPiece text,const Run & run,FontStyle style)761 FakedFont FontCollection::getBestFont(U16StringPiece text, const Run& run, FontStyle style) {
762 uint8_t bestIndex = 0;
763 uint32_t bestScore = 0xFFFFFFFF;
764
765 const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[0]);
766 if (family->isColorEmojiFamily() && run.familyMatch.size() > 1) {
767 for (size_t i = 0; i < run.familyMatch.size(); ++i) {
768 const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[i]);
769 const HbFontUniquePtr& font = family->getFont(0)->baseFont();
770 uint32_t score = getGlyphScore(text, run.start, run.end, font);
771
772 if (score < bestScore) {
773 bestIndex = run.familyMatch[i];
774 bestScore = score;
775 }
776 }
777 } else {
778 bestIndex = run.familyMatch[0];
779 }
780 return getFamilyAt(bestIndex)->getClosestMatch(style);
781 }
782
baseFontFaked(FontStyle style)783 FakedFont FontCollection::baseFontFaked(FontStyle style) {
784 return getFamilyAt(0)->getClosestMatch(style);
785 }
786
createCollectionWithVariation(const std::vector<FontVariation> & variations)787 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
788 const std::vector<FontVariation>& variations) {
789 if (variations.empty() || mSupportedAxesCount == 0) {
790 return nullptr;
791 }
792
793 bool hasSupportedAxis = false;
794 for (const FontVariation& variation : variations) {
795 if (std::binary_search(mSupportedAxes.get(), mSupportedAxes.get() + mSupportedAxesCount,
796 variation.axisTag)) {
797 hasSupportedAxis = true;
798 break;
799 }
800 }
801 if (!hasSupportedAxis) {
802 // None of variation axes are supported by this font collection.
803 return nullptr;
804 }
805
806 std::vector<std::shared_ptr<FontFamily>> families;
807 for (size_t i = 0; i < getFamilyCount(); ++i) {
808 const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
809 std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
810 if (newFamily) {
811 families.push_back(newFamily);
812 } else {
813 families.push_back(family);
814 }
815 }
816
817 return std::shared_ptr<FontCollection>(new FontCollection(families));
818 }
819
createCollectionWithFamilies(std::vector<std::shared_ptr<FontFamily>> && families) const820 std::shared_ptr<FontCollection> FontCollection::createCollectionWithFamilies(
821 std::vector<std::shared_ptr<FontFamily>>&& families) const {
822 families.reserve(families.size() + getFamilyCount());
823 for (size_t i = 0; i < getFamilyCount(); i++) {
824 families.push_back(getFamilyAt(i));
825 }
826 return FontCollection::create(families);
827 }
828
getId() const829 uint32_t FontCollection::getId() const {
830 return mId;
831 }
832
833 } // namespace minikin
834