1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // #define VERBOSE_DEBUG
18
19 #define LOG_TAG "Minikin"
20
21 #include <algorithm>
22
23 #include <log/log.h>
24 #include "unicode/unistr.h"
25 #include "unicode/unorm2.h"
26 #include "unicode/utf16.h"
27
28 #include <minikin/Emoji.h>
29 #include <minikin/FontCollection.h>
30 #include "FontLanguage.h"
31 #include "FontLanguageListCache.h"
32 #include "MinikinInternal.h"
33
34 using std::vector;
35
36 namespace minikin {
37
38 template <typename T>
max(T a,T b)39 static inline T max(T a, T b) {
40 return a > b ? a : b;
41 }
42
43 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
44 const uint32_t TEXT_STYLE_VS = 0xFE0E;
45
46 uint32_t FontCollection::sNextId = 0;
47
48 // libtxt: return a locale string for a language list ID
GetFontLocale(uint32_t langListId)49 std::string GetFontLocale(uint32_t langListId) {
50 const FontLanguages& langs = FontLanguageListCache::getById(langListId);
51 return langs.size() ? langs[0].getString() : "";
52 }
53
FontCollection(std::shared_ptr<FontFamily> && typeface)54 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface)
55 : mMaxChar(0) {
56 std::vector<std::shared_ptr<FontFamily>> typefaces;
57 typefaces.push_back(typeface);
58 init(typefaces);
59 }
60
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)61 FontCollection::FontCollection(
62 const vector<std::shared_ptr<FontFamily>>& typefaces)
63 : mMaxChar(0) {
64 init(typefaces);
65 }
66
init(const vector<std::shared_ptr<FontFamily>> & typefaces)67 void FontCollection::init(
68 const vector<std::shared_ptr<FontFamily>>& typefaces) {
69 std::scoped_lock _l(gMinikinLock);
70 mId = sNextId++;
71 vector<uint32_t> lastChar;
72 size_t nTypefaces = typefaces.size();
73 #ifdef VERBOSE_DEBUG
74 ALOGD("nTypefaces = %zd\n", nTypefaces);
75 #endif
76 const FontStyle defaultStyle;
77 for (size_t i = 0; i < nTypefaces; i++) {
78 const std::shared_ptr<FontFamily>& family = typefaces[i];
79 if (family->getClosestMatch(defaultStyle).font == nullptr) {
80 continue;
81 }
82 const SparseBitSet& coverage = family->getCoverage();
83 mFamilies.push_back(family); // emplace_back would be better
84 if (family->hasVSTable()) {
85 mVSFamilyVec.push_back(family);
86 }
87 mMaxChar = max(mMaxChar, coverage.length());
88 lastChar.push_back(coverage.nextSetBit(0));
89
90 const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
91 mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
92 }
93 nTypefaces = mFamilies.size();
94 LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
95 "Font collection must have at least one valid typeface");
96 LOG_ALWAYS_FATAL_IF(nTypefaces > 254,
97 "Font collection may only have up to 254 font families.");
98 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
99 // TODO: Use variation selector map for mRanges construction.
100 // A font can have a glyph for a base code point and variation selector pair
101 // but no glyph for the base code point without variation selector. The family
102 // won't be listed in the range in this case.
103 for (size_t i = 0; i < nPages; i++) {
104 Range dummy;
105 mRanges.push_back(dummy);
106 Range* range = &mRanges.back();
107 #ifdef VERBOSE_DEBUG
108 ALOGD("i=%zd: range start = %zd\n", i, offset);
109 #endif
110 range->start = mFamilyVec.size();
111 for (size_t j = 0; j < nTypefaces; j++) {
112 if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
113 const std::shared_ptr<FontFamily>& family = mFamilies[j];
114 mFamilyVec.push_back(static_cast<uint8_t>(j));
115 uint32_t nextChar =
116 family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
117 #ifdef VERBOSE_DEBUG
118 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
119 #endif
120 lastChar[j] = nextChar;
121 }
122 }
123 range->end = mFamilyVec.size();
124 }
125 // See the comment in Range for more details.
126 LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
127 "Exceeded the maximum indexable cmap coverage.");
128 }
129
130 // Special scores for the font fallback.
131 const uint32_t kUnsupportedFontScore = 0;
132 const uint32_t kFirstFontScore = UINT32_MAX;
133
134 // Calculates a font score.
135 // The score of the font family is based on three subscores.
136 // - Coverage Score: How well the font family covers the given character or
137 // variation sequence.
138 // - Language Score: How well the font family is appropriate for the language.
139 // - Variant Score: Whether the font family matches the variant. Note that this
140 // variant is not the
141 // one in BCP47. This is our own font variant (e.g., elegant, compact).
142 //
143 // Then, there is a priority for these three subscores as follow:
144 // Coverage Score > Language Score > Variant Score
145 // The returned score reflects this priority order.
146 //
147 // Note that there are two special scores.
148 // - kUnsupportedFontScore: When the font family doesn't support the variation
149 // sequence or even its
150 // base character.
151 // - kFirstFontScore: When the font is the first font family in the collection
152 // and it supports the
153 // given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,int variant,uint32_t langListId,const std::shared_ptr<FontFamily> & fontFamily) const154 uint32_t FontCollection::calcFamilyScore(
155 uint32_t ch,
156 uint32_t vs,
157 int variant,
158 uint32_t langListId,
159 const std::shared_ptr<FontFamily>& fontFamily) const {
160 const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
161 if (coverageScore == kFirstFontScore ||
162 coverageScore == kUnsupportedFontScore) {
163 // No need to calculate other scores.
164 return coverageScore;
165 }
166
167 const uint32_t languageScore =
168 calcLanguageMatchingScore(langListId, *fontFamily);
169 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
170
171 // Subscores are encoded into 31 bits representation to meet the subscore
172 // priority. The highest 2 bits are for coverage score, then following 28 bits
173 // are for language score, then the last 1 bit is for variant score.
174 return coverageScore << 29 | languageScore << 1 | variantScore;
175 }
176
177 // Calculates a font score based on variation sequence coverage.
178 // - Returns kUnsupportedFontScore if the font doesn't support the variation
179 // sequence or its base
180 // character.
181 // - Returns kFirstFontScore if the font family is the first font family in the
182 // collection and it
183 // supports the given character or variation sequence.
184 // - Returns 3 if the font family supports the variation sequence.
185 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font
186 // is an emoji font.
187 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font
188 // is not an emoji font.
189 // - Returns 1 if the variation selector is not specified or if the font family
190 // only supports the
191 // variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,const std::shared_ptr<FontFamily> & fontFamily) const192 uint32_t FontCollection::calcCoverageScore(
193 uint32_t ch,
194 uint32_t vs,
195 const std::shared_ptr<FontFamily>& fontFamily) const {
196 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
197 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
198 // The font doesn't support either variation sequence or even the base
199 // character.
200 return kUnsupportedFontScore;
201 }
202
203 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
204 // If the first font family supports the given character or variation
205 // sequence, always use it.
206 return kFirstFontScore;
207 }
208
209 if (vs == 0) {
210 return 1;
211 }
212
213 if (hasVSGlyph) {
214 return 3;
215 }
216
217 if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) {
218 const FontLanguages& langs =
219 FontLanguageListCache::getById(fontFamily->langId());
220 bool hasEmojiFlag = false;
221 for (size_t i = 0; i < langs.size(); ++i) {
222 if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
223 hasEmojiFlag = true;
224 break;
225 }
226 }
227
228 if (vs == EMOJI_STYLE_VS) {
229 return hasEmojiFlag ? 2 : 1;
230 } else { // vs == TEXT_STYLE_VS
231 return hasEmojiFlag ? 1 : 2;
232 }
233 }
234 return 1;
235 }
236
237 // Calculate font scores based on the script matching, subtag matching and
238 // primary language matching.
239 //
240 // 1. If only the font's language matches or there is no matches between
241 // requested font and
242 // supported font, then the font obtains a score of 0.
243 // 2. Without a match in language, considering subtag may change font's
244 // EmojiStyle over script,
245 // a match in subtag gets a score of 2 and a match in scripts gains a score
246 // of 1.
247 // 3. Regarding to two elements matchings, language-and-subtag matching has a
248 // score of 4, while
249 // language-and-script obtains a socre of 3 with the same reason above.
250 //
251 // If two languages in the requested list have the same language score, the font
252 // matching with higher priority language gets a higher score. For example, in
253 // the case the user requested language list is "ja-Jpan,en-Latn". The score of
254 // for the font of "ja-Jpan" gets a higher score than the font of "en-Latn".
255 //
256 // To achieve score calculation with priorities, the language score is
257 // determined as follows:
258 // LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 +
259 // s(m - 1)
260 // Here, m is the maximum number of languages to be compared, and s(i) is the
261 // i-th language's matching score. The possible values of s(i) are 0, 1, 2, 3
262 // and 4.
calcLanguageMatchingScore(uint32_t userLangListId,const FontFamily & fontFamily)263 uint32_t FontCollection::calcLanguageMatchingScore(
264 uint32_t userLangListId,
265 const FontFamily& fontFamily) {
266 const FontLanguages& langList =
267 FontLanguageListCache::getById(userLangListId);
268 const FontLanguages& fontLanguages =
269 FontLanguageListCache::getById(fontFamily.langId());
270
271 const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
272 uint32_t score = 0;
273 for (size_t i = 0; i < maxCompareNum; ++i) {
274 score = score * 5u + langList[i].calcScoreFor(fontLanguages);
275 }
276 return score;
277 }
278
279 // Calculates a font score based on variant ("compact" or "elegant") matching.
280 // - Returns 1 if the font doesn't have variant or the variant matches with the
281 // text style.
282 // - No score if the font has a variant but it doesn't match with the text
283 // style.
calcVariantMatchingScore(int variant,const FontFamily & fontFamily)284 uint32_t FontCollection::calcVariantMatchingScore(
285 int variant,
286 const FontFamily& fontFamily) {
287 return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
288 }
289
290 // Implement heuristic for choosing best-match font. Here are the rules:
291 // 1. If first font in the collection has the character, it wins.
292 // 2. Calculate a score for the font family. See comments in calcFamilyScore for
293 // the detail.
294 // 3. Highest score wins, with ties resolved to the first font.
295 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t langListId,int variant) const296 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(
297 uint32_t ch,
298 uint32_t vs,
299 uint32_t langListId,
300 int variant) const {
301 // First find font in hw theme.
302 for (size_t k = 0; k < mFamilies.size(); k++) {
303 if (NULL == mFamilies[k]) {
304 continue;
305 }
306 if (mFamilies[k]->getHwFontFamilyType() > 0) {
307 if(mFamilies[k] ->getCoverage().get(ch)) {
308 return mFamilies[k];
309 }
310 }
311 }
312
313 if (mFallbackFontProvider) {
314 const std::shared_ptr<FontFamily>& fallback =
315 mFallbackFontProvider->matchFallbackFontFromHwFont(ch,
316 GetFontLocale(langListId));
317 if (fallback) {
318 return fallback;
319 }
320 }
321
322 if (ch >= mMaxChar) {
323 // libtxt: check if the fallback font provider can match this character
324 if (mFallbackFontProvider) {
325 const std::shared_ptr<FontFamily>& fallback =
326 mFallbackFontProvider->matchFallbackFont(ch,
327 GetFontLocale(langListId));
328 if (fallback) {
329 return fallback;
330 }
331 }
332 return mFamilies[0];
333 }
334
335 // 0x1000 to 0x109f is range of Burmese, force ch to 0x1000 or 0x1050 to match Z-encoding and U-encoding of Burmese.
336 if (ch >= 0x1000 && ch <= 0x109f) {
337 const uint32_t ZAWGYI_START_CODE = 0x1000;
338 const uint32_t UNICODE_START_CODE = 0x1050;
339 ch = mIsZawgyiMyanmar ? ZAWGYI_START_CODE : UNICODE_START_CODE; // 0x1050 is exist in U-encoding of Burmese only.
340 }
341
342 Range range = mRanges[ch >> kLogCharsPerPage];
343
344 if (vs != 0) {
345 range = {0, static_cast<uint16_t>(mFamilies.size())};
346 }
347
348 #ifdef VERBOSE_DEBUG
349 ALOGD("querying range %zd:%zd\n", range.start, range.end);
350 #endif
351 int bestFamilyIndex = -1;
352 uint32_t bestScore = kUnsupportedFontScore;
353 for (size_t i = range.start; i < range.end; i++) {
354 const std::shared_ptr<FontFamily>& family =
355 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
356 const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
357 if (score == kFirstFontScore) {
358 // If the first font family supports the given character or variation
359 // sequence, always use it.
360 return family;
361 }
362 if (score > bestScore) {
363 bestScore = score;
364 bestFamilyIndex = i;
365 }
366 }
367 if (bestFamilyIndex == -1) {
368 // libtxt: check if the fallback font provider can match this character
369 if (mFallbackFontProvider) {
370 const std::shared_ptr<FontFamily>& fallback =
371 mFallbackFontProvider->matchFallbackFont(ch,
372 GetFontLocale(langListId));
373 if (fallback) {
374 return fallback;
375 }
376 }
377
378 UErrorCode errorCode = U_ZERO_ERROR;
379 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
380 if (U_SUCCESS(errorCode)) {
381 UChar decomposed[4];
382 int len =
383 unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
384 if (U_SUCCESS(errorCode) && len > 0) {
385 int off = 0;
386 U16_NEXT_UNSAFE(decomposed, off, ch);
387 return getFamilyForChar(ch, vs, langListId, variant);
388 }
389 }
390 return mFamilies[0];
391 }
392 return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]]
393 : mFamilies[bestFamilyIndex];
394 }
395
396 const uint32_t NBSP = 0x00A0;
397 const uint32_t SOFT_HYPHEN = 0x00AD;
398 const uint32_t ZWJ = 0x200C;
399 const uint32_t ZWNJ = 0x200D;
400 const uint32_t HYPHEN = 0x2010;
401 const uint32_t NB_HYPHEN = 0x2011;
402 const uint32_t NNBSP = 0x202F;
403 const uint32_t FEMALE_SIGN = 0x2640;
404 const uint32_t MALE_SIGN = 0x2642;
405 const uint32_t STAFF_OF_AESCULAPIUS = 0x2695;
406
407 // Characters where we want to continue using existing font run instead of
408 // recomputing the best match in the fallback list.
409 static const uint32_t stickyWhitelist[] = {
410 '!', ',', '-', '.',
411 ':', ';', '?', NBSP,
412 ZWJ, ZWNJ, HYPHEN, NB_HYPHEN,
413 NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS};
414
isStickyWhitelisted(uint32_t c)415 static bool isStickyWhitelisted(uint32_t c) {
416 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]);
417 i++) {
418 if (stickyWhitelist[i] == c)
419 return true;
420 }
421 return false;
422 }
423
isVariationSelector(uint32_t c)424 static bool isVariationSelector(uint32_t c) {
425 return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
426 }
427
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const428 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
429 uint32_t variationSelector) const {
430 if (!isVariationSelector(variationSelector)) {
431 return false;
432 }
433 if (baseCodepoint >= mMaxChar) {
434 return false;
435 }
436
437 std::scoped_lock _l(gMinikinLock);
438
439 // Currently mRanges can not be used here since it isn't aware of the
440 // variation sequence.
441 for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
442 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
443 return true;
444 }
445 }
446
447 // Even if there is no cmap format 14 subtable entry for the given sequence,
448 // should return true for <char, text presentation selector> case since we
449 // have special fallback rule for the sequence. Note that we don't need to
450 // restrict this to already standardized variation sequences, since Unicode is
451 // adding variation sequences more frequently now and may even move towards
452 // allowing text and emoji variation selectors on any character.
453 if (variationSelector == TEXT_STYLE_VS) {
454 for (size_t i = 0; i < mFamilies.size(); ++i) {
455 if (!mFamilies[i]->isColorEmojiFamily() &&
456 mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
457 return true;
458 }
459 }
460 }
461
462 return false;
463 }
464
itemize(const uint16_t * string,size_t string_size,FontStyle style,vector<Run> * result) const465 void FontCollection::itemize(const uint16_t* string,
466 size_t string_size,
467 FontStyle style,
468 vector<Run>* result) const {
469 const uint32_t langListId = style.getLanguageListId();
470 int variant = style.getVariant();
471 const FontFamily* lastFamily = nullptr;
472 Run* run = NULL;
473
474 if (string_size == 0) {
475 return;
476 }
477
478 const uint32_t kEndOfString = 0xFFFFFFFF;
479
480 uint32_t nextCh = 0;
481 uint32_t prevCh = 0;
482 size_t nextUtf16Pos = 0;
483 size_t readLength = 0;
484 U16_NEXT(string, readLength, string_size, nextCh);
485
486 do {
487 const uint32_t ch = nextCh;
488 const size_t utf16Pos = nextUtf16Pos;
489 nextUtf16Pos = readLength;
490 if (readLength < string_size) {
491 U16_NEXT(string, readLength, string_size, nextCh);
492 } else {
493 nextCh = kEndOfString;
494 }
495
496 bool shouldContinueRun = false;
497 if (lastFamily != nullptr) {
498 if (isStickyWhitelisted(ch)) {
499 // Continue using existing font as long as it has coverage and is
500 // whitelisted
501 shouldContinueRun = lastFamily->getCoverage().get(ch);
502 } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) {
503 // Always continue if the character is the soft hyphen or a variation
504 // selector.
505 shouldContinueRun = true;
506 }
507 }
508
509 if (!shouldContinueRun) {
510 const std::shared_ptr<FontFamily>& family = getFamilyForChar(
511 ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant);
512 if (utf16Pos == 0 || family.get() != lastFamily) {
513 size_t start = utf16Pos;
514 // Workaround for combining marks and emoji modifiers until we implement
515 // per-cluster font selection: if a combining mark or an emoji modifier
516 // is found in a different font that also supports the previous
517 // character, attach previous character to the new run. U+20E3 COMBINING
518 // ENCLOSING KEYCAP, used in emoji, is handled properly by this since
519 // it's a combining mark too.
520 if (utf16Pos != 0 &&
521 ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 ||
522 (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
523 family != nullptr && family->getCoverage().get(prevCh)) {
524 const size_t prevChLength = U16_LENGTH(prevCh);
525 run->end -= prevChLength;
526 if (run->start == run->end) {
527 result->pop_back();
528 }
529 start -= prevChLength;
530 }
531 result->push_back(
532 {family->getClosestMatch(style), static_cast<int>(start), 0});
533 run = &result->back();
534 lastFamily = family.get();
535 }
536 }
537 prevCh = ch;
538 run->end = nextUtf16Pos; // exclusive
539 } while (nextCh != kEndOfString);
540 }
541
baseFontFaked(FontStyle style)542 FakedFont FontCollection::baseFontFaked(FontStyle style) {
543 return mFamilies[0]->getClosestMatch(style);
544 }
545
createCollectionWithVariation(const std::vector<FontVariation> & variations)546 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
547 const std::vector<FontVariation>& variations) {
548 if (variations.empty() || mSupportedAxes.empty()) {
549 return nullptr;
550 }
551
552 bool hasSupportedAxis = false;
553 for (const FontVariation& variation : variations) {
554 if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
555 hasSupportedAxis = true;
556 break;
557 }
558 }
559 if (!hasSupportedAxis) {
560 // None of variation axes are supported by this font collection.
561 return nullptr;
562 }
563
564 std::vector<std::shared_ptr<FontFamily>> families;
565 for (const std::shared_ptr<FontFamily>& family : mFamilies) {
566 std::shared_ptr<FontFamily> newFamily =
567 family->createFamilyWithVariation(variations);
568 if (newFamily) {
569 families.push_back(newFamily);
570 } else {
571 families.push_back(family);
572 }
573 }
574
575 return std::shared_ptr<FontCollection>(new FontCollection(families));
576 }
577
getId() const578 uint32_t FontCollection::getId() const {
579 return mId;
580 }
581
582 } // namespace minikin
583