• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // #define VERBOSE_DEBUG
18 
19 #define LOG_TAG "Minikin"
20 
21 #include <algorithm>
22 
23 #include <log/log.h>
24 #include "unicode/unistr.h"
25 #include "unicode/unorm2.h"
26 #include "unicode/utf16.h"
27 
28 #include <minikin/Emoji.h>
29 #include <minikin/FontCollection.h>
30 #include "FontLanguage.h"
31 #include "FontLanguageListCache.h"
32 #include "MinikinInternal.h"
33 
34 using std::vector;
35 
36 namespace minikin {
37 
38 template <typename T>
max(T a,T b)39 static inline T max(T a, T b) {
40   return a > b ? a : b;
41 }
42 
43 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
44 const uint32_t TEXT_STYLE_VS = 0xFE0E;
45 
46 uint32_t FontCollection::sNextId = 0;
47 
48 // libtxt: return a locale string for a language list ID
GetFontLocale(uint32_t langListId)49 std::string GetFontLocale(uint32_t langListId) {
50   const FontLanguages& langs = FontLanguageListCache::getById(langListId);
51   return langs.size() ? langs[0].getString() : "";
52 }
53 
FontCollection(std::shared_ptr<FontFamily> && typeface)54 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface)
55     : mMaxChar(0) {
56   std::vector<std::shared_ptr<FontFamily>> typefaces;
57   typefaces.push_back(typeface);
58   init(typefaces);
59 }
60 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)61 FontCollection::FontCollection(
62     const vector<std::shared_ptr<FontFamily>>& typefaces)
63     : mMaxChar(0) {
64   init(typefaces);
65 }
66 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)67 void FontCollection::init(
68     const vector<std::shared_ptr<FontFamily>>& typefaces) {
69   std::scoped_lock _l(gMinikinLock);
70   mId = sNextId++;
71   vector<uint32_t> lastChar;
72   size_t nTypefaces = typefaces.size();
73 #ifdef VERBOSE_DEBUG
74   ALOGD("nTypefaces = %zd\n", nTypefaces);
75 #endif
76   const FontStyle defaultStyle;
77   for (size_t i = 0; i < nTypefaces; i++) {
78     const std::shared_ptr<FontFamily>& family = typefaces[i];
79     if (family->getClosestMatch(defaultStyle).font == nullptr) {
80       continue;
81     }
82     const SparseBitSet& coverage = family->getCoverage();
83     mFamilies.push_back(family);  // emplace_back would be better
84     if (family->hasVSTable()) {
85       mVSFamilyVec.push_back(family);
86     }
87     mMaxChar = max(mMaxChar, coverage.length());
88     lastChar.push_back(coverage.nextSetBit(0));
89 
90     const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
91     mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
92   }
93   nTypefaces = mFamilies.size();
94   LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
95                       "Font collection must have at least one valid typeface");
96   LOG_ALWAYS_FATAL_IF(nTypefaces > 254,
97                       "Font collection may only have up to 254 font families.");
98   size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
99   // TODO: Use variation selector map for mRanges construction.
100   // A font can have a glyph for a base code point and variation selector pair
101   // but no glyph for the base code point without variation selector. The family
102   // won't be listed in the range in this case.
103   for (size_t i = 0; i < nPages; i++) {
104     Range dummy;
105     mRanges.push_back(dummy);
106     Range* range = &mRanges.back();
107 #ifdef VERBOSE_DEBUG
108     ALOGD("i=%zd: range start = %zd\n", i, offset);
109 #endif
110     range->start = mFamilyVec.size();
111     for (size_t j = 0; j < nTypefaces; j++) {
112       if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
113         const std::shared_ptr<FontFamily>& family = mFamilies[j];
114         mFamilyVec.push_back(static_cast<uint8_t>(j));
115         uint32_t nextChar =
116             family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
117 #ifdef VERBOSE_DEBUG
118         ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
119 #endif
120         lastChar[j] = nextChar;
121       }
122     }
123     range->end = mFamilyVec.size();
124   }
125   // See the comment in Range for more details.
126   LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
127                       "Exceeded the maximum indexable cmap coverage.");
128 }
129 
130 // Special scores for the font fallback.
131 const uint32_t kUnsupportedFontScore = 0;
132 const uint32_t kFirstFontScore = UINT32_MAX;
133 
134 // Calculates a font score.
135 // The score of the font family is based on three subscores.
136 //  - Coverage Score: How well the font family covers the given character or
137 //  variation sequence.
138 //  - Language Score: How well the font family is appropriate for the language.
139 //  - Variant Score: Whether the font family matches the variant. Note that this
140 //  variant is not the
141 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
142 //
143 // Then, there is a priority for these three subscores as follow:
144 //   Coverage Score > Language Score > Variant Score
145 // The returned score reflects this priority order.
146 //
147 // Note that there are two special scores.
148 //  - kUnsupportedFontScore: When the font family doesn't support the variation
149 //  sequence or even its
150 //    base character.
151 //  - kFirstFontScore: When the font is the first font family in the collection
152 //  and it supports the
153 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,int variant,uint32_t langListId,const std::shared_ptr<FontFamily> & fontFamily) const154 uint32_t FontCollection::calcFamilyScore(
155     uint32_t ch,
156     uint32_t vs,
157     int variant,
158     uint32_t langListId,
159     const std::shared_ptr<FontFamily>& fontFamily) const {
160   const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
161   if (coverageScore == kFirstFontScore ||
162       coverageScore == kUnsupportedFontScore) {
163     // No need to calculate other scores.
164     return coverageScore;
165   }
166 
167   const uint32_t languageScore =
168       calcLanguageMatchingScore(langListId, *fontFamily);
169   const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
170 
171   // Subscores are encoded into 31 bits representation to meet the subscore
172   // priority. The highest 2 bits are for coverage score, then following 28 bits
173   // are for language score, then the last 1 bit is for variant score.
174   return coverageScore << 29 | languageScore << 1 | variantScore;
175 }
176 
177 // Calculates a font score based on variation sequence coverage.
178 // - Returns kUnsupportedFontScore if the font doesn't support the variation
179 // sequence or its base
180 //   character.
181 // - Returns kFirstFontScore if the font family is the first font family in the
182 // collection and it
183 //   supports the given character or variation sequence.
184 // - Returns 3 if the font family supports the variation sequence.
185 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font
186 // is an emoji font.
187 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font
188 // is not an emoji font.
189 // - Returns 1 if the variation selector is not specified or if the font family
190 // only supports the
191 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,const std::shared_ptr<FontFamily> & fontFamily) const192 uint32_t FontCollection::calcCoverageScore(
193     uint32_t ch,
194     uint32_t vs,
195     const std::shared_ptr<FontFamily>& fontFamily) const {
196   const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
197   if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
198     // The font doesn't support either variation sequence or even the base
199     // character.
200     return kUnsupportedFontScore;
201   }
202 
203   if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
204     // If the first font family supports the given character or variation
205     // sequence, always use it.
206     return kFirstFontScore;
207   }
208 
209   if (vs == 0) {
210     return 1;
211   }
212 
213   if (hasVSGlyph) {
214     return 3;
215   }
216 
217   if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) {
218     const FontLanguages& langs =
219         FontLanguageListCache::getById(fontFamily->langId());
220     bool hasEmojiFlag = false;
221     for (size_t i = 0; i < langs.size(); ++i) {
222       if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
223         hasEmojiFlag = true;
224         break;
225       }
226     }
227 
228     if (vs == EMOJI_STYLE_VS) {
229       return hasEmojiFlag ? 2 : 1;
230     } else {  // vs == TEXT_STYLE_VS
231       return hasEmojiFlag ? 1 : 2;
232     }
233   }
234   return 1;
235 }
236 
237 // Calculate font scores based on the script matching, subtag matching and
238 // primary language matching.
239 //
240 // 1. If only the font's language matches or there is no matches between
241 // requested font and
242 //    supported font, then the font obtains a score of 0.
243 // 2. Without a match in language, considering subtag may change font's
244 // EmojiStyle over script,
245 //    a match in subtag gets a score of 2 and a match in scripts gains a score
246 //    of 1.
247 // 3. Regarding to two elements matchings, language-and-subtag matching has a
248 // score of 4, while
249 //    language-and-script obtains a socre of 3 with the same reason above.
250 //
251 // If two languages in the requested list have the same language score, the font
252 // matching with higher priority language gets a higher score. For example, in
253 // the case the user requested language list is "ja-Jpan,en-Latn". The score of
254 // for the font of "ja-Jpan" gets a higher score than the font of "en-Latn".
255 //
256 // To achieve score calculation with priorities, the language score is
257 // determined as follows:
258 //   LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 +
259 //   s(m - 1)
260 // Here, m is the maximum number of languages to be compared, and s(i) is the
261 // i-th language's matching score. The possible values of s(i) are 0, 1, 2, 3
262 // and 4.
calcLanguageMatchingScore(uint32_t userLangListId,const FontFamily & fontFamily)263 uint32_t FontCollection::calcLanguageMatchingScore(
264     uint32_t userLangListId,
265     const FontFamily& fontFamily) {
266   const FontLanguages& langList =
267       FontLanguageListCache::getById(userLangListId);
268   const FontLanguages& fontLanguages =
269       FontLanguageListCache::getById(fontFamily.langId());
270 
271   const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
272   uint32_t score = 0;
273   for (size_t i = 0; i < maxCompareNum; ++i) {
274     score = score * 5u + langList[i].calcScoreFor(fontLanguages);
275   }
276   return score;
277 }
278 
279 // Calculates a font score based on variant ("compact" or "elegant") matching.
280 //  - Returns 1 if the font doesn't have variant or the variant matches with the
281 //  text style.
282 //  - No score if the font has a variant but it doesn't match with the text
283 //  style.
calcVariantMatchingScore(int variant,const FontFamily & fontFamily)284 uint32_t FontCollection::calcVariantMatchingScore(
285     int variant,
286     const FontFamily& fontFamily) {
287   return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
288 }
289 
290 // Implement heuristic for choosing best-match font. Here are the rules:
291 // 1. If first font in the collection has the character, it wins.
292 // 2. Calculate a score for the font family. See comments in calcFamilyScore for
293 // the detail.
294 // 3. Highest score wins, with ties resolved to the first font.
295 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t langListId,int variant) const296 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(
297     uint32_t ch,
298     uint32_t vs,
299     uint32_t langListId,
300     int variant) const {
301   // First find font in hw theme.
302   for (size_t k = 0; k < mFamilies.size(); k++) {
303     if (NULL == mFamilies[k]) {
304       continue;
305     }
306     if (mFamilies[k]->getHwFontFamilyType() > 0) {
307       if(mFamilies[k] ->getCoverage().get(ch)) {
308         return mFamilies[k];
309       }
310     }
311   }
312 
313   if (mFallbackFontProvider) {
314     const std::shared_ptr<FontFamily>& fallback =
315         mFallbackFontProvider->matchFallbackFontFromHwFont(ch,
316                                                            GetFontLocale(langListId));
317     if (fallback) {
318       return fallback;
319     }
320   }
321 
322   if (ch >= mMaxChar) {
323     // libtxt: check if the fallback font provider can match this character
324     if (mFallbackFontProvider) {
325       const std::shared_ptr<FontFamily>& fallback =
326           mFallbackFontProvider->matchFallbackFont(ch,
327                                                    GetFontLocale(langListId));
328       if (fallback) {
329         return fallback;
330       }
331     }
332     return mFamilies[0];
333   }
334 
335   // 0x1000 to 0x109f is range of Burmese, force ch to 0x1000 or 0x1050 to match Z-encoding and U-encoding of Burmese.
336   if (ch >= 0x1000 && ch <= 0x109f) {
337     const uint32_t ZAWGYI_START_CODE = 0x1000;
338     const uint32_t UNICODE_START_CODE = 0x1050;
339     ch = mIsZawgyiMyanmar ? ZAWGYI_START_CODE : UNICODE_START_CODE; // 0x1050 is exist in U-encoding of Burmese only.
340   }
341 
342   Range range = mRanges[ch >> kLogCharsPerPage];
343 
344   if (vs != 0) {
345     range = {0, static_cast<uint16_t>(mFamilies.size())};
346   }
347 
348 #ifdef VERBOSE_DEBUG
349   ALOGD("querying range %zd:%zd\n", range.start, range.end);
350 #endif
351   int bestFamilyIndex = -1;
352   uint32_t bestScore = kUnsupportedFontScore;
353   for (size_t i = range.start; i < range.end; i++) {
354     const std::shared_ptr<FontFamily>& family =
355         vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
356     const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
357     if (score == kFirstFontScore) {
358       // If the first font family supports the given character or variation
359       // sequence, always use it.
360       return family;
361     }
362     if (score > bestScore) {
363       bestScore = score;
364       bestFamilyIndex = i;
365     }
366   }
367   if (bestFamilyIndex == -1) {
368     // libtxt: check if the fallback font provider can match this character
369     if (mFallbackFontProvider) {
370       const std::shared_ptr<FontFamily>& fallback =
371           mFallbackFontProvider->matchFallbackFont(ch,
372                                                    GetFontLocale(langListId));
373       if (fallback) {
374         return fallback;
375       }
376     }
377 
378     UErrorCode errorCode = U_ZERO_ERROR;
379     const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
380     if (U_SUCCESS(errorCode)) {
381       UChar decomposed[4];
382       int len =
383           unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
384       if (U_SUCCESS(errorCode) && len > 0) {
385         int off = 0;
386         U16_NEXT_UNSAFE(decomposed, off, ch);
387         return getFamilyForChar(ch, vs, langListId, variant);
388       }
389     }
390     return mFamilies[0];
391   }
392   return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]]
393                  : mFamilies[bestFamilyIndex];
394 }
395 
396 const uint32_t NBSP = 0x00A0;
397 const uint32_t SOFT_HYPHEN = 0x00AD;
398 const uint32_t ZWJ = 0x200C;
399 const uint32_t ZWNJ = 0x200D;
400 const uint32_t HYPHEN = 0x2010;
401 const uint32_t NB_HYPHEN = 0x2011;
402 const uint32_t NNBSP = 0x202F;
403 const uint32_t FEMALE_SIGN = 0x2640;
404 const uint32_t MALE_SIGN = 0x2642;
405 const uint32_t STAFF_OF_AESCULAPIUS = 0x2695;
406 
407 // Characters where we want to continue using existing font run instead of
408 // recomputing the best match in the fallback list.
409 static const uint32_t stickyWhitelist[] = {
410     '!',   ',',         '-',       '.',
411     ':',   ';',         '?',       NBSP,
412     ZWJ,   ZWNJ,        HYPHEN,    NB_HYPHEN,
413     NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS};
414 
isStickyWhitelisted(uint32_t c)415 static bool isStickyWhitelisted(uint32_t c) {
416   for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]);
417        i++) {
418     if (stickyWhitelist[i] == c)
419       return true;
420   }
421   return false;
422 }
423 
isVariationSelector(uint32_t c)424 static bool isVariationSelector(uint32_t c) {
425   return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
426 }
427 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const428 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
429                                           uint32_t variationSelector) const {
430   if (!isVariationSelector(variationSelector)) {
431     return false;
432   }
433   if (baseCodepoint >= mMaxChar) {
434     return false;
435   }
436 
437   std::scoped_lock _l(gMinikinLock);
438 
439   // Currently mRanges can not be used here since it isn't aware of the
440   // variation sequence.
441   for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
442     if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
443       return true;
444     }
445   }
446 
447   // Even if there is no cmap format 14 subtable entry for the given sequence,
448   // should return true for <char, text presentation selector> case since we
449   // have special fallback rule for the sequence. Note that we don't need to
450   // restrict this to already standardized variation sequences, since Unicode is
451   // adding variation sequences more frequently now and may even move towards
452   // allowing text and emoji variation selectors on any character.
453   if (variationSelector == TEXT_STYLE_VS) {
454     for (size_t i = 0; i < mFamilies.size(); ++i) {
455       if (!mFamilies[i]->isColorEmojiFamily() &&
456           mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
457         return true;
458       }
459     }
460   }
461 
462   return false;
463 }
464 
itemize(const uint16_t * string,size_t string_size,FontStyle style,vector<Run> * result) const465 void FontCollection::itemize(const uint16_t* string,
466                              size_t string_size,
467                              FontStyle style,
468                              vector<Run>* result) const {
469   const uint32_t langListId = style.getLanguageListId();
470   int variant = style.getVariant();
471   const FontFamily* lastFamily = nullptr;
472   Run* run = NULL;
473 
474   if (string_size == 0) {
475     return;
476   }
477 
478   const uint32_t kEndOfString = 0xFFFFFFFF;
479 
480   uint32_t nextCh = 0;
481   uint32_t prevCh = 0;
482   size_t nextUtf16Pos = 0;
483   size_t readLength = 0;
484   U16_NEXT(string, readLength, string_size, nextCh);
485 
486   do {
487     const uint32_t ch = nextCh;
488     const size_t utf16Pos = nextUtf16Pos;
489     nextUtf16Pos = readLength;
490     if (readLength < string_size) {
491       U16_NEXT(string, readLength, string_size, nextCh);
492     } else {
493       nextCh = kEndOfString;
494     }
495 
496     bool shouldContinueRun = false;
497     if (lastFamily != nullptr) {
498       if (isStickyWhitelisted(ch)) {
499         // Continue using existing font as long as it has coverage and is
500         // whitelisted
501         shouldContinueRun = lastFamily->getCoverage().get(ch);
502       } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) {
503         // Always continue if the character is the soft hyphen or a variation
504         // selector.
505         shouldContinueRun = true;
506       }
507     }
508 
509     if (!shouldContinueRun) {
510       const std::shared_ptr<FontFamily>& family = getFamilyForChar(
511           ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant);
512       if (utf16Pos == 0 || family.get() != lastFamily) {
513         size_t start = utf16Pos;
514         // Workaround for combining marks and emoji modifiers until we implement
515         // per-cluster font selection: if a combining mark or an emoji modifier
516         // is found in a different font that also supports the previous
517         // character, attach previous character to the new run. U+20E3 COMBINING
518         // ENCLOSING KEYCAP, used in emoji, is handled properly by this since
519         // it's a combining mark too.
520         if (utf16Pos != 0 &&
521             ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 ||
522              (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
523             family != nullptr && family->getCoverage().get(prevCh)) {
524           const size_t prevChLength = U16_LENGTH(prevCh);
525           run->end -= prevChLength;
526           if (run->start == run->end) {
527             result->pop_back();
528           }
529           start -= prevChLength;
530         }
531         result->push_back(
532             {family->getClosestMatch(style), static_cast<int>(start), 0});
533         run = &result->back();
534         lastFamily = family.get();
535       }
536     }
537     prevCh = ch;
538     run->end = nextUtf16Pos;  // exclusive
539   } while (nextCh != kEndOfString);
540 }
541 
baseFontFaked(FontStyle style)542 FakedFont FontCollection::baseFontFaked(FontStyle style) {
543   return mFamilies[0]->getClosestMatch(style);
544 }
545 
createCollectionWithVariation(const std::vector<FontVariation> & variations)546 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
547     const std::vector<FontVariation>& variations) {
548   if (variations.empty() || mSupportedAxes.empty()) {
549     return nullptr;
550   }
551 
552   bool hasSupportedAxis = false;
553   for (const FontVariation& variation : variations) {
554     if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
555       hasSupportedAxis = true;
556       break;
557     }
558   }
559   if (!hasSupportedAxis) {
560     // None of variation axes are supported by this font collection.
561     return nullptr;
562   }
563 
564   std::vector<std::shared_ptr<FontFamily>> families;
565   for (const std::shared_ptr<FontFamily>& family : mFamilies) {
566     std::shared_ptr<FontFamily> newFamily =
567         family->createFamilyWithVariation(variations);
568     if (newFamily) {
569       families.push_back(newFamily);
570     } else {
571       families.push_back(family);
572     }
573   }
574 
575   return std::shared_ptr<FontCollection>(new FontCollection(families));
576 }
577 
getId() const578 uint32_t FontCollection::getId() const {
579   return mId;
580 }
581 
582 }  // namespace minikin
583