• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // #define VERBOSE_DEBUG
18 
19 #define LOG_TAG "Minikin"
20 
21 #include <algorithm>
22 
23 #include <log/log.h>
24 #include "unicode/unistr.h"
25 #include "unicode/unorm2.h"
26 
27 #include "FontLanguage.h"
28 #include "FontLanguageListCache.h"
29 #include "MinikinInternal.h"
30 #include <minikin/Emoji.h>
31 #include <minikin/FontCollection.h>
32 
33 using std::vector;
34 
35 namespace minikin {
36 
37 template <typename T>
max(T a,T b)38 static inline T max(T a, T b) {
39     return a>b ? a : b;
40 }
41 
42 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
43 const uint32_t TEXT_STYLE_VS = 0xFE0E;
44 
45 uint32_t FontCollection::sNextId = 0;
46 
FontCollection(std::shared_ptr<FontFamily> && typeface)47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
48     std::vector<std::shared_ptr<FontFamily>> typefaces;
49     typefaces.push_back(typeface);
50     init(typefaces);
51 }
52 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) :
54     mMaxChar(0) {
55     init(typefaces);
56 }
57 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)58 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
59     android::AutoMutex _l(gMinikinLock);
60     mId = sNextId++;
61     vector<uint32_t> lastChar;
62     size_t nTypefaces = typefaces.size();
63 #ifdef VERBOSE_DEBUG
64     ALOGD("nTypefaces = %zd\n", nTypefaces);
65 #endif
66     const FontStyle defaultStyle;
67     for (size_t i = 0; i < nTypefaces; i++) {
68         const std::shared_ptr<FontFamily>& family = typefaces[i];
69         if (family->getClosestMatch(defaultStyle).font == nullptr) {
70             continue;
71         }
72         const SparseBitSet& coverage = family->getCoverage();
73         mFamilies.push_back(family);  // emplace_back would be better
74         if (family->hasVSTable()) {
75             mVSFamilyVec.push_back(family);
76         }
77         mMaxChar = max(mMaxChar, coverage.length());
78         lastChar.push_back(coverage.nextSetBit(0));
79 
80         const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
81         mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
82     }
83     nTypefaces = mFamilies.size();
84     LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
85         "Font collection must have at least one valid typeface");
86     LOG_ALWAYS_FATAL_IF(nTypefaces > 254,
87         "Font collection may only have up to 254 font families.");
88     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
89     // TODO: Use variation selector map for mRanges construction.
90     // A font can have a glyph for a base code point and variation selector pair but no glyph for
91     // the base code point without variation selector. The family won't be listed in the range in
92     // this case.
93     for (size_t i = 0; i < nPages; i++) {
94         Range dummy;
95         mRanges.push_back(dummy);
96         Range* range = &mRanges.back();
97 #ifdef VERBOSE_DEBUG
98         ALOGD("i=%zd: range start = %zd\n", i, offset);
99 #endif
100         range->start = mFamilyVec.size();
101         for (size_t j = 0; j < nTypefaces; j++) {
102             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
103                 const std::shared_ptr<FontFamily>& family = mFamilies[j];
104                 mFamilyVec.push_back(static_cast<uint8_t>(j));
105                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
106 #ifdef VERBOSE_DEBUG
107                 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
108 #endif
109                 lastChar[j] = nextChar;
110             }
111         }
112         range->end = mFamilyVec.size();
113     }
114     // See the comment in Range for more details.
115     LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
116         "Exceeded the maximum indexable cmap coverage.");
117 }
118 
119 // Special scores for the font fallback.
120 const uint32_t kUnsupportedFontScore = 0;
121 const uint32_t kFirstFontScore = UINT32_MAX;
122 
123 // Calculates a font score.
124 // The score of the font family is based on three subscores.
125 //  - Coverage Score: How well the font family covers the given character or variation sequence.
126 //  - Language Score: How well the font family is appropriate for the language.
127 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
128 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
129 //
130 // Then, there is a priority for these three subscores as follow:
131 //   Coverage Score > Language Score > Variant Score
132 // The returned score reflects this priority order.
133 //
134 // Note that there are two special scores.
135 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
136 //    base character.
137 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
138 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,int variant,uint32_t langListId,const std::shared_ptr<FontFamily> & fontFamily) const139 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
140         const std::shared_ptr<FontFamily>& fontFamily) const {
141 
142     const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
143     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
144         // No need to calculate other scores.
145         return coverageScore;
146     }
147 
148     const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
149     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
150 
151     // Subscores are encoded into 31 bits representation to meet the subscore priority.
152     // The highest 2 bits are for coverage score, then following 28 bits are for language score,
153     // then the last 1 bit is for variant score.
154     return coverageScore << 29 | languageScore << 1 | variantScore;
155 }
156 
157 // Calculates a font score based on variation sequence coverage.
158 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
159 //   character.
160 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
161 //   supports the given character or variation sequence.
162 // - Returns 3 if the font family supports the variation sequence.
163 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
164 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
165 // - Returns 1 if the variation selector is not specified or if the font family only supports the
166 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,const std::shared_ptr<FontFamily> & fontFamily) const167 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs,
168         const std::shared_ptr<FontFamily>& fontFamily) const {
169     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
170     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
171         // The font doesn't support either variation sequence or even the base character.
172         return kUnsupportedFontScore;
173     }
174 
175     if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
176         // If the first font family supports the given character or variation sequence, always use
177         // it.
178         return kFirstFontScore;
179     }
180 
181     if (vs == 0) {
182         return 1;
183     }
184 
185     if (hasVSGlyph) {
186         return 3;
187     }
188 
189     if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) {
190         const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId());
191         bool hasEmojiFlag = false;
192         for (size_t i = 0; i < langs.size(); ++i) {
193             if (langs[i].getEmojiStyle() == FontLanguage::EMSTYLE_EMOJI) {
194                 hasEmojiFlag = true;
195                 break;
196             }
197         }
198 
199         if (vs == EMOJI_STYLE_VS) {
200             return hasEmojiFlag ? 2 : 1;
201         } else {  // vs == TEXT_STYLE_VS
202             return hasEmojiFlag ? 1 : 2;
203         }
204     }
205     return 1;
206 }
207 
208 // Calculate font scores based on the script matching, subtag matching and primary langauge matching.
209 //
210 // 1. If only the font's language matches or there is no matches between requested font and
211 //    supported font, then the font obtains a score of 0.
212 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
213 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
214 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
215 //    language-and-script obtains a socre of 3 with the same reason above.
216 //
217 // If two languages in the requested list have the same language score, the font matching with
218 // higher priority language gets a higher score. For example, in the case the user requested
219 // language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
220 // than the font of "en-Latn".
221 //
222 // To achieve score calculation with priorities, the language score is determined as follows:
223 //   LanguageScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
224 // Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
225 // matching score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLanguageMatchingScore(uint32_t userLangListId,const FontFamily & fontFamily)226 uint32_t FontCollection::calcLanguageMatchingScore(
227         uint32_t userLangListId, const FontFamily& fontFamily) {
228     const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
229     const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId());
230 
231     const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
232     uint32_t score = 0;
233     for (size_t i = 0; i < maxCompareNum; ++i) {
234         score = score * 5u + langList[i].calcScoreFor(fontLanguages);
235     }
236     return score;
237 }
238 
239 // Calculates a font score based on variant ("compact" or "elegant") matching.
240 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
241 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(int variant,const FontFamily & fontFamily)242 uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
243     return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
244 }
245 
246 // Implement heuristic for choosing best-match font. Here are the rules:
247 // 1. If first font in the collection has the character, it wins.
248 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
249 // 3. Highest score wins, with ties resolved to the first font.
250 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t langListId,int variant) const251 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
252             uint32_t langListId, int variant) const {
253     if (ch >= mMaxChar) {
254         return mFamilies[0];
255     }
256 
257     Range range = mRanges[ch >> kLogCharsPerPage];
258 
259     if (vs != 0) {
260         range = { 0, static_cast<uint16_t>(mFamilies.size()) };
261     }
262 
263 #ifdef VERBOSE_DEBUG
264     ALOGD("querying range %zd:%zd\n", range.start, range.end);
265 #endif
266     int bestFamilyIndex = -1;
267     uint32_t bestScore = kUnsupportedFontScore;
268     for (size_t i = range.start; i < range.end; i++) {
269         const std::shared_ptr<FontFamily>& family =
270                 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
271         const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
272         if (score == kFirstFontScore) {
273             // If the first font family supports the given character or variation sequence, always
274             // use it.
275             return family;
276         }
277         if (score > bestScore) {
278             bestScore = score;
279             bestFamilyIndex = i;
280         }
281     }
282     if (bestFamilyIndex == -1) {
283         UErrorCode errorCode = U_ZERO_ERROR;
284         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
285         if (U_SUCCESS(errorCode)) {
286             UChar decomposed[4];
287             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
288             if (U_SUCCESS(errorCode) && len > 0) {
289                 int off = 0;
290                 U16_NEXT_UNSAFE(decomposed, off, ch);
291                 return getFamilyForChar(ch, vs, langListId, variant);
292             }
293         }
294         return mFamilies[0];
295     }
296     return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
297 }
298 
299 const uint32_t NBSP = 0x00A0;
300 const uint32_t SOFT_HYPHEN = 0x00AD;
301 const uint32_t ZWJ = 0x200C;
302 const uint32_t ZWNJ = 0x200D;
303 const uint32_t HYPHEN = 0x2010;
304 const uint32_t NB_HYPHEN = 0x2011;
305 const uint32_t NNBSP = 0x202F;
306 const uint32_t FEMALE_SIGN = 0x2640;
307 const uint32_t MALE_SIGN = 0x2642;
308 const uint32_t STAFF_OF_AESCULAPIUS = 0x2695;
309 
310 // Characters where we want to continue using existing font run instead of
311 // recomputing the best match in the fallback list.
312 static const uint32_t stickyWhitelist[] = {
313         '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
314         HYPHEN, NB_HYPHEN, NNBSP, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS };
315 
isStickyWhitelisted(uint32_t c)316 static bool isStickyWhitelisted(uint32_t c) {
317     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
318         if (stickyWhitelist[i] == c) return true;
319     }
320     return false;
321 }
322 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const323 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
324         uint32_t variationSelector) const {
325     if (!isVariationSelector(variationSelector)) {
326         return false;
327     }
328     if (baseCodepoint >= mMaxChar) {
329         return false;
330     }
331 
332     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
333     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
334         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
335             return true;
336         }
337     }
338 
339     // TODO: We can remove this lock by precomputing color emoji information.
340     android::AutoMutex _l(gMinikinLock);
341 
342     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
343     // for <char, text presentation selector> case since we have special fallback rule for the
344     // sequence. Note that we don't need to restrict this to already standardized variation
345     // sequences, since Unicode is adding variation sequences more frequently now and may even move
346     // towards allowing text and emoji variation selectors on any character.
347     if (variationSelector == TEXT_STYLE_VS) {
348         for (size_t i = 0; i < mFamilies.size(); ++i) {
349             if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
350                 return true;
351             }
352         }
353     }
354 
355     return false;
356 }
357 
itemize(const uint16_t * string,size_t string_size,FontStyle style,vector<Run> * result) const358 void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
359         vector<Run>* result) const {
360     const uint32_t langListId = style.getLanguageListId();
361     int variant = style.getVariant();
362     const FontFamily* lastFamily = nullptr;
363     Run* run = NULL;
364 
365     if (string_size == 0) {
366         return;
367     }
368 
369     const uint32_t kEndOfString = 0xFFFFFFFF;
370 
371     uint32_t nextCh = 0;
372     uint32_t prevCh = 0;
373     size_t nextUtf16Pos = 0;
374     size_t readLength = 0;
375     U16_NEXT(string, readLength, string_size, nextCh);
376 
377     do {
378         const uint32_t ch = nextCh;
379         const size_t utf16Pos = nextUtf16Pos;
380         nextUtf16Pos = readLength;
381         if (readLength < string_size) {
382             U16_NEXT(string, readLength, string_size, nextCh);
383         } else {
384             nextCh = kEndOfString;
385         }
386 
387         bool shouldContinueRun = false;
388         if (lastFamily != nullptr) {
389             if (isStickyWhitelisted(ch)) {
390                 // Continue using existing font as long as it has coverage and is whitelisted
391                 shouldContinueRun = lastFamily->getCoverage().get(ch);
392             } else if (ch == SOFT_HYPHEN || isVariationSelector(ch)) {
393                 // Always continue if the character is the soft hyphen or a variation selector.
394                 shouldContinueRun = true;
395             }
396         }
397 
398         if (!shouldContinueRun) {
399             const std::shared_ptr<FontFamily>& family = getFamilyForChar(
400                     ch, isVariationSelector(nextCh) ? nextCh : 0, langListId, variant);
401             if (utf16Pos == 0 || family.get() != lastFamily) {
402                 size_t start = utf16Pos;
403                 // Workaround for combining marks and emoji modifiers until we implement
404                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
405                 // a different font that also supports the previous character, attach previous
406                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
407                 // handled properly by this since it's a combining mark too.
408                 if (utf16Pos != 0 &&
409                         ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 ||
410                          (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
411                         family != nullptr && family->getCoverage().get(prevCh)) {
412                     const size_t prevChLength = U16_LENGTH(prevCh);
413                     run->end -= prevChLength;
414                     if (run->start == run->end) {
415                         result->pop_back();
416                     }
417                     start -= prevChLength;
418                 }
419                 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
420                 run = &result->back();
421                 lastFamily = family.get();
422             }
423         }
424         prevCh = ch;
425         run->end = nextUtf16Pos;  // exclusive
426     } while (nextCh != kEndOfString);
427 }
428 
baseFontFaked(FontStyle style)429 FakedFont FontCollection::baseFontFaked(FontStyle style) {
430     return mFamilies[0]->getClosestMatch(style);
431 }
432 
createCollectionWithVariation(const std::vector<FontVariation> & variations)433 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
434         const std::vector<FontVariation>& variations) {
435     if (variations.empty() || mSupportedAxes.empty()) {
436         return nullptr;
437     }
438 
439     bool hasSupportedAxis = false;
440     for (const FontVariation& variation : variations) {
441         if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
442             hasSupportedAxis = true;
443             break;
444         }
445     }
446     if (!hasSupportedAxis) {
447         // None of variation axes are supported by this font collection.
448         return nullptr;
449     }
450 
451     std::vector<std::shared_ptr<FontFamily> > families;
452     for (const std::shared_ptr<FontFamily>& family : mFamilies) {
453         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
454         if (newFamily) {
455             families.push_back(newFamily);
456         } else {
457             families.push_back(family);
458         }
459     }
460 
461     return std::shared_ptr<FontCollection>(new FontCollection(families));
462 }
463 
getId() const464 uint32_t FontCollection::getId() const {
465     return mId;
466 }
467 
468 }  // namespace minikin
469