1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // #define VERBOSE_DEBUG
18
19 #define LOG_TAG "Minikin"
20 #include <cutils/log.h>
21 #include <algorithm>
22
23 #include "unicode/unistr.h"
24 #include "unicode/unorm2.h"
25
26 #include "FontLanguage.h"
27 #include "FontLanguageListCache.h"
28 #include "MinikinInternal.h"
29 #include <minikin/FontCollection.h>
30
31 using std::vector;
32
33 namespace android {
34
35 template <typename T>
max(T a,T b)36 static inline T max(T a, T b) {
37 return a>b ? a : b;
38 }
39
40 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
41 const uint32_t TEXT_STYLE_VS = 0xFE0E;
42
43 // See http://www.unicode.org/Public/9.0.0/ucd/StandardizedVariants.txt
44 // U+2640, U+2642, U+2695 are now in emoji category but not listed in above file, so added them by
45 // manual.
46 // Must be sorted.
47 const uint32_t EMOJI_STYLE_VS_BASES[] = {
48 0x0023, 0x002A, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039,
49 0x00A9, 0x00AE, 0x203C, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199,
50 0x21A9, 0x21AA, 0x231A, 0x231B, 0x2328, 0x23CF, 0x23ED, 0x23EE, 0x23EF, 0x23F1, 0x23F2, 0x23F8,
51 0x23F9, 0x23FA, 0x24C2, 0x25AA, 0x25AB, 0x25B6, 0x25C0, 0x25FB, 0x25FC, 0x25FD, 0x25FE, 0x2600,
52 0x2601, 0x2602, 0x2603, 0x2604, 0x260E, 0x2611, 0x2614, 0x2615, 0x2618, 0x261D, 0x2620, 0x2622,
53 0x2623, 0x2626, 0x262A, 0x262E, 0x262F, 0x2638, 0x2639, 0x263A, 0x2640, 0x2642, 0x2648, 0x2649,
54 0x264A, 0x264B, 0x264C, 0x264D, 0x264E, 0x264F, 0x2650, 0x2651, 0x2652, 0x2653, 0x2660, 0x2663,
55 0x2665, 0x2666, 0x2668, 0x267B, 0x267F, 0x2692, 0x2693, 0x2694, 0x2695, 0x2696, 0x2697, 0x2699,
56 0x269B, 0x269C, 0x26A0, 0x26A1, 0x26AA, 0x26AB, 0x26B0, 0x26B1, 0x26BD, 0x26BE, 0x26C4, 0x26C5,
57 0x26C8, 0x26CF, 0x26D1, 0x26D3, 0x26D4, 0x26E9, 0x26EA, 0x26F0, 0x26F1, 0x26F2, 0x26F3, 0x26F4,
58 0x26F5, 0x26F7, 0x26F8, 0x26F9, 0x26FA, 0x26FD, 0x2702, 0x2708, 0x2709, 0x270C, 0x270D, 0x270F,
59 0x2712, 0x2714, 0x2716, 0x271D, 0x2721, 0x2733, 0x2734, 0x2744, 0x2747, 0x2757, 0x2763, 0x2764,
60 0x27A1, 0x2934, 0x2935, 0x2B05, 0x2B06, 0x2B07, 0x2B1B, 0x2B1C, 0x2B50, 0x2B55, 0x3030, 0x303D,
61 0x3297, 0x3299, 0x1F004, 0x1F170, 0x1F171, 0x1F17E, 0x1F17F, 0x1F202, 0x1F21A, 0x1F22F, 0x1F237,
62 0x1F321, 0x1F324, 0x1F325, 0x1F326, 0x1F327, 0x1F328, 0x1F329, 0x1F32A, 0x1F32B, 0x1F32C,
63 0x1F336, 0x1F37D, 0x1F396, 0x1F397, 0x1F399, 0x1F39A, 0x1F39B, 0x1F39E, 0x1F39F, 0x1F3CB,
64 0x1F3CC, 0x1F3CD, 0x1F3CE, 0x1F3D4, 0x1F3D5, 0x1F3D6, 0x1F3D7, 0x1F3D8, 0x1F3D9, 0x1F3DA,
65 0x1F3DB, 0x1F3DC, 0x1F3DD, 0x1F3DE, 0x1F3DF, 0x1F3F3, 0x1F3F5, 0x1F3F7, 0x1F43F, 0x1F441,
66 0x1F4FD, 0x1F549, 0x1F54A, 0x1F56F, 0x1F570, 0x1F573, 0x1F574, 0x1F575, 0x1F576, 0x1F577,
67 0x1F578, 0x1F579, 0x1F587, 0x1F58A, 0x1F58B, 0x1F58C, 0x1F58D, 0x1F590, 0x1F5A5, 0x1F5A8,
68 0x1F5B1, 0x1F5B2, 0x1F5BC, 0x1F5C2, 0x1F5C3, 0x1F5C4, 0x1F5D1, 0x1F5D2, 0x1F5D3, 0x1F5DC,
69 0x1F5DD, 0x1F5DE, 0x1F5E1, 0x1F5E3, 0x1F5E8, 0x1F5EF, 0x1F5F3, 0x1F5FA, 0x1F6CB, 0x1F6CD,
70 0x1F6CE, 0x1F6CF, 0x1F6E0, 0x1F6E1, 0x1F6E2, 0x1F6E3, 0x1F6E4, 0x1F6E5, 0x1F6E9, 0x1F6F0,
71 0x1F6F3,
72 };
73
isEmojiStyleVSBase(uint32_t cp)74 static bool isEmojiStyleVSBase(uint32_t cp) {
75 const size_t length = sizeof(EMOJI_STYLE_VS_BASES) / sizeof(EMOJI_STYLE_VS_BASES[0]);
76 return std::binary_search(EMOJI_STYLE_VS_BASES, EMOJI_STYLE_VS_BASES + length, cp);
77 }
78
79 uint32_t FontCollection::sNextId = 0;
80
FontCollection(const vector<FontFamily * > & typefaces)81 FontCollection::FontCollection(const vector<FontFamily*>& typefaces) :
82 mMaxChar(0) {
83 AutoMutex _l(gMinikinLock);
84 mId = sNextId++;
85 vector<uint32_t> lastChar;
86 size_t nTypefaces = typefaces.size();
87 #ifdef VERBOSE_DEBUG
88 ALOGD("nTypefaces = %zd\n", nTypefaces);
89 #endif
90 const FontStyle defaultStyle;
91 for (size_t i = 0; i < nTypefaces; i++) {
92 FontFamily* family = typefaces[i];
93 MinikinFont* typeface = family->getClosestMatch(defaultStyle).font;
94 if (typeface == NULL) {
95 continue;
96 }
97 family->RefLocked();
98 const SparseBitSet* coverage = family->getCoverage();
99 if (coverage == nullptr) {
100 family->UnrefLocked();
101 continue;
102 }
103 mFamilies.push_back(family); // emplace_back would be better
104 if (family->hasVSTable()) {
105 mVSFamilyVec.push_back(family);
106 }
107 mMaxChar = max(mMaxChar, coverage->length());
108 lastChar.push_back(coverage->nextSetBit(0));
109 }
110 nTypefaces = mFamilies.size();
111 LOG_ALWAYS_FATAL_IF(nTypefaces == 0,
112 "Font collection must have at least one valid typeface");
113 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
114 size_t offset = 0;
115 // TODO: Use variation selector map for mRanges construction.
116 // A font can have a glyph for a base code point and variation selector pair but no glyph for
117 // the base code point without variation selector. The family won't be listed in the range in
118 // this case.
119 for (size_t i = 0; i < nPages; i++) {
120 Range dummy;
121 mRanges.push_back(dummy);
122 Range* range = &mRanges.back();
123 #ifdef VERBOSE_DEBUG
124 ALOGD("i=%zd: range start = %zd\n", i, offset);
125 #endif
126 range->start = offset;
127 for (size_t j = 0; j < nTypefaces; j++) {
128 if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
129 FontFamily* family = mFamilies[j];
130 mFamilyVec.push_back(family);
131 offset++;
132 uint32_t nextChar = family->getCoverage()->nextSetBit((i + 1) << kLogCharsPerPage);
133 #ifdef VERBOSE_DEBUG
134 ALOGD("nextChar = %d (j = %zd)\n", nextChar, j);
135 #endif
136 lastChar[j] = nextChar;
137 }
138 }
139 range->end = offset;
140 }
141 }
142
~FontCollection()143 FontCollection::~FontCollection() {
144 for (size_t i = 0; i < mFamilies.size(); i++) {
145 mFamilies[i]->UnrefLocked();
146 }
147 }
148
149 // Special scores for the font fallback.
150 const uint32_t kUnsupportedFontScore = 0;
151 const uint32_t kFirstFontScore = UINT32_MAX;
152
153 // Calculates a font score.
154 // The score of the font family is based on three subscores.
155 // - Coverage Score: How well the font family covers the given character or variation sequence.
156 // - Language Score: How well the font family is appropriate for the language.
157 // - Variant Score: Whether the font family matches the variant. Note that this variant is not the
158 // one in BCP47. This is our own font variant (e.g., elegant, compact).
159 //
160 // Then, there is a priority for these three subscores as follow:
161 // Coverage Score > Language Score > Variant Score
162 // The returned score reflects this priority order.
163 //
164 // Note that there are two special scores.
165 // - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
166 // base character.
167 // - kFirstFontScore: When the font is the first font family in the collection and it supports the
168 // given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,int variant,uint32_t langListId,FontFamily * fontFamily) const169 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, int variant, uint32_t langListId,
170 FontFamily* fontFamily) const {
171
172 const uint32_t coverageScore = calcCoverageScore(ch, vs, fontFamily);
173 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
174 // No need to calculate other scores.
175 return coverageScore;
176 }
177
178 const uint32_t languageScore = calcLanguageMatchingScore(langListId, *fontFamily);
179 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
180
181 // Subscores are encoded into 31 bits representation to meet the subscore priority.
182 // The highest 2 bits are for coverage score, then following 28 bits are for language score,
183 // then the last 1 bit is for variant score.
184 return coverageScore << 29 | languageScore << 1 | variantScore;
185 }
186
187 // Calculates a font score based on variation sequence coverage.
188 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
189 // character.
190 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
191 // supports the given character or variation sequence.
192 // - Returns 3 if the font family supports the variation sequence.
193 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
194 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
195 // - Returns 1 if the variation selector is not specified or if the font family only supports the
196 // variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,FontFamily * fontFamily) const197 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, FontFamily* fontFamily) const {
198 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
199 if (!hasVSGlyph && !fontFamily->getCoverage()->get(ch)) {
200 // The font doesn't support either variation sequence or even the base character.
201 return kUnsupportedFontScore;
202 }
203
204 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
205 // If the first font family supports the given character or variation sequence, always use
206 // it.
207 return kFirstFontScore;
208 }
209
210 if (vs == 0) {
211 return 1;
212 }
213
214 if (hasVSGlyph) {
215 return 3;
216 }
217
218 if (vs == EMOJI_STYLE_VS || vs == TEXT_STYLE_VS) {
219 const FontLanguages& langs = FontLanguageListCache::getById(fontFamily->langId());
220 bool hasEmojiFlag = false;
221 for (size_t i = 0; i < langs.size(); ++i) {
222 if (langs[i].hasEmojiFlag()) {
223 hasEmojiFlag = true;
224 break;
225 }
226 }
227
228 if (vs == EMOJI_STYLE_VS) {
229 return hasEmojiFlag ? 2 : 1;
230 } else { // vs == TEXT_STYLE_VS
231 return hasEmojiFlag ? 1 : 2;
232 }
233 }
234 return 1;
235 }
236
237 // Calculates font scores based on the script matching and primary langauge matching.
238 //
239 // If the font's script doesn't support the requested script, the font gets a score of 0. If the
240 // font's script supports the requested script and the font has the same primary language as the
241 // requested one, the font gets a score of 2. If the font's script supports the requested script
242 // but the primary language is different from the requested one, the font gets a score of 1.
243 //
244 // If two languages in the requested list have the same language score, the font matching with
245 // higher priority language gets a higher score. For example, in the case the user requested
246 // language list is "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score
247 // than the font of "en-Latn".
248 //
249 // To achieve the above two conditions, the language score is determined as follows:
250 // LanguageScore = s(0) * 3^(m - 1) + s(1) * 3^(m - 2) + ... + s(m - 2) * 3 + s(m - 1)
251 // Here, m is the maximum number of languages to be compared, and s(i) is the i-th language's
252 // matching score. The possible values of s(i) are 0, 1 and 2.
calcLanguageMatchingScore(uint32_t userLangListId,const FontFamily & fontFamily)253 uint32_t FontCollection::calcLanguageMatchingScore(
254 uint32_t userLangListId, const FontFamily& fontFamily) {
255 const FontLanguages& langList = FontLanguageListCache::getById(userLangListId);
256 const FontLanguages& fontLanguages = FontLanguageListCache::getById(fontFamily.langId());
257
258 const size_t maxCompareNum = std::min(langList.size(), FONT_LANGUAGES_LIMIT);
259 uint32_t score = 0;
260 for (size_t i = 0; i < maxCompareNum; ++i) {
261 score = score * 3u + langList[i].calcScoreFor(fontLanguages);
262 }
263 return score;
264 }
265
266 // Calculates a font score based on variant ("compact" or "elegant") matching.
267 // - Returns 1 if the font doesn't have variant or the variant matches with the text style.
268 // - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(int variant,const FontFamily & fontFamily)269 uint32_t FontCollection::calcVariantMatchingScore(int variant, const FontFamily& fontFamily) {
270 return (fontFamily.variant() == 0 || fontFamily.variant() == variant) ? 1 : 0;
271 }
272
273 // Implement heuristic for choosing best-match font. Here are the rules:
274 // 1. If first font in the collection has the character, it wins.
275 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
276 // 3. Highest score wins, with ties resolved to the first font.
277 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t langListId,int variant) const278 FontFamily* FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
279 uint32_t langListId, int variant) const {
280 if (ch >= mMaxChar) {
281 return mFamilies[0];
282 }
283
284 const std::vector<FontFamily*>* familyVec = &mFamilyVec;
285 Range range = mRanges[ch >> kLogCharsPerPage];
286
287 std::vector<FontFamily*> familyVecForVS;
288 if (vs != 0) {
289 // If variation selector is specified, need to search for both the variation sequence and
290 // its base codepoint. Compute the union vector of them.
291 familyVecForVS = mVSFamilyVec;
292 familyVecForVS.insert(familyVecForVS.end(),
293 mFamilyVec.begin() + range.start, mFamilyVec.begin() + range.end);
294 std::sort(familyVecForVS.begin(), familyVecForVS.end());
295 auto last = std::unique(familyVecForVS.begin(), familyVecForVS.end());
296 familyVecForVS.erase(last, familyVecForVS.end());
297
298 familyVec = &familyVecForVS;
299 range = { 0, familyVecForVS.size() };
300 }
301
302 #ifdef VERBOSE_DEBUG
303 ALOGD("querying range %zd:%zd\n", range.start, range.end);
304 #endif
305 FontFamily* bestFamily = nullptr;
306 uint32_t bestScore = kUnsupportedFontScore;
307 for (size_t i = range.start; i < range.end; i++) {
308 FontFamily* family = (*familyVec)[i];
309 const uint32_t score = calcFamilyScore(ch, vs, variant, langListId, family);
310 if (score == kFirstFontScore) {
311 // If the first font family supports the given character or variation sequence, always
312 // use it.
313 return family;
314 }
315 if (score > bestScore) {
316 bestScore = score;
317 bestFamily = family;
318 }
319 }
320 if (bestFamily == nullptr) {
321 UErrorCode errorCode = U_ZERO_ERROR;
322 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
323 if (U_SUCCESS(errorCode)) {
324 UChar decomposed[4];
325 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
326 if (U_SUCCESS(errorCode) && len > 0) {
327 int off = 0;
328 U16_NEXT_UNSAFE(decomposed, off, ch);
329 return getFamilyForChar(ch, vs, langListId, variant);
330 }
331 }
332 bestFamily = mFamilies[0];
333 }
334 return bestFamily;
335 }
336
337 const uint32_t NBSP = 0xa0;
338 const uint32_t ZWJ = 0x200c;
339 const uint32_t ZWNJ = 0x200d;
340 const uint32_t HYPHEN = 0x2010;
341 const uint32_t NB_HYPHEN = 0x2011;
342 const uint32_t FEMALE_SIGN = 0x2640;
343 const uint32_t MALE_SIGN = 0x2642;
344 const uint32_t STAFF_OF_AESCULAPIUS = 0x2695;
345
346 // Characters where we want to continue using existing font run instead of
347 // recomputing the best match in the fallback list.
348 static const uint32_t stickyWhitelist[] = { '!', ',', '-', '.', ':', ';', '?', NBSP, ZWJ, ZWNJ,
349 HYPHEN, NB_HYPHEN, FEMALE_SIGN, MALE_SIGN, STAFF_OF_AESCULAPIUS };
350
isStickyWhitelisted(uint32_t c)351 static bool isStickyWhitelisted(uint32_t c) {
352 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
353 if (stickyWhitelist[i] == c) return true;
354 }
355 return false;
356 }
357
isVariationSelector(uint32_t c)358 static bool isVariationSelector(uint32_t c) {
359 return (0xFE00 <= c && c <= 0xFE0F) || (0xE0100 <= c && c <= 0xE01EF);
360 }
361
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const362 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
363 uint32_t variationSelector) const {
364 if (!isVariationSelector(variationSelector)) {
365 return false;
366 }
367 if (baseCodepoint >= mMaxChar) {
368 return false;
369 }
370
371 AutoMutex _l(gMinikinLock);
372
373 // Currently mRanges can not be used here since it isn't aware of the variation sequence.
374 for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
375 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
376 return true;
377 }
378 }
379
380 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
381 // for emoji + U+FE0E case since we have special fallback rule for the sequence.
382 if (isEmojiStyleVSBase(baseCodepoint) && variationSelector == TEXT_STYLE_VS) {
383 for (size_t i = 0; i < mFamilies.size(); ++i) {
384 if (!mFamilies[i]->isColorEmojiFamily() && variationSelector == TEXT_STYLE_VS &&
385 mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
386 return true;
387 }
388 }
389 }
390
391 return false;
392 }
393
itemize(const uint16_t * string,size_t string_size,FontStyle style,vector<Run> * result) const394 void FontCollection::itemize(const uint16_t *string, size_t string_size, FontStyle style,
395 vector<Run>* result) const {
396 const uint32_t langListId = style.getLanguageListId();
397 int variant = style.getVariant();
398 FontFamily* lastFamily = NULL;
399 Run* run = NULL;
400
401 if (string_size == 0) {
402 return;
403 }
404
405 const uint32_t kEndOfString = 0xFFFFFFFF;
406
407 uint32_t nextCh = 0;
408 uint32_t prevCh = 0;
409 size_t nextUtf16Pos = 0;
410 size_t readLength = 0;
411 U16_NEXT(string, readLength, string_size, nextCh);
412
413 do {
414 const uint32_t ch = nextCh;
415 const size_t utf16Pos = nextUtf16Pos;
416 nextUtf16Pos = readLength;
417 if (readLength < string_size) {
418 U16_NEXT(string, readLength, string_size, nextCh);
419 } else {
420 nextCh = kEndOfString;
421 }
422
423 bool shouldContinueRun = false;
424 if (lastFamily != nullptr) {
425 if (isStickyWhitelisted(ch)) {
426 // Continue using existing font as long as it has coverage and is whitelisted
427 shouldContinueRun = lastFamily->getCoverage()->get(ch);
428 } else if (isVariationSelector(ch)) {
429 // Always continue if the character is a variation selector.
430 shouldContinueRun = true;
431 }
432 }
433
434 if (!shouldContinueRun) {
435 FontFamily* family = getFamilyForChar(ch, isVariationSelector(nextCh) ? nextCh : 0,
436 langListId, variant);
437 if (utf16Pos == 0 || family != lastFamily) {
438 size_t start = utf16Pos;
439 // Workaround for combining marks and emoji modifiers until we implement
440 // per-cluster font selection: if a combining mark or an emoji modifier is found in
441 // a different font that also supports the previous character, attach previous
442 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
443 // handled properly by this since it's a combining mark too.
444 if (utf16Pos != 0 &&
445 ((U_GET_GC_MASK(ch) & U_GC_M_MASK) != 0 ||
446 (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
447 family && family->getCoverage()->get(prevCh)) {
448 const size_t prevChLength = U16_LENGTH(prevCh);
449 run->end -= prevChLength;
450 if (run->start == run->end) {
451 result->pop_back();
452 }
453 start -= prevChLength;
454 }
455 Run dummy;
456 result->push_back(dummy);
457 run = &result->back();
458 run->fakedFont = family->getClosestMatch(style);
459 lastFamily = family;
460 run->start = start;
461 }
462 }
463 prevCh = ch;
464 run->end = nextUtf16Pos; // exclusive
465 } while (nextCh != kEndOfString);
466 }
467
baseFont(FontStyle style)468 MinikinFont* FontCollection::baseFont(FontStyle style) {
469 return baseFontFaked(style).font;
470 }
471
baseFontFaked(FontStyle style)472 FakedFont FontCollection::baseFontFaked(FontStyle style) {
473 return mFamilies[0]->getClosestMatch(style);
474 }
475
getId() const476 uint32_t FontCollection::getId() const {
477 return mId;
478 }
479
480 } // namespace android
481