1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINIKIN_LOCALE_LIST_H 18 #define MINIKIN_LOCALE_LIST_H 19 20 #include <string> 21 #include <vector> 22 23 #include <hb.h> 24 25 #include "StringPiece.h" 26 27 namespace minikin { 28 29 // Due to the limits in font fallback score calculation, we can't use anything more than 12 locales. 30 const size_t FONT_LOCALE_LIMIT = 12; 31 32 // The language or region code is encoded to 15 bits. 33 constexpr uint16_t NO_LANGUAGE = 0x7fff; 34 constexpr uint16_t NO_REGION = 0x7fff; 35 // The script code is encoded to 20 bits. 36 constexpr uint32_t NO_SCRIPT = 0xfffff; 37 38 class LocaleList; 39 40 // Enum for making sub-locale from FontLangauge. 41 enum class SubtagBits : uint8_t { 42 EMPTY = 0b00000000, 43 LANGUAGE = 0b00000001, 44 SCRIPT = 0b00000010, 45 REGION = 0b00000100, 46 VARIANT = 0b00001000, 47 EMOJI = 0b00010000, 48 ALL = 0b00011111, 49 }; 50 51 inline constexpr SubtagBits operator&(SubtagBits l, SubtagBits r) { 52 return static_cast<SubtagBits>(static_cast<uint8_t>(l) & static_cast<uint8_t>(r)); 53 } 54 inline constexpr SubtagBits operator|(SubtagBits l, SubtagBits r) { 55 return static_cast<SubtagBits>(static_cast<uint8_t>(l) | static_cast<uint8_t>(r)); 56 } 57 58 // Enum for emoji style. 59 enum class EmojiStyle : uint8_t { 60 EMPTY = 0, // No emoji style is specified. 61 DEFAULT = 1, // Default emoji style is specified. 62 EMOJI = 2, // Emoji (color) emoji style is specified. 63 TEXT = 3, // Text (black/white) emoji style is specified. 64 }; 65 66 // Enum for line break style. 67 enum class LineBreakStyle : uint8_t { 68 EMPTY = 0, // No line break style is specified. 69 LOOSE = 1, // line break style is loose. 70 NORMAL = 2, // line break style is normal. 71 STRICT = 3, // line break style is strict. 72 }; 73 74 // Locale is a compact representation of a BCP 47 language tag. 75 // It does not capture all possible information, only what directly affects text layout: 76 // font rendering, hyphenation, word breaking, etc. 77 struct Locale { 78 public: 79 enum class Variant : uint16_t { 80 NO_VARIANT = 0x0000, 81 GERMAN_1901_ORTHOGRAPHY = 0x0001, 82 GERMAN_1996_ORTHOGRAPHY = 0x0002, 83 }; 84 85 // Default constructor creates the unsupported locale. LocaleLocale86 Locale() 87 : mScript(NO_SCRIPT), 88 mLanguage(NO_LANGUAGE), 89 mRegion(NO_REGION), 90 mSubScriptBits(0ul), 91 mVariant(Variant::NO_VARIANT), 92 mEmojiStyle(EmojiStyle::EMPTY), 93 mLBStyle(LineBreakStyle::EMPTY) {} 94 95 // Parse from string 96 Locale(const StringPiece& buf); 97 98 bool operator==(const Locale other) const { 99 return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage && 100 mRegion == other.mRegion && mVariant == other.mVariant && 101 mLBStyle == other.mLBStyle && mEmojiStyle == other.mEmojiStyle; 102 } 103 104 bool operator!=(const Locale other) const { return !(*this == other); } 105 hasLanguageLocale106 inline bool hasLanguage() const { return mLanguage != NO_LANGUAGE; } hasScriptLocale107 inline bool hasScript() const { return mScript != NO_SCRIPT; } hasRegionLocale108 inline bool hasRegion() const { return mRegion != NO_REGION; } hasVariantLocale109 inline bool hasVariant() const { return mVariant != Variant::NO_VARIANT; } hasLBStyleLocale110 inline bool hasLBStyle() const { return mLBStyle != LineBreakStyle::EMPTY; } hasEmojiStyleLocale111 inline bool hasEmojiStyle() const { return mEmojiStyle != EmojiStyle::EMPTY; } 112 isSupportedLocale113 inline bool isSupported() const { 114 return hasLanguage() || hasScript() || hasRegion() || hasVariant() || hasLBStyle() || 115 hasEmojiStyle(); 116 } 117 isUnsupportedLocale118 inline bool isUnsupported() const { return !isSupported(); } 119 getEmojiStyleLocale120 EmojiStyle getEmojiStyle() const { return mEmojiStyle; } 121 122 bool isEqualScript(const Locale& other) const; 123 124 // Returns true if this script supports the given script. For example, ja-Jpan supports Hira, 125 // ja-Hira doesn't support Jpan. 126 bool supportsHbScript(hb_script_t script) const; 127 128 std::string getString() const; 129 130 // Calculates a matching score. This score represents how well the input locales cover this 131 // locale. The maximum score in the locale list is returned. 132 // 0 = no match, 1 = script match, 2 = script and primary language match. 133 int calcScoreFor(const LocaleList& supported) const; 134 135 // Identifier pattern: 136 // |-------|-------|-------|-------|-------|-------|-------|-------| 137 // lllllllllllllll Language Code 138 // ssssssssssssssssssss Script Code 139 // rrrrrrrrrrrrrrr Region Code 140 // ee Emoji Style 141 // bb Line Break Style 142 // XXXXXXXX Free 143 // vv German Variant getIdentifierLocale144 uint64_t getIdentifier() const { 145 return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 29) | ((uint64_t)mRegion << 14) | 146 ((uint64_t)mEmojiStyle << 12) | ((uint64_t)mLBStyle << 10) | (uint64_t)mVariant; 147 } 148 149 Locale getPartialLocale(SubtagBits bits) const; 150 151 private: 152 friend class LocaleList; // for LocaleList constructor 153 154 // ISO 15924 compliant script code. The 4 chars script code are packed into a 20 bit integer. 155 // If not specified, this is kInvalidScript. 156 uint32_t mScript; 157 158 // ISO 639-1 or ISO 639-2 compliant language code. 159 // The two- or three-letter language code is packed into a 15 bit integer. 160 // mLanguage = 0 means the Locale is unsupported. 161 uint16_t mLanguage; 162 163 // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is 164 // packed into a 15 bit integer. 165 uint16_t mRegion; 166 167 // For faster comparing, use 7 bits for specific scripts. 168 static const uint8_t kBopomofoFlag = 1u; 169 static const uint8_t kHanFlag = 1u << 1; 170 static const uint8_t kHangulFlag = 1u << 2; 171 static const uint8_t kHiraganaFlag = 1u << 3; 172 static const uint8_t kKatakanaFlag = 1u << 4; 173 static const uint8_t kSimplifiedChineseFlag = 1u << 5; 174 static const uint8_t kTraditionalChineseFlag = 1u << 6; 175 uint8_t mSubScriptBits; 176 177 Variant mVariant; 178 179 EmojiStyle mEmojiStyle; 180 LineBreakStyle mLBStyle; 181 182 void resolveUnicodeExtension(const char* buf, size_t length); 183 184 static uint8_t scriptToSubScriptBits(uint32_t rawScript); 185 186 static LineBreakStyle resolveLineBreakStyle(const char* buf, size_t length); 187 static EmojiStyle resolveEmojiStyle(const char* buf, size_t length); 188 static EmojiStyle scriptToEmojiStyle(uint32_t script); 189 190 // Returns true if the provide subscript bits has the requested subscript bits. 191 // Note that this function returns false if the requested subscript bits are empty. 192 static bool supportsScript(uint8_t providedBits, uint8_t requestedBits); 193 }; 194 195 // An immutable list of locale. 196 class LocaleList { 197 public: 198 explicit LocaleList(std::vector<Locale>&& locales); LocaleList()199 LocaleList() 200 : mUnionOfSubScriptBits(0), 201 mIsAllTheSameLocale(false), 202 mEmojiStyle(EmojiStyle::EMPTY) {} 203 LocaleList(LocaleList&&) = default; 204 size()205 size_t size() const { return mLocales.size(); } empty()206 bool empty() const { return mLocales.empty(); } 207 const Locale& operator[](size_t n) const { return mLocales[n]; } 208 getHbLanguage(size_t n)209 hb_language_t getHbLanguage(size_t n) const { return mHbLangs[n]; } 210 211 // Returns an effective emoji style of this locale list. 212 // The effective means the first non empty emoji style in the list. getEmojiStyle()213 EmojiStyle getEmojiStyle() const { return mEmojiStyle; } 214 215 private: 216 friend struct Locale; // for calcScoreFor 217 218 std::vector<Locale> mLocales; 219 220 // The languages to be passed to HarfBuzz shaper. 221 std::vector<hb_language_t> mHbLangs; 222 uint8_t mUnionOfSubScriptBits; 223 bool mIsAllTheSameLocale; 224 EmojiStyle mEmojiStyle; 225 getUnionOfSubScriptBits()226 uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } isAllTheSameLocale()227 bool isAllTheSameLocale() const { return mIsAllTheSameLocale; } 228 229 // Do not copy and assign. 230 LocaleList(const LocaleList&) = delete; 231 void operator=(const LocaleList&) = delete; 232 }; 233 234 } // namespace minikin 235 236 #endif // MINIKIN_LOCALE_LIST_H 237