1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINIKIN_FONT_LANGUAGE_H 18 #define MINIKIN_FONT_LANGUAGE_H 19 20 #include <string> 21 #include <vector> 22 23 #include <hb.h> 24 25 namespace minikin { 26 27 // Due to the limits in font fallback score calculation, we can't use anything 28 // more than 12 languages. 29 const size_t FONT_LANGUAGES_LIMIT = 12; 30 31 // The language or region code is encoded to 15 bits. 32 const uint16_t INVALID_CODE = 0x7fff; 33 34 class FontLanguages; 35 36 // FontLanguage is a compact representation of a BCP 47 language tag. It 37 // does not capture all possible information, only what directly affects 38 // font rendering. 39 struct FontLanguage { 40 public: 41 enum EmojiStyle : uint8_t { 42 EMSTYLE_EMPTY = 0, 43 EMSTYLE_DEFAULT = 1, 44 EMSTYLE_EMOJI = 2, 45 EMSTYLE_TEXT = 3, 46 }; 47 // Default constructor creates the unsupported language. FontLanguageFontLanguage48 FontLanguage() 49 : mScript(0ul), 50 mLanguage(INVALID_CODE), 51 mRegion(INVALID_CODE), 52 mHbLanguage(HB_LANGUAGE_INVALID), 53 mSubScriptBits(0ul), 54 mEmojiStyle(EMSTYLE_EMPTY) {} 55 56 // Parse from string 57 FontLanguage(const char* buf, size_t length); 58 59 bool operator==(const FontLanguage other) const { 60 return !isUnsupported() && isEqualScript(other) && 61 mLanguage == other.mLanguage && mRegion == other.mRegion && 62 mEmojiStyle == other.mEmojiStyle; 63 } 64 65 bool operator!=(const FontLanguage other) const { return !(*this == other); } 66 isUnsupportedFontLanguage67 bool isUnsupported() const { return mLanguage == INVALID_CODE; } getEmojiStyleFontLanguage68 EmojiStyle getEmojiStyle() const { return mEmojiStyle; } getHbLanguageFontLanguage69 hb_language_t getHbLanguage() const { return mHbLanguage; } 70 71 bool isEqualScript(const FontLanguage& other) const; 72 73 // Returns true if this script supports the given script. For example, ja-Jpan 74 // supports Hira, ja-Hira doesn't support Jpan. 75 bool supportsHbScript(hb_script_t script) const; 76 77 std::string getString() const; 78 79 // Calculates a matching score. This score represents how well the input 80 // languages cover this language. The maximum score in the language list is 81 // returned. 0 = no match, 1 = script match, 2 = script and primary language 82 // match. 83 int calcScoreFor(const FontLanguages& supported) const; 84 getIdentifierFontLanguage85 uint64_t getIdentifier() const { 86 return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 17) | 87 ((uint64_t)mRegion << 2) | mEmojiStyle; 88 } 89 90 private: 91 friend class FontLanguages; // for FontLanguages constructor 92 93 // ISO 15924 compliant script code. The 4 chars script code are packed into a 94 // 32 bit integer. 95 uint32_t mScript; 96 97 // ISO 639-1 or ISO 639-2 compliant language code. 98 // The two- or three-letter language code is packed into a 15 bit integer. 99 // mLanguage = 0 means the FontLanguage is unsupported. 100 uint16_t mLanguage; 101 102 // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit 103 // region code is packed into a 15 bit integer. 104 uint16_t mRegion; 105 106 // The language to be passed HarfBuzz shaper. 107 hb_language_t mHbLanguage; 108 109 // For faster comparing, use 7 bits for specific scripts. 110 static const uint8_t kBopomofoFlag = 1u; 111 static const uint8_t kHanFlag = 1u << 1; 112 static const uint8_t kHangulFlag = 1u << 2; 113 static const uint8_t kHiraganaFlag = 1u << 3; 114 static const uint8_t kKatakanaFlag = 1u << 4; 115 static const uint8_t kSimplifiedChineseFlag = 1u << 5; 116 static const uint8_t kTraditionalChineseFlag = 1u << 6; 117 uint8_t mSubScriptBits; 118 119 EmojiStyle mEmojiStyle; 120 121 static uint8_t scriptToSubScriptBits(uint32_t script); 122 123 static EmojiStyle resolveEmojiStyle(const char* buf, 124 size_t length, 125 uint32_t script); 126 127 // Returns true if the provide subscript bits has the requested subscript 128 // bits. Note that this function returns false if the requested subscript bits 129 // are empty. 130 static bool supportsScript(uint8_t providedBits, uint8_t requestedBits); 131 }; 132 133 // An immutable list of languages. 134 class FontLanguages { 135 public: 136 explicit FontLanguages(std::vector<FontLanguage>&& languages); FontLanguages()137 FontLanguages() : mUnionOfSubScriptBits(0), mIsAllTheSameLanguage(false) {} 138 FontLanguages(FontLanguages&&) = default; 139 size()140 size_t size() const { return mLanguages.size(); } empty()141 bool empty() const { return mLanguages.empty(); } 142 const FontLanguage& operator[](size_t n) const { return mLanguages[n]; } 143 144 private: 145 friend struct FontLanguage; // for calcScoreFor 146 147 std::vector<FontLanguage> mLanguages; 148 uint8_t mUnionOfSubScriptBits; 149 bool mIsAllTheSameLanguage; 150 getUnionOfSubScriptBits()151 uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } isAllTheSameLanguage()152 bool isAllTheSameLanguage() const { return mIsAllTheSameLanguage; } 153 154 // Do not copy and assign. 155 FontLanguages(const FontLanguages&) = delete; 156 void operator=(const FontLanguages&) = delete; 157 }; 158 159 } // namespace minikin 160 161 #endif // MINIKIN_FONT_LANGUAGE_H 162