/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MINIKIN_LOCALE_LIST_H #define MINIKIN_LOCALE_LIST_H #include #include #include #include "StringPiece.h" #include "minikin/LineBreakStyle.h" namespace minikin { // Due to the limits in font fallback score calculation, we can't use anything more than 12 locales. const size_t FONT_LOCALE_LIMIT = 12; // The language or region code is encoded to 15 bits. constexpr uint16_t NO_LANGUAGE = 0x7fff; constexpr uint16_t NO_REGION = 0x7fff; // The script code is encoded to 20 bits. constexpr uint32_t NO_SCRIPT = 0xfffff; class LocaleList; // Enum for making sub-locale from FontLangauge. enum class SubtagBits : uint8_t { EMPTY = 0b00000000, LANGUAGE = 0b00000001, SCRIPT = 0b00000010, REGION = 0b00000100, VARIANT = 0b00001000, EMOJI = 0b00010000, ALL = 0b00011111, }; inline constexpr SubtagBits operator&(SubtagBits l, SubtagBits r) { return static_cast(static_cast(l) & static_cast(r)); } inline constexpr SubtagBits operator|(SubtagBits l, SubtagBits r) { return static_cast(static_cast(l) | static_cast(r)); } // Enum for emoji style. enum class EmojiStyle : uint8_t { EMPTY = 0, // No emoji style is specified. DEFAULT = 1, // Default emoji style is specified. EMOJI = 2, // Emoji (color) emoji style is specified. TEXT = 3, // Text (black/white) emoji style is specified. }; // Locale is a compact representation of a BCP 47 language tag. // It does not capture all possible information, only what directly affects text layout: // font rendering, hyphenation, word breaking, etc. struct Locale { public: enum class Variant : uint16_t { NO_VARIANT = 0x0000, GERMAN_1901_ORTHOGRAPHY = 0x0001, GERMAN_1996_ORTHOGRAPHY = 0x0002, }; // Default constructor creates the unsupported locale. Locale() : mScript(NO_SCRIPT), mLanguage(NO_LANGUAGE), mRegion(NO_REGION), mSubScriptBits(0ul), mVariant(Variant::NO_VARIANT), mEmojiStyle(EmojiStyle::EMPTY) {} // Parse from string Locale(const StringPiece& buf); // Parse from identifier. See getIdentifier() for the identifier format. explicit Locale(uint64_t identifier) : mScript(extractBits(identifier, 29, 20)), mLanguage(extractBits(identifier, 49, 15)), mRegion(extractBits(identifier, 14, 15)), mSubScriptBits(scriptToSubScriptBits(mScript)), mVariant(static_cast(extractBits(identifier, 0, 2))), mEmojiStyle(static_cast(extractBits(identifier, 12, 2))) {} bool operator==(const Locale& other) const { return !isUnsupported() && isEqualScript(other) && mLanguage == other.mLanguage && mRegion == other.mRegion && mVariant == other.mVariant && mEmojiStyle == other.mEmojiStyle; } bool operator!=(const Locale other) const { return !(*this == other); } inline bool hasLanguage() const { return mLanguage != NO_LANGUAGE; } inline bool hasScript() const { return mScript != NO_SCRIPT; } inline bool hasRegion() const { return mRegion != NO_REGION; } inline bool hasVariant() const { return mVariant != Variant::NO_VARIANT; } inline bool hasEmojiStyle() const { return mEmojiStyle != EmojiStyle::EMPTY; } inline bool isSupported() const { return hasLanguage() || hasScript() || hasRegion() || hasVariant() || hasEmojiStyle(); } inline bool isUnsupported() const { return !isSupported(); } EmojiStyle getEmojiStyle() const { return mEmojiStyle; } bool isEqualScript(const Locale& other) const; // Returns true if this script supports the given script. For example, ja-Jpan supports Hira, // ja-Hira doesn't support Jpan. bool supportsScript(uint32_t script) const; std::string getString() const; std::string getStringWithLineBreakOption(LineBreakStyle lbStyle, LineBreakWordStyle lbWordStyle) const; // Calculates a matching score. This score represents how well the input locales cover this // locale. The maximum score in the locale list is returned. // 0 = no match, 1 = script match, 2 = script and primary language match. int calcScoreFor(const LocaleList& supported) const; // Identifier pattern: // |-------|-------|-------|-------|-------|-------|-------|-------| // lllllllllllllll Language Code (15 bits) // ssssssssssssssssssss Script Code (20 bits) // rrrrrrrrrrrrrrr Region Code (15 bits) // ee Emoji Style (2 bits) // XXXXXXXXXX Free (10 bits) // vv German Variant (2 bits) uint64_t getIdentifier() const { return ((uint64_t)mLanguage << 49) | ((uint64_t)mScript << 29) | ((uint64_t)mRegion << 14) | ((uint64_t)mEmojiStyle << 12) | (uint64_t)mVariant; } Locale getPartialLocale(SubtagBits bits) const; private: friend class LocaleList; // for LocaleList constructor // ISO 15924 compliant script code. The 4 chars script code are packed into a 20 bit integer. // If not specified, this is kInvalidScript. uint32_t mScript; // ISO 639-1 or ISO 639-2 compliant language code. // The two- or three-letter language code is packed into a 15 bit integer. // mLanguage = 0 means the Locale is unsupported. uint16_t mLanguage; // ISO 3166-1 or UN M.49 compliant region code. The two-letter or three-digit region code is // packed into a 15 bit integer. uint16_t mRegion; // For faster comparing, use 7 bits for specific scripts. static const uint8_t kBopomofoFlag = 1u; static const uint8_t kHanFlag = 1u << 1; static const uint8_t kHangulFlag = 1u << 2; static const uint8_t kHiraganaFlag = 1u << 3; static const uint8_t kKatakanaFlag = 1u << 4; static const uint8_t kSimplifiedChineseFlag = 1u << 5; static const uint8_t kTraditionalChineseFlag = 1u << 6; uint8_t mSubScriptBits; Variant mVariant; EmojiStyle mEmojiStyle; void resolveUnicodeExtension(const char* buf, size_t length); inline static uint64_t extractBits(uint64_t value, uint8_t shift, uint8_t nBits) { return (value >> shift) & ((1 << nBits) - 1); } int buildLocaleString(char* buf) const; static uint8_t scriptToSubScriptBits(uint32_t rawScript); static EmojiStyle resolveEmojiStyle(const char* buf, size_t length); static EmojiStyle scriptToEmojiStyle(uint32_t script); // Returns true if the provide subscript bits has the requested subscript bits. // Note that this function returns false if the requested subscript bits are empty. static bool supportsScript(uint8_t providedBits, uint8_t requestedBits); }; // An immutable list of locale. class LocaleList { public: explicit LocaleList(std::vector&& locales); LocaleList() : mUnionOfSubScriptBits(0), mIsAllTheSameLocale(false), mEmojiStyle(EmojiStyle::EMPTY) {} LocaleList(LocaleList&&) = default; size_t size() const { return mLocales.size(); } bool empty() const { return mLocales.empty(); } const Locale& operator[](size_t n) const { return mLocales[n]; } hb_language_t getHbLanguage(size_t n) const { return mHbLangs[n]; } // Returns an effective emoji style of this locale list. // The effective means the first non empty emoji style in the list. EmojiStyle getEmojiStyle() const { return mEmojiStyle; } private: friend struct Locale; // for calcScoreFor std::vector mLocales; // The languages to be passed to HarfBuzz shaper. std::vector mHbLangs; uint8_t mUnionOfSubScriptBits; bool mIsAllTheSameLocale; EmojiStyle mEmojiStyle; uint8_t getUnionOfSubScriptBits() const { return mUnionOfSubScriptBits; } bool isAllTheSameLocale() const { return mIsAllTheSameLocale; } // Do not copy and assign. LocaleList(const LocaleList&) = delete; void operator=(const LocaleList&) = delete; }; } // namespace minikin #endif // MINIKIN_LOCALE_LIST_H