// Copyright 2018 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // This file defines a helper class for selecting a supported language from a // set of candidates. It is used to get localized strings that are directly // embedded into the executable / library instead of stored in external // .pak files. #include "base/win/embedded_i18n/language_selector.h" #include #include #include #include "base/check_op.h" #include "base/memory/raw_ptr.h" #include "base/ranges/algorithm.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" #include "base/win/i18n.h" namespace base { namespace win { namespace i18n { namespace { using LangToOffset = LanguageSelector::LangToOffset; // Holds pointers to LangToOffset pairs for specific languages that are the // targets of exceptions (where one language is mapped to another) or wildcards // (where a raw language identifier is mapped to a specific localization). struct AvailableLanguageAliases { raw_ptr en_gb_language_offset; raw_ptr en_us_language_offset; raw_ptr es_language_offset; raw_ptr es_419_language_offset; raw_ptr fil_language_offset; raw_ptr iw_language_offset; raw_ptr no_language_offset; raw_ptr pt_br_language_offset; raw_ptr zh_cn_language_offset; raw_ptr zh_tw_language_offset; }; #if DCHECK_IS_ON() // Returns true if the items in the given range are sorted and lower cased. bool IsArraySortedAndLowerCased(span languages_to_offset) { return std::is_sorted(languages_to_offset.begin(), languages_to_offset.end()) && base::ranges::all_of(languages_to_offset, [](const auto& lang) { auto language = AsStringPiece16(lang.first); return ToLowerASCII(language) == language; }); } #endif // DCHECK_IS_ON() // Determines the availability of all languages that may be used as aliases in // GetAliasedLanguageOffset or GetCompatibleNeutralLanguageOffset AvailableLanguageAliases DetermineAvailableAliases( span languages_to_offset) { AvailableLanguageAliases available_aliases = {}; for (const LangToOffset& lang_to_offset : languages_to_offset) { if (lang_to_offset.first == L"en-gb") available_aliases.en_gb_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"en-us") available_aliases.en_us_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"es") available_aliases.es_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"es-419") available_aliases.es_419_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"fil") available_aliases.fil_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"iw") available_aliases.iw_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"no") available_aliases.no_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"pt-br") available_aliases.pt_br_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"zh-cn") available_aliases.zh_cn_language_offset = &lang_to_offset; else if (lang_to_offset.first == L"zh-tw") available_aliases.zh_tw_language_offset = &lang_to_offset; } // Fallback language must exist. DCHECK(available_aliases.en_us_language_offset); return available_aliases; } // Returns true if a LangToOffset entry can be found in |languages_to_offset| // that matches the |language| exactly. |offset| will store the offset of the // language that matches if any. |languages_to_offset| must be sorted by // language and all languages must lower case. bool GetExactLanguageOffset(span languages_to_offset, const std::wstring& language, const LangToOffset** matched_language_to_offset) { DCHECK(matched_language_to_offset); // Binary search in the sorted arrays to find the offset corresponding // to a given language |name|. auto search_result = std::lower_bound( languages_to_offset.begin(), languages_to_offset.end(), language, [](const LangToOffset& left, const std::wstring& to_find) { return left.first < to_find; }); if (languages_to_offset.end() != search_result && search_result->first == language) { *matched_language_to_offset = &*search_result; return true; } return false; } // Returns true if the current language can be aliased to another language. bool GetAliasedLanguageOffset(const AvailableLanguageAliases& available_aliases, const std::wstring& language, const LangToOffset** matched_language_to_offset) { DCHECK(matched_language_to_offset); // Alias some English variants to British English (all others wildcard to // US). if (available_aliases.en_gb_language_offset && (language == L"en-au" || language == L"en-ca" || language == L"en-nz" || language == L"en-za")) { *matched_language_to_offset = available_aliases.en_gb_language_offset; return true; } // Alias es-es to es (all others wildcard to es-419). if (available_aliases.es_language_offset && language == L"es-es") { *matched_language_to_offset = available_aliases.es_language_offset; return true; } // Google web properties use iw for he. Handle both just to be safe. if (available_aliases.iw_language_offset && language == L"he") { *matched_language_to_offset = available_aliases.iw_language_offset; return true; } // Google web properties use no for nb. Handle both just to be safe. if (available_aliases.no_language_offset && language == L"nb") { *matched_language_to_offset = available_aliases.no_language_offset; return true; } // Some Google web properties use tl for fil. Handle both just to be safe. // They're not completely identical, but alias it here. if (available_aliases.fil_language_offset && language == L"tl") { *matched_language_to_offset = available_aliases.fil_language_offset; return true; } if (available_aliases.zh_cn_language_offset && // Pre-Vista alias for Chinese w/ script subtag. (language == L"zh-chs" || // Vista+ alias for Chinese w/ script subtag. language == L"zh-hans" || // Although the wildcard entry for zh would result in this, alias zh-sg // so that it will win if it precedes another valid tag in a list of // candidates. language == L"zh-sg")) { *matched_language_to_offset = available_aliases.zh_cn_language_offset; return true; } if (available_aliases.zh_tw_language_offset && // Pre-Vista alias for Chinese w/ script subtag. (language == L"zh-cht" || // Vista+ alias for Chinese w/ script subtag. language == L"zh-hant" || // Alias Hong Kong and Macau to Taiwan. language == L"zh-hk" || language == L"zh-mo")) { *matched_language_to_offset = available_aliases.zh_tw_language_offset; return true; } return false; } // Returns true if the current neutral language can be aliased to another // language. bool GetCompatibleNeutralLanguageOffset( const AvailableLanguageAliases& available_aliases, const std::wstring& neutral_language, const LangToOffset** matched_language_to_offset) { DCHECK(matched_language_to_offset); if (available_aliases.en_us_language_offset && neutral_language == L"en") { // Use the U.S. region for anything English. *matched_language_to_offset = available_aliases.en_us_language_offset; return true; } if (available_aliases.es_419_language_offset && neutral_language == L"es") { // Use the Latin American region for anything Spanish. *matched_language_to_offset = available_aliases.es_419_language_offset; return true; } if (available_aliases.pt_br_language_offset && neutral_language == L"pt") { // Use the Brazil region for anything Portugese. *matched_language_to_offset = available_aliases.pt_br_language_offset; return true; } if (available_aliases.zh_cn_language_offset && neutral_language == L"zh") { // Use the P.R.C. region for anything Chinese. *matched_language_to_offset = available_aliases.zh_cn_language_offset; return true; } return false; } // Runs through the set of candidates, sending their downcased representation // through |select_predicate|. Returns true if the predicate selects a // candidate, in which case |matched_name| is assigned the value of the // candidate and |matched_offset| is assigned the language offset of the // selected translation. // static bool SelectIf(const std::vector& candidates, span languages_to_offset, const AvailableLanguageAliases& available_aliases, const LangToOffset** matched_language_to_offset, std::wstring* matched_name) { DCHECK(matched_language_to_offset); DCHECK(matched_name); // Note: always perform the exact match first so that an alias is never // selected in place of a future translation. // An earlier candidate entry matching on an exact match or alias match takes // precedence over a later candidate entry matching on an exact match. for (const std::wstring& scan : candidates) { std::wstring lower_case_candidate = AsWString(ToLowerASCII(AsStringPiece16(scan))); if (GetExactLanguageOffset(languages_to_offset, lower_case_candidate, matched_language_to_offset) || GetAliasedLanguageOffset(available_aliases, lower_case_candidate, matched_language_to_offset)) { matched_name->assign(scan); return true; } } // If no candidate matches exactly or by alias, try to match by locale neutral // language. for (const std::wstring& scan : candidates) { std::wstring lower_case_candidate = AsWString(ToLowerASCII(AsStringPiece16(scan))); // Extract the locale neutral language from the language to search and try // to find an exact match for that language in the provided table. std::wstring neutral_language = lower_case_candidate.substr(0, lower_case_candidate.find(L'-')); if (GetCompatibleNeutralLanguageOffset(available_aliases, neutral_language, matched_language_to_offset)) { matched_name->assign(scan); return true; } } return false; } void SelectLanguageMatchingCandidate( const std::vector& candidates, span languages_to_offset, size_t* selected_offset, std::wstring* matched_candidate, std::wstring* selected_language) { DCHECK(selected_offset); DCHECK(matched_candidate); DCHECK(selected_language); DCHECK(!languages_to_offset.empty()); DCHECK_EQ(static_cast(*selected_offset), languages_to_offset.size()); DCHECK(matched_candidate->empty()); DCHECK(selected_language->empty()); // Note: While DCHECK_IS_ON() seems redundant here, this is required to avoid // compilation errors, since IsArraySortedAndLowerCased is not defined // otherwise. #if DCHECK_IS_ON() DCHECK(IsArraySortedAndLowerCased(languages_to_offset)) << "languages_to_offset is not sorted and lower cased"; #endif // DCHECK_IS_ON() // Get which languages that are commonly used as aliases and wildcards are // available for use to match candidates. AvailableLanguageAliases available_aliases = DetermineAvailableAliases(languages_to_offset); // The fallback must exist. DCHECK(available_aliases.en_us_language_offset); // Try to find the first matching candidate from all the language mappings // that are given. Failing that, used en-us as the fallback language. const LangToOffset* matched_language_to_offset = nullptr; if (!SelectIf(candidates, languages_to_offset, available_aliases, &matched_language_to_offset, matched_candidate)) { matched_language_to_offset = available_aliases.en_us_language_offset; *matched_candidate = std::wstring(available_aliases.en_us_language_offset->first); } DCHECK(matched_language_to_offset); // Get the real language being used for the matched candidate. *selected_language = std::wstring(matched_language_to_offset->first); *selected_offset = matched_language_to_offset->second; } std::vector GetCandidatesFromSystem( std::wstring_view preferred_language) { std::vector candidates; // Get the initial candidate list for this particular implementation (if // applicable). if (!preferred_language.empty()) candidates.emplace_back(preferred_language); // Now try the UI languages. Use the thread preferred ones since that will // kindly return us a list of all kinds of fallbacks. win::i18n::GetThreadPreferredUILanguageList(&candidates); return candidates; } } // namespace LanguageSelector::LanguageSelector(std::wstring_view preferred_language, span languages_to_offset) : LanguageSelector(GetCandidatesFromSystem(preferred_language), languages_to_offset) {} LanguageSelector::LanguageSelector(const std::vector& candidates, span languages_to_offset) : selected_offset_(languages_to_offset.size()) { SelectLanguageMatchingCandidate(candidates, languages_to_offset, &selected_offset_, &matched_candidate_, &selected_language_); } LanguageSelector::~LanguageSelector() = default; } // namespace i18n } // namespace win } // namespace base