1 /*
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "locale_helper.h"
16 #include <sstream>
17 #include "unicode/localebuilder.h"
18 #include "i18n_hilog.h"
19 #include "locale_info.h"
20 #include "utils.h"
21
22 namespace OHOS {
23 namespace Global {
24 namespace I18n {
25 namespace {
26 constexpr uint8_t INTL_INDEX_TWO = 2;
27 constexpr uint8_t INTL_INDEX_THREE = 3;
28 constexpr uint8_t INTL_INDEX_FOUR = 4;
29 constexpr uint8_t INTL_INDEX_FIVE = 5;
30 constexpr uint8_t INTL_INDEX_EIGHT = 8;
31 const int32_t MIN_CALENDAR_LENGTH = 3;
32 const int32_t MAX_CALENDAR_LENGTH = 8;
33 const int32_t CURRENCY_LENGTH = 3;
34
AsciiAlphaToLower(uint32_t c)35 inline constexpr int AsciiAlphaToLower(uint32_t c)
36 {
37 constexpr uint32_t FLAG = 0x20;
38 return static_cast<int>(c | FLAG);
39 }
40
41 template<typename T, typename U>
InRange(T value,U start,U end)42 bool InRange(T value, U start, U end)
43 {
44 if (start > end || sizeof(T) < sizeof(U)) {
45 return false;
46 }
47 return (value >= static_cast<T>(start)) && (value <= static_cast<T>(end));
48 }
49
IsDigit(const std::string & str,size_t min,size_t max)50 bool IsDigit(const std::string &str, size_t min, size_t max)
51 {
52 if (!InRange(str.length(), min, max)) {
53 return false;
54 }
55 for (char i : str) {
56 if (!InRange(i, '0', '9')) {
57 return false;
58 }
59 }
60 return true;
61 }
62
IsAsciiAlpha(char ch)63 bool IsAsciiAlpha(char ch)
64 {
65 return InRange(ch, 'A', 'Z') || InRange(ch, 'a', 'z');
66 }
67
IsAlphanum(const std::string & str,size_t min,size_t max)68 bool IsAlphanum(const std::string &str, size_t min, size_t max)
69 {
70 if (!InRange(str.length(), min, max)) {
71 return false;
72 }
73 for (char i : str) {
74 if (!IsAsciiAlpha(i) && !InRange(i, '0', '9')) {
75 return false;
76 }
77 }
78 return true;
79 }
80
IsThirdDigitAlphanum(const std::string & value)81 bool IsThirdDigitAlphanum(const std::string &value)
82 {
83 return value.length() == INTL_INDEX_FOUR && InRange(value[0], '0', '9') &&
84 IsAlphanum(value.substr(1), INTL_INDEX_THREE, INTL_INDEX_THREE);
85 }
86
IsVariantSubtag(const std::string & value)87 bool IsVariantSubtag(const std::string &value)
88 {
89 return IsThirdDigitAlphanum(value) || IsAlphanum(value, INTL_INDEX_FIVE, INTL_INDEX_EIGHT);
90 }
91
IsLanguageSubtag(const std::string & value)92 bool IsLanguageSubtag(const std::string &value)
93 {
94 return LocaleHelper::IsAlpha(value, INTL_INDEX_TWO, INTL_INDEX_THREE) ||
95 LocaleHelper::IsAlpha(value, INTL_INDEX_FIVE, INTL_INDEX_EIGHT);
96 }
97
IsScriptSubtag(const std::string & value)98 bool IsScriptSubtag(const std::string &value)
99 {
100 return LocaleHelper::IsAlpha(value, INTL_INDEX_FOUR, INTL_INDEX_FOUR);
101 }
102
IsRegionSubtag(const std::string & value)103 bool IsRegionSubtag(const std::string &value)
104 {
105 return LocaleHelper::IsAlpha(value, INTL_INDEX_TWO, INTL_INDEX_TWO) ||
106 IsDigit(value, INTL_INDEX_THREE, INTL_INDEX_THREE);
107 }
108
IsExtensionSingleton(const std::string & value)109 bool IsExtensionSingleton(const std::string &value)
110 {
111 return IsAlphanum(value, 1, 1);
112 }
113 }
114
115 std::string LocaleHelper::defaultLocale = "";
116 std::mutex LocaleHelper::defaultLocaleMutex;
117
118 const std::unordered_map<std::string, std::unordered_set<std::string>> LocaleHelper::KEY_TO_OPTION_NAME = {
119 { "localeMatcher", { "best fit", "lookup" } },
120 { "style", { "narrow", "short", "long" }},
121 { "type", { "language", "region", "script", "currency", "calendar", "dateTimeField" } },
122 { "fallback", { "code", "none" } },
123 { "languageDisplay", { "dialect", "standard" } },
124 { "usage", { "sort", "search" } },
125 { "numeric", { "false", "true" } },
126 { "caseFirst", { "false", "upper", "lower" } },
127 { "sensitivity", { "base", "accent", "case", "variant" } },
128 { "ignorePunctuation", { "true", "false" } },
129 { "hourCycle", { "h11", "h12", "h23", "h24" } },
130 };
131
IsValidOptionName(const std::string & key,const std::string & option)132 bool LocaleHelper::IsValidOptionName(const std::string& key, const std::string& option)
133 {
134 auto optionIter = KEY_TO_OPTION_NAME.find(key);
135 if (optionIter == KEY_TO_OPTION_NAME.end()) {
136 return false;
137 }
138 return optionIter->second.find(option) != optionIter->second.end();
139 }
140
ParseOptionWithoutCheck(const std::map<std::string,std::string> & options,const std::string & key,const std::string & defaultOption)141 std::string LocaleHelper::ParseOptionWithoutCheck(const std::map<std::string, std::string> &options,
142 const std::string &key,
143 const std::string &defaultOption)
144 {
145 auto optionsIter = options.find(key);
146 if (optionsIter == options.end()) {
147 return defaultOption;
148 }
149 return optionsIter->second;
150 }
151
ParseOption(const std::map<std::string,std::string> & options,const std::string & key,const std::string & defaultOption,bool isOptional,I18nErrorCode & status)152 std::string LocaleHelper::ParseOption(const std::map<std::string, std::string>& options,
153 const std::string& key,
154 const std::string& defaultOption,
155 bool isOptional,
156 I18nErrorCode& status)
157 {
158 auto optionsMapIter = KEY_TO_OPTION_NAME.find(key);
159 if (optionsMapIter == KEY_TO_OPTION_NAME.end()) {
160 return defaultOption;
161 }
162 auto optionsIter = options.find(key);
163 if (optionsIter == options.end()) {
164 if (!isOptional) {
165 status = I18nErrorCode::MISSING_PARAM;
166 HILOG_ERROR_I18N("LocaleHelper::ParseOption: missing param %{public}s.", key.c_str());
167 }
168 return defaultOption;
169 }
170 std::string option = optionsIter->second;
171 const std::unordered_set<std::string>& optionsName = optionsMapIter->second;
172 auto nameIter = optionsName.find(option);
173 if (nameIter == optionsName.end()) {
174 status = I18nErrorCode::INVALID_PARAM;
175 return option;
176 }
177 return option;
178 }
179
BestAvailableLocale(const std::set<std::string> & availableLocales,const std::string & locale)180 std::string LocaleHelper::BestAvailableLocale(const std::set<std::string> &availableLocales,
181 const std::string &locale)
182 {
183 std::string localeCandidate = locale;
184 std::string undefined = std::string();
185 while (true) {
186 for (const std::string& itemStr : availableLocales) {
187 if (itemStr == localeCandidate) {
188 return localeCandidate;
189 }
190 }
191 size_t pos = localeCandidate.rfind('-');
192 if (pos == std::string::npos) {
193 return undefined;
194 }
195 if (pos >= INTL_INDEX_TWO && localeCandidate[pos - INTL_INDEX_TWO] == '-') {
196 pos -= INTL_INDEX_TWO;
197 }
198 localeCandidate.resize(pos);
199 }
200 }
201
LookupSupportedLocales(const std::set<std::string> & availableLocales,const std::vector<std::string> & requestLocales)202 std::vector<std::string> LocaleHelper::LookupSupportedLocales(const std::set<std::string> &availableLocales,
203 const std::vector<std::string> &requestLocales)
204 {
205 std::vector<std::string> resultLocales;
206 std::unordered_set<std::string> visitedLocales;
207 for (const auto& candidateLocale : requestLocales) {
208 std::string availableLocale = LocaleHelper::BestAvailableLocale(availableLocales, candidateLocale);
209 if (!availableLocale.empty() && visitedLocales.find(candidateLocale) == visitedLocales.end()) {
210 resultLocales.push_back(candidateLocale);
211 visitedLocales.insert(candidateLocale);
212 }
213 }
214 return resultLocales;
215 }
216
CanonicalizeLocaleList(const std::vector<std::string> & locales,I18nErrorCode & status)217 std::vector<std::string> LocaleHelper::CanonicalizeLocaleList(const std::vector<std::string> &locales,
218 I18nErrorCode &status)
219 {
220 std::vector<std::string> resultLocales;
221 for (const std::string& locale : locales) {
222 if (locale.empty() || !LocaleHelper::IsStructurallyValidLanguageTag(locale)) {
223 status = I18nErrorCode::INVALID_LOCALE_TAG;
224 return {};
225 }
226 std::string localeStr = locale;
227 std::transform(localeStr.begin(), localeStr.end(), localeStr.begin(), AsciiAlphaToLower);
228 UErrorCode icuStatus = U_ZERO_ERROR;
229 icu::Locale formalLocale = icu::Locale::forLanguageTag(localeStr.c_str(), icuStatus);
230 if (U_FAILURE(icuStatus) || formalLocale.isBogus()) {
231 status = I18nErrorCode::INVALID_LOCALE_TAG;
232 return {};
233 }
234 formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(icuStatus);
235 // Canonicalize the locale ID of this object according to CLDR.
236 formalLocale.canonicalize(icuStatus);
237 if ((U_FAILURE(icuStatus)) || (formalLocale.isBogus())) {
238 status = I18nErrorCode::INVALID_LOCALE_TAG;
239 return {};
240 }
241 std::string languageTag = formalLocale.toLanguageTag<std::string>(icuStatus);
242 if (U_FAILURE(icuStatus)) {
243 status = I18nErrorCode::INVALID_LOCALE_TAG;
244 return {};
245 }
246 resultLocales.push_back(languageTag);
247 }
248 return resultLocales;
249 }
250
IsUnicodeScriptSubtag(const std::string & value)251 bool LocaleHelper::IsUnicodeScriptSubtag(const std::string& value)
252 {
253 UErrorCode status = U_ZERO_ERROR;
254 icu::LocaleBuilder builder;
255 builder.setScript(value).build(status);
256 return U_SUCCESS(status);
257 }
258
IsUnicodeRegionSubtag(const std::string & value)259 bool LocaleHelper::IsUnicodeRegionSubtag(const std::string& value)
260 {
261 UErrorCode status = U_ZERO_ERROR;
262 icu::LocaleBuilder builder;
263 builder.setRegion(value).build(status);
264 return U_SUCCESS(status);
265 }
266
IsWellFormedCurrencyCode(const std::string & currency)267 bool LocaleHelper::IsWellFormedCurrencyCode(const std::string ¤cy)
268 {
269 if (currency.length() != CURRENCY_LENGTH) {
270 return false;
271 }
272 for (char c : currency) {
273 if (!IsAsciiAlpha(c)) {
274 return false;
275 }
276 }
277 return true;
278 }
279
IsWellFormedCalendarCode(const std::string & calendar)280 bool LocaleHelper::IsWellFormedCalendarCode(const std::string &calendar)
281 {
282 std::string value = calendar;
283 while (true) {
284 std::size_t found_dash = value.find('-');
285 if (found_dash == std::string::npos) {
286 return IsAlphanum(value, MIN_CALENDAR_LENGTH, MAX_CALENDAR_LENGTH);
287 }
288 if (!IsAlphanum(value.substr(0, found_dash), MIN_CALENDAR_LENGTH, MAX_CALENDAR_LENGTH)) {
289 return false;
290 }
291 value = value.substr(found_dash + 1);
292 }
293 }
294
IsStructurallyValidLanguageTag(const std::string & tag)295 bool LocaleHelper::IsStructurallyValidLanguageTag(const std::string& tag)
296 {
297 std::string tagCollection = tag;
298 std::vector<std::string> containers;
299 std::string substring;
300 std::set<std::string> uniqueSubtags;
301 size_t address = 1;
302 for (auto it = tagCollection.begin(); it != tagCollection.end(); it++) {
303 if (*it != '-' && it != tagCollection.end() - 1) {
304 substring += *it;
305 continue;
306 }
307 if (it == tagCollection.end() - 1) {
308 substring += *it;
309 }
310 containers.push_back(substring);
311 if (IsVariantSubtag(substring)) {
312 std::transform(substring.begin(), substring.end(), substring.begin(), AsciiAlphaToLower);
313 if (!uniqueSubtags.insert(substring).second) {
314 return false;
315 }
316 }
317 substring.clear();
318 }
319 bool result = DealwithLanguageTag(containers, address);
320 return result;
321 }
322
DealwithLanguageTag(const std::vector<std::string> & containers,size_t & address)323 bool LocaleHelper::DealwithLanguageTag(const std::vector<std::string> &containers, size_t &address)
324 {
325 // The abstract operation returns true if locale can be generated from the ABNF grammar in section 2.1 of the RFC,
326 // starting with Language-Tag, and does not contain duplicate variant or singleton subtags
327 // If language tag is empty, return false.
328 if (containers.empty()) {
329 return false;
330 }
331
332 // a. if the first tag is not language, return false.
333 if (!IsLanguageSubtag(containers[0])) {
334 return false;
335 }
336
337 // if the tag include language only, like "zh" or "de", return true;
338 if (containers.size() == 1) {
339 return true;
340 }
341
342 // Else, then
343 // if is unique singleton subtag, script and region tag.
344 if (IsExtensionSingleton(containers[1])) {
345 return true;
346 }
347
348 if (IsScriptSubtag(containers[address])) {
349 address++;
350 if (containers.size() == address) {
351 return true;
352 }
353 }
354
355 if (IsRegionSubtag(containers[address])) {
356 address++;
357 }
358
359 for (size_t i = address; i < containers.size(); i++) {
360 if (IsExtensionSingleton(containers[i])) {
361 return true;
362 }
363 if (!IsVariantSubtag(containers[i])) {
364 return false;
365 }
366 }
367 return true;
368 }
369
IsAlpha(const std::string & str,size_t min,size_t max)370 bool LocaleHelper::IsAlpha(const std::string &str, size_t min, size_t max)
371 {
372 if (!InRange(str.length(), min, max)) {
373 return false;
374 }
375 for (char c : str) {
376 if (!IsAsciiAlpha(c)) {
377 return false;
378 }
379 }
380 return true;
381 }
382
IsNormativeCalendar(const std::string & value)383 bool LocaleHelper::IsNormativeCalendar(const std::string &value)
384 {
385 return IsWellAlphaNumList(value);
386 }
387
IsNormativeNumberingSystem(const std::string & value)388 bool LocaleHelper::IsNormativeNumberingSystem(const std::string &value)
389 {
390 return IsWellAlphaNumList(value);
391 }
392
IsWellAlphaNumList(const std::string & value)393 bool LocaleHelper::IsWellAlphaNumList(const std::string &value)
394 {
395 if (value.length() < INTL_INDEX_THREE) {
396 return false;
397 }
398 char lastChar = value[value.length() - 1];
399 if (lastChar == '-') {
400 return false;
401 }
402 std::vector<std::string> items;
403 Split(value, "-", items);
404 for (auto &item : items) {
405 if (!IsAlphanum(item, INTL_INDEX_THREE, INTL_INDEX_EIGHT)) {
406 return false;
407 }
408 }
409 return true;
410 }
411
TruncateDouble(double number)412 double LocaleHelper::TruncateDouble(double number)
413 {
414 if (std::isnan(number)) {
415 return 0;
416 }
417 if (!std::isfinite(number)) {
418 return number;
419 }
420 // -0 to +0
421 if (Eq(number, 0)) {
422 return 0;
423 }
424 double ret = Geq(number, 0) ? std::floor(number) : std::ceil(number);
425 return ret;
426 }
427
GetAvailableLocales()428 std::set<std::string> LocaleHelper::GetAvailableLocales()
429 {
430 std::set<std::string> result;
431 int32_t count = 0;
432 const icu::Locale* locales = icu::Locale::getAvailableLocales(count);
433 if (count < 1 || locales == nullptr) {
434 HILOG_ERROR_I18N("LocaleHelper::GetAvailableLocales: Get available locales failed.");
435 return result;
436 }
437 for (int i = 0; i < count; i++) {
438 const char* name = locales[i].getName();
439 if (name != nullptr) {
440 result.insert(name);
441 }
442 }
443 return result;
444 }
445
SupportedLocalesOf(const std::vector<std::string> & requestLocales,const std::map<std::string,std::string> & configs,I18nErrorCode & status)446 std::vector<std::string> LocaleHelper::SupportedLocalesOf(const std::vector<std::string> &requestLocales,
447 const std::map<std::string, std::string> &configs, I18nErrorCode &status)
448 {
449 std::vector<std::string> undefined = {};
450 auto requestedLocales = CanonicalizeLocaleList(requestLocales, status);
451 if (status != I18nErrorCode::SUCCESS) {
452 return undefined;
453 }
454 std::string localeMatcher = ParseOption(configs, "localeMatcher", "best fit", true, status);
455 if (status != I18nErrorCode::SUCCESS) {
456 return undefined;
457 }
458 std::set<std::string> availableLocales = GetAvailableLocales();
459 return LookupSupportedLocales(availableLocales, requestedLocales);
460 }
461
CheckParamLocales(const std::vector<std::string> & localeArray)462 std::string LocaleHelper::CheckParamLocales(const std::vector<std::string> &localeArray)
463 {
464 I18nErrorCode i18nStatus = I18nErrorCode::SUCCESS;
465 std::vector<std::string> requestedLocales = CanonicalizeLocaleList(localeArray, i18nStatus);
466 if (i18nStatus != I18nErrorCode::SUCCESS) {
467 HILOG_ERROR_I18N("LocaleHelper::CheckParamLocales: CanonicalizeLocaleList failed");
468 return "invalid locale";
469 }
470 return "";
471 }
472
DefaultLocale()473 std::string LocaleHelper::DefaultLocale()
474 {
475 if (!defaultLocale.empty()) {
476 return defaultLocale;
477 }
478 std::lock_guard<std::mutex> defaultLocaleLock(defaultLocaleMutex);
479 if (!defaultLocale.empty()) {
480 return defaultLocale;
481 }
482 icu::Locale icuDefaultLocale;
483 if (strcmp(icuDefaultLocale.getName(), "en_US_POSIX") == 0 || strcmp(icuDefaultLocale.getName(), "c") == 0) {
484 defaultLocale = "en-US";
485 } else if (icuDefaultLocale.isBogus()) {
486 defaultLocale = "und";
487 } else {
488 UErrorCode icuStatus = U_ZERO_ERROR;
489 defaultLocale = icuDefaultLocale.toLanguageTag<std::string>(icuStatus);
490 if (U_FAILURE(icuStatus)) {
491 HILOG_ERROR_I18N("LocaleHelper::DefaultLocale: toLanguageTag failed.");
492 }
493 }
494 return defaultLocale;
495 }
496
LookupMatcher(const std::set<std::string> & availableLocales,const std::vector<std::string> & requestedLocales)497 std::string LocaleHelper::LookupMatcher(const std::set<std::string>& availableLocales,
498 const std::vector<std::string>& requestedLocales)
499 {
500 for (const std::string& locale : requestedLocales) {
501 std::string availableLocale = BestAvailableLocale(availableLocales, locale);
502 if (!availableLocale.empty()) {
503 return availableLocale;
504 }
505 }
506 return "";
507 }
508 } // namespace I18n
509 } // namespace Global
510 } // namespace OHOS
511