• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2024-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "plugins/ets/stdlib/native/core/IntlLocaleMatch.h"
17 #include "plugins/ets/stdlib/native/core/IntlCommon.h"
18 #include "plugins/ets/stdlib/native/core/IntlLanguageTag.h"
19 #include "plugins/ets/stdlib/native/core/IntlLocale.h"
20 #include "libpandabase/macros.h"
21 #include "unicode/locid.h"
22 #include "unicode/localebuilder.h"
23 #include "unicode/localematcher.h"
24 #include "stdlib_ani_helpers.h"
25 
26 #include <algorithm>
27 #include <cassert>
28 #include <cstddef>
29 #include <cstring>
30 #include <memory>
31 #include <string>
32 #include <array>
33 #include <set>
34 #include <sstream>
35 
36 namespace ark::ets::stdlib {
37 
38 template <typename... Args>
ThrowRangeError(ani_env * env,Args &&...args)39 static void ThrowRangeError(ani_env *env, Args &&...args)
40 {
41     std::stringstream message;
42     (message << ... << args);
43     ThrowNewError(env, "Lstd/core/RangeError;", message.str().c_str(), "Lstd/core/String;:V");
44 }
45 
GetAvailableLocales()46 std::vector<std::string> GetAvailableLocales()
47 {
48     int32_t availableCount;
49     std::vector<std::string> availableLocales;
50     const icu::Locale *locales = icu::Locale::getAvailableLocales(availableCount);
51     availableLocales.reserve(availableCount);
52     for (int32_t i = 0; i < availableCount; ++i) {
53         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
54         availableLocales.emplace_back(intl::ToStdStringLanguageTag(locales[i]));
55     }
56     return availableLocales;
57 }
58 
BestAvailableLocale(const std::vector<std::string> & availableLocales,const std::string & locale)59 std::string BestAvailableLocale(const std::vector<std::string> &availableLocales, const std::string &locale)
60 {
61     // 1. Let candidate be locale.
62     std::string localeCandidate = locale;
63     std::string undefined = std::string();
64     // 2. Repeat,
65     uint32_t length = availableLocales.size();
66     while (!localeCandidate.empty()) {
67         // a. If availableLocales contains an element equal to candidate, return candidate.
68         for (uint32_t i = 0; i < length; ++i) {
69             const std::string &itemStr = availableLocales[i];
70             if (itemStr == localeCandidate) {
71                 return localeCandidate;
72             }
73         }
74         // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate.
75         //    If that character does not occur, return undefined.
76         size_t pos = localeCandidate.rfind('-');
77         if (pos == std::string::npos) {
78             return undefined;
79         }
80         // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
81         if (pos >= intl::INTL_INDEX_TWO && localeCandidate[pos - intl::INTL_INDEX_TWO] == '-') {
82             pos -= intl::INTL_INDEX_TWO;
83         }
84         // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
85         localeCandidate.resize(pos);
86     }
87     return undefined;
88 }
89 
GetDefaultLocaleTag()90 static std::string GetDefaultLocaleTag()
91 {
92     icu::Locale defaultLocale;
93 
94     const char *defaultLocaleName = defaultLocale.getName();
95     if (strcmp(defaultLocaleName, "en_US_POSIX") == 0 || strcmp(defaultLocaleName, "c") == 0) {
96         return "en-US";
97     }
98 
99     if (defaultLocale.isBogus() == TRUE) {
100         return "und";
101     }
102 
103     UErrorCode error = U_ZERO_ERROR;
104     auto defaultLocaleTag = defaultLocale.toLanguageTag<std::string>(error);
105     ANI_FATAL_IF(U_FAILURE(error));
106 
107     return defaultLocaleTag;
108 }
109 
ToStringList(ani_env * env,ani_array_ref aniList)110 static std::vector<std::string> ToStringList(ani_env *env, ani_array_ref aniList)
111 {
112     ani_size len;
113     ANI_FATAL_IF_ERROR(env->Array_GetLength(aniList, &len));
114 
115     std::vector<std::string> result;
116     result.reserve(len);
117 
118     for (ani_size i = 0; i < len; i++) {
119         ani_ref aniRef;
120         ANI_FATAL_IF_ERROR(env->Array_Get_Ref(aniList, i, &aniRef));
121 
122         auto item = ConvertFromAniString(env, reinterpret_cast<ani_string>(aniRef));
123         result.push_back(item);
124     }
125     return result;
126 }
127 
ToAniStrArray(ani_env * env,std::vector<std::string> strings)128 static ani_array_ref ToAniStrArray(ani_env *env, std::vector<std::string> strings)
129 {
130     ani_class stringClass;
131     ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/String;", &stringClass));
132 
133     ani_array_ref array;
134     if (strings.empty()) {
135         ANI_FATAL_IF_ERROR(env->Array_New_Ref(stringClass, 0, nullptr, &array));
136         return array;
137     }
138     auto first = intl::StdStrToAni(env, strings[0]);
139     ANI_FATAL_IF_ERROR(env->Array_New_Ref(stringClass, strings.size(), first, &array));
140     for (size_t i = 1; i < strings.size(); ++i) {
141         auto item = intl::StdStrToAni(env, strings[i]);
142         ANI_FATAL_IF_ERROR(env->Array_Set_Ref(array, i, item));
143     }
144     return array;
145 }
146 
CanonicalizeLocaleList(ani_env * env,std::vector<std::string> & seen,std::vector<std::string> & requestedLocales)147 ani_status CanonicalizeLocaleList(ani_env *env, std::vector<std::string> &seen,
148                                   std::vector<std::string> &requestedLocales)
149 {
150     if (seen.empty()) {
151         return ANI_PENDING_ERROR;
152     }
153     auto len = seen.size();
154     for (size_t i = 0; i < len; i++) {
155         std::string localeCStr = seen[i];
156         if (!intl::IsStructurallyValidLanguageTag(localeCStr)) {
157             ThrowRangeError(env, "invalid locale");
158             return ANI_PENDING_ERROR;
159         }
160         if (localeCStr.length() == 0) {
161             ThrowRangeError(env, "invalid locale");
162             return ANI_PENDING_ERROR;
163         }
164 
165         std::transform(localeCStr.begin(), localeCStr.end(), localeCStr.begin(), intl::AsciiAlphaToLower);
166         UErrorCode success = U_ZERO_ERROR;
167         icu::Locale formalLocale = icu::Locale::forLanguageTag(seen[i], success);
168         if ((U_FAILURE(success) != 0) || (formalLocale.isBogus() != 0)) {
169             ThrowRangeError(env, "invalid locale");
170             return ANI_PENDING_ERROR;
171         }
172         // Resets the LocaleBuilder to match the locale.
173         // Returns an instance of Locale created from the fields set on this builder.
174         formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(success);
175         // Canonicalize the locale ID of this object according to CLDR.
176         formalLocale.canonicalize(success);
177         if ((U_FAILURE(success) != 0) || (formalLocale.isBogus() != 0)) {
178             ThrowRangeError(env, "invalid locale");
179             return ANI_PENDING_ERROR;
180         }
181         std::string languageTag = intl::ToStdStringLanguageTag(formalLocale);
182         if (std::find(requestedLocales.begin(), requestedLocales.end(), languageTag) == requestedLocales.end()) {
183             requestedLocales.push_back(languageTag);
184         }
185     }
186     return ANI_OK;
187 }
188 
BuildLocaleMatcher(UErrorCode & success)189 static icu::LocaleMatcher BuildLocaleMatcher(UErrorCode &success)
190 {
191     UErrorCode error = U_ZERO_ERROR;
192 
193     const icu::Locale defaultLocale = icu::Locale::forLanguageTag(GetDefaultLocaleTag(), error);
194     ANI_FATAL_IF(U_FAILURE(error));
195 
196     icu::LocaleMatcher::Builder builder;
197     builder.setDefaultLocale(&defaultLocale);
198 
199     int32_t count;
200     const icu::Locale *availableLocales = icu::Locale::getAvailableLocales(count);
201     for (int32_t i = 0; i < count; i++) {
202         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
203         builder.addSupportedLocale(availableLocales[i]);
204     }
205 
206     return builder.build(success);
207 }
208 
GetLocale(ani_env * env,std::string & locTag)209 icu::Locale GetLocale(ani_env *env, std::string &locTag)
210 {
211     UErrorCode status = U_ZERO_ERROR;
212     icu::Locale locale = icu::Locale::forLanguageTag(icu::StringPiece(locTag.c_str()), status);
213     if (UNLIKELY(U_FAILURE(status))) {
214         const auto errorMessage = std::string("Language tag '").append(locTag).append("' is invalid or not supported");
215         ThrowNewError(env, "Lstd/core/RuntimeException;", errorMessage.c_str(), "Lstd/core/String;:V");
216         return nullptr;
217     }
218     return locale;
219 }
220 
StdCoreIntlBestFitLocale(ani_env * env,ani_class klass,ani_array_ref locales)221 ani_string StdCoreIntlBestFitLocale(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
222 {
223     auto tags = ToStringList(env, locales);
224     for (const auto &tag : tags) {
225         if (!intl::IsStructurallyValidLanguageTag(tag)) {
226             ThrowRangeError(env, "Incorrect locale information provided");
227         }
228     }
229     auto success = UErrorCode::U_ZERO_ERROR;
230     auto matcher = BuildLocaleMatcher(success);
231     if (UNLIKELY(U_FAILURE(success))) {
232         ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to build locale matcher", "Lstd/core/String;:V");
233         return nullptr;
234     }
235     auto it = intl::LanguageTagListIterator(tags);
236     auto bestfit = matcher.getBestMatchResult(it, success);
237     if (UNLIKELY(U_FAILURE(success))) {
238         ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to get best match result", "Lstd/core/String;:V");
239         return nullptr;
240     }
241     auto locale = bestfit.makeResolvedLocale(success);
242     if (UNLIKELY(U_FAILURE(success))) {
243         ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to make resolved locale", "Lstd/core/String;:V");
244         return nullptr;
245     }
246     auto tag = locale.toLanguageTag<std::string>(success);
247     if (UNLIKELY(U_FAILURE(success))) {
248         ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to convert locale into language tag",
249                       "Lstd/core/String;:V");
250         return nullptr;
251     }
252     if (tag == "en_US_POSIX" || tag == "c") {
253         tag = "en-US";
254     }
255     return intl::StdStrToAni(env, tag);
256 }
257 
LookupLocales(std::vector<std::string> & availableLocales,std::vector<std::string> & requestedLocales)258 std::vector<std::string> LookupLocales(std::vector<std::string> &availableLocales,
259                                        std::vector<std::string> &requestedLocales)
260 {
261     auto length = requestedLocales.size();
262     std::vector<std::string> convertedLocales;
263     convertedLocales.reserve(length);
264     std::vector<uint32_t> indexAvailableLocales;
265     indexAvailableLocales.reserve(length);
266     for (uint32_t i = 0; i < length; ++i) {
267         convertedLocales.push_back(requestedLocales[i]);
268     }
269     // 1. For each element locale of requestedLocales in List order, do
270     //    a. Let noExtensionsLocale be the String value that is locale with all Unicode locale extension sequences
271     //       removed.
272     //    b. Let availableLocale be BestAvailableLocale(availableLocales, noExtensionsLocale).
273     //    c. If availableLocale is not undefined, append locale to the end of result.
274 
275     for (uint32_t i = 0; i < length; ++i) {
276         intl::ParsedLocale foundationResult = intl::HandleLocale(convertedLocales[i]);
277         std::string availableLocale = BestAvailableLocale(availableLocales, foundationResult.base);
278         if (!availableLocale.empty()) {
279             indexAvailableLocales.push_back(i);
280         }
281     }
282     // 2. Let result be a new empty List.
283     std::vector<std::string> result;
284     result.reserve(length);
285     for (unsigned int indexAvailableLocale : indexAvailableLocales) {
286         result.push_back(requestedLocales[indexAvailableLocale]);
287     }
288     // 3. Return result.
289     return result;
290 }
291 
StdCoreIntlBestFitLocales(ani_env * env,ani_class klass,ani_array_ref locales)292 ani_array_ref StdCoreIntlBestFitLocales(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
293 {
294     auto tags = ToStringList(env, locales);
295 
296     auto success = UErrorCode::U_ZERO_ERROR;
297     auto matcher = BuildLocaleMatcher(success);
298     if (UNLIKELY(U_FAILURE(success))) {
299         ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to build locale matcher", "Lstd/core/String;:V");
300         return nullptr;
301     }
302 
303     auto result = std::vector<std::string>();
304     for (const auto &tag : tags) {
305         if (!intl::IsStructurallyValidLanguageTag(tag)) {
306             ThrowRangeError(env, "Incorrect locale information provided");
307             return nullptr;
308         }
309         success = UErrorCode::U_ZERO_ERROR;
310         auto desired = icu::Locale::forLanguageTag(tag, success);
311         auto matched = matcher.getBestMatchResult(desired, success);
312         if (UNLIKELY(U_FAILURE(success))) {
313             continue;
314         }
315         if (matched.getSupportedIndex() < 0) {
316             continue;
317         }
318         auto bestfit = desired.toLanguageTag<std::string>(success);
319         if (UNLIKELY(U_FAILURE(success))) {
320             continue;
321         }
322         result.push_back(bestfit);
323     }
324     return ToAniStrArray(env, result);
325 }
326 
LookupLocale(const std::string & locTag,const icu::Locale * availableLocales,const int32_t count)327 static std::string LookupLocale(const std::string &locTag, const icu::Locale *availableLocales, const int32_t count)
328 {
329     UErrorCode success = U_ZERO_ERROR;
330     auto locP = icu::StringPiece(locTag.c_str());
331     icu::Locale requestedLoc = icu::Locale::forLanguageTag(locP, success);
332     if (UNLIKELY(U_FAILURE(success))) {
333         return std::string();
334     }
335     for (int32_t i = 0; i < count; i++) {
336         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
337         if (LIKELY(availableLocales[i] == requestedLoc)) {
338             return requestedLoc.toLanguageTag<std::string>(success);
339         }
340     }
341     return std::string();
342 }
343 
StdCoreIntlLookupLocale(ani_env * env,ani_class klass,ani_array_ref locales)344 ani_string StdCoreIntlLookupLocale(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
345 {
346     UErrorCode success = U_ZERO_ERROR;
347     ani_ref locale;
348     int32_t availableCount;
349     std::string bestLoc;
350     const icu::Locale *availableLocales = icu::Locale::getAvailableLocales(availableCount);
351     if (UNLIKELY(U_FAILURE(success))) {
352         return CreateUtf8String(env, "", 0);
353     }
354 
355     ani_size len;
356     ANI_FATAL_IF_ERROR(env->Array_GetLength(locales, &len));
357 
358     for (ani_size i = 0; i < len; i++) {
359         ANI_FATAL_IF_ERROR(env->Array_Get_Ref(locales, i, &locale));
360 
361         auto locTag = ConvertFromAniString(env, reinterpret_cast<ani_string>(locale));
362         if (!intl::IsStructurallyValidLanguageTag(locTag)) {
363             ThrowRangeError(env, "Incorrect locale information provided");
364         }
365 
366         bestLoc = LookupLocale(locTag, availableLocales, availableCount);
367         if (!bestLoc.empty()) {
368             return intl::StdStrToAni(env, bestLoc);
369         }
370     }
371     return intl::StdStrToAni(env, GetDefaultLocaleTag());
372 }
373 
StdCoreIntlLookupLocales(ani_env * env,ani_class klass,ani_array_ref locales)374 ani_array_ref StdCoreIntlLookupLocales(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
375 {
376     auto tags = ToStringList(env, locales);
377     auto availableLocales = GetAvailableLocales();
378     std::vector<std::string> requestedLocales;
379     ani_status status = CanonicalizeLocaleList(env, tags, requestedLocales);
380     if (status != ANI_OK) {
381         return nullptr;
382     }
383     auto result = LookupLocales(availableLocales, requestedLocales);
384     return ToAniStrArray(env, result);
385 }
386 
RegisterIntlLocaleMatch(ani_env * env)387 ani_status RegisterIntlLocaleMatch(ani_env *env)
388 {
389     const auto methods = std::array {ani_native_function {"bestFitLocale", "[Lstd/core/String;:Lstd/core/String;",
390                                                           reinterpret_cast<void *>(StdCoreIntlBestFitLocale)},
391                                      ani_native_function {"lookupLocale", "[Lstd/core/String;:Lstd/core/String;",
392                                                           reinterpret_cast<void *>(StdCoreIntlLookupLocale)},
393                                      ani_native_function {"bestFitLocales", "[Lstd/core/String;:[Lstd/core/String;",
394                                                           reinterpret_cast<void *>(StdCoreIntlBestFitLocales)},
395                                      ani_native_function {"lookupLocales", "[Lstd/core/String;:[Lstd/core/String;",
396                                                           reinterpret_cast<void *>(StdCoreIntlLookupLocales)}};
397 
398     ani_class localeMatchClass;
399     ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/LocaleMatch;", &localeMatchClass));
400 
401     return env->Class_BindNativeMethods(localeMatchClass, methods.data(), methods.size());
402 }
403 
404 }  // namespace ark::ets::stdlib
405