1 /**
2 * Copyright (c) 2024-2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "plugins/ets/stdlib/native/core/IntlLocaleMatch.h"
17 #include "plugins/ets/stdlib/native/core/IntlCommon.h"
18 #include "plugins/ets/stdlib/native/core/IntlLanguageTag.h"
19 #include "plugins/ets/stdlib/native/core/IntlLocale.h"
20 #include "libpandabase/macros.h"
21 #include "unicode/locid.h"
22 #include "unicode/localebuilder.h"
23 #include "unicode/localematcher.h"
24 #include "stdlib_ani_helpers.h"
25
26 #include <algorithm>
27 #include <cassert>
28 #include <cstddef>
29 #include <cstring>
30 #include <memory>
31 #include <string>
32 #include <array>
33 #include <set>
34 #include <sstream>
35
36 namespace ark::ets::stdlib {
37
38 template <typename... Args>
ThrowRangeError(ani_env * env,Args &&...args)39 static void ThrowRangeError(ani_env *env, Args &&...args)
40 {
41 std::stringstream message;
42 (message << ... << args);
43 ThrowNewError(env, "Lstd/core/RangeError;", message.str().c_str(), "Lstd/core/String;:V");
44 }
45
GetAvailableLocales()46 std::vector<std::string> GetAvailableLocales()
47 {
48 int32_t availableCount;
49 std::vector<std::string> availableLocales;
50 const icu::Locale *locales = icu::Locale::getAvailableLocales(availableCount);
51 availableLocales.reserve(availableCount);
52 for (int32_t i = 0; i < availableCount; ++i) {
53 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
54 availableLocales.emplace_back(intl::ToStdStringLanguageTag(locales[i]));
55 }
56 return availableLocales;
57 }
58
BestAvailableLocale(const std::vector<std::string> & availableLocales,const std::string & locale)59 std::string BestAvailableLocale(const std::vector<std::string> &availableLocales, const std::string &locale)
60 {
61 // 1. Let candidate be locale.
62 std::string localeCandidate = locale;
63 std::string undefined = std::string();
64 // 2. Repeat,
65 uint32_t length = availableLocales.size();
66 while (!localeCandidate.empty()) {
67 // a. If availableLocales contains an element equal to candidate, return candidate.
68 for (uint32_t i = 0; i < length; ++i) {
69 const std::string &itemStr = availableLocales[i];
70 if (itemStr == localeCandidate) {
71 return localeCandidate;
72 }
73 }
74 // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate.
75 // If that character does not occur, return undefined.
76 size_t pos = localeCandidate.rfind('-');
77 if (pos == std::string::npos) {
78 return undefined;
79 }
80 // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
81 if (pos >= intl::INTL_INDEX_TWO && localeCandidate[pos - intl::INTL_INDEX_TWO] == '-') {
82 pos -= intl::INTL_INDEX_TWO;
83 }
84 // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
85 localeCandidate.resize(pos);
86 }
87 return undefined;
88 }
89
GetDefaultLocaleTag()90 static std::string GetDefaultLocaleTag()
91 {
92 icu::Locale defaultLocale;
93
94 const char *defaultLocaleName = defaultLocale.getName();
95 if (strcmp(defaultLocaleName, "en_US_POSIX") == 0 || strcmp(defaultLocaleName, "c") == 0) {
96 return "en-US";
97 }
98
99 if (defaultLocale.isBogus() == TRUE) {
100 return "und";
101 }
102
103 UErrorCode error = U_ZERO_ERROR;
104 auto defaultLocaleTag = defaultLocale.toLanguageTag<std::string>(error);
105 ANI_FATAL_IF(U_FAILURE(error));
106
107 return defaultLocaleTag;
108 }
109
ToStringList(ani_env * env,ani_array_ref aniList)110 static std::vector<std::string> ToStringList(ani_env *env, ani_array_ref aniList)
111 {
112 ani_size len;
113 ANI_FATAL_IF_ERROR(env->Array_GetLength(aniList, &len));
114
115 std::vector<std::string> result;
116 result.reserve(len);
117
118 for (ani_size i = 0; i < len; i++) {
119 ani_ref aniRef;
120 ANI_FATAL_IF_ERROR(env->Array_Get_Ref(aniList, i, &aniRef));
121
122 auto item = ConvertFromAniString(env, reinterpret_cast<ani_string>(aniRef));
123 result.push_back(item);
124 }
125 return result;
126 }
127
ToAniStrArray(ani_env * env,std::vector<std::string> strings)128 static ani_array_ref ToAniStrArray(ani_env *env, std::vector<std::string> strings)
129 {
130 ani_class stringClass;
131 ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/String;", &stringClass));
132
133 ani_array_ref array;
134 if (strings.empty()) {
135 ANI_FATAL_IF_ERROR(env->Array_New_Ref(stringClass, 0, nullptr, &array));
136 return array;
137 }
138 auto first = intl::StdStrToAni(env, strings[0]);
139 ANI_FATAL_IF_ERROR(env->Array_New_Ref(stringClass, strings.size(), first, &array));
140 for (size_t i = 1; i < strings.size(); ++i) {
141 auto item = intl::StdStrToAni(env, strings[i]);
142 ANI_FATAL_IF_ERROR(env->Array_Set_Ref(array, i, item));
143 }
144 return array;
145 }
146
CanonicalizeLocaleList(ani_env * env,std::vector<std::string> & seen,std::vector<std::string> & requestedLocales)147 ani_status CanonicalizeLocaleList(ani_env *env, std::vector<std::string> &seen,
148 std::vector<std::string> &requestedLocales)
149 {
150 if (seen.empty()) {
151 return ANI_PENDING_ERROR;
152 }
153 auto len = seen.size();
154 for (size_t i = 0; i < len; i++) {
155 std::string localeCStr = seen[i];
156 if (!intl::IsStructurallyValidLanguageTag(localeCStr)) {
157 ThrowRangeError(env, "invalid locale");
158 return ANI_PENDING_ERROR;
159 }
160 if (localeCStr.length() == 0) {
161 ThrowRangeError(env, "invalid locale");
162 return ANI_PENDING_ERROR;
163 }
164
165 std::transform(localeCStr.begin(), localeCStr.end(), localeCStr.begin(), intl::AsciiAlphaToLower);
166 UErrorCode success = U_ZERO_ERROR;
167 icu::Locale formalLocale = icu::Locale::forLanguageTag(seen[i], success);
168 if ((U_FAILURE(success) != 0) || (formalLocale.isBogus() != 0)) {
169 ThrowRangeError(env, "invalid locale");
170 return ANI_PENDING_ERROR;
171 }
172 // Resets the LocaleBuilder to match the locale.
173 // Returns an instance of Locale created from the fields set on this builder.
174 formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(success);
175 // Canonicalize the locale ID of this object according to CLDR.
176 formalLocale.canonicalize(success);
177 if ((U_FAILURE(success) != 0) || (formalLocale.isBogus() != 0)) {
178 ThrowRangeError(env, "invalid locale");
179 return ANI_PENDING_ERROR;
180 }
181 std::string languageTag = intl::ToStdStringLanguageTag(formalLocale);
182 if (std::find(requestedLocales.begin(), requestedLocales.end(), languageTag) == requestedLocales.end()) {
183 requestedLocales.push_back(languageTag);
184 }
185 }
186 return ANI_OK;
187 }
188
BuildLocaleMatcher(UErrorCode & success)189 static icu::LocaleMatcher BuildLocaleMatcher(UErrorCode &success)
190 {
191 UErrorCode error = U_ZERO_ERROR;
192
193 const icu::Locale defaultLocale = icu::Locale::forLanguageTag(GetDefaultLocaleTag(), error);
194 ANI_FATAL_IF(U_FAILURE(error));
195
196 icu::LocaleMatcher::Builder builder;
197 builder.setDefaultLocale(&defaultLocale);
198
199 int32_t count;
200 const icu::Locale *availableLocales = icu::Locale::getAvailableLocales(count);
201 for (int32_t i = 0; i < count; i++) {
202 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
203 builder.addSupportedLocale(availableLocales[i]);
204 }
205
206 return builder.build(success);
207 }
208
GetLocale(ani_env * env,std::string & locTag)209 icu::Locale GetLocale(ani_env *env, std::string &locTag)
210 {
211 UErrorCode status = U_ZERO_ERROR;
212 icu::Locale locale = icu::Locale::forLanguageTag(icu::StringPiece(locTag.c_str()), status);
213 if (UNLIKELY(U_FAILURE(status))) {
214 const auto errorMessage = std::string("Language tag '").append(locTag).append("' is invalid or not supported");
215 ThrowNewError(env, "Lstd/core/RuntimeException;", errorMessage.c_str(), "Lstd/core/String;:V");
216 return nullptr;
217 }
218 return locale;
219 }
220
StdCoreIntlBestFitLocale(ani_env * env,ani_class klass,ani_array_ref locales)221 ani_string StdCoreIntlBestFitLocale(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
222 {
223 auto tags = ToStringList(env, locales);
224 for (const auto &tag : tags) {
225 if (!intl::IsStructurallyValidLanguageTag(tag)) {
226 ThrowRangeError(env, "Incorrect locale information provided");
227 }
228 }
229 auto success = UErrorCode::U_ZERO_ERROR;
230 auto matcher = BuildLocaleMatcher(success);
231 if (UNLIKELY(U_FAILURE(success))) {
232 ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to build locale matcher", "Lstd/core/String;:V");
233 return nullptr;
234 }
235 auto it = intl::LanguageTagListIterator(tags);
236 auto bestfit = matcher.getBestMatchResult(it, success);
237 if (UNLIKELY(U_FAILURE(success))) {
238 ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to get best match result", "Lstd/core/String;:V");
239 return nullptr;
240 }
241 auto locale = bestfit.makeResolvedLocale(success);
242 if (UNLIKELY(U_FAILURE(success))) {
243 ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to make resolved locale", "Lstd/core/String;:V");
244 return nullptr;
245 }
246 auto tag = locale.toLanguageTag<std::string>(success);
247 if (UNLIKELY(U_FAILURE(success))) {
248 ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to convert locale into language tag",
249 "Lstd/core/String;:V");
250 return nullptr;
251 }
252 if (tag == "en_US_POSIX" || tag == "c") {
253 tag = "en-US";
254 }
255 return intl::StdStrToAni(env, tag);
256 }
257
LookupLocales(std::vector<std::string> & availableLocales,std::vector<std::string> & requestedLocales)258 std::vector<std::string> LookupLocales(std::vector<std::string> &availableLocales,
259 std::vector<std::string> &requestedLocales)
260 {
261 auto length = requestedLocales.size();
262 std::vector<std::string> convertedLocales;
263 convertedLocales.reserve(length);
264 std::vector<uint32_t> indexAvailableLocales;
265 indexAvailableLocales.reserve(length);
266 for (uint32_t i = 0; i < length; ++i) {
267 convertedLocales.push_back(requestedLocales[i]);
268 }
269 // 1. For each element locale of requestedLocales in List order, do
270 // a. Let noExtensionsLocale be the String value that is locale with all Unicode locale extension sequences
271 // removed.
272 // b. Let availableLocale be BestAvailableLocale(availableLocales, noExtensionsLocale).
273 // c. If availableLocale is not undefined, append locale to the end of result.
274
275 for (uint32_t i = 0; i < length; ++i) {
276 intl::ParsedLocale foundationResult = intl::HandleLocale(convertedLocales[i]);
277 std::string availableLocale = BestAvailableLocale(availableLocales, foundationResult.base);
278 if (!availableLocale.empty()) {
279 indexAvailableLocales.push_back(i);
280 }
281 }
282 // 2. Let result be a new empty List.
283 std::vector<std::string> result;
284 result.reserve(length);
285 for (unsigned int indexAvailableLocale : indexAvailableLocales) {
286 result.push_back(requestedLocales[indexAvailableLocale]);
287 }
288 // 3. Return result.
289 return result;
290 }
291
StdCoreIntlBestFitLocales(ani_env * env,ani_class klass,ani_array_ref locales)292 ani_array_ref StdCoreIntlBestFitLocales(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
293 {
294 auto tags = ToStringList(env, locales);
295
296 auto success = UErrorCode::U_ZERO_ERROR;
297 auto matcher = BuildLocaleMatcher(success);
298 if (UNLIKELY(U_FAILURE(success))) {
299 ThrowNewError(env, "Lstd/core/RuntimeException;", "Unable to build locale matcher", "Lstd/core/String;:V");
300 return nullptr;
301 }
302
303 auto result = std::vector<std::string>();
304 for (const auto &tag : tags) {
305 if (!intl::IsStructurallyValidLanguageTag(tag)) {
306 ThrowRangeError(env, "Incorrect locale information provided");
307 return nullptr;
308 }
309 success = UErrorCode::U_ZERO_ERROR;
310 auto desired = icu::Locale::forLanguageTag(tag, success);
311 auto matched = matcher.getBestMatchResult(desired, success);
312 if (UNLIKELY(U_FAILURE(success))) {
313 continue;
314 }
315 if (matched.getSupportedIndex() < 0) {
316 continue;
317 }
318 auto bestfit = desired.toLanguageTag<std::string>(success);
319 if (UNLIKELY(U_FAILURE(success))) {
320 continue;
321 }
322 result.push_back(bestfit);
323 }
324 return ToAniStrArray(env, result);
325 }
326
LookupLocale(const std::string & locTag,const icu::Locale * availableLocales,const int32_t count)327 static std::string LookupLocale(const std::string &locTag, const icu::Locale *availableLocales, const int32_t count)
328 {
329 UErrorCode success = U_ZERO_ERROR;
330 auto locP = icu::StringPiece(locTag.c_str());
331 icu::Locale requestedLoc = icu::Locale::forLanguageTag(locP, success);
332 if (UNLIKELY(U_FAILURE(success))) {
333 return std::string();
334 }
335 for (int32_t i = 0; i < count; i++) {
336 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
337 if (LIKELY(availableLocales[i] == requestedLoc)) {
338 return requestedLoc.toLanguageTag<std::string>(success);
339 }
340 }
341 return std::string();
342 }
343
StdCoreIntlLookupLocale(ani_env * env,ani_class klass,ani_array_ref locales)344 ani_string StdCoreIntlLookupLocale(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
345 {
346 UErrorCode success = U_ZERO_ERROR;
347 ani_ref locale;
348 int32_t availableCount;
349 std::string bestLoc;
350 const icu::Locale *availableLocales = icu::Locale::getAvailableLocales(availableCount);
351 if (UNLIKELY(U_FAILURE(success))) {
352 return CreateUtf8String(env, "", 0);
353 }
354
355 ani_size len;
356 ANI_FATAL_IF_ERROR(env->Array_GetLength(locales, &len));
357
358 for (ani_size i = 0; i < len; i++) {
359 ANI_FATAL_IF_ERROR(env->Array_Get_Ref(locales, i, &locale));
360
361 auto locTag = ConvertFromAniString(env, reinterpret_cast<ani_string>(locale));
362 if (!intl::IsStructurallyValidLanguageTag(locTag)) {
363 ThrowRangeError(env, "Incorrect locale information provided");
364 }
365
366 bestLoc = LookupLocale(locTag, availableLocales, availableCount);
367 if (!bestLoc.empty()) {
368 return intl::StdStrToAni(env, bestLoc);
369 }
370 }
371 return intl::StdStrToAni(env, GetDefaultLocaleTag());
372 }
373
StdCoreIntlLookupLocales(ani_env * env,ani_class klass,ani_array_ref locales)374 ani_array_ref StdCoreIntlLookupLocales(ani_env *env, [[maybe_unused]] ani_class klass, ani_array_ref locales)
375 {
376 auto tags = ToStringList(env, locales);
377 auto availableLocales = GetAvailableLocales();
378 std::vector<std::string> requestedLocales;
379 ani_status status = CanonicalizeLocaleList(env, tags, requestedLocales);
380 if (status != ANI_OK) {
381 return nullptr;
382 }
383 auto result = LookupLocales(availableLocales, requestedLocales);
384 return ToAniStrArray(env, result);
385 }
386
RegisterIntlLocaleMatch(ani_env * env)387 ani_status RegisterIntlLocaleMatch(ani_env *env)
388 {
389 const auto methods = std::array {ani_native_function {"bestFitLocale", "[Lstd/core/String;:Lstd/core/String;",
390 reinterpret_cast<void *>(StdCoreIntlBestFitLocale)},
391 ani_native_function {"lookupLocale", "[Lstd/core/String;:Lstd/core/String;",
392 reinterpret_cast<void *>(StdCoreIntlLookupLocale)},
393 ani_native_function {"bestFitLocales", "[Lstd/core/String;:[Lstd/core/String;",
394 reinterpret_cast<void *>(StdCoreIntlBestFitLocales)},
395 ani_native_function {"lookupLocales", "[Lstd/core/String;:[Lstd/core/String;",
396 reinterpret_cast<void *>(StdCoreIntlLookupLocales)}};
397
398 ani_class localeMatchClass;
399 ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/LocaleMatch;", &localeMatchClass));
400
401 return env->Class_BindNativeMethods(localeMatchClass, methods.data(), methods.size());
402 }
403
404 } // namespace ark::ets::stdlib
405