• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2024-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "plugins/ets/stdlib/native/core/IntlDisplayNames.h"
17 #include "plugins/ets/stdlib/native/core/IntlCommon.h"
18 #include "plugins/ets/stdlib/native/core/IntlState.h"
19 #include "plugins/ets/stdlib/native/core/stdlib_ani_helpers.h"
20 #include "libpandabase/macros.h"
21 #include "unicode/numberformatter.h"
22 #include "unicode/numberrangeformatter.h"
23 #include "unicode/locid.h"
24 #include "unicode/unistr.h"
25 #include "unicode/uloc.h"
26 #include "unicode/ucurr.h"
27 #include "unicode/uldnames.h"
28 #include "unicode/udisplaycontext.h"
29 #include "IntlCommon.h"
30 
31 #include <string>
32 #include <string_view>
33 #include <cstdlib>
34 #include <array>
35 #include <optional>
36 #include <algorithm>
37 #include <memory>
38 #include <cctype>
39 
40 namespace ark::ets::stdlib::intl {
41 
42 /**
43  * @enum CodeType
44  * @brief Enumeration of supported display name types
45  *
46  * This enum defines the different types of codes that can be used for localized
47  * display names, following international standards.
48  */
49 enum class CodeType {
50     /** Language code (ISO 639) */
51     LANGUAGE,
52     /** Script code (ISO 15924) */
53     SCRIPT,
54     /** Region/Country code (ISO 3166) */
55     REGION,
56     /** Currency code (ISO 4217) */
57     CURRENCY
58 };
59 
60 constexpr int32_t ISO_SCRIPT_CODE_LENGTH = 4;
61 constexpr int32_t ISO_REGION_ALPHA_LENGTH = 2;
62 constexpr int32_t ISO_REGION_NUMERIC_LENGTH = 3;
63 constexpr int32_t ISO_CURRENCY_CODE_LENGTH = 3;
64 constexpr int32_t BUFFER_CAPACITY_SMALL = 100;
65 constexpr int32_t BUFFER_CAPACITY_LARGE = 200;
66 
67 constexpr size_t FIRST_CHAR_INDEX = 0;
68 constexpr size_t SECOND_CHAR_INDEX = 1;
69 constexpr size_t THIRD_CHAR_INDEX = 2;
70 constexpr size_t FOURTH_CHAR_INDEX = 3;
71 
72 /**
73  * @brief Validates a script code according to ISO 15924 standard
74  *
75  * Script codes should be 4 letters with the first letter uppercase and
76  * the remaining letters lowercase (e.g., "Latn", "Cyrl", "Arab").
77  *
78  * @param code The script code to validate
79  * @return true if the code is a valid ISO 15924 script code, false otherwise
80  */
IsValidScriptCode(std::string_view code)81 [[nodiscard]] constexpr bool IsValidScriptCode(std::string_view code) noexcept
82 {
83     // Script codes should be 4 letters according to ISO-15924
84     if (code.length() != ISO_SCRIPT_CODE_LENGTH) {
85         return false;
86     }
87 
88     // First letter should be uppercase, the rest lowercase
89     return (std::isupper(code[FIRST_CHAR_INDEX]) != 0) && (std::islower(code[SECOND_CHAR_INDEX]) != 0) &&
90            (std::islower(code[THIRD_CHAR_INDEX]) != 0) && (std::islower(code[FOURTH_CHAR_INDEX]) != 0);
91 }
92 
93 /**
94  * @brief Validates a region code according to ISO 3166 or UN M49 standards
95  * Valid region codes are either:
96  * - Two uppercase letters (ISO 3166-1 alpha-2, e.g., "US", "JP", "DE")
97  * - Three digits (UN M49 numeric code, e.g., "840", "392", "276")
98  *
99  * @param code The region code to validate
100  * @return true if the code is a valid region code, false otherwise
101  */
IsValidRegionCode(std::string_view code)102 [[nodiscard]] constexpr bool IsValidRegionCode(std::string_view code) noexcept
103 {
104     // Region codes are either 2 uppercase letters (ISO-3166) or 3 digits (UN M49)
105     if (code.length() == ISO_REGION_ALPHA_LENGTH) {
106         return (std::isupper(code[FIRST_CHAR_INDEX]) != 0) && (std::isupper(code[SECOND_CHAR_INDEX]) != 0);
107     }
108 
109     if (code.length() == ISO_REGION_NUMERIC_LENGTH) {
110         return (std::isdigit(code[FIRST_CHAR_INDEX]) != 0) && (std::isdigit(code[SECOND_CHAR_INDEX]) != 0) &&
111                (std::isdigit(code[THIRD_CHAR_INDEX]) != 0);
112     }
113 
114     return false;
115 }
116 
117 /**
118  * @brief Validates a currency code according to ISO 4217 standard
119  *
120  * Currency codes should be 3 uppercase letters (e.g., "USD", "EUR", "JPY").
121  *
122  * @param code The currency code to validate
123  * @return true if the code is a valid ISO 4217 currency code, false otherwise
124  */
IsValidCurrencyCode(std::string_view code)125 [[nodiscard]] constexpr bool IsValidCurrencyCode(std::string_view code) noexcept
126 {
127     // Currency codes should be 3 uppercase letters according to ISO-4217
128     if (code.length() != ISO_CURRENCY_CODE_LENGTH) {
129         return false;
130     }
131 
132     return (std::isupper(code[FIRST_CHAR_INDEX]) != 0) && (std::isupper(code[SECOND_CHAR_INDEX]) != 0) &&
133            (std::isupper(code[THIRD_CHAR_INDEX]) != 0);
134 }
135 
136 /**
137  * @brief Validates a language code according to BCP 47 standard
138  *
139  * Performs basic validation for language codes according to BCP 47.
140  * Language codes should be 2-3 letters (ISO 639) or 5-8 characters for language-extlang.
141  *
142  * @param code The language code to validate
143  * @return true if the code appears to be a valid BCP 47 language code, false otherwise
144  */
IsValidLanguageCode(std::string_view code)145 [[nodiscard]] bool IsValidLanguageCode(std::string_view code)
146 {
147     // Basic validation for language codes according to BCP 47
148     // Language codes should be 2-3 letters (ISO 639) or 5-8 characters for language-extlang
149     if (code.empty()) {
150         return false;
151     }
152 
153     // Check if the code uses valid characters
154     if (!std::all_of(code.begin(), code.end(), [](char c) { return (std::isalpha(c) != 0) || c == '-' || c == '_'; })) {
155         return false;
156     }
157 
158     // Additional validation: try canonicalizing with ICU and check for errors
159     UErrorCode status = U_ZERO_ERROR;
160     std::array<char, BUFFER_CAPACITY_SMALL> canonical {};
161 
162     uloc_canonicalize(std::string(code).c_str(), canonical.data(), BUFFER_CAPACITY_SMALL, &status);
163 
164     return (U_SUCCESS(status) != 0);
165 }
166 
167 /**
168  * @brief Canonicalizes a language code according to BCP 47 standard
169  *
170  * Attempts to convert a language code to its canonical form using ICU.
171  * If canonicalization fails, the original code is returned.
172  *
173  * @param code The language code to canonicalize
174  * @return The canonicalized language code, or the original if canonicalization fails
175  */
CanonicalizeLanguageCode(std::string_view code)176 [[nodiscard]] std::string CanonicalizeLanguageCode(std::string_view code)
177 {
178     UErrorCode status = U_ZERO_ERROR;
179     std::array<char, BUFFER_CAPACITY_SMALL> canonical {};
180 
181     uloc_canonicalize(std::string(code).c_str(), canonical.data(), BUFFER_CAPACITY_SMALL, &status);
182 
183     if ((U_FAILURE(status) != 0)) {
184         return std::string(code);  // Return original if canonicalization fails
185     }
186 
187     return std::string(canonical.data());
188 }
189 
190 /**
191  * @brief Converts a string type identifier to the corresponding CodeType enum
192  *
193  * @param typeStr The type string to convert ("language", "script", "region", or "currency")
194  * @param isValid Reference to a boolean that will be set to false if the type is invalid
195  * @return The corresponding CodeType enum value
196  */
StringToCodeType(std::string_view typeStr,bool & isValid)197 [[nodiscard]] CodeType StringToCodeType(std::string_view typeStr, bool &isValid) noexcept
198 {
199     isValid = true;
200 
201     if (typeStr == "language") {
202         return CodeType::LANGUAGE;
203     }
204     if (typeStr == "script") {
205         return CodeType::SCRIPT;
206     }
207     if (typeStr == "region") {
208         return CodeType::REGION;
209     }
210     if (typeStr == "currency") {
211         return CodeType::CURRENCY;
212     }
213 
214     isValid = false;
215     return CodeType::LANGUAGE;  // Default value, not used when isValid is false
216 }
217 
218 /**
219  * @brief Generic template function to retrieve a display name using ICU
220  *
221  * This function handles the common pattern of calling an ICU function to get a
222  * display name, dealing with buffer allocation, error checking, and result conversion.
223  *
224  * @tparam Func Type of the function to call (usually a function pointer or lambda)
225  * @param displayNames The ICU ULocaleDisplayNames object to use
226  * @param code The code to get the display name for
227  * @param getNameFunc The function to call to get the display name
228  * @return An optional containing the Unicode string result, or std::nullopt if the operation failed
229  */
230 template <typename Func>
GetDisplayName(ULocaleDisplayNames * displayNames,const char * code,Func getNameFunc)231 std::optional<icu::UnicodeString> GetDisplayName(ULocaleDisplayNames *displayNames, const char *code, Func getNameFunc)
232 {
233     UErrorCode status = U_ZERO_ERROR;
234     std::array<UChar, BUFFER_CAPACITY_LARGE> buffer {};
235 
236     int32_t length = getNameFunc(displayNames, code, buffer.data(), BUFFER_CAPACITY_LARGE, &status);
237     if ((U_SUCCESS(status) != 0) && length > 0) {
238         return icu::UnicodeString(buffer.data(), length);
239     }
240     return std::nullopt;
241 }
242 
243 /**
244  * @brief Validates code and returns canonicalized form if valid
245  *
246  * @param env The environment
247  * @param codeType The type of code to validate
248  * @param codeStr The code string to validate
249  * @param fallbackStr The fallback behavior if validation fails
250  * @param isValid Reference to a boolean that will be set to the validation result
251  * @return The canonicalized code if valid, or the original code otherwise
252  */
ValidateAndCanonicalizeCode(ani_env * env,CodeType codeType,const std::string & codeStr,const std::string & fallbackStr,bool & isValid)253 static std::string ValidateAndCanonicalizeCode(ani_env *env, CodeType codeType, const std::string &codeStr,
254                                                const std::string &fallbackStr, bool &isValid)
255 {
256     std::string canonicalizedCode = codeStr;
257 
258     switch (codeType) {
259         case CodeType::LANGUAGE:
260             isValid = IsValidLanguageCode(codeStr);
261             if (isValid) {
262                 canonicalizedCode = CanonicalizeLanguageCode(codeStr);
263             }
264             break;
265         case CodeType::SCRIPT:
266             isValid = IsValidScriptCode(codeStr);
267             break;
268         case CodeType::REGION:
269             isValid = IsValidRegionCode(codeStr);
270             break;
271         case CodeType::CURRENCY:
272             isValid = IsValidCurrencyCode(codeStr);
273             break;
274     }
275 
276     if (!isValid) {
277         if (fallbackStr == "code") {
278             return codeStr;
279         }
280         ThrowNewError(env, "Lstd/core/RangeError;", ("Invalid code: " + codeStr).c_str(), "Lstd/core/String;:V");
281     }
282 
283     return canonicalizedCode;
284 }
285 
286 /**
287  * @brief Gets the display name based on the code type
288  *
289  * @param displayNames The ICU ULocaleDisplayNames object to use
290  * @param codeType The type of code
291  * @param canonicalizedCode The canonicalized code to get the display name for
292  * @return An optional containing the Unicode string result
293  */
GetDisplayNameByType(ULocaleDisplayNames * displayNames,CodeType codeType,const std::string & canonicalizedCode)294 static std::optional<icu::UnicodeString> GetDisplayNameByType(ULocaleDisplayNames *displayNames, CodeType codeType,
295                                                               const std::string &canonicalizedCode)
296 {
297     std::optional<icu::UnicodeString> result;
298 
299     switch (codeType) {
300         case CodeType::LANGUAGE:
301             result = GetDisplayName(displayNames, canonicalizedCode.c_str(), uldn_localeDisplayName);
302             break;
303         case CodeType::SCRIPT:
304             result = GetDisplayName(displayNames, canonicalizedCode.c_str(), uldn_scriptDisplayName);
305             break;
306         case CodeType::REGION:
307             result = GetDisplayName(displayNames, canonicalizedCode.c_str(), uldn_regionDisplayName);
308             break;
309         case CodeType::CURRENCY: {
310             result = GetDisplayName(
311                 displayNames, canonicalizedCode.c_str(),
312                 [](ULocaleDisplayNames *ldn, const char *code, UChar *dest, int32_t destCapacity, UErrorCode *uStatus) {
313                     return uldn_keyValueDisplayName(ldn, "currency", code, dest, destCapacity, uStatus);
314                 });
315             if (!result) {
316                 // Fallback: just use the currency code
317                 result = icu::UnicodeString(canonicalizedCode.c_str(), static_cast<int32_t>(canonicalizedCode.length()),
318                                             "UTF-8");
319             }
320             break;
321         }
322     }
323 
324     return result;
325 }
326 
327 /// @brief Safely closes a ULocaleDisplayNames object
SafeCloseDisplayNames(ULocaleDisplayNames * displayNames)328 static inline void SafeCloseDisplayNames(ULocaleDisplayNames *displayNames)
329 {
330     if (displayNames != nullptr) {
331         uldn_close(displayNames);
332     }
333 }
334 
335 /**
336  * @brief Native implementation of DisplayNames.of() method
337  *
338  * Retrieves a localized display name for a given code based on the specified type and locale.
339  *
340  * @param env The environment
341  * @param klass The class from which this method was called (unused)
342  * @param locale The BCP 47 language tag for the desired locale
343  * @param type The type of code ("language", "script", "region", or "currency")
344  * @param code The code to get the display name for
345  * @param style The style of the display name ("long" or "short")
346  * @param fallback The fallback behavior if the display name cannot be found ("code" or "none")
347  * @param languageDisplay How to display language names with dialects ("dialect" or "standard")
348  * @return The localized display name as an ani_string, or nullptr if not found and fallback is "none"
349  */
StdCoreIntlDisplayNamesOf(ani_env * env,ani_class klass,ani_string locale,ani_string type,ani_string code,ani_string style,ani_string fallback,ani_string languageDisplay)350 static ani_string StdCoreIntlDisplayNamesOf(ani_env *env, [[maybe_unused]] ani_class klass, ani_string locale,
351                                             ani_string type, ani_string code, ani_string style, ani_string fallback,
352                                             ani_string languageDisplay)
353 {
354     auto localeStr = ConvertFromAniString(env, locale);
355     auto typeStr = ConvertFromAniString(env, type);
356     auto codeStr = ConvertFromAniString(env, code);
357     auto styleStr = ConvertFromAniString(env, style);
358     auto fallbackStr = ConvertFromAniString(env, fallback);
359     auto languageDisplayStr = (languageDisplay != nullptr) ? ConvertFromAniString(env, languageDisplay) : "dialect";
360 
361     bool isValidCodeType = true;
362     auto codeType = StringToCodeType(typeStr, isValidCodeType);
363 
364     if (!isValidCodeType) {
365         ThrowNewError(env, "Lstd/core/RangeError;", ("Invalid type: " + typeStr).c_str(), "Lstd/core/String;:V");
366         return nullptr;
367     }
368 
369     bool isValidCode = true;
370     std::string canonicalizedCode = ValidateAndCanonicalizeCode(env, codeType, codeStr, fallbackStr, isValidCode);
371     if (!isValidCode) {
372         return (fallbackStr == "code") ? StdStrToAni(env, codeStr) : nullptr;
373     }
374 
375     // Set up ICU locale
376     UErrorCode status = U_ZERO_ERROR;
377     auto icuLocale = icu::Locale::forLanguageTag(localeStr, status);
378     if ((U_FAILURE(status) != 0)) {
379         ThrowNewError(env, "Lstd/core/RangeError;", ("Invalid locale tag: " + localeStr).c_str(),
380                       "Lstd/core/String;:V");
381         return nullptr;
382     }
383 
384     // Determine ICU dialect handling and open display names
385     auto dialectHandling = (styleStr == "short") ? ULDN_DIALECT_NAMES : ULDN_STANDARD_NAMES;
386     ULocaleDisplayNames *displayNames = uldn_open(localeStr.c_str(), dialectHandling, &status);
387 
388     // Handle failure to create display names
389     if ((U_FAILURE(status) != 0) || displayNames == nullptr) {
390         SafeCloseDisplayNames(displayNames);
391         return (fallbackStr == "code") ? StdStrToAni(env, codeStr) : nullptr;
392     }
393 
394     std::optional<icu::UnicodeString> result = GetDisplayNameByType(displayNames, codeType, canonicalizedCode);
395 
396     ani_string retVal = nullptr;
397     bool hasValidResult = result.has_value() && (result->isEmpty() == 0);
398     if (hasValidResult) {
399         retVal = UnicodeToAniStr(env, *result);
400     } else if (fallbackStr == "code") {
401         retVal = StdStrToAni(env, codeStr);
402     } else {
403         ThrowNewError(env, "Lstd/core/RangeError;", ("Invalid code: " + codeStr).c_str(), "Lstd/core/String;:V");
404     }
405 
406     SafeCloseDisplayNames(displayNames);
407     return retVal;
408 }
409 
410 /**
411  * @brief Registers the native methods for the Intl.DisplayNames class
412  *
413  * This function binds the native C++ implementation of the DisplayNames methods
414  *
415  * @param env The environment
416  * @return ani_status indicating success or failure of the registration
417  */
RegisterIntlDisplayNames(ani_env * env)418 ani_status RegisterIntlDisplayNames(ani_env *env)
419 {
420     const auto methods = std::array {
421         ani_native_function {"ofNative",
422                              "Lstd/core/String;Lstd/core/String;Lstd/core/String;Lstd/core/String;Lstd/core/"
423                              "String;Lstd/core/String;:Lstd/core/String;",
424                              reinterpret_cast<void *>(StdCoreIntlDisplayNamesOf)},
425     };
426 
427     ani_class displayNamesClass;
428     ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/Intl/DisplayNames;", &displayNamesClass));
429 
430     return env->Class_BindNativeMethods(displayNamesClass, methods.data(), methods.size());
431 }
432 
433 }  // namespace ark::ets::stdlib::intl
434