• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "IntlCollator.h"
17 #include "IntlCommon.h"
18 #include "ani.h"
19 #include "plugins/ets/runtime/ets_exceptions.h"
20 #include "libpandabase/macros.h"
21 #include "stdlib_ani_helpers.h"
22 #include <unicode/coll.h>
23 #include <unicode/locid.h>
24 #include <unicode/stringpiece.h>
25 #include <unicode/translit.h>
26 
27 #include <algorithm>
28 #include <string>
29 #include <array>
30 #include "IntlLocaleMatch.h"
31 
32 namespace ark::ets::stdlib::intl {
33 
34 // https://stackoverflow.com/questions/2992066/code-to-strip-diacritical-marks-using-icu
RemoveAccents(ani_env * env,const std::string & str)35 std::string RemoveAccents(ani_env *env, const std::string &str)
36 {
37     // UTF-8 std::string -> UTF-16 UnicodeString
38     icu::UnicodeString source = icu::UnicodeString::fromUTF8(icu::StringPiece(str));
39 
40     // Transliterate UTF-16 UnicodeString
41     UErrorCode status = U_ZERO_ERROR;
42     icu::Transliterator *accentsConverter =
43         icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status);
44     accentsConverter->transliterate(source);
45     delete accentsConverter;
46     if (UNLIKELY(U_FAILURE(status))) {
47         ThrowNewError(env, "Lstd/core/RuntimeException;", "Removing accents failed, transliterate failed",
48                       "Lstd/core/String;:V");
49         return std::string();
50     }
51 
52     // UTF-16 UnicodeString -> UTF-8 std::string
53     std::string result;
54     source.toUTF8String(result);
55 
56     return result;
57 }
58 
RemovePunctuation(std::string & text)59 void RemovePunctuation(std::string &text)
60 {
61     text.erase(std::remove_if(text.begin(), text.end(), ispunct), text.end());
62 }
63 
StdCoreIntlCollatorRemoveAccents(ani_env * env,ani_class klass,ani_string etsText)64 ani_string StdCoreIntlCollatorRemoveAccents(ani_env *env, [[maybe_unused]] ani_class klass, ani_string etsText)
65 {
66     std::string text = ConvertFromAniString(env, etsText);
67     text = RemoveAccents(env, text);
68 
69     ani_boolean unhandledExc;
70     ANI_FATAL_IF_ERROR(env->ExistUnhandledError(&unhandledExc));
71     if (unhandledExc != 0U) {
72         return nullptr;
73     }
74 
75     return StdStrToAni(env, text);
76 }
77 
StdCoreIntlCollatorRemovePunctuation(ani_env * env,ani_class klass,ani_string etsText)78 ani_string StdCoreIntlCollatorRemovePunctuation(ani_env *env, [[maybe_unused]] ani_class klass, ani_string etsText)
79 {
80     std::string text = ConvertFromAniString(env, etsText);
81     RemovePunctuation(text);
82     return StdStrToAni(env, text);
83 }
84 
StdCoreIntlCollatorLocaleCmp(ani_env * env,ani_class klass,ani_string collationIn,ani_string langIn,ani_string firstStr,ani_string secondStr)85 ani_double StdCoreIntlCollatorLocaleCmp(ani_env *env, [[maybe_unused]] ani_class klass, ani_string collationIn,
86                                         ani_string langIn, ani_string firstStr, ani_string secondStr)
87 {
88     auto collation = ConvertFromAniString(env, collationIn);
89     auto lang = ConvertFromAniString(env, langIn);
90     auto str1 = ConvertFromAniString(env, firstStr);
91     auto str2 = ConvertFromAniString(env, secondStr);
92 
93     auto locale = GetLocale(env, lang);
94     UErrorCode status = U_ZERO_ERROR;
95     icu::StringPiece collationName = "collation";
96     icu::StringPiece collationValue = collation.c_str();
97     locale.setUnicodeKeywordValue(collationName, collationValue, status);
98     if (UNLIKELY(U_FAILURE(status))) {
99         const auto errorMessage = std::string("Collation '").append(collation).append("' is invalid or not supported");
100         ThrowNewError(env, "Lstd/core/RuntimeException;", errorMessage.c_str(), "Lstd/core/String;:V");
101         return 0;
102     }
103 
104     status = U_ZERO_ERROR;
105     std::unique_ptr<icu::Collator> collator(icu::Collator::createInstance(locale, status));
106     if (UNLIKELY(U_FAILURE(status))) {
107         icu::UnicodeString dispName;
108         locale.getDisplayName(dispName);
109         std::string localeName;
110         dispName.toUTF8String(localeName);
111         const auto errorMessage = std::string("Failed to create the collator for ").append(localeName);
112         ThrowNewError(env, "Lstd/core/RuntimeException;", errorMessage.c_str(), "Lstd/core/String;:V");
113     }
114 
115     auto strPiece1 = icu::StringPiece(str1.c_str());
116     auto strPiece2 = icu::StringPiece(str2.c_str());
117     if ((strPiece1.empty() != 0) && (strPiece2.empty() != 0)) {
118         auto res = collator->compareUTF8(strPiece1, strPiece2, status);
119         if (UNLIKELY(U_FAILURE(status))) {
120             ThrowNewError(env, "Lstd/core/RuntimeException;", "Comparison failed", "Lstd/core/String;:V");
121         }
122         return res;
123     }
124 
125     icu::UnicodeString source = StdStrToUnicode(str1);
126     icu::UnicodeString target = StdStrToUnicode(str2);
127     auto res = collator->compare(source, target, status);
128     if (UNLIKELY(U_FAILURE(status))) {
129         ThrowNewError(env, "Lstd/core/RuntimeException;", "Comparison failed", "Lstd/core/String;:V");
130     }
131     return res;
132 }
133 
RegisterIntlCollator(ani_env * env)134 ani_status RegisterIntlCollator(ani_env *env)
135 {
136     const auto methods =
137         std::array {ani_native_function {"removePunctuation", "Lstd/core/String;:Lstd/core/String;",
138                                          reinterpret_cast<void *>(StdCoreIntlCollatorRemovePunctuation)},
139                     ani_native_function {"removeAccents", "Lstd/core/String;:Lstd/core/String;",
140                                          reinterpret_cast<void *>(StdCoreIntlCollatorRemoveAccents)},
141                     ani_native_function {"compareByCollation",
142                                          "Lstd/core/String;Lstd/core/String;Lstd/core/String;Lstd/core/String;:D",
143                                          reinterpret_cast<void *>(StdCoreIntlCollatorLocaleCmp)}};
144 
145     ani_class collatorClass;
146     ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/Intl/Collator;", &collatorClass));
147     return env->Class_BindNativeMethods(collatorClass, methods.data(), methods.size());
148 }
149 
150 }  // namespace ark::ets::stdlib::intl
151