1 /**
2 * Copyright (c) 2025 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "IntlCollator.h"
17 #include "IntlCommon.h"
18 #include "ani.h"
19 #include "plugins/ets/runtime/ets_exceptions.h"
20 #include "libpandabase/macros.h"
21 #include "stdlib_ani_helpers.h"
22 #include <unicode/coll.h>
23 #include <unicode/locid.h>
24 #include <unicode/stringpiece.h>
25 #include <unicode/translit.h>
26
27 #include <algorithm>
28 #include <string>
29 #include <array>
30 #include "IntlLocaleMatch.h"
31
32 namespace ark::ets::stdlib::intl {
33
34 // https://stackoverflow.com/questions/2992066/code-to-strip-diacritical-marks-using-icu
RemoveAccents(ani_env * env,const std::string & str)35 std::string RemoveAccents(ani_env *env, const std::string &str)
36 {
37 // UTF-8 std::string -> UTF-16 UnicodeString
38 icu::UnicodeString source = icu::UnicodeString::fromUTF8(icu::StringPiece(str));
39
40 // Transliterate UTF-16 UnicodeString
41 UErrorCode status = U_ZERO_ERROR;
42 icu::Transliterator *accentsConverter =
43 icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status);
44 accentsConverter->transliterate(source);
45 delete accentsConverter;
46 if (UNLIKELY(U_FAILURE(status))) {
47 ThrowNewError(env, "Lstd/core/RuntimeException;", "Removing accents failed, transliterate failed",
48 "Lstd/core/String;:V");
49 return std::string();
50 }
51
52 // UTF-16 UnicodeString -> UTF-8 std::string
53 std::string result;
54 source.toUTF8String(result);
55
56 return result;
57 }
58
RemovePunctuation(std::string & text)59 void RemovePunctuation(std::string &text)
60 {
61 text.erase(std::remove_if(text.begin(), text.end(), ispunct), text.end());
62 }
63
StdCoreIntlCollatorRemoveAccents(ani_env * env,ani_class klass,ani_string etsText)64 ani_string StdCoreIntlCollatorRemoveAccents(ani_env *env, [[maybe_unused]] ani_class klass, ani_string etsText)
65 {
66 std::string text = ConvertFromAniString(env, etsText);
67 text = RemoveAccents(env, text);
68
69 ani_boolean unhandledExc;
70 ANI_FATAL_IF_ERROR(env->ExistUnhandledError(&unhandledExc));
71 if (unhandledExc != 0U) {
72 return nullptr;
73 }
74
75 return StdStrToAni(env, text);
76 }
77
StdCoreIntlCollatorRemovePunctuation(ani_env * env,ani_class klass,ani_string etsText)78 ani_string StdCoreIntlCollatorRemovePunctuation(ani_env *env, [[maybe_unused]] ani_class klass, ani_string etsText)
79 {
80 std::string text = ConvertFromAniString(env, etsText);
81 RemovePunctuation(text);
82 return StdStrToAni(env, text);
83 }
84
StdCoreIntlCollatorLocaleCmp(ani_env * env,ani_class klass,ani_string collationIn,ani_string langIn,ani_string firstStr,ani_string secondStr)85 ani_double StdCoreIntlCollatorLocaleCmp(ani_env *env, [[maybe_unused]] ani_class klass, ani_string collationIn,
86 ani_string langIn, ani_string firstStr, ani_string secondStr)
87 {
88 auto collation = ConvertFromAniString(env, collationIn);
89 auto lang = ConvertFromAniString(env, langIn);
90 auto str1 = ConvertFromAniString(env, firstStr);
91 auto str2 = ConvertFromAniString(env, secondStr);
92
93 auto locale = GetLocale(env, lang);
94 UErrorCode status = U_ZERO_ERROR;
95 icu::StringPiece collationName = "collation";
96 icu::StringPiece collationValue = collation.c_str();
97 locale.setUnicodeKeywordValue(collationName, collationValue, status);
98 if (UNLIKELY(U_FAILURE(status))) {
99 const auto errorMessage = std::string("Collation '").append(collation).append("' is invalid or not supported");
100 ThrowNewError(env, "Lstd/core/RuntimeException;", errorMessage.c_str(), "Lstd/core/String;:V");
101 return 0;
102 }
103
104 status = U_ZERO_ERROR;
105 std::unique_ptr<icu::Collator> collator(icu::Collator::createInstance(locale, status));
106 if (UNLIKELY(U_FAILURE(status))) {
107 icu::UnicodeString dispName;
108 locale.getDisplayName(dispName);
109 std::string localeName;
110 dispName.toUTF8String(localeName);
111 const auto errorMessage = std::string("Failed to create the collator for ").append(localeName);
112 ThrowNewError(env, "Lstd/core/RuntimeException;", errorMessage.c_str(), "Lstd/core/String;:V");
113 }
114
115 auto strPiece1 = icu::StringPiece(str1.c_str());
116 auto strPiece2 = icu::StringPiece(str2.c_str());
117 if ((strPiece1.empty() != 0) && (strPiece2.empty() != 0)) {
118 auto res = collator->compareUTF8(strPiece1, strPiece2, status);
119 if (UNLIKELY(U_FAILURE(status))) {
120 ThrowNewError(env, "Lstd/core/RuntimeException;", "Comparison failed", "Lstd/core/String;:V");
121 }
122 return res;
123 }
124
125 icu::UnicodeString source = StdStrToUnicode(str1);
126 icu::UnicodeString target = StdStrToUnicode(str2);
127 auto res = collator->compare(source, target, status);
128 if (UNLIKELY(U_FAILURE(status))) {
129 ThrowNewError(env, "Lstd/core/RuntimeException;", "Comparison failed", "Lstd/core/String;:V");
130 }
131 return res;
132 }
133
RegisterIntlCollator(ani_env * env)134 ani_status RegisterIntlCollator(ani_env *env)
135 {
136 const auto methods =
137 std::array {ani_native_function {"removePunctuation", "Lstd/core/String;:Lstd/core/String;",
138 reinterpret_cast<void *>(StdCoreIntlCollatorRemovePunctuation)},
139 ani_native_function {"removeAccents", "Lstd/core/String;:Lstd/core/String;",
140 reinterpret_cast<void *>(StdCoreIntlCollatorRemoveAccents)},
141 ani_native_function {"compareByCollation",
142 "Lstd/core/String;Lstd/core/String;Lstd/core/String;Lstd/core/String;:D",
143 reinterpret_cast<void *>(StdCoreIntlCollatorLocaleCmp)}};
144
145 ani_class collatorClass;
146 ANI_FATAL_IF_ERROR(env->FindClass("Lstd/core/Intl/Collator;", &collatorClass));
147 return env->Class_BindNativeMethods(collatorClass, methods.data(), methods.size());
148 }
149
150 } // namespace ark::ets::stdlib::intl
151