1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "collator.h"
16
17 #include <map>
18 #include <set>
19 #include <string>
20 #include <utility>
21 #include <vector>
22
23 #include "unicode/strenum.h"
24 #include "unicode/errorcode.h"
25 #include "unicode/locid.h"
26 #include "unicode/stringpiece.h"
27 #include "unicode/ucol.h"
28 #include "unicode/uloc.h"
29 #include "unicode/unistr.h"
30 #include "unicode/urename.h"
31
32 #include "i18n_hilog.h"
33 #include "locale_config.h"
34 #include "locale_helper.h"
35
36 namespace OHOS {
37 namespace Global {
38 namespace I18n {
GetAvailableLocales()39 std::set<std::string> Collator::GetAvailableLocales()
40 {
41 std::set<std::string> allLocales;
42 int32_t count = 0;
43 const icu::Locale *localeList = icu::Collator::getAvailableLocales(count);
44 if (localeList == nullptr) {
45 HILOG_ERROR_I18N("Collator::GetAvailableLocales: getAvailableLocales nullptr.");
46 return allLocales;
47 }
48 for (int32_t i = 0; i < count; ++i) {
49 allLocales.insert(localeList[i].getName());
50 }
51 return allLocales;
52 }
53
SupportedLocalesOf(const std::vector<std::string> & requestLocales,const std::map<std::string,std::string> & configs,I18nErrorCode & status)54 std::vector<std::string> Collator::SupportedLocalesOf(const std::vector<std::string> &requestLocales,
55 const std::map<std::string, std::string> &configs,
56 I18nErrorCode &status)
57 {
58 std::vector<std::string> undefined = {};
59 auto requestedLocales = LocaleHelper::CanonicalizeLocaleList(requestLocales, status);
60 if (status != I18nErrorCode::SUCCESS) {
61 return undefined;
62 }
63
64 std::string localeMatcher = LocaleHelper::ParseOption(configs, "localeMatcher", "best fit", true, status);
65 if (status != I18nErrorCode::SUCCESS) {
66 return undefined;
67 }
68 std::set<std::string> availableLocales = GetAvailableLocales();
69 return LocaleHelper::LookupSupportedLocales(availableLocales, requestedLocales);
70 }
71
ParseAllOptions(std::map<std::string,std::string> & options)72 void Collator::ParseAllOptions(std::map<std::string, std::string> &options)
73 {
74 localeMatcher = LocaleHelper::ParseOption(options, "localeMatcher", "best fit", true, i18nStatus);
75 usage = LocaleHelper::ParseOption(options, "usage", "sort", true, i18nStatus);
76 sensitivity = LocaleHelper::ParseOption(options, "sensitivity", "variant", true, i18nStatus);
77 ignorePunctuation = LocaleHelper::ParseOption(options, "ignorePunctuation", "false", true, i18nStatus);
78 numeric = LocaleHelper::ParseOption(options, "numeric", "", true, i18nStatus);
79 caseFirst = LocaleHelper::ParseOption(options, "caseFirst", "", true, i18nStatus);
80 collation = LocaleHelper::ParseOptionWithoutCheck(options, "collation", "");
81 }
82
Collator(std::vector<std::string> & localeTags,std::map<std::string,std::string> & options,const std::string & defaultLocale)83 Collator::Collator(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options,
84 const std::string &defaultLocale)
85 {
86 i18nStatus = I18nErrorCode::SUCCESS;
87 auto requestedLocales = LocaleHelper::CanonicalizeLocaleList(localeTags, i18nStatus);
88 if (i18nStatus != I18nErrorCode::SUCCESS) {
89 return;
90 }
91 Init(requestedLocales, options, defaultLocale);
92 }
93
Collator(std::vector<std::string> & localeTags,std::map<std::string,std::string> & options)94 Collator::Collator(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options)
95 {
96 Init(localeTags, options, LocaleConfig::GetEffectiveLocale());
97 }
98
Init(std::vector<std::string> & localeTags,std::map<std::string,std::string> & options,const std::string & defaultLocale)99 void Collator::Init(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options,
100 const std::string &defaultLocale)
101 {
102 i18nStatus = I18nErrorCode::SUCCESS;
103 ParseAllOptions(options);
104
105 localeTags.push_back(defaultLocale);
106 for (size_t i = 0; i < localeTags.size(); i++) {
107 std::string curLocale = localeTags[i];
108 UErrorCode status = U_ZERO_ERROR;
109 locale = icu::Locale::forLanguageTag(icu::StringPiece(curLocale), status);
110 if (U_FAILURE(status)) {
111 continue;
112 }
113 if (LocaleInfo::allValidLocales.count(locale.getLanguage()) > 0) {
114 localeInfo = std::make_unique<LocaleInfo>(curLocale, options);
115 if (!localeInfo->InitSuccess()) {
116 continue;
117 }
118 locale = localeInfo->GetLocale();
119 localeStr = localeInfo->GetBaseName();
120 createSuccess = InitCollator();
121 if (!createSuccess) {
122 continue;
123 }
124 break;
125 }
126 }
127 }
128
IsValidCollation(std::string & collation)129 bool Collator::IsValidCollation(std::string &collation)
130 {
131 const char *currentCollation = uloc_toLegacyType("collation", collation.c_str());
132 if (currentCollation == nullptr) {
133 return false;
134 }
135 UErrorCode status = U_ZERO_ERROR;
136 std::unique_ptr<icu::StringEnumeration> enumeration(
137 icu::Collator::getKeywordValuesForLocale("collation", icu::Locale(locale.getBaseName()), false, status));
138 if (U_FAILURE(status)) {
139 return false;
140 }
141 int length;
142 const char *validCollations = nullptr;
143 if (enumeration != nullptr) {
144 validCollations = enumeration->next(&length, status);
145 }
146 while (validCollations != nullptr) {
147 if (!strcmp(validCollations, currentCollation)) {
148 return true;
149 }
150 if (enumeration != nullptr) {
151 validCollations = enumeration->next(&length, status);
152 }
153 }
154 return false;
155 }
156
SetCollation()157 void Collator::SetCollation()
158 {
159 UErrorCode status = U_ZERO_ERROR;
160 if (!collation.empty()) {
161 if (IsValidCollation(collation)) {
162 locale.setUnicodeKeywordValue("co", collation, status);
163 } else {
164 collation = "default";
165 locale.setUnicodeKeywordValue("co", nullptr, status);
166 }
167 } else {
168 collation = localeInfo->GetCollation();
169 if (!collation.empty()) {
170 if (IsValidCollation(collation)) {
171 locale.setUnicodeKeywordValue("co", collation, status);
172 } else {
173 locale.setUnicodeKeywordValue("co", nullptr, status);
174 collation = "default";
175 }
176 } else {
177 locale.setUnicodeKeywordValue("co", nullptr, status);
178 collation = "default";
179 }
180 }
181 if (U_FAILURE(status)) {
182 HILOG_ERROR_I18N("Collator::SetCollation: Set unicode key word value failed.");
183 }
184 }
185
SetUsage()186 void Collator::SetUsage()
187 {
188 if (usage == "search") {
189 collation = "default";
190 UErrorCode status = U_ZERO_ERROR;
191 locale.setUnicodeKeywordValue("co", nullptr, status);
192 if (U_FAILURE(status)) {
193 HILOG_ERROR_I18N("Collator::SetUsage: Set unicode key word value failed.");
194 }
195 }
196 }
197
SetNumeric()198 void Collator::SetNumeric()
199 {
200 if (!collatorPtr) {
201 return;
202 }
203 if (numeric.empty()) {
204 numeric = localeInfo->GetNumeric();
205 if (numeric != "true" && numeric != "false") {
206 numeric = "false";
207 }
208 }
209 UErrorCode status = U_ZERO_ERROR;
210 if (numeric == "true") {
211 collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
212 UColAttributeValue::UCOL_ON, status);
213 } else {
214 collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
215 UColAttributeValue::UCOL_OFF, status);
216 }
217 if (U_FAILURE(status)) {
218 HILOG_ERROR_I18N("Collator::SetNumeric: Set attribute failed.");
219 }
220 }
221
SetCaseFirst()222 void Collator::SetCaseFirst()
223 {
224 if (!collatorPtr) {
225 return;
226 }
227 if (caseFirst.empty()) {
228 caseFirst = localeInfo->GetCaseFirst();
229 if (caseFirst != "upper" && caseFirst != "lower" && caseFirst != "false") {
230 caseFirst = "false";
231 }
232 }
233 UErrorCode status = U_ZERO_ERROR;
234 if (caseFirst == "upper") {
235 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
236 UColAttributeValue::UCOL_UPPER_FIRST, status);
237 } else if (caseFirst == "lower") {
238 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
239 UColAttributeValue::UCOL_LOWER_FIRST, status);
240 } else {
241 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
242 UColAttributeValue::UCOL_OFF, status);
243 }
244 if (U_FAILURE(status)) {
245 HILOG_ERROR_I18N("Collator::SetCaseFirst: Set attribute failed.");
246 }
247 }
248
SetSensitivity()249 void Collator::SetSensitivity()
250 {
251 if (!collatorPtr) {
252 return;
253 }
254 if (sensitivity == "base") {
255 collatorPtr->setStrength(icu::Collator::PRIMARY);
256 } else if (sensitivity == "accent") {
257 collatorPtr->setStrength(icu::Collator::SECONDARY);
258 } else if (sensitivity == "case") {
259 collatorPtr->setStrength(icu::Collator::PRIMARY);
260 UErrorCode status = U_ZERO_ERROR;
261 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_LEVEL,
262 UColAttributeValue::UCOL_ON, status);
263 } else {
264 collatorPtr->setStrength(icu::Collator::TERTIARY);
265 }
266 }
267
SetIgnorePunctuation()268 void Collator::SetIgnorePunctuation()
269 {
270 if (!collatorPtr) {
271 return;
272 }
273 if (ignorePunctuation == "true") {
274 UErrorCode status = U_ZERO_ERROR;
275 collatorPtr->setAttribute(UColAttribute::UCOL_ALTERNATE_HANDLING,
276 UColAttributeValue::UCOL_SHIFTED, status);
277 if (U_FAILURE(status)) {
278 HILOG_ERROR_I18N("Collator::SetIgnorePunctuation: Set attribute failed.");
279 }
280 }
281 }
282
InitCollator()283 bool Collator::InitCollator()
284 {
285 SetCollation();
286 SetUsage();
287 UErrorCode status = UErrorCode::U_ZERO_ERROR;
288 collatorPtr = icu::Collator::createInstance(locale, status);
289 if (!U_SUCCESS(status) || collatorPtr == nullptr) {
290 if (collatorPtr != nullptr) {
291 delete collatorPtr;
292 collatorPtr = nullptr;
293 }
294 return false;
295 }
296 SetNumeric();
297 SetCaseFirst();
298 SetSensitivity();
299 SetIgnorePunctuation();
300 return true;
301 }
302
~Collator()303 Collator::~Collator()
304 {
305 if (collatorPtr != nullptr) {
306 delete collatorPtr;
307 collatorPtr = nullptr;
308 }
309 }
310
Compare(const std::string & first,const std::string & second)311 CompareResult Collator::Compare(const std::string &first, const std::string &second)
312 {
313 if (!collatorPtr) {
314 return CompareResult::INVALID;
315 }
316 icu::Collator::EComparisonResult result = collatorPtr->compare(icu::UnicodeString(first.data(), first.length()),
317 icu::UnicodeString(second.data(), second.length()));
318 if (result == icu::Collator::EComparisonResult::LESS) {
319 return CompareResult::SMALLER;
320 } else if (result == icu::Collator::EComparisonResult::EQUAL) {
321 return CompareResult::EQUAL;
322 } else {
323 return CompareResult::GREATER;
324 }
325 }
326
ResolvedOptions(std::map<std::string,std::string> & options)327 void Collator::ResolvedOptions(std::map<std::string, std::string> &options)
328 {
329 options.insert(std::pair<std::string, std::string>("localeMatcher", localeMatcher));
330 options.insert(std::pair<std::string, std::string>("locale", localeStr));
331 options.insert(std::pair<std::string, std::string>("usage", usage));
332 options.insert(std::pair<std::string, std::string>("sensitivity", sensitivity));
333 options.insert(std::pair<std::string, std::string>("ignorePunctuation", ignorePunctuation));
334 options.insert(std::pair<std::string, std::string>("numeric", numeric));
335 options.insert(std::pair<std::string, std::string>("caseFirst", caseFirst));
336 options.insert(std::pair<std::string, std::string>("collation", collation));
337 }
338
GetError() const339 I18nErrorCode Collator::GetError() const
340 {
341 return i18nStatus;
342 }
343 } // namespace I18n
344 } // namespace Global
345 } // namespace OHOS
346