1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include "collator.h"
16
17 #include <stringpiece.h>
18
19 #include "locale_config.h"
20 #include "map"
21 #include "set"
22 #include "strenum.h"
23 #include "string"
24 #include "unicode/errorcode.h"
25 #include "unicode/locid.h"
26 #include "unicode/ucol.h"
27 #include "unicode/uloc.h"
28 #include "unistr.h"
29 #include "urename.h"
30 #include "utility"
31 #include "vector"
32
33 namespace OHOS {
34 namespace Global {
35 namespace I18n {
ParseOption(std::map<std::string,std::string> & options,const std::string & key)36 std::string Collator::ParseOption(std::map<std::string, std::string> &options, const std::string &key)
37 {
38 std::map<std::string, std::string>::iterator it = options.find(key);
39 if (it != options.end()) {
40 return it->second;
41 } else {
42 return "";
43 }
44 }
45
ParseAllOptions(std::map<std::string,std::string> & options)46 void Collator::ParseAllOptions(std::map<std::string, std::string> &options)
47 {
48 localeMatcher = ParseOption(options, "localeMatcher");
49 if (localeMatcher == "") {
50 localeMatcher = "best fit";
51 }
52
53 usage = ParseOption(options, "usage");
54 if (usage == "") {
55 usage = "sort";
56 }
57
58 sensitivity = ParseOption(options, "sensitivity");
59 if (sensitivity == "") {
60 sensitivity = "variant";
61 }
62
63 ignorePunctuation = ParseOption(options, "ignorePunctuation");
64 if (ignorePunctuation == "") {
65 ignorePunctuation = "false";
66 }
67
68 numeric = ParseOption(options, "numeric");
69 caseFirst = ParseOption(options, "caseFirst");
70 collation = ParseOption(options, "collation");
71 }
72
Collator(std::vector<std::string> & localeTags,std::map<std::string,std::string> & options)73 Collator::Collator(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options)
74 {
75 ParseAllOptions(options);
76 UErrorCode status = U_ZERO_ERROR;
77 localeTags.push_back(LocaleConfig::GetSystemLocale());
78 for (size_t i = 0; i < localeTags.size(); i++) {
79 std::string curLocale = localeTags[i];
80 locale = icu::Locale::forLanguageTag(icu::StringPiece(curLocale), status);
81 if (status != U_ZERO_ERROR) {
82 status = U_ZERO_ERROR;
83 continue;
84 }
85 if (LocaleInfo::allValidLocales.count(locale.getLanguage()) > 0) {
86 localeInfo = std::make_unique<LocaleInfo>(curLocale, options);
87 if (!localeInfo->InitSuccess()) {
88 continue;
89 }
90 locale = localeInfo->GetLocale();
91 localeStr = localeInfo->GetBaseName();
92 createSuccess = InitCollator();
93 if (!createSuccess) {
94 continue;
95 }
96 break;
97 }
98 }
99 }
100
IsValidCollation(std::string & collation)101 bool Collator::IsValidCollation(std::string &collation)
102 {
103 UErrorCode status = U_ZERO_ERROR;
104 const char *currentCollation = uloc_toLegacyType("collation", collation.c_str());
105 if (currentCollation != nullptr) {
106 std::unique_ptr<icu::StringEnumeration> enumeration(
107 icu::Collator::getKeywordValuesForLocale("collation", icu::Locale(locale.getBaseName()), false, status));
108 if (!U_SUCCESS(status)) {
109 return false;
110 }
111 int length;
112 const char *validCollations = nullptr;
113 if (enumeration != nullptr) {
114 validCollations = enumeration->next(&length, status);
115 }
116 while (validCollations != nullptr) {
117 if (!strcmp(validCollations, currentCollation)) {
118 return true;
119 }
120 if (enumeration != nullptr) {
121 validCollations = enumeration->next(&length, status);
122 }
123 }
124 }
125 return false;
126 }
127
SetCollation()128 void Collator::SetCollation()
129 {
130 UErrorCode status = U_ZERO_ERROR;
131 if (collation != "") {
132 if (IsValidCollation(collation)) {
133 locale.setUnicodeKeywordValue("co", collation, status);
134 } else {
135 collation = "default";
136 locale.setUnicodeKeywordValue("co", nullptr, status);
137 }
138 } else {
139 collation = localeInfo->GetCollation();
140 if (collation != "") {
141 if (IsValidCollation(collation)) {
142 locale.setUnicodeKeywordValue("co", collation, status);
143 } else {
144 locale.setUnicodeKeywordValue("co", nullptr, status);
145 collation = "default";
146 }
147 } else {
148 locale.setUnicodeKeywordValue("co", nullptr, status);
149 collation = "default";
150 }
151 }
152 }
153
SetUsage()154 void Collator::SetUsage()
155 {
156 if (usage == "search") {
157 collation = "default";
158 UErrorCode status = U_ZERO_ERROR;
159 locale.setUnicodeKeywordValue("co", nullptr, status);
160 }
161 }
162
SetNumeric()163 void Collator::SetNumeric()
164 {
165 if (!collatorPtr) {
166 return;
167 }
168 if (numeric == "") {
169 numeric = localeInfo->GetNumeric();
170 if (numeric != "true" && numeric != "false") {
171 numeric = "false";
172 }
173 }
174 UErrorCode status = U_ZERO_ERROR;
175 if (numeric == "true") {
176 collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
177 UColAttributeValue::UCOL_ON, status);
178 } else {
179 collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
180 UColAttributeValue::UCOL_OFF, status);
181 }
182 }
183
SetCaseFirst()184 void Collator::SetCaseFirst()
185 {
186 if (!collatorPtr) {
187 return;
188 }
189 if (caseFirst == "") {
190 caseFirst = localeInfo->GetCaseFirst();
191 if (caseFirst != "upper" && caseFirst != "lower" && caseFirst != "false") {
192 caseFirst = "false";
193 }
194 }
195 UErrorCode status = U_ZERO_ERROR;
196 if (caseFirst == "upper") {
197 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
198 UColAttributeValue::UCOL_UPPER_FIRST, status);
199 } else if (caseFirst == "lower") {
200 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
201 UColAttributeValue::UCOL_LOWER_FIRST, status);
202 } else {
203 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
204 UColAttributeValue::UCOL_OFF, status);
205 }
206 }
207
SetSensitivity()208 void Collator::SetSensitivity()
209 {
210 if (!collatorPtr) {
211 return;
212 }
213 if (sensitivity == "base") {
214 collatorPtr->setStrength(icu::Collator::PRIMARY);
215 } else if (sensitivity == "accent") {
216 collatorPtr->setStrength(icu::Collator::SECONDARY);
217 } else if (sensitivity == "case") {
218 collatorPtr->setStrength(icu::Collator::PRIMARY);
219 UErrorCode status = U_ZERO_ERROR;
220 collatorPtr->setAttribute(UColAttribute::UCOL_CASE_LEVEL,
221 UColAttributeValue::UCOL_ON, status);
222 } else {
223 collatorPtr->setStrength(icu::Collator::TERTIARY);
224 }
225 }
226
SetIgnorePunctuation()227 void Collator::SetIgnorePunctuation()
228 {
229 if (!collatorPtr) {
230 return;
231 }
232 if (ignorePunctuation == "true") {
233 UErrorCode status = U_ZERO_ERROR;
234 collatorPtr->setAttribute(UColAttribute::UCOL_ALTERNATE_HANDLING,
235 UColAttributeValue::UCOL_SHIFTED, status);
236 }
237 }
238
InitCollator()239 bool Collator::InitCollator()
240 {
241 SetCollation();
242 SetUsage();
243 UErrorCode status = UErrorCode::U_ZERO_ERROR;
244 collatorPtr = icu::Collator::createInstance(locale, status);
245 if (!U_SUCCESS(status) || collatorPtr == nullptr) {
246 if (collatorPtr != nullptr) {
247 delete collatorPtr;
248 collatorPtr = nullptr;
249 }
250 return false;
251 }
252 SetNumeric();
253 SetCaseFirst();
254 SetSensitivity();
255 SetIgnorePunctuation();
256 return true;
257 }
258
~Collator()259 Collator::~Collator()
260 {
261 if (collatorPtr != nullptr) {
262 delete collatorPtr;
263 collatorPtr = nullptr;
264 }
265 }
266
Compare(const std::string & first,const std::string & second)267 CompareResult Collator::Compare(const std::string &first, const std::string &second)
268 {
269 if (!collatorPtr) {
270 return CompareResult::INVALID;
271 }
272 icu::Collator::EComparisonResult result = collatorPtr->compare(icu::UnicodeString(first.data(), first.length()),
273 icu::UnicodeString(second.data(), second.length()));
274 if (result == icu::Collator::EComparisonResult::LESS) {
275 return CompareResult::SMALLER;
276 } else if (result == icu::Collator::EComparisonResult::EQUAL) {
277 return CompareResult::EQUAL;
278 } else {
279 return CompareResult::GREATER;
280 }
281 }
282
ResolvedOptions(std::map<std::string,std::string> & options)283 void Collator::ResolvedOptions(std::map<std::string, std::string> &options)
284 {
285 options.insert(std::pair<std::string, std::string>("localeMatcher", localeMatcher));
286 options.insert(std::pair<std::string, std::string>("locale", localeStr));
287 options.insert(std::pair<std::string, std::string>("usage", usage));
288 options.insert(std::pair<std::string, std::string>("sensitivity", sensitivity));
289 options.insert(std::pair<std::string, std::string>("ignorePunctuation", ignorePunctuation));
290 options.insert(std::pair<std::string, std::string>("numeric", numeric));
291 options.insert(std::pair<std::string, std::string>("caseFirst", caseFirst));
292 options.insert(std::pair<std::string, std::string>("collation", collation));
293 }
294 } // namespace I18n
295 } // namespace Global
296 } // namespace OHOS
297