• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include "collator.h"
16 
17 #include <stringpiece.h>
18 
19 #include "locale_config.h"
20 #include "map"
21 #include "set"
22 #include "strenum.h"
23 #include "string"
24 #include "unicode/errorcode.h"
25 #include "unicode/locid.h"
26 #include "unicode/ucol.h"
27 #include "unicode/uloc.h"
28 #include "unistr.h"
29 #include "urename.h"
30 #include "utility"
31 #include "vector"
32 
33 namespace OHOS {
34 namespace Global {
35 namespace I18n {
ParseOption(std::map<std::string,std::string> & options,const std::string & key)36 std::string Collator::ParseOption(std::map<std::string, std::string> &options, const std::string &key)
37 {
38     std::map<std::string, std::string>::iterator it = options.find(key);
39     if (it != options.end()) {
40         return it->second;
41     } else {
42         return "";
43     }
44 }
45 
ParseAllOptions(std::map<std::string,std::string> & options)46 void Collator::ParseAllOptions(std::map<std::string, std::string> &options)
47 {
48     localeMatcher = ParseOption(options, "localeMatcher");
49     if (localeMatcher == "") {
50         localeMatcher = "best fit";
51     }
52 
53     usage = ParseOption(options, "usage");
54     if (usage == "") {
55         usage = "sort";
56     }
57 
58     sensitivity = ParseOption(options, "sensitivity");
59     if (sensitivity == "") {
60         sensitivity = "variant";
61     }
62 
63     ignorePunctuation = ParseOption(options, "ignorePunctuation");
64     if (ignorePunctuation == "") {
65         ignorePunctuation = "false";
66     }
67 
68     numeric = ParseOption(options, "numeric");
69     caseFirst = ParseOption(options, "caseFirst");
70     collation = ParseOption(options, "collation");
71 }
72 
Collator(std::vector<std::string> & localeTags,std::map<std::string,std::string> & options)73 Collator::Collator(std::vector<std::string> &localeTags, std::map<std::string, std::string> &options)
74 {
75     ParseAllOptions(options);
76     UErrorCode status = U_ZERO_ERROR;
77     localeTags.push_back(LocaleConfig::GetSystemLocale());
78     for (size_t i = 0; i < localeTags.size(); i++) {
79         std::string curLocale = localeTags[i];
80         locale = icu::Locale::forLanguageTag(icu::StringPiece(curLocale), status);
81         if (status != U_ZERO_ERROR) {
82             status = U_ZERO_ERROR;
83             continue;
84         }
85         if (LocaleInfo::allValidLocales.count(locale.getLanguage()) > 0) {
86             localeInfo = std::make_unique<LocaleInfo>(curLocale, options);
87             if (!localeInfo->InitSuccess()) {
88                 continue;
89             }
90             locale = localeInfo->GetLocale();
91             localeStr = localeInfo->GetBaseName();
92             createSuccess = InitCollator();
93             if (!createSuccess) {
94                 continue;
95             }
96             break;
97         }
98     }
99 }
100 
IsValidCollation(std::string & collation)101 bool Collator::IsValidCollation(std::string &collation)
102 {
103     UErrorCode status = U_ZERO_ERROR;
104     const char *currentCollation = uloc_toLegacyType("collation", collation.c_str());
105     if (currentCollation != nullptr) {
106         std::unique_ptr<icu::StringEnumeration> enumeration(
107             icu::Collator::getKeywordValuesForLocale("collation", icu::Locale(locale.getBaseName()), false, status));
108         if (!U_SUCCESS(status)) {
109             return false;
110         }
111         int length;
112         const char *validCollations = nullptr;
113         if (enumeration != nullptr) {
114             validCollations = enumeration->next(&length, status);
115         }
116         while (validCollations != nullptr) {
117             if (!strcmp(validCollations, currentCollation)) {
118                 return true;
119             }
120             if (enumeration != nullptr) {
121                 validCollations = enumeration->next(&length, status);
122             }
123         }
124     }
125     return false;
126 }
127 
SetCollation()128 void Collator::SetCollation()
129 {
130     UErrorCode status = U_ZERO_ERROR;
131     if (collation != "") {
132         if (IsValidCollation(collation)) {
133             locale.setUnicodeKeywordValue("co", collation, status);
134         } else {
135             collation = "default";
136             locale.setUnicodeKeywordValue("co", nullptr, status);
137         }
138     } else {
139         collation = localeInfo->GetCollation();
140         if (collation != "") {
141             if (IsValidCollation(collation)) {
142                 locale.setUnicodeKeywordValue("co", collation, status);
143             } else {
144                 locale.setUnicodeKeywordValue("co", nullptr, status);
145                 collation = "default";
146             }
147         } else {
148             locale.setUnicodeKeywordValue("co", nullptr, status);
149             collation = "default";
150         }
151     }
152 }
153 
SetUsage()154 void Collator::SetUsage()
155 {
156     if (usage == "search") {
157         collation = "default";
158         UErrorCode status = U_ZERO_ERROR;
159         locale.setUnicodeKeywordValue("co", nullptr, status);
160     }
161 }
162 
SetNumeric()163 void Collator::SetNumeric()
164 {
165     if (!collatorPtr) {
166         return;
167     }
168     if (numeric == "") {
169         numeric = localeInfo->GetNumeric();
170         if (numeric != "true" && numeric != "false") {
171             numeric = "false";
172         }
173     }
174     UErrorCode status = U_ZERO_ERROR;
175     if (numeric == "true") {
176         collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
177             UColAttributeValue::UCOL_ON, status);
178     } else {
179         collatorPtr->setAttribute(UColAttribute::UCOL_NUMERIC_COLLATION,
180             UColAttributeValue::UCOL_OFF, status);
181     }
182 }
183 
SetCaseFirst()184 void Collator::SetCaseFirst()
185 {
186     if (!collatorPtr) {
187         return;
188     }
189     if (caseFirst == "") {
190         caseFirst = localeInfo->GetCaseFirst();
191         if (caseFirst != "upper" && caseFirst != "lower" && caseFirst != "false") {
192             caseFirst = "false";
193         }
194     }
195     UErrorCode status = U_ZERO_ERROR;
196     if (caseFirst == "upper") {
197         collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
198             UColAttributeValue::UCOL_UPPER_FIRST, status);
199     } else if (caseFirst == "lower") {
200         collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
201             UColAttributeValue::UCOL_LOWER_FIRST, status);
202     } else {
203         collatorPtr->setAttribute(UColAttribute::UCOL_CASE_FIRST,
204             UColAttributeValue::UCOL_OFF, status);
205     }
206 }
207 
SetSensitivity()208 void Collator::SetSensitivity()
209 {
210     if (!collatorPtr) {
211         return;
212     }
213     if (sensitivity == "base") {
214         collatorPtr->setStrength(icu::Collator::PRIMARY);
215     } else if (sensitivity == "accent") {
216         collatorPtr->setStrength(icu::Collator::SECONDARY);
217     } else if (sensitivity == "case") {
218         collatorPtr->setStrength(icu::Collator::PRIMARY);
219         UErrorCode status = U_ZERO_ERROR;
220         collatorPtr->setAttribute(UColAttribute::UCOL_CASE_LEVEL,
221             UColAttributeValue::UCOL_ON, status);
222     } else {
223         collatorPtr->setStrength(icu::Collator::TERTIARY);
224     }
225 }
226 
SetIgnorePunctuation()227 void Collator::SetIgnorePunctuation()
228 {
229     if (!collatorPtr) {
230         return;
231     }
232     if (ignorePunctuation == "true") {
233         UErrorCode status = U_ZERO_ERROR;
234         collatorPtr->setAttribute(UColAttribute::UCOL_ALTERNATE_HANDLING,
235             UColAttributeValue::UCOL_SHIFTED, status);
236     }
237 }
238 
InitCollator()239 bool Collator::InitCollator()
240 {
241     SetCollation();
242     SetUsage();
243     UErrorCode status = UErrorCode::U_ZERO_ERROR;
244     collatorPtr = icu::Collator::createInstance(locale, status);
245     if (!U_SUCCESS(status) || collatorPtr == nullptr) {
246         if (collatorPtr != nullptr) {
247             delete collatorPtr;
248             collatorPtr = nullptr;
249         }
250         return false;
251     }
252     SetNumeric();
253     SetCaseFirst();
254     SetSensitivity();
255     SetIgnorePunctuation();
256     return true;
257 }
258 
~Collator()259 Collator::~Collator()
260 {
261     if (collatorPtr != nullptr) {
262         delete collatorPtr;
263         collatorPtr = nullptr;
264     }
265 }
266 
Compare(const std::string & first,const std::string & second)267 CompareResult Collator::Compare(const std::string &first, const std::string &second)
268 {
269     if (!collatorPtr) {
270         return CompareResult::INVALID;
271     }
272     icu::Collator::EComparisonResult result = collatorPtr->compare(icu::UnicodeString(first.data(), first.length()),
273         icu::UnicodeString(second.data(), second.length()));
274     if (result == icu::Collator::EComparisonResult::LESS) {
275         return CompareResult::SMALLER;
276     } else if (result == icu::Collator::EComparisonResult::EQUAL) {
277         return CompareResult::EQUAL;
278     } else {
279         return CompareResult::GREATER;
280     }
281 }
282 
ResolvedOptions(std::map<std::string,std::string> & options)283 void Collator::ResolvedOptions(std::map<std::string, std::string> &options)
284 {
285     options.insert(std::pair<std::string, std::string>("localeMatcher", localeMatcher));
286     options.insert(std::pair<std::string, std::string>("locale", localeStr));
287     options.insert(std::pair<std::string, std::string>("usage", usage));
288     options.insert(std::pair<std::string, std::string>("sensitivity", sensitivity));
289     options.insert(std::pair<std::string, std::string>("ignorePunctuation", ignorePunctuation));
290     options.insert(std::pair<std::string, std::string>("numeric", numeric));
291     options.insert(std::pair<std::string, std::string>("caseFirst", caseFirst));
292     options.insert(std::pair<std::string, std::string>("collation", collation));
293 }
294 } // namespace I18n
295 } // namespace Global
296 } // namespace OHOS
297