1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/js_collator.h"
17
18 #include "ecmascript/global_env.h"
19 #include "ecmascript/mem/c_string.h"
20 #include "ecmascript/mem/barriers-inl.h"
21
22 #include "unicode/udata.h"
23
24 namespace panda::ecmascript {
25 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
26 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
27 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
28 {"upper", CaseFirstOption::UPPER},
29 {"lower", CaseFirstOption::LOWER},
30 {"false", CaseFirstOption::FALSE_OPTION}
31 };
32 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
33 {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
34 {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
35 {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
36 {CaseFirstOption::UNDEFINED, UCOL_OFF}
37 };
38
GetAvailableLocales(JSThread * thread)39 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread)
40 {
41 const char *key = nullptr;
42 const char *path = JSCollator::uIcuDataColl.c_str();
43 JSHandle<TaggedArray> availableLocales = JSLocale::GetAvailableLocales(thread, key, path);
44 return availableLocales;
45 }
46
47 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const DeleteEntryPoint & callback)48 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
49 icu::Collator *icuCollator, const DeleteEntryPoint &callback)
50 {
51 EcmaVM *ecmaVm = thread->GetEcmaVM();
52 ObjectFactory *factory = ecmaVm->GetFactory();
53
54 ASSERT(icuCollator != nullptr);
55 JSTaggedValue data = collator->GetIcuField();
56 if (data.IsJSNativePointer()) {
57 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
58 native->ResetExternalPointer(icuCollator);
59 return;
60 }
61 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
62 collator->SetIcuField(thread, pointer.GetTaggedValue());
63 }
64
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache)65 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
66 const JSHandle<JSCollator> &collator,
67 const JSHandle<JSTaggedValue> &locales,
68 const JSHandle<JSTaggedValue> &options,
69 bool forIcuCache)
70 {
71 EcmaVM *ecmaVm = thread->GetEcmaVM();
72 ObjectFactory *factory = ecmaVm->GetFactory();
73 const GlobalEnvConstants *globalConst = thread->GlobalConstants();
74 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
75 JSHandle<TaggedArray> requestedLocales = JSLocale::CanonicalizeLocaleList(thread, locales);
76 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
77
78 // 2. If options is undefined, then
79 // a. Let options be ObjectCreate(null).
80 // 3. Else,
81 // a. Let options be ? ToObject(options).
82 JSHandle<JSObject> optionsObject;
83 if (options->IsUndefined()) {
84 optionsObject = factory->CreateNullJSObject();
85 } else {
86 optionsObject = JSTaggedValue::ToObject(thread, options);
87 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
88 }
89 // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
90 auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
91 {UsageOption::SORT, UsageOption::SEARCH}, {"sort", "search"},
92 UsageOption::SORT);
93 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
94 collator->SetUsage(usage);
95
96 // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
97 auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
98 thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
99 {LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT}, {"lookup", "best fit"},
100 LocaleMatcherOption::BEST_FIT);
101 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
102
103 // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
104 // 7. If collation is not undefined, then
105 // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
106 JSHandle<JSTaggedValue> collation =
107 JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
108 globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
109 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
110 collator->SetCollation(thread, collation);
111 std::string collationStr;
112 if (!collation->IsUndefined()) {
113 JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
114 collationStr = JSLocale::ConvertToStdString(collationEcmaStr);
115 if (!JSLocale::IsWellAlphaNumList(collationStr)) {
116 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
117 }
118 }
119
120 // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
121 bool numeric = false;
122 bool foundNumeric =
123 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
124 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
125 collator->SetNumeric(numeric);
126
127 // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
128 CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
129 thread, optionsObject, globalConst->GetHandledCaseFirstString(),
130 {CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION}, {"upper", "lower", "false"},
131 CaseFirstOption::UNDEFINED);
132 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
133 collator->SetCaseFirst(caseFirst);
134
135 // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
136 std::set<std::string> relevantExtensionKeys = {"co", "kn", "kf"};
137
138 // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
139 // %Collator%.[[RelevantExtensionKeys]], localeData).
140 JSHandle<TaggedArray> availableLocales;
141 if (requestedLocales->GetLength() == 0) {
142 availableLocales = factory->EmptyArray();
143 } else {
144 availableLocales = GetAvailableLocales(thread);
145 }
146 ResolvedLocale r =
147 JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, relevantExtensionKeys);
148 icu::Locale icuLocale = r.localeData;
149 JSHandle<EcmaString> localeStr = JSLocale::ToLanguageTag(thread, icuLocale);
150 collator->SetLocale(thread, localeStr.GetTaggedValue());
151 ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
152
153 // If collation is undefined iterate RelevantExtensionKeys to find "co"
154 // if found, set ICU collator UnicodeKeyword to iterator->second
155 UErrorCode status = U_ZERO_ERROR;
156 if (!collation->IsUndefined()) {
157 auto extensionIter = r.extensions.find("co");
158 if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
159 icuLocale.setUnicodeKeywordValue("co", nullptr, status);
160 ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
161 }
162 }
163
164 // If usage is serach set co-serach to icu locale key word value
165 // Eles set collation string to icu locale key word value
166 if (usage == UsageOption::SEARCH) {
167 icuLocale.setUnicodeKeywordValue("co", "search", status);
168 ASSERT(U_SUCCESS(status));
169 } else {
170 if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
171 icuLocale.setUnicodeKeywordValue("co", collationStr, status);
172 ASSERT(U_SUCCESS(status));
173 }
174 }
175
176 std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
177 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
178 status = U_ZERO_ERROR;
179 icu::Locale localeName(icuLocale.getBaseName());
180 icuCollator.reset(icu::Collator::createInstance(localeName, status));
181 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
182 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
183 }
184 }
185 ASSERT(U_SUCCESS(status));
186 icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
187
188 icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
189 ASSERT(U_SUCCESS(status));
190
191 // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
192 // Else iterate RelevantExtensionKeys to find "kn"
193 // if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
194 status = U_ZERO_ERROR;
195 if (foundNumeric) {
196 ASSERT(icuCollator.get() != nullptr);
197 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
198 ASSERT(U_SUCCESS(status));
199 } else {
200 auto extensionIter = r.extensions.find("kn");
201 if (extensionIter != r.extensions.end()) {
202 ASSERT(icuCollator.get() != nullptr);
203 bool found = (extensionIter->second == "true");
204 collator->SetNumeric(found);
205 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
206 ASSERT(U_SUCCESS(status));
207 }
208 }
209
210 // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
211 // Else iterate RelevantExtensionKeys to find "kf"
212 // if found, set ICU collator UColAttributeValue to iterator->second
213 status = U_ZERO_ERROR;
214 if (caseFirst != CaseFirstOption::UNDEFINED) {
215 ASSERT(icuCollator.get() != nullptr);
216 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
217 ASSERT(U_SUCCESS(status));
218 } else {
219 auto extensionIter = r.extensions.find("kf");
220 if (extensionIter != r.extensions.end()) {
221 ASSERT(icuCollator.get() != nullptr);
222 auto mapIter = caseFirstMap.find(extensionIter->second);
223 if (mapIter != caseFirstMap.end()) {
224 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
225 collator->SetCaseFirst(mapIter->second);
226 } else {
227 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
228 status);
229 }
230 ASSERT(U_SUCCESS(status));
231 }
232 }
233
234 // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
235 // undefined).
236 SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
237 thread, optionsObject, globalConst->GetHandledSensitivityString(),
238 {SensitivityOption::BASE, SensitivityOption::ACCENT, SensitivityOption::CASE, SensitivityOption::VARIANT},
239 {"base", "accent", "case", "variant"}, SensitivityOption::UNDEFINED);
240 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
241 // 25. If sensitivity is undefined, then
242 // a. If usage is "sort", then
243 // i. Let sensitivity be "variant".
244 if (sensitivity == SensitivityOption::UNDEFINED) {
245 if (usage == UsageOption::SORT) {
246 sensitivity = SensitivityOption::VARIANT;
247 }
248 }
249 collator->SetSensitivity(sensitivity);
250
251 // Trans SensitivityOption to Icu strength option
252 switch (sensitivity) {
253 case SensitivityOption::BASE:
254 icuCollator->setStrength(icu::Collator::PRIMARY);
255 break;
256 case SensitivityOption::ACCENT:
257 icuCollator->setStrength(icu::Collator::SECONDARY);
258 break;
259 case SensitivityOption::CASE:
260 icuCollator->setStrength(icu::Collator::PRIMARY);
261 icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
262 break;
263 case SensitivityOption::VARIANT:
264 icuCollator->setStrength(icu::Collator::TERTIARY);
265 break;
266 case SensitivityOption::UNDEFINED:
267 break;
268 case SensitivityOption::EXCEPTION:
269 UNREACHABLE();
270 }
271
272 // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
273 // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
274 bool ignorePunctuation = false;
275 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(), false,
276 &ignorePunctuation);
277 collator->SetIgnorePunctuation(ignorePunctuation);
278 if (ignorePunctuation) {
279 status = U_ZERO_ERROR;
280 icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
281 ASSERT(U_SUCCESS(status));
282 }
283
284 if (forIcuCache) {
285 std::string cacheEntry =
286 locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
287 ecmaVm->SetIcuFormatterToCache(IcuFormatterType::Collator, cacheEntry, icuCollator.release(),
288 JSCollator::FreeIcuCollator);
289 } else {
290 SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
291 }
292 collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
293 // 29. Return collator.
294 return collator;
295 }
296
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)297 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
298 {
299 std::string cacheEntry = locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
300 EcmaVM *ecmaVm = thread->GetEcmaVM();
301 void *cachedCollator = ecmaVm->GetIcuFormatterFromCache(IcuFormatterType::Collator, cacheEntry);
302 if (cachedCollator != nullptr) {
303 return reinterpret_cast<icu::Collator*>(cachedCollator);
304 }
305 return nullptr;
306 }
307
OptionToUColAttribute(CaseFirstOption caseFirstOption)308 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
309 {
310 auto iter = uColAttributeValueMap.find(caseFirstOption);
311 if (iter != uColAttributeValueMap.end()) {
312 return iter->second;
313 }
314 UNREACHABLE();
315 }
316
OptionsToEcmaString(JSThread * thread,UsageOption usage)317 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
318 {
319 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
320 auto globalConst = thread->GlobalConstants();
321 switch (usage) {
322 case UsageOption::SORT:
323 result.Update(globalConst->GetSortString());
324 break;
325 case UsageOption::SEARCH:
326 result.Update(globalConst->GetSearchString());
327 break;
328 default:
329 UNREACHABLE();
330 }
331 return result;
332 }
333
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)334 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
335 {
336 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
337 auto globalConst = thread->GlobalConstants();
338 switch (sensitivity) {
339 case SensitivityOption::BASE:
340 result.Update(globalConst->GetBaseString());
341 break;
342 case SensitivityOption::ACCENT:
343 result.Update(globalConst->GetAccentString());
344 break;
345 case SensitivityOption::CASE:
346 result.Update(globalConst->GetCaseString());
347 break;
348 case SensitivityOption::VARIANT:
349 result.Update(globalConst->GetVariantString());
350 break;
351 case SensitivityOption::UNDEFINED:
352 break;
353 default:
354 UNREACHABLE();
355 }
356 return result;
357 }
358
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)359 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
360 {
361 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
362 auto globalConst = thread->GlobalConstants();
363 switch (caseFirst) {
364 case CaseFirstOption::UPPER:
365 result.Update(globalConst->GetUpperString());
366 break;
367 case CaseFirstOption::LOWER:
368 result.Update(globalConst->GetLowerString());
369 break;
370 case CaseFirstOption::FALSE_OPTION:
371 result.Update(globalConst->GetFalseString());
372 break;
373 case CaseFirstOption::UNDEFINED:
374 result.Update(globalConst->GetUpperString());
375 break;
376 default:
377 UNREACHABLE();
378 }
379 return result;
380 }
381
382 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)383 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
384 {
385 auto ecmaVm = thread->GetEcmaVM();
386 auto globalConst = thread->GlobalConstants();
387 ObjectFactory *factory = ecmaVm->GetFactory();
388 JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
389 JSHandle<JSFunction> funCtor(env->GetObjectFunction());
390 JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
391
392 // [[Locale]]
393 JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
394 JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
395 JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
396
397 // [[Usage]]
398 UsageOption usageOption = collator->GetUsage();
399 JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
400 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
401
402 // [[Sensitivity]]
403 auto sentivityOption = collator->GetSensitivity();
404 JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
405 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
406
407 // [[IgnorePunctuation]]
408 JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
409 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
410 ignorePunctuationValue);
411
412 // [[Collation]]
413 JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
414 if (collationValue->IsUndefined()) {
415 collationValue.Update(globalConst->GetDefaultString());
416 }
417 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
418
419 // [[Numeric]]
420 JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
421 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
422
423 // [[CaseFirst]]
424 CaseFirstOption caseFirstOption = collator->GetCaseFirst();
425 // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
426 // the requirement maybe change in the future
427 JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
428 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
429 return options;
430 }
431
EcmaStringToUString(const JSHandle<EcmaString> & string)432 icu::UnicodeString EcmaStringToUString(const JSHandle<EcmaString> &string)
433 {
434 std::string stdString(ConvertToString(*string, StringConvertedUsage::LOGICOPERATION));
435 icu::StringPiece sp(stdString);
436 icu::UnicodeString uString = icu::UnicodeString::fromUTF8(sp);
437 return uString;
438 }
439
CompareStrings(const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2)440 JSTaggedValue JSCollator::CompareStrings(const icu::Collator *icuCollator, const JSHandle<EcmaString> &string1,
441 const JSHandle<EcmaString> &string2)
442 {
443 icu::UnicodeString uString1 = EcmaStringToUString(string1);
444 icu::UnicodeString uString2 = EcmaStringToUString(string2);
445
446 UCollationResult result;
447 UErrorCode status = U_ZERO_ERROR;
448 result = icuCollator->compare(uString1, uString2, status);
449 ASSERT(U_SUCCESS(status));
450
451 return JSTaggedValue(result);
452 }
453 } // namespace panda::ecmascript
454