1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/js_collator.h"
17
18 #include "unicode/udata.h"
19
20 #include "ecmascript/global_env.h"
21 #include "ecmascript/mem/c_string.h"
22
23 namespace panda::ecmascript {
24 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
25 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
26 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
27 {"upper", CaseFirstOption::UPPER},
28 {"lower", CaseFirstOption::LOWER},
29 {"false", CaseFirstOption::FALSE_OPTION}
30 };
31 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
32 {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
33 {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
34 {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
35 {CaseFirstOption::UNDEFINED, UCOL_OFF}
36 };
37
GetAvailableLocales(JSThread * thread)38 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread)
39 {
40 const char *key = nullptr;
41 const char *path = JSCollator::uIcuDataColl.c_str();
42 JSHandle<TaggedArray> availableLocales = JSLocale::GetAvailableLocales(thread, key, path);
43 return availableLocales;
44 }
45
46 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const DeleteEntryPoint & callback)47 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
48 icu::Collator *icuCollator, const DeleteEntryPoint &callback)
49 {
50 EcmaVM *ecmaVm = thread->GetEcmaVM();
51 ObjectFactory *factory = ecmaVm->GetFactory();
52
53 ASSERT(icuCollator != nullptr);
54 JSTaggedValue data = collator->GetIcuField();
55 if (data.IsJSNativePointer()) {
56 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
57 native->ResetExternalPointer(icuCollator);
58 return;
59 }
60 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator);
61 pointer->SetDeleter(callback);
62 collator->SetIcuField(thread, pointer.GetTaggedValue());
63 ecmaVm->PushToArrayDataList(*pointer);
64 }
65
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache)66 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
67 const JSHandle<JSCollator> &collator,
68 const JSHandle<JSTaggedValue> &locales,
69 const JSHandle<JSTaggedValue> &options,
70 bool forIcuCache)
71 {
72 EcmaVM *ecmaVm = thread->GetEcmaVM();
73 ObjectFactory *factory = ecmaVm->GetFactory();
74 const GlobalEnvConstants *globalConst = thread->GlobalConstants();
75 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
76 JSHandle<TaggedArray> requestedLocales = JSLocale::CanonicalizeLocaleList(thread, locales);
77 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
78
79 // 2. If options is undefined, then
80 // a. Let options be ObjectCreate(null).
81 // 3. Else,
82 // a. Let options be ? ToObject(options).
83 JSHandle<JSObject> optionsObject;
84 if (options->IsUndefined()) {
85 JSHandle<JSTaggedValue> nullValue = globalConst->GetHandledNull();
86 optionsObject = factory->OrdinaryNewJSObjectCreate(nullValue);
87 } else {
88 optionsObject = JSTaggedValue::ToObject(thread, options);
89 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
90 }
91 // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
92 auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
93 {UsageOption::SORT, UsageOption::SEARCH}, {"sort", "search"},
94 UsageOption::SORT);
95 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
96 collator->SetUsage(usage);
97
98 // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
99 auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
100 thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
101 {LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT}, {"lookup", "best fit"},
102 LocaleMatcherOption::BEST_FIT);
103 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
104
105 // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
106 // 7. If collation is not undefined, then
107 // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
108 JSHandle<JSTaggedValue> collation =
109 JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
110 globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
111 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
112 collator->SetCollation(thread, collation);
113 std::string collationStr;
114 if (!collation->IsUndefined()) {
115 JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
116 collationStr = JSLocale::ConvertToStdString(collationEcmaStr);
117 if (!JSLocale::IsWellAlphaNumList(collationStr)) {
118 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
119 }
120 }
121
122 // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
123 bool numeric = false;
124 bool foundNumeric =
125 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
126 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
127 collator->SetNumeric(numeric);
128
129 // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
130 CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
131 thread, optionsObject, globalConst->GetHandledCaseFirstString(),
132 {CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION}, {"upper", "lower", "false"},
133 CaseFirstOption::UNDEFINED);
134 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
135 collator->SetCaseFirst(caseFirst);
136
137 // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
138 std::set<std::string> relevantExtensionKeys = {"co", "kn", "kf"};
139
140 // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
141 // %Collator%.[[RelevantExtensionKeys]], localeData).
142 JSHandle<TaggedArray> availableLocales;
143 if (requestedLocales->GetLength() == 0) {
144 availableLocales = factory->EmptyArray();
145 } else {
146 availableLocales = GetAvailableLocales(thread);
147 }
148 ResolvedLocale r =
149 JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, relevantExtensionKeys);
150 icu::Locale icuLocale = r.localeData;
151 JSHandle<EcmaString> localeStr = JSLocale::ToLanguageTag(thread, icuLocale);
152 collator->SetLocale(thread, localeStr.GetTaggedValue());
153 ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
154
155 // If collation is undefined iterate RelevantExtensionKeys to find "co"
156 // if found, set ICU collator UnicodeKeyword to iterator->second
157 UErrorCode status = U_ZERO_ERROR;
158 if (!collation->IsUndefined()) {
159 auto extensionIter = r.extensions.find("co");
160 if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
161 icuLocale.setUnicodeKeywordValue("co", nullptr, status);
162 ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
163 }
164 }
165
166 // If usage is serach set co-serach to icu locale key word value
167 // Eles set collation string to icu locale key word value
168 if (usage == UsageOption::SEARCH) {
169 icuLocale.setUnicodeKeywordValue("co", "search", status);
170 ASSERT(U_SUCCESS(status));
171 } else {
172 if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
173 icuLocale.setUnicodeKeywordValue("co", collationStr, status);
174 ASSERT(U_SUCCESS(status));
175 }
176 }
177
178 std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
179 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
180 status = U_ZERO_ERROR;
181 icu::Locale localeName(icuLocale.getBaseName());
182 icuCollator.reset(icu::Collator::createInstance(localeName, status));
183 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
184 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
185 }
186 }
187 ASSERT(U_SUCCESS(status));
188 icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
189
190 icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
191 ASSERT(U_SUCCESS(status));
192
193 // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
194 // Else iterate RelevantExtensionKeys to find "kn"
195 // if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
196 status = U_ZERO_ERROR;
197 if (foundNumeric) {
198 ASSERT(icuCollator.get() != nullptr);
199 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
200 ASSERT(U_SUCCESS(status));
201 } else {
202 auto extensionIter = r.extensions.find("kn");
203 if (extensionIter != r.extensions.end()) {
204 ASSERT(icuCollator.get() != nullptr);
205 bool found = (extensionIter->second == "true");
206 collator->SetNumeric(found);
207 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
208 ASSERT(U_SUCCESS(status));
209 }
210 }
211
212 // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
213 // Else iterate RelevantExtensionKeys to find "kf"
214 // if found, set ICU collator UColAttributeValue to iterator->second
215 status = U_ZERO_ERROR;
216 if (caseFirst != CaseFirstOption::UNDEFINED) {
217 ASSERT(icuCollator.get() != nullptr);
218 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
219 ASSERT(U_SUCCESS(status));
220 } else {
221 auto extensionIter = r.extensions.find("kf");
222 if (extensionIter != r.extensions.end()) {
223 ASSERT(icuCollator.get() != nullptr);
224 auto mapIter = caseFirstMap.find(extensionIter->second);
225 if (mapIter != caseFirstMap.end()) {
226 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
227 collator->SetCaseFirst(mapIter->second);
228 } else {
229 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
230 status);
231 }
232 ASSERT(U_SUCCESS(status));
233 }
234 }
235
236 // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
237 // undefined).
238 SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
239 thread, optionsObject, globalConst->GetHandledSensitivityString(),
240 {SensitivityOption::BASE, SensitivityOption::ACCENT, SensitivityOption::CASE, SensitivityOption::VARIANT},
241 {"base", "accent", "case", "variant"}, SensitivityOption::UNDEFINED);
242 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
243 // 25. If sensitivity is undefined, then
244 // a. If usage is "sort", then
245 // i. Let sensitivity be "variant".
246 if (sensitivity == SensitivityOption::UNDEFINED) {
247 if (usage == UsageOption::SORT) {
248 sensitivity = SensitivityOption::VARIANT;
249 }
250 }
251 collator->SetSensitivity(sensitivity);
252
253 // Trans SensitivityOption to Icu strength option
254 switch (sensitivity) {
255 case SensitivityOption::BASE:
256 icuCollator->setStrength(icu::Collator::PRIMARY);
257 break;
258 case SensitivityOption::ACCENT:
259 icuCollator->setStrength(icu::Collator::SECONDARY);
260 break;
261 case SensitivityOption::CASE:
262 icuCollator->setStrength(icu::Collator::PRIMARY);
263 icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
264 break;
265 case SensitivityOption::VARIANT:
266 icuCollator->setStrength(icu::Collator::TERTIARY);
267 break;
268 case SensitivityOption::UNDEFINED:
269 break;
270 case SensitivityOption::EXCEPTION:
271 UNREACHABLE();
272 }
273
274 // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
275 // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
276 bool ignorePunctuation = false;
277 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(), false,
278 &ignorePunctuation);
279 collator->SetIgnorePunctuation(ignorePunctuation);
280 if (ignorePunctuation) {
281 status = U_ZERO_ERROR;
282 icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
283 ASSERT(U_SUCCESS(status));
284 }
285
286 if (forIcuCache) {
287 std::string cacheEntry =
288 locales->IsUndefined() ? "" : JSLocale::ConvertToStdString(JSHandle<EcmaString>::Cast(locales));
289 ecmaVm->SetIcuFormatterToCache(IcuFormatterType::Collator, cacheEntry, icuCollator.release(),
290 JSCollator::FreeIcuCollator);
291 } else {
292 SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
293 }
294 collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
295 // 29. Return collator.
296 return collator;
297 }
298
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)299 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
300 {
301 std::string cacheEntry =
302 locales->IsUndefined() ? "" : JSLocale::ConvertToStdString(JSHandle<EcmaString>::Cast(locales));
303 EcmaVM *ecmaVm = thread->GetEcmaVM();
304 void *cachedCollator = ecmaVm->GetIcuFormatterFromCache(IcuFormatterType::Collator, cacheEntry);
305 if (cachedCollator != nullptr) {
306 return reinterpret_cast<icu::Collator*>(cachedCollator);
307 }
308 return nullptr;
309 }
310
OptionToUColAttribute(CaseFirstOption caseFirstOption)311 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
312 {
313 auto iter = uColAttributeValueMap.find(caseFirstOption);
314 if (iter != uColAttributeValueMap.end()) {
315 return iter->second;
316 }
317 UNREACHABLE();
318 }
319
OptionsToEcmaString(JSThread * thread,UsageOption usage)320 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
321 {
322 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
323 auto globalConst = thread->GlobalConstants();
324 switch (usage) {
325 case UsageOption::SORT:
326 result.Update(globalConst->GetSortString());
327 break;
328 case UsageOption::SEARCH:
329 result.Update(globalConst->GetSearchString());
330 break;
331 default:
332 UNREACHABLE();
333 }
334 return result;
335 }
336
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)337 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
338 {
339 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
340 auto globalConst = thread->GlobalConstants();
341 switch (sensitivity) {
342 case SensitivityOption::BASE:
343 result.Update(globalConst->GetBaseString());
344 break;
345 case SensitivityOption::ACCENT:
346 result.Update(globalConst->GetAccentString());
347 break;
348 case SensitivityOption::CASE:
349 result.Update(globalConst->GetCaseString());
350 break;
351 case SensitivityOption::VARIANT:
352 result.Update(globalConst->GetVariantString());
353 break;
354 case SensitivityOption::UNDEFINED:
355 break;
356 default:
357 UNREACHABLE();
358 }
359 return result;
360 }
361
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)362 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
363 {
364 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
365 auto globalConst = thread->GlobalConstants();
366 switch (caseFirst) {
367 case CaseFirstOption::UPPER:
368 result.Update(globalConst->GetUpperString());
369 break;
370 case CaseFirstOption::LOWER:
371 result.Update(globalConst->GetLowerString());
372 break;
373 case CaseFirstOption::FALSE_OPTION:
374 result.Update(globalConst->GetFalseString());
375 break;
376 case CaseFirstOption::UNDEFINED:
377 result.Update(globalConst->GetUpperString());
378 break;
379 default:
380 UNREACHABLE();
381 }
382 return result;
383 }
384
385 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)386 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
387 {
388 auto ecmaVm = thread->GetEcmaVM();
389 auto globalConst = thread->GlobalConstants();
390 ObjectFactory *factory = ecmaVm->GetFactory();
391 JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
392 JSHandle<JSTaggedValue> ctor = env->GetObjectFunction();
393 JSHandle<JSFunction> funCtor = JSHandle<JSFunction>::Cast(env->GetObjectFunction());
394 JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor, ctor));
395
396 // [[Locale]]
397 JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
398 JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
399 JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
400
401 // [[Usage]]
402 UsageOption usageOption = collator->GetUsage();
403 JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
404 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
405
406 // [[Sensitivity]]
407 auto sentivityOption = collator->GetSensitivity();
408 JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
409 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
410
411 // [[IgnorePunctuation]]
412 JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
413 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
414 ignorePunctuationValue);
415
416 // [[Collation]]
417 JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
418 if (collationValue->IsUndefined()) {
419 collationValue.Update(globalConst->GetDefaultString());
420 }
421 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
422
423 // [[Numeric]]
424 JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
425 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
426
427 // [[CaseFirst]]
428 CaseFirstOption caseFirstOption = collator->GetCaseFirst();
429 // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
430 // the requirement maybe change in the future
431 JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
432 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
433 return options;
434 }
435
EcmaStringToUString(const JSHandle<EcmaString> & string)436 icu::UnicodeString EcmaStringToUString(const JSHandle<EcmaString> &string)
437 {
438 std::string stdString(ConvertToString(*string, StringConvertedUsage::LOGICOPERATION));
439 icu::StringPiece sp(stdString);
440 icu::UnicodeString uString = icu::UnicodeString::fromUTF8(sp);
441 return uString;
442 }
443
CompareStrings(const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2)444 JSTaggedValue JSCollator::CompareStrings(const icu::Collator *icuCollator, const JSHandle<EcmaString> &string1,
445 const JSHandle<EcmaString> &string2)
446 {
447 icu::UnicodeString uString1 = EcmaStringToUString(string1);
448 icu::UnicodeString uString2 = EcmaStringToUString(string2);
449
450 UCollationResult result;
451 UErrorCode status = U_ZERO_ERROR;
452 result = icuCollator->compare(uString1, uString2, status);
453 ASSERT(U_SUCCESS(status));
454
455 return JSTaggedValue(result);
456 }
457 } // namespace panda::ecmascript
458