1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/js_collator.h"
17
18 #include "ecmascript/intl/locale_helper.h"
19 #include "ecmascript/global_env.h"
20 #include "ecmascript/ecma_string-inl.h"
21 namespace panda::ecmascript {
22 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
23 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
24 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
25 {"upper", CaseFirstOption::UPPER},
26 {"lower", CaseFirstOption::LOWER},
27 {"false", CaseFirstOption::FALSE_OPTION}
28 };
29 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
30 {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
31 {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
32 {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
33 {CaseFirstOption::UNDEFINED, UCOL_OFF}
34 };
35 const std::vector<LocaleMatcherOption> JSCollator::LOCALE_MATCHER_OPTION = {
36 LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT
37 };
38 const std::vector<std::string> JSCollator::LOCALE_MATCHER_OPTION_NAME = {"lookup", "best fit"};
39
40 const std::vector<CaseFirstOption> JSCollator::CASE_FIRST_OPTION = {
41 CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION
42 };
43 const std::vector<std::string> JSCollator::CASE_FIRST_OPTION_NAME = {"upper", "lower", "false"};
44
45 const std::set<std::string> JSCollator::RELEVANT_EXTENSION_KEYS = {"co", "kn", "kf"};
46
47 const std::vector<SensitivityOption> JSCollator::SENSITIVITY_OPTION = {
48 SensitivityOption::BASE, SensitivityOption::ACCENT,
49 SensitivityOption::CASE, SensitivityOption::VARIANT
50 };
51 const std::vector<std::string> JSCollator::SENSITIVITY_OPTION_NAME = {"base", "accent", "case", "variant"};
52
53 const std::vector<UsageOption> JSCollator::USAGE_OPTION = {UsageOption::SORT, UsageOption::SEARCH};
54 const std::vector<std::string> JSCollator::USAGE_OPTION_NAME = {"sort", "search"};
55
56 // All the available locales that are statically known to fulfill fast path conditions.
57 const char* const JSCollator::FAST_LOCALE[] = {
58 "en-US", "en", "fr", "es", "de", "pt", "it", "ca",
59 "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro",
60 "sl", "sv", "sw", "vi", "en-DE", "en-GB",
61 };
62
63
GetAvailableLocales(JSThread * thread,bool enableLocaleCache)64 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache)
65 {
66 const char *key = nullptr;
67 const char *path = JSCollator::uIcuDataColl.c_str();
68 // key and path are const, so we can cache the result
69 if (enableLocaleCache) {
70 JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales();
71 if (cachedLocales->IsHeapObject()) {
72 return JSHandle<TaggedArray>(cachedLocales);
73 }
74 }
75 std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path);
76 JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales);
77 if (enableLocaleCache) {
78 GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants());
79 constants->SetCachedLocales(availableLocales.GetTaggedValue());
80 }
81 return availableLocales;
82 }
83
84 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const NativePointerCallback & callback)85 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
86 icu::Collator *icuCollator, const NativePointerCallback &callback)
87 {
88 EcmaVM *ecmaVm = thread->GetEcmaVM();
89 ObjectFactory *factory = ecmaVm->GetFactory();
90
91 ASSERT(icuCollator != nullptr);
92 JSTaggedValue data = collator->GetIcuField(thread);
93 if (data.IsJSNativePointer()) {
94 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
95 native->ResetExternalPointer(thread, icuCollator);
96 return;
97 }
98 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
99 collator->SetIcuField(thread, pointer.GetTaggedValue());
100 }
101
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache,bool enableLocaleCache)102 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
103 const JSHandle<JSCollator> &collator,
104 const JSHandle<JSTaggedValue> &locales,
105 const JSHandle<JSTaggedValue> &options,
106 bool forIcuCache,
107 bool enableLocaleCache)
108 {
109 EcmaVM *ecmaVm = thread->GetEcmaVM();
110 ObjectFactory *factory = ecmaVm->GetFactory();
111 const GlobalEnvConstants *globalConst = thread->GlobalConstants();
112 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
113 JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales);
114 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
115
116 // 2. If options is undefined, then
117 // a. Let options be ObjectCreate(null).
118 // 3. Else,
119 // a. Let options be ? ToObject(options).
120 JSHandle<JSObject> optionsObject;
121 if (options->IsUndefined()) {
122 optionsObject = factory->CreateNullJSObject();
123 } else {
124 optionsObject = JSTaggedValue::ToObject(thread, options);
125 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
126 }
127 // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
128 auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
129 JSCollator::USAGE_OPTION, JSCollator::USAGE_OPTION_NAME,
130 UsageOption::SORT);
131 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
132 collator->SetUsage(usage);
133
134 // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
135 auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
136 thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
137 JSCollator::LOCALE_MATCHER_OPTION, JSCollator::LOCALE_MATCHER_OPTION_NAME,
138 LocaleMatcherOption::BEST_FIT);
139 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
140
141 // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
142 // 7. If collation is not undefined, then
143 // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
144 JSHandle<JSTaggedValue> collation =
145 JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
146 globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
147 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
148 collator->SetCollation(thread, collation);
149 std::string collationStr;
150 if (!collation->IsUndefined()) {
151 JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
152 collationStr = intl::LocaleHelper::ConvertToStdString(thread, collationEcmaStr);
153 if (!JSLocale::IsWellAlphaNumList(collationStr)) {
154 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
155 }
156 }
157
158 // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
159 bool numeric = false;
160 bool foundNumeric =
161 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
162 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
163 collator->SetNumeric(numeric);
164
165 // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
166 CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
167 thread, optionsObject, globalConst->GetHandledCaseFirstString(),
168 JSCollator::CASE_FIRST_OPTION, JSCollator::CASE_FIRST_OPTION_NAME,
169 CaseFirstOption::UNDEFINED);
170 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
171 collator->SetCaseFirst(caseFirst);
172
173 // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
174
175 // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
176 // %Collator%.[[RelevantExtensionKeys]], localeData).
177 JSHandle<TaggedArray> availableLocales;
178 if (requestedLocales->GetLength() == 0) {
179 availableLocales = factory->EmptyArray();
180 } else {
181 availableLocales = GetAvailableLocales(thread, enableLocaleCache);
182 }
183 ResolvedLocale r =
184 JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, RELEVANT_EXTENSION_KEYS);
185 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
186 icu::Locale icuLocale = r.localeData;
187 JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale);
188 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
189 collator->SetLocale(thread, localeStr.GetTaggedValue());
190 ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
191
192 // If collation is undefined iterate RelevantExtensionKeys to find "co"
193 // if found, set ICU collator UnicodeKeyword to iterator->second
194 UErrorCode status = U_ZERO_ERROR;
195 if (!collation->IsUndefined()) {
196 auto extensionIter = r.extensions.find("co");
197 if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
198 icuLocale.setUnicodeKeywordValue("co", nullptr, status);
199 ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
200 }
201 }
202
203 // If usage is serach set co-serach to icu locale key word value
204 // Eles set collation string to icu locale key word value
205 if (usage == UsageOption::SEARCH) {
206 icuLocale.setUnicodeKeywordValue("co", "search", status);
207 ASSERT(U_SUCCESS(status));
208 } else {
209 if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
210 icuLocale.setUnicodeKeywordValue("co", collationStr, status);
211 ASSERT(U_SUCCESS(status));
212 }
213 }
214
215 std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
216 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
217 if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) {
218 THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator);
219 }
220 status = U_ZERO_ERROR;
221 icu::Locale localeName(icuLocale.getBaseName());
222 icuCollator.reset(icu::Collator::createInstance(localeName, status));
223 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
224 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
225 }
226 }
227 ASSERT(U_SUCCESS(status));
228 icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
229
230 icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
231 ASSERT(U_SUCCESS(status));
232
233 // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
234 // Else iterate RelevantExtensionKeys to find "kn"
235 // if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
236 status = U_ZERO_ERROR;
237 if (foundNumeric) {
238 ASSERT(icuCollator.get() != nullptr);
239 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
240 ASSERT(U_SUCCESS(status));
241 } else {
242 auto extensionIter = r.extensions.find("kn");
243 if (extensionIter != r.extensions.end()) {
244 ASSERT(icuCollator.get() != nullptr);
245 bool found = (extensionIter->second == "true");
246 collator->SetNumeric(found);
247 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
248 ASSERT(U_SUCCESS(status));
249 }
250 }
251
252 // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
253 // Else iterate RelevantExtensionKeys to find "kf"
254 // if found, set ICU collator UColAttributeValue to iterator->second
255 status = U_ZERO_ERROR;
256 if (caseFirst != CaseFirstOption::UNDEFINED) {
257 ASSERT(icuCollator.get() != nullptr);
258 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
259 ASSERT(U_SUCCESS(status));
260 } else {
261 auto extensionIter = r.extensions.find("kf");
262 if (extensionIter != r.extensions.end()) {
263 ASSERT(icuCollator.get() != nullptr);
264 auto mapIter = caseFirstMap.find(extensionIter->second);
265 if (mapIter != caseFirstMap.end()) {
266 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
267 collator->SetCaseFirst(mapIter->second);
268 } else {
269 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
270 status);
271 }
272 ASSERT(U_SUCCESS(status));
273 }
274 }
275
276 // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
277 // undefined).
278 SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
279 thread, optionsObject, globalConst->GetHandledSensitivityString(),
280 JSCollator::SENSITIVITY_OPTION, JSCollator::SENSITIVITY_OPTION_NAME,
281 SensitivityOption::UNDEFINED);
282 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
283 // 25. If sensitivity is undefined, then
284 // a. If usage is "sort", then
285 // i. Let sensitivity be "variant".
286 if (sensitivity == SensitivityOption::UNDEFINED) {
287 if (usage == UsageOption::SORT) {
288 sensitivity = SensitivityOption::VARIANT;
289 }
290 }
291 collator->SetSensitivity(sensitivity);
292
293 // Trans SensitivityOption to Icu strength option
294 switch (sensitivity) {
295 case SensitivityOption::BASE:
296 icuCollator->setStrength(icu::Collator::PRIMARY);
297 break;
298 case SensitivityOption::ACCENT:
299 icuCollator->setStrength(icu::Collator::SECONDARY);
300 break;
301 case SensitivityOption::CASE:
302 icuCollator->setStrength(icu::Collator::PRIMARY);
303 icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
304 break;
305 case SensitivityOption::VARIANT:
306 icuCollator->setStrength(icu::Collator::TERTIARY);
307 break;
308 case SensitivityOption::UNDEFINED:
309 break;
310 case SensitivityOption::EXCEPTION:
311 LOG_ECMA(FATAL) << "this branch is unreachable";
312 UNREACHABLE();
313 }
314
315 // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
316 // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
317 bool ignorePunctuation = false;
318 bool defaultIgnorePunctuation = false;
319 // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales.
320 JSHandle<EcmaString> thKey = factory->NewFromUtf8("th");
321 if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) {
322 defaultIgnorePunctuation = true;
323 }
324 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(),
325 defaultIgnorePunctuation, &ignorePunctuation);
326 collator->SetIgnorePunctuation(ignorePunctuation);
327 if (ignorePunctuation) {
328 status = U_ZERO_ERROR;
329 icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
330 ASSERT(U_SUCCESS(status));
331 }
332
333 if (forIcuCache) {
334 std::string cacheEntry =
335 locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString(thread);
336 ecmaVm->GetIntlCache().SetIcuFormatterToCache(IcuFormatterType::COLLATOR,
337 cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator);
338 } else {
339 SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
340 }
341 collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
342 // 29. Return collator.
343 return collator;
344 }
345
GetCachedIcuCollator(JSThread * thread,const JSTaggedValue & locales)346 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales)
347 {
348 std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString(thread);
349 void *cachedCollator =
350 thread->GetEcmaVM()->GetIntlCache().GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry);
351 if (cachedCollator != nullptr) {
352 return reinterpret_cast<icu::Collator*>(cachedCollator);
353 }
354 return nullptr;
355 }
356
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)357 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
358 {
359 return GetCachedIcuCollator(thread, locales.GetTaggedValue());
360 }
361
OptionToUColAttribute(CaseFirstOption caseFirstOption)362 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
363 {
364 auto iter = uColAttributeValueMap.find(caseFirstOption);
365 if (iter != uColAttributeValueMap.end()) {
366 return iter->second;
367 }
368 LOG_ECMA(FATAL) << "this branch is unreachable";
369 UNREACHABLE();
370 }
371
OptionsToEcmaString(JSThread * thread,UsageOption usage)372 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
373 {
374 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
375 auto globalConst = thread->GlobalConstants();
376 switch (usage) {
377 case UsageOption::SORT:
378 result.Update(globalConst->GetSortString());
379 break;
380 case UsageOption::SEARCH:
381 result.Update(globalConst->GetSearchString());
382 break;
383 default:
384 LOG_ECMA(FATAL) << "this branch is unreachable";
385 UNREACHABLE();
386 }
387 return result;
388 }
389
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)390 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
391 {
392 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
393 auto globalConst = thread->GlobalConstants();
394 switch (sensitivity) {
395 case SensitivityOption::BASE:
396 result.Update(globalConst->GetBaseString());
397 break;
398 case SensitivityOption::ACCENT:
399 result.Update(globalConst->GetAccentString());
400 break;
401 case SensitivityOption::CASE:
402 result.Update(globalConst->GetCaseString());
403 break;
404 case SensitivityOption::VARIANT:
405 result.Update(globalConst->GetVariantString());
406 break;
407 case SensitivityOption::UNDEFINED:
408 break;
409 default:
410 LOG_ECMA(FATAL) << "this branch is unreachable";
411 UNREACHABLE();
412 }
413 return result;
414 }
415
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)416 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
417 {
418 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
419 auto globalConst = thread->GlobalConstants();
420 switch (caseFirst) {
421 case CaseFirstOption::UPPER:
422 result.Update(globalConst->GetUpperString());
423 break;
424 case CaseFirstOption::LOWER:
425 result.Update(globalConst->GetLowerString());
426 break;
427 case CaseFirstOption::FALSE_OPTION:
428 result.Update(globalConst->GetFalseString());
429 break;
430 case CaseFirstOption::UNDEFINED:
431 result.Update(globalConst->GetUpperString());
432 break;
433 default:
434 LOG_ECMA(FATAL) << "this branch is unreachable";
435 UNREACHABLE();
436 }
437 return result;
438 }
439
440 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)441 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
442 {
443 auto ecmaVm = thread->GetEcmaVM();
444 auto globalConst = thread->GlobalConstants();
445 ObjectFactory *factory = ecmaVm->GetFactory();
446 JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
447 JSHandle<JSFunction> funCtor(env->GetObjectFunction());
448 JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
449
450 // [[Locale]]
451 JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
452 JSHandle<JSTaggedValue> locale(thread, collator->GetLocale(thread));
453 JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
454 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread);
455
456 // [[Usage]]
457 UsageOption usageOption = collator->GetUsage();
458 JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
459 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
460
461 // [[Sensitivity]]
462 auto sentivityOption = collator->GetSensitivity();
463 JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
464 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
465
466 // [[IgnorePunctuation]]
467 JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
468 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
469 ignorePunctuationValue);
470
471 // [[Collation]]
472 JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation(thread));
473 UErrorCode status = U_ZERO_ERROR;
474 icu::Collator *icuCollator = collator->GetIcuCollator(thread);
475 icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
476 std::string collation_value =
477 icu_locale.getUnicodeKeywordValue<std::string>("co", status);
478 if (collationValue->IsUndefined()) {
479 if (collation_value != "search" && collation_value != "") {
480 collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue());
481 } else {
482 collationValue.Update(globalConst->GetDefaultString());
483 }
484 }
485 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
486
487 // [[Numeric]]
488 JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
489 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
490
491 // [[CaseFirst]]
492 CaseFirstOption caseFirstOption = collator->GetCaseFirst();
493 // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
494 // the requirement maybe change in the future
495 JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
496 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
497 return options;
498 }
499
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales)500 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
501 JSHandle<JSTaggedValue> locales)
502 {
503 if (locales->IsUndefined()) {
504 auto& intlCache = thread->GetEcmaVM()->GetIntlCache();
505 auto defaultCompareOption = intlCache.GetDefaultCompareStringsOption();
506 if (defaultCompareOption.has_value()) {
507 return defaultCompareOption.value();
508 }
509 auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread);
510 for (const char *fastLocale : FAST_LOCALE) {
511 if (strcmp(fastLocale, defaultLocale.c_str()) == 0) {
512 intlCache.SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH);
513 return CompareStringsOption::TRY_FAST_PATH;
514 }
515 }
516 intlCache.SetDefaultCompareStringsOption(CompareStringsOption::NONE);
517 return CompareStringsOption::NONE;
518 }
519
520 if (!locales->IsString()) {
521 return CompareStringsOption::NONE;
522 }
523
524 JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales);
525 CString localesStr = ConvertToString(thread, *localesString, StringConvertedUsage::LOGICOPERATION);
526 for (const char *fastLocale : FAST_LOCALE) {
527 if (strcmp(fastLocale, localesStr.c_str()) == 0) {
528 return CompareStringsOption::TRY_FAST_PATH;
529 }
530 }
531
532 return CompareStringsOption::NONE;
533 }
534
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales,JSHandle<JSTaggedValue> options)535 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
536 JSHandle<JSTaggedValue> locales,
537 JSHandle<JSTaggedValue> options)
538 {
539 if (!options->IsUndefined()) {
540 return CompareStringsOption::NONE;
541 }
542 return CompareStringsOptionFor(thread, locales);
543 }
544
545 // Anonymous namespace for ComapreStrings
546 namespace {
547 constexpr uint8_t COLLATION_WEIGHT_L1[256] = {
548 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0,
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 12, 16, 28, 38, 29, 27, 15,
550 17, 18, 24, 32, 9, 8, 14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10,
551 33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
552 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7, 30, 49, 50, 51,
553 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
554 72, 73, 74, 21, 36, 22, 37, 0,
555 };
556 constexpr uint8_t COLLATION_WEIGHT_L3[256] = {
557 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
561 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
563 1, 1, 1, 1, 1, 1, 1, 0,
564 };
565 constexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]);
566
ToUCollationResult(int delta)567 constexpr UCollationResult ToUCollationResult(int delta)
568 {
569 return delta < 0 ? UCollationResult::UCOL_LESS
570 : (delta > 0 ? UCollationResult::UCOL_GREATER
571 : UCollationResult::UCOL_EQUAL);
572 }
573
574 struct FastCompareStringsData {
575 UCollationResult l1Result = UCollationResult::UCOL_EQUAL;
576 UCollationResult l3Result = UCollationResult::UCOL_EQUAL;
577 int processedUntil = 0;
578 int firstDiffAt = 0; // The first relevant diff (L1 if exists, else L3).
579 bool hasDiff = false;
580
FastCompareFailedpanda::ecmascript::__anon13eda0b80111::FastCompareStringsData581 std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const
582 {
583 if (hasDiff) {
584 // Found some difference, continue there to ensure the generic algorithm picks it up.
585 processedUntilOut = firstDiffAt;
586 } else {
587 // No difference found, reprocess the last processed character since it may be
588 // followed by a unicode combining character.
589 processedUntilOut = std::max(processedUntil - 1, 0);
590 }
591 return {};
592 }
593 };
594
595 template <class T>
CanFastCompare(T ch)596 constexpr bool CanFastCompare(T ch)
597 {
598 return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0;
599 }
600
601 // Check canFastCompare, L1 weight, and L3 weight together.
602 // Use FastCompareStringsData to store these results.
603 template <class T1, class T2>
FastCompareFlatString(const T1 * lhs,const T2 * rhs,int length,FastCompareStringsData & fastCompareData)604 bool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData)
605 {
606 for (int i = 0; i < length; i++) {
607 const T1 l = lhs[i];
608 const T2 r = rhs[i];
609 if (!CanFastCompare(l) || !CanFastCompare(r)) {
610 fastCompareData.processedUntil = i;
611 return false;
612 }
613 auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]);
614 if (l1Result != UCollationResult::UCOL_EQUAL) {
615 fastCompareData.hasDiff = true;
616 fastCompareData.firstDiffAt = i;
617 fastCompareData.processedUntil = i;
618 fastCompareData.l1Result = l1Result;
619 return true;
620 }
621 if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) {
622 auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]);
623 fastCompareData.l3Result = l3Result;
624 if (!fastCompareData.hasDiff) {
625 fastCompareData.hasDiff = true;
626 fastCompareData.firstDiffAt = i;
627 }
628 }
629 }
630 fastCompareData.processedUntil = length;
631 return true;
632 }
633
FastCompareStringFlatContent(JSThread * thread,EcmaString * string1,EcmaString * string2,int length,FastCompareStringsData & fastCompareData)634 bool FastCompareStringFlatContent(JSThread *thread,
635 EcmaString* string1, EcmaString* string2,
636 int length, FastCompareStringsData& fastCompareData)
637 {
638 EcmaStringAccessor string1Acc(string1);
639 EcmaStringAccessor string2Acc(string2);
640 if (string1Acc.IsUtf8()) {
641 auto l = EcmaStringAccessor::GetNonTreeUtf8Data(thread, string1);
642 if (string2Acc.IsUtf8()) {
643 auto r = EcmaStringAccessor::GetNonTreeUtf8Data(thread, string2);
644 return FastCompareFlatString(l, r, length, fastCompareData);
645 } else {
646 auto r = EcmaStringAccessor::GetNonTreeUtf16Data(thread, string2);
647 return FastCompareFlatString(l, r, length, fastCompareData);
648 }
649 } else {
650 auto l = EcmaStringAccessor::GetNonTreeUtf16Data(thread, string1);
651 if (string2Acc.IsUtf8()) {
652 auto r = EcmaStringAccessor::GetNonTreeUtf8Data(thread, string2);
653 return FastCompareFlatString(l, r, length, fastCompareData);
654 } else {
655 auto r = EcmaStringAccessor::GetNonTreeUtf16Data(thread, string2);
656 return FastCompareFlatString(l, r, length, fastCompareData);
657 }
658 }
659 UNREACHABLE();
660 }
661
CharIsAsciiOrOutOfBounds(JSThread * thread,EcmaString * string,int stringLength,int index)662 bool CharIsAsciiOrOutOfBounds(JSThread *thread, EcmaString* string, int stringLength, int index)
663 {
664 return index >= stringLength ||
665 EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(thread, index));
666 }
667
CharCanFastCompareOrOutOfBounds(JSThread * thread,EcmaString * string,int stringLength,int index)668 bool CharCanFastCompareOrOutOfBounds(JSThread *thread, EcmaString* string, int stringLength, int index)
669 {
670 return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(thread, index));
671 }
672
673 // Pseudo-code for simplified multi-pass algorithm is:
674 // // Only a certain subset of the ASCII range can be fast-compared.
675 // // In the actual single-pass algorithm below, we tolerate non-ASCII contents.
676 // 1. Check string1 and string2 can fastcompare.
677 // 2. Compare L1 weight for each char, the greater wins.
678 // 3. Is two strings are L1 equal in common length, the longer wins.
679 // 4. Compare L3 weight for each char, the greater wins.
680 // 5. If all equal, return equal.
681 // 6. Once some chars cannot be fastcompared, use icu.
682
TryFastCompareStrings(JSThread * thread,EcmaString * string1,EcmaString * string2,int & processedUntilOut)683 std::optional<UCollationResult> TryFastCompareStrings(JSThread *thread,
684 EcmaString* string1, EcmaString* string2,
685 int& processedUntilOut)
686 {
687 processedUntilOut = 0;
688
689 const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength());
690 const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength());
691 int commonLength = std::min(length1, length2);
692
693 FastCompareStringsData fastCompareData;
694 if (!FastCompareStringFlatContent(thread, string1, string2, commonLength, fastCompareData)) {
695 return fastCompareData.FastCompareFailed(processedUntilOut);
696 }
697 // The result is only valid if the last processed character is not followed
698 // by a unicode combining character.
699 if (!CharIsAsciiOrOutOfBounds(thread, string1, length1, fastCompareData.processedUntil + 1) ||
700 !CharIsAsciiOrOutOfBounds(thread, string2, length2, fastCompareData.processedUntil + 1)) {
701 return fastCompareData.FastCompareFailed(processedUntilOut);
702 }
703 if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) {
704 return fastCompareData.l1Result;
705 }
706 // Strings are L1-equal up to their common length, length differences win.
707 UCollationResult lengthResult = ToUCollationResult(length1 - length2);
708 if (lengthResult != UCollationResult::UCOL_EQUAL) {
709 // Strings of different lengths may still compare as equal if the longer
710 // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
711 if (!CharCanFastCompareOrOutOfBounds(thread, string1, length1, commonLength) ||
712 !CharCanFastCompareOrOutOfBounds(thread, string2, length2, commonLength)) {
713 return fastCompareData.FastCompareFailed(processedUntilOut);
714 }
715 return lengthResult;
716 }
717 // L1-equal and same length, the L3 result wins.
718 return fastCompareData.l3Result;
719 }
720 } // namespace
721
722 //StringPiece is similar to std::string_view
ToICUStringPiece(JSThread * thread,EcmaString * string,int offset=0)723 icu::StringPiece ToICUStringPiece(JSThread *thread, EcmaString* string, int offset = 0)
724 {
725 EcmaStringAccessor stringAcc(string);
726 ASSERT(stringAcc.IsUtf8());
727 ASSERT(!stringAcc.IsTreeString());
728 return icu::StringPiece(reinterpret_cast<const char *>(EcmaStringAccessor::GetNonTreeUtf8Data(thread, string)) +
729 offset,
730 static_cast<int>(stringAcc.GetLength()) - offset);
731 }
732
733 // Convert to a UTF16 string and partially convert to ICUUnicodeString
ToICUUnicodeString(JSThread * thread,EcmaString * string,int offset=0)734 icu::UnicodeString ToICUUnicodeString(JSThread *thread, EcmaString* string, int offset = 0)
735 {
736 EcmaStringAccessor stringAcc(string);
737 ASSERT(!stringAcc.IsTreeString());
738 int strLength = static_cast<int>(stringAcc.GetLength());
739 int partialLength = strLength - offset;
740 if (stringAcc.IsUtf8()) {
741 constexpr int shortStringLength = 80; // 80: short string length
742 if (partialLength <= shortStringLength) {
743 // short string on stack
744 UChar shortStringBuffer[shortStringLength];
745 // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK
746 std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(thread, string) + offset, partialLength,
747 shortStringBuffer);
748 return icu::UnicodeString(shortStringBuffer, partialLength);
749 }
750 CVector<uint16_t> ucharBuffer(partialLength);
751 std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(thread, string) + offset, partialLength,
752 ucharBuffer.begin());
753 return icu::UnicodeString(ucharBuffer.data(), partialLength);
754 } else {
755 return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(thread, string) + offset, partialLength);
756 }
757 }
758
SlowCompareStrings(JSThread * thread,const icu::Collator * icuCollator,EcmaString * flatString1,EcmaString * flatString2,int processedUntil)759 JSTaggedValue JSCollator::SlowCompareStrings(JSThread *thread,
760 const icu::Collator *icuCollator,
761 EcmaString* flatString1,
762 EcmaString* flatString2,
763 int processedUntil)
764 {
765 UCollationResult result;
766 UErrorCode status = U_ZERO_ERROR;
767 if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) {
768 auto string1Piece = ToICUStringPiece(thread, flatString1, processedUntil);
769 if (!string1Piece.empty()) {
770 auto string2Piece = ToICUStringPiece(thread, flatString2, processedUntil);
771 if (!string2Piece.empty()) {
772 result = icuCollator->compareUTF8(string1Piece, string2Piece, status);
773 return JSTaggedValue(result);
774 }
775 }
776 }
777
778 auto uString1 = ToICUUnicodeString(thread, flatString1, processedUntil);
779 auto uString2 = ToICUUnicodeString(thread, flatString2, processedUntil);
780 result = icuCollator->compare(uString1, uString2, status);
781 ASSERT(U_SUCCESS(status));
782 return JSTaggedValue(result);
783 }
784
CompareStrings(JSThread * thread,const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)785 JSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator,
786 const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2,
787 [[maybe_unused]]CompareStringsOption csOption)
788 {
789 if (*string1 == *string2) {
790 return JSTaggedValue(UCollationResult::UCOL_EQUAL);
791 }
792
793 // Since Unicode has ignorable characters,
794 // we cannot return early for 0-length strings.
795 auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
796 auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
797
798 int processedUntil = 0;
799 if (csOption == CompareStringsOption::TRY_FAST_PATH) {
800 auto maybeResult = TryFastCompareStrings(thread, *flatString1, *flatString2, processedUntil);
801 if (maybeResult.has_value()) {
802 return JSTaggedValue(maybeResult.value());
803 }
804 }
805 return SlowCompareStrings(thread, icuCollator, *flatString1, *flatString2, processedUntil);
806 }
807
FastCachedCompareStrings(JSThread * thread,JSHandle<JSTaggedValue> locales,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)808 JSTaggedValue JSCollator::FastCachedCompareStrings(JSThread *thread, JSHandle<JSTaggedValue> locales,
809 const JSHandle<EcmaString> &string1,
810 const JSHandle<EcmaString> &string2,
811 CompareStringsOption csOption)
812 {
813 if (*string1 == *string2) {
814 return JSTaggedValue(UCollationResult::UCOL_EQUAL);
815 }
816
817 // Since Unicode has ignorable characters,
818 // we cannot return early for 0-length strings.
819 auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
820 auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
821
822 int processedUntil = 0;
823 if (csOption == CompareStringsOption::TRY_FAST_PATH) {
824 auto maybeResult = TryFastCompareStrings(thread, *flatString1, *flatString2, processedUntil);
825 if (maybeResult.has_value()) {
826 return JSTaggedValue(maybeResult.value());
827 }
828 }
829
830 auto icuCollator = JSCollator::GetCachedIcuCollator(thread, locales);
831 if (icuCollator != nullptr) {
832 return SlowCompareStrings(thread, icuCollator, *flatString1, *flatString2, processedUntil);
833 }
834 return JSTaggedValue::Undefined();
835 }
836 } // namespace panda::ecmascript
837