1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/js_collator.h"
17
18 #include "ecmascript/ecma_context.h"
19 #include "ecmascript/intl/locale_helper.h"
20 #include "ecmascript/global_env.h"
21 #include "ecmascript/ecma_string-inl.h"
22 namespace panda::ecmascript {
23 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
24 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
25 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
26 {"upper", CaseFirstOption::UPPER},
27 {"lower", CaseFirstOption::LOWER},
28 {"false", CaseFirstOption::FALSE_OPTION}
29 };
30 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
31 {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
32 {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
33 {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
34 {CaseFirstOption::UNDEFINED, UCOL_OFF}
35 };
36 const std::vector<LocaleMatcherOption> JSCollator::LOCALE_MATCHER_OPTION = {
37 LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT
38 };
39 const std::vector<std::string> JSCollator::LOCALE_MATCHER_OPTION_NAME = {"lookup", "best fit"};
40
41 const std::vector<CaseFirstOption> JSCollator::CASE_FIRST_OPTION = {
42 CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION
43 };
44 const std::vector<std::string> JSCollator::CASE_FIRST_OPTION_NAME = {"upper", "lower", "false"};
45
46 const std::set<std::string> JSCollator::RELEVANT_EXTENSION_KEYS = {"co", "kn", "kf"};
47
48 const std::vector<SensitivityOption> JSCollator::SENSITIVITY_OPTION = {
49 SensitivityOption::BASE, SensitivityOption::ACCENT,
50 SensitivityOption::CASE, SensitivityOption::VARIANT
51 };
52 const std::vector<std::string> JSCollator::SENSITIVITY_OPTION_NAME = {"base", "accent", "case", "variant"};
53
54 const std::vector<UsageOption> JSCollator::USAGE_OPTION = {UsageOption::SORT, UsageOption::SEARCH};
55 const std::vector<std::string> JSCollator::USAGE_OPTION_NAME = {"sort", "search"};
56
57 // All the available locales that are statically known to fulfill fast path conditions.
58 const char* const JSCollator::FAST_LOCALE[] = {
59 "en-US", "en", "fr", "es", "de", "pt", "it", "ca",
60 "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro",
61 "sl", "sv", "sw", "vi", "en-DE", "en-GB",
62 };
63
64
GetAvailableLocales(JSThread * thread,bool enableLocaleCache)65 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache)
66 {
67 const char *key = nullptr;
68 const char *path = JSCollator::uIcuDataColl.c_str();
69 // key and path are const, so we can cache the result
70 if (enableLocaleCache) {
71 JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales();
72 if (cachedLocales->IsHeapObject()) {
73 return JSHandle<TaggedArray>(cachedLocales);
74 }
75 }
76 std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path);
77 JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales);
78 if (enableLocaleCache) {
79 GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants());
80 constants->SetCachedLocales(availableLocales.GetTaggedValue());
81 }
82 return availableLocales;
83 }
84
85 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const NativePointerCallback & callback)86 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
87 icu::Collator *icuCollator, const NativePointerCallback &callback)
88 {
89 EcmaVM *ecmaVm = thread->GetEcmaVM();
90 ObjectFactory *factory = ecmaVm->GetFactory();
91
92 ASSERT(icuCollator != nullptr);
93 JSTaggedValue data = collator->GetIcuField();
94 if (data.IsJSNativePointer()) {
95 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
96 native->ResetExternalPointer(thread, icuCollator);
97 return;
98 }
99 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
100 collator->SetIcuField(thread, pointer.GetTaggedValue());
101 }
102
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache,bool enableLocaleCache)103 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
104 const JSHandle<JSCollator> &collator,
105 const JSHandle<JSTaggedValue> &locales,
106 const JSHandle<JSTaggedValue> &options,
107 bool forIcuCache,
108 bool enableLocaleCache)
109 {
110 EcmaVM *ecmaVm = thread->GetEcmaVM();
111 ObjectFactory *factory = ecmaVm->GetFactory();
112 const GlobalEnvConstants *globalConst = thread->GlobalConstants();
113 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
114 JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales);
115 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
116
117 // 2. If options is undefined, then
118 // a. Let options be ObjectCreate(null).
119 // 3. Else,
120 // a. Let options be ? ToObject(options).
121 JSHandle<JSObject> optionsObject;
122 if (options->IsUndefined()) {
123 optionsObject = factory->CreateNullJSObject();
124 } else {
125 optionsObject = JSTaggedValue::ToObject(thread, options);
126 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
127 }
128 // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
129 auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
130 JSCollator::USAGE_OPTION, JSCollator::USAGE_OPTION_NAME,
131 UsageOption::SORT);
132 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
133 collator->SetUsage(usage);
134
135 // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
136 auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
137 thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
138 JSCollator::LOCALE_MATCHER_OPTION, JSCollator::LOCALE_MATCHER_OPTION_NAME,
139 LocaleMatcherOption::BEST_FIT);
140 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
141
142 // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
143 // 7. If collation is not undefined, then
144 // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
145 JSHandle<JSTaggedValue> collation =
146 JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
147 globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
148 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
149 collator->SetCollation(thread, collation);
150 std::string collationStr;
151 if (!collation->IsUndefined()) {
152 JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
153 collationStr = intl::LocaleHelper::ConvertToStdString(collationEcmaStr);
154 if (!JSLocale::IsWellAlphaNumList(collationStr)) {
155 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
156 }
157 }
158
159 // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
160 bool numeric = false;
161 bool foundNumeric =
162 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
163 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
164 collator->SetNumeric(numeric);
165
166 // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
167 CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
168 thread, optionsObject, globalConst->GetHandledCaseFirstString(),
169 JSCollator::CASE_FIRST_OPTION, JSCollator::CASE_FIRST_OPTION_NAME,
170 CaseFirstOption::UNDEFINED);
171 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
172 collator->SetCaseFirst(caseFirst);
173
174 // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
175
176 // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
177 // %Collator%.[[RelevantExtensionKeys]], localeData).
178 JSHandle<TaggedArray> availableLocales;
179 if (requestedLocales->GetLength() == 0) {
180 availableLocales = factory->EmptyArray();
181 } else {
182 availableLocales = GetAvailableLocales(thread, enableLocaleCache);
183 }
184 ResolvedLocale r =
185 JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, RELEVANT_EXTENSION_KEYS);
186 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
187 icu::Locale icuLocale = r.localeData;
188 JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale);
189 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
190 collator->SetLocale(thread, localeStr.GetTaggedValue());
191 ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
192
193 // If collation is undefined iterate RelevantExtensionKeys to find "co"
194 // if found, set ICU collator UnicodeKeyword to iterator->second
195 UErrorCode status = U_ZERO_ERROR;
196 if (!collation->IsUndefined()) {
197 auto extensionIter = r.extensions.find("co");
198 if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
199 icuLocale.setUnicodeKeywordValue("co", nullptr, status);
200 ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
201 }
202 }
203
204 // If usage is serach set co-serach to icu locale key word value
205 // Eles set collation string to icu locale key word value
206 if (usage == UsageOption::SEARCH) {
207 icuLocale.setUnicodeKeywordValue("co", "search", status);
208 ASSERT(U_SUCCESS(status));
209 } else {
210 if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
211 icuLocale.setUnicodeKeywordValue("co", collationStr, status);
212 ASSERT(U_SUCCESS(status));
213 }
214 }
215
216 std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
217 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
218 if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) {
219 THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator);
220 }
221 status = U_ZERO_ERROR;
222 icu::Locale localeName(icuLocale.getBaseName());
223 icuCollator.reset(icu::Collator::createInstance(localeName, status));
224 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
225 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
226 }
227 }
228 ASSERT(U_SUCCESS(status));
229 icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
230
231 icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
232 ASSERT(U_SUCCESS(status));
233
234 // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
235 // Else iterate RelevantExtensionKeys to find "kn"
236 // if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
237 status = U_ZERO_ERROR;
238 if (foundNumeric) {
239 ASSERT(icuCollator.get() != nullptr);
240 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
241 ASSERT(U_SUCCESS(status));
242 } else {
243 auto extensionIter = r.extensions.find("kn");
244 if (extensionIter != r.extensions.end()) {
245 ASSERT(icuCollator.get() != nullptr);
246 bool found = (extensionIter->second == "true");
247 collator->SetNumeric(found);
248 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
249 ASSERT(U_SUCCESS(status));
250 }
251 }
252
253 // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
254 // Else iterate RelevantExtensionKeys to find "kf"
255 // if found, set ICU collator UColAttributeValue to iterator->second
256 status = U_ZERO_ERROR;
257 if (caseFirst != CaseFirstOption::UNDEFINED) {
258 ASSERT(icuCollator.get() != nullptr);
259 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
260 ASSERT(U_SUCCESS(status));
261 } else {
262 auto extensionIter = r.extensions.find("kf");
263 if (extensionIter != r.extensions.end()) {
264 ASSERT(icuCollator.get() != nullptr);
265 auto mapIter = caseFirstMap.find(extensionIter->second);
266 if (mapIter != caseFirstMap.end()) {
267 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
268 collator->SetCaseFirst(mapIter->second);
269 } else {
270 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
271 status);
272 }
273 ASSERT(U_SUCCESS(status));
274 }
275 }
276
277 // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
278 // undefined).
279 SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
280 thread, optionsObject, globalConst->GetHandledSensitivityString(),
281 JSCollator::SENSITIVITY_OPTION, JSCollator::SENSITIVITY_OPTION_NAME,
282 SensitivityOption::UNDEFINED);
283 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
284 // 25. If sensitivity is undefined, then
285 // a. If usage is "sort", then
286 // i. Let sensitivity be "variant".
287 if (sensitivity == SensitivityOption::UNDEFINED) {
288 if (usage == UsageOption::SORT) {
289 sensitivity = SensitivityOption::VARIANT;
290 }
291 }
292 collator->SetSensitivity(sensitivity);
293
294 // Trans SensitivityOption to Icu strength option
295 switch (sensitivity) {
296 case SensitivityOption::BASE:
297 icuCollator->setStrength(icu::Collator::PRIMARY);
298 break;
299 case SensitivityOption::ACCENT:
300 icuCollator->setStrength(icu::Collator::SECONDARY);
301 break;
302 case SensitivityOption::CASE:
303 icuCollator->setStrength(icu::Collator::PRIMARY);
304 icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
305 break;
306 case SensitivityOption::VARIANT:
307 icuCollator->setStrength(icu::Collator::TERTIARY);
308 break;
309 case SensitivityOption::UNDEFINED:
310 break;
311 case SensitivityOption::EXCEPTION:
312 LOG_ECMA(FATAL) << "this branch is unreachable";
313 UNREACHABLE();
314 }
315
316 // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
317 // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
318 bool ignorePunctuation = false;
319 bool defaultIgnorePunctuation = false;
320 // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales.
321 JSHandle<EcmaString> thKey = factory->NewFromUtf8("th");
322 if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) {
323 defaultIgnorePunctuation = true;
324 }
325 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(),
326 defaultIgnorePunctuation, &ignorePunctuation);
327 collator->SetIgnorePunctuation(ignorePunctuation);
328 if (ignorePunctuation) {
329 status = U_ZERO_ERROR;
330 icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
331 ASSERT(U_SUCCESS(status));
332 }
333
334 if (forIcuCache) {
335 std::string cacheEntry =
336 locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
337 ecmaVm->GetIntlCache().SetIcuFormatterToCache(IcuFormatterType::COLLATOR,
338 cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator);
339 } else {
340 SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
341 }
342 collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
343 // 29. Return collator.
344 return collator;
345 }
346
GetCachedIcuCollator(JSThread * thread,const JSTaggedValue & locales)347 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales)
348 {
349 std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString();
350 void *cachedCollator =
351 thread->GetEcmaVM()->GetIntlCache().GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry);
352 if (cachedCollator != nullptr) {
353 return reinterpret_cast<icu::Collator*>(cachedCollator);
354 }
355 return nullptr;
356 }
357
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)358 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
359 {
360 return GetCachedIcuCollator(thread, locales.GetTaggedValue());
361 }
362
OptionToUColAttribute(CaseFirstOption caseFirstOption)363 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
364 {
365 auto iter = uColAttributeValueMap.find(caseFirstOption);
366 if (iter != uColAttributeValueMap.end()) {
367 return iter->second;
368 }
369 LOG_ECMA(FATAL) << "this branch is unreachable";
370 UNREACHABLE();
371 }
372
OptionsToEcmaString(JSThread * thread,UsageOption usage)373 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
374 {
375 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
376 auto globalConst = thread->GlobalConstants();
377 switch (usage) {
378 case UsageOption::SORT:
379 result.Update(globalConst->GetSortString());
380 break;
381 case UsageOption::SEARCH:
382 result.Update(globalConst->GetSearchString());
383 break;
384 default:
385 LOG_ECMA(FATAL) << "this branch is unreachable";
386 UNREACHABLE();
387 }
388 return result;
389 }
390
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)391 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
392 {
393 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
394 auto globalConst = thread->GlobalConstants();
395 switch (sensitivity) {
396 case SensitivityOption::BASE:
397 result.Update(globalConst->GetBaseString());
398 break;
399 case SensitivityOption::ACCENT:
400 result.Update(globalConst->GetAccentString());
401 break;
402 case SensitivityOption::CASE:
403 result.Update(globalConst->GetCaseString());
404 break;
405 case SensitivityOption::VARIANT:
406 result.Update(globalConst->GetVariantString());
407 break;
408 case SensitivityOption::UNDEFINED:
409 break;
410 default:
411 LOG_ECMA(FATAL) << "this branch is unreachable";
412 UNREACHABLE();
413 }
414 return result;
415 }
416
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)417 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
418 {
419 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
420 auto globalConst = thread->GlobalConstants();
421 switch (caseFirst) {
422 case CaseFirstOption::UPPER:
423 result.Update(globalConst->GetUpperString());
424 break;
425 case CaseFirstOption::LOWER:
426 result.Update(globalConst->GetLowerString());
427 break;
428 case CaseFirstOption::FALSE_OPTION:
429 result.Update(globalConst->GetFalseString());
430 break;
431 case CaseFirstOption::UNDEFINED:
432 result.Update(globalConst->GetUpperString());
433 break;
434 default:
435 LOG_ECMA(FATAL) << "this branch is unreachable";
436 UNREACHABLE();
437 }
438 return result;
439 }
440
441 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)442 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
443 {
444 auto ecmaVm = thread->GetEcmaVM();
445 auto globalConst = thread->GlobalConstants();
446 ObjectFactory *factory = ecmaVm->GetFactory();
447 JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
448 JSHandle<JSFunction> funCtor(env->GetObjectFunction());
449 JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
450
451 // [[Locale]]
452 JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
453 JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
454 JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
455 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread);
456
457 // [[Usage]]
458 UsageOption usageOption = collator->GetUsage();
459 JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
460 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
461
462 // [[Sensitivity]]
463 auto sentivityOption = collator->GetSensitivity();
464 JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
465 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
466
467 // [[IgnorePunctuation]]
468 JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
469 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
470 ignorePunctuationValue);
471
472 // [[Collation]]
473 JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
474 UErrorCode status = U_ZERO_ERROR;
475 icu::Collator *icuCollator = collator->GetIcuCollator();
476 icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
477 std::string collation_value =
478 icu_locale.getUnicodeKeywordValue<std::string>("co", status);
479 if (collationValue->IsUndefined()) {
480 if (collation_value != "search" && collation_value != "") {
481 collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue());
482 } else {
483 collationValue.Update(globalConst->GetDefaultString());
484 }
485 }
486 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
487
488 // [[Numeric]]
489 JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
490 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
491
492 // [[CaseFirst]]
493 CaseFirstOption caseFirstOption = collator->GetCaseFirst();
494 // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
495 // the requirement maybe change in the future
496 JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
497 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
498 return options;
499 }
500
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales)501 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
502 JSHandle<JSTaggedValue> locales)
503 {
504 if (locales->IsUndefined()) {
505 auto& intlCache = thread->GetEcmaVM()->GetIntlCache();
506 auto defaultCompareOption = intlCache.GetDefaultCompareStringsOption();
507 if (defaultCompareOption.has_value()) {
508 return defaultCompareOption.value();
509 }
510 auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread);
511 for (const char *fastLocale : FAST_LOCALE) {
512 if (strcmp(fastLocale, defaultLocale.c_str()) == 0) {
513 intlCache.SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH);
514 return CompareStringsOption::TRY_FAST_PATH;
515 }
516 }
517 intlCache.SetDefaultCompareStringsOption(CompareStringsOption::NONE);
518 return CompareStringsOption::NONE;
519 }
520
521 if (!locales->IsString()) {
522 return CompareStringsOption::NONE;
523 }
524
525 JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales);
526 CString localesStr = ConvertToString(*localesString, StringConvertedUsage::LOGICOPERATION);
527 for (const char *fastLocale : FAST_LOCALE) {
528 if (strcmp(fastLocale, localesStr.c_str()) == 0) {
529 return CompareStringsOption::TRY_FAST_PATH;
530 }
531 }
532
533 return CompareStringsOption::NONE;
534 }
535
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales,JSHandle<JSTaggedValue> options)536 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
537 JSHandle<JSTaggedValue> locales,
538 JSHandle<JSTaggedValue> options)
539 {
540 if (!options->IsUndefined()) {
541 return CompareStringsOption::NONE;
542 }
543 return CompareStringsOptionFor(thread, locales);
544 }
545
546 // Anonymous namespace for ComapreStrings
547 namespace {
548 constexpr uint8_t COLLATION_WEIGHT_L1[256] = {
549 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0,
550 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 12, 16, 28, 38, 29, 27, 15,
551 17, 18, 24, 32, 9, 8, 14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10,
552 33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
553 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7, 30, 49, 50, 51,
554 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
555 72, 73, 74, 21, 36, 22, 37, 0,
556 };
557 constexpr uint8_t COLLATION_WEIGHT_L3[256] = {
558 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
562 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
564 1, 1, 1, 1, 1, 1, 1, 0,
565 };
566 constexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]);
567
ToUCollationResult(int delta)568 constexpr UCollationResult ToUCollationResult(int delta)
569 {
570 return delta < 0 ? UCollationResult::UCOL_LESS
571 : (delta > 0 ? UCollationResult::UCOL_GREATER
572 : UCollationResult::UCOL_EQUAL);
573 }
574
575 struct FastCompareStringsData {
576 UCollationResult l1Result = UCollationResult::UCOL_EQUAL;
577 UCollationResult l3Result = UCollationResult::UCOL_EQUAL;
578 int processedUntil = 0;
579 int firstDiffAt = 0; // The first relevant diff (L1 if exists, else L3).
580 bool hasDiff = false;
581
FastCompareFailedpanda::ecmascript::__anonfbbf73560111::FastCompareStringsData582 std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const
583 {
584 if (hasDiff) {
585 // Found some difference, continue there to ensure the generic algorithm picks it up.
586 processedUntilOut = firstDiffAt;
587 } else {
588 // No difference found, reprocess the last processed character since it may be
589 // followed by a unicode combining character.
590 processedUntilOut = std::max(processedUntil - 1, 0);
591 }
592 return {};
593 }
594 };
595
596 template <class T>
CanFastCompare(T ch)597 constexpr bool CanFastCompare(T ch)
598 {
599 return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0;
600 }
601
602 // Check canFastCompare, L1 weight, and L3 weight together.
603 // Use FastCompareStringsData to store these results.
604 template <class T1, class T2>
FastCompareFlatString(const T1 * lhs,const T2 * rhs,int length,FastCompareStringsData & fastCompareData)605 bool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData)
606 {
607 for (int i = 0; i < length; i++) {
608 const T1 l = lhs[i];
609 const T2 r = rhs[i];
610 if (!CanFastCompare(l) || !CanFastCompare(r)) {
611 fastCompareData.processedUntil = i;
612 return false;
613 }
614 auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]);
615 if (l1Result != UCollationResult::UCOL_EQUAL) {
616 fastCompareData.hasDiff = true;
617 fastCompareData.firstDiffAt = i;
618 fastCompareData.processedUntil = i;
619 fastCompareData.l1Result = l1Result;
620 return true;
621 }
622 if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) {
623 auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]);
624 fastCompareData.l3Result = l3Result;
625 if (!fastCompareData.hasDiff) {
626 fastCompareData.hasDiff = true;
627 fastCompareData.firstDiffAt = i;
628 }
629 }
630 }
631 fastCompareData.processedUntil = length;
632 return true;
633 }
634
FastCompareStringFlatContent(EcmaString * string1,EcmaString * string2,int length,FastCompareStringsData & fastCompareData)635 bool FastCompareStringFlatContent(EcmaString* string1, EcmaString* string2,
636 int length, FastCompareStringsData& fastCompareData)
637 {
638 EcmaStringAccessor string1Acc(string1);
639 EcmaStringAccessor string2Acc(string2);
640 if (string1Acc.IsUtf8()) {
641 auto l = EcmaStringAccessor::GetNonTreeUtf8Data(string1);
642 if (string2Acc.IsUtf8()) {
643 auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
644 return FastCompareFlatString(l, r, length, fastCompareData);
645 } else {
646 auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
647 return FastCompareFlatString(l, r, length, fastCompareData);
648 }
649 } else {
650 auto l = EcmaStringAccessor::GetNonTreeUtf16Data(string1);
651 if (string2Acc.IsUtf8()) {
652 auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
653 return FastCompareFlatString(l, r, length, fastCompareData);
654 } else {
655 auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
656 return FastCompareFlatString(l, r, length, fastCompareData);
657 }
658 }
659 UNREACHABLE();
660 }
661
CharIsAsciiOrOutOfBounds(EcmaString * string,int stringLength,int index)662 bool CharIsAsciiOrOutOfBounds(EcmaString* string, int stringLength, int index)
663 {
664 return index >= stringLength || EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(index));
665 }
666
CharCanFastCompareOrOutOfBounds(EcmaString * string,int stringLength,int index)667 bool CharCanFastCompareOrOutOfBounds(EcmaString* string, int stringLength, int index)
668 {
669 return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(index));
670 }
671
672 // Pseudo-code for simplified multi-pass algorithm is:
673 // // Only a certain subset of the ASCII range can be fast-compared.
674 // // In the actual single-pass algorithm below, we tolerate non-ASCII contents.
675 // 1. Check string1 and string2 can fastcompare.
676 // 2. Compare L1 weight for each char, the greater wins.
677 // 3. Is two strings are L1 equal in common length, the longer wins.
678 // 4. Compare L3 weight for each char, the greater wins.
679 // 5. If all equal, return equal.
680 // 6. Once some chars cannot be fastcompared, use icu.
681
TryFastCompareStrings(EcmaString * string1,EcmaString * string2,int & processedUntilOut)682 std::optional<UCollationResult> TryFastCompareStrings(EcmaString* string1, EcmaString* string2,
683 int& processedUntilOut)
684 {
685 processedUntilOut = 0;
686
687 const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength());
688 const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength());
689 int commonLength = std::min(length1, length2);
690
691 FastCompareStringsData fastCompareData;
692 if (!FastCompareStringFlatContent(string1, string2, commonLength, fastCompareData)) {
693 return fastCompareData.FastCompareFailed(processedUntilOut);
694 }
695 // The result is only valid if the last processed character is not followed
696 // by a unicode combining character.
697 if (!CharIsAsciiOrOutOfBounds(string1, length1, fastCompareData.processedUntil + 1) ||
698 !CharIsAsciiOrOutOfBounds(string2, length2, fastCompareData.processedUntil + 1)) {
699 return fastCompareData.FastCompareFailed(processedUntilOut);
700 }
701 if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) {
702 return fastCompareData.l1Result;
703 }
704 // Strings are L1-equal up to their common length, length differences win.
705 UCollationResult lengthResult = ToUCollationResult(length1 - length2);
706 if (lengthResult != UCollationResult::UCOL_EQUAL) {
707 // Strings of different lengths may still compare as equal if the longer
708 // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
709 if (!CharCanFastCompareOrOutOfBounds(string1, length1, commonLength) ||
710 !CharCanFastCompareOrOutOfBounds(string2, length2, commonLength)) {
711 return fastCompareData.FastCompareFailed(processedUntilOut);
712 }
713 return lengthResult;
714 }
715 // L1-equal and same length, the L3 result wins.
716 return fastCompareData.l3Result;
717 }
718 } // namespace
719
720 //StringPiece is similar to std::string_view
ToICUStringPiece(EcmaString * string,int offset=0)721 icu::StringPiece ToICUStringPiece(EcmaString* string, int offset = 0)
722 {
723 EcmaStringAccessor stringAcc(string);
724 ASSERT(stringAcc.IsUtf8());
725 ASSERT(!stringAcc.IsTreeString());
726 return icu::StringPiece(reinterpret_cast<const char*>(EcmaStringAccessor::GetNonTreeUtf8Data(string)) + offset,
727 static_cast<int>(stringAcc.GetLength()) - offset);
728 }
729
730 // Convert to a UTF16 string and partially convert to ICUUnicodeString
ToICUUnicodeString(EcmaString * string,int offset=0)731 icu::UnicodeString ToICUUnicodeString(EcmaString* string, int offset = 0)
732 {
733 EcmaStringAccessor stringAcc(string);
734 ASSERT(!stringAcc.IsTreeString());
735 int strLength = static_cast<int>(stringAcc.GetLength());
736 int partialLength = strLength - offset;
737 if (stringAcc.IsUtf8()) {
738 constexpr int shortStringLength = 80; // 80: short string length
739 if (partialLength <= shortStringLength) {
740 // short string on stack
741 UChar shortStringBuffer[shortStringLength];
742 // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK
743 std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(string) + offset, partialLength, shortStringBuffer);
744 return icu::UnicodeString(shortStringBuffer, partialLength);
745 }
746 CVector<uint16_t> ucharBuffer(partialLength);
747 std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(string) + offset, partialLength, ucharBuffer.begin());
748 return icu::UnicodeString(ucharBuffer.data(), partialLength);
749 } else {
750 return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(string) + offset, partialLength);
751 }
752 }
753
SlowCompareStrings(const icu::Collator * icuCollator,EcmaString * flatString1,EcmaString * flatString2,int processedUntil)754 JSTaggedValue JSCollator::SlowCompareStrings(const icu::Collator *icuCollator,
755 EcmaString* flatString1,
756 EcmaString* flatString2,
757 int processedUntil)
758 {
759 UCollationResult result;
760 UErrorCode status = U_ZERO_ERROR;
761 if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) {
762 auto string1Piece = ToICUStringPiece(flatString1, processedUntil);
763 if (!string1Piece.empty()) {
764 auto string2Piece = ToICUStringPiece(flatString2, processedUntil);
765 if (!string2Piece.empty()) {
766 result = icuCollator->compareUTF8(string1Piece, string2Piece, status);
767 return JSTaggedValue(result);
768 }
769 }
770 }
771
772 auto uString1 = ToICUUnicodeString(flatString1, processedUntil);
773 auto uString2 = ToICUUnicodeString(flatString2, processedUntil);
774 result = icuCollator->compare(uString1, uString2, status);
775 ASSERT(U_SUCCESS(status));
776 return JSTaggedValue(result);
777 }
778
CompareStrings(JSThread * thread,const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)779 JSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator,
780 const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2,
781 [[maybe_unused]]CompareStringsOption csOption)
782 {
783 if (*string1 == *string2) {
784 return JSTaggedValue(UCollationResult::UCOL_EQUAL);
785 }
786
787 // Since Unicode has ignorable characters,
788 // we cannot return early for 0-length strings.
789 auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
790 auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
791
792 int processedUntil = 0;
793 if (csOption == CompareStringsOption::TRY_FAST_PATH) {
794 auto maybeResult = TryFastCompareStrings(*flatString1, *flatString2, processedUntil);
795 if (maybeResult.has_value()) {
796 return JSTaggedValue(maybeResult.value());
797 }
798 }
799 return SlowCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil);
800 }
801
FastCachedCompareStrings(JSThread * thread,JSHandle<JSTaggedValue> locales,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)802 JSTaggedValue JSCollator::FastCachedCompareStrings(JSThread *thread, JSHandle<JSTaggedValue> locales,
803 const JSHandle<EcmaString> &string1,
804 const JSHandle<EcmaString> &string2,
805 CompareStringsOption csOption)
806 {
807 if (*string1 == *string2) {
808 return JSTaggedValue(UCollationResult::UCOL_EQUAL);
809 }
810
811 // Since Unicode has ignorable characters,
812 // we cannot return early for 0-length strings.
813 auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
814 auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
815
816 int processedUntil = 0;
817 if (csOption == CompareStringsOption::TRY_FAST_PATH) {
818 auto maybeResult = TryFastCompareStrings(*flatString1, *flatString2, processedUntil);
819 if (maybeResult.has_value()) {
820 return JSTaggedValue(maybeResult.value());
821 }
822 }
823
824 auto icuCollator = JSCollator::GetCachedIcuCollator(thread, locales);
825 if (icuCollator != nullptr) {
826 return SlowCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil);
827 }
828 return JSTaggedValue::Undefined();
829 }
830 } // namespace panda::ecmascript
831