1 /*
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/js_collator.h"
17
18 #include "ecmascript/ecma_context.h"
19 #include "ecmascript/intl/locale_helper.h"
20 #include "ecmascript/global_env.h"
21 #include "ecmascript/ecma_string-inl.h"
22
23 namespace panda::ecmascript {
24 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
25 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
26 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
27 {"upper", CaseFirstOption::UPPER},
28 {"lower", CaseFirstOption::LOWER},
29 {"false", CaseFirstOption::FALSE_OPTION}
30 };
31 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
32 {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
33 {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
34 {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
35 {CaseFirstOption::UNDEFINED, UCOL_OFF}
36 };
37
GetAvailableLocales(JSThread * thread,bool enableLocaleCache)38 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache)
39 {
40 const char *key = nullptr;
41 const char *path = JSCollator::uIcuDataColl.c_str();
42 // key and path are const, so we can cache the result
43 if (enableLocaleCache) {
44 JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales();
45 if (cachedLocales->IsHeapObject()) {
46 return JSHandle<TaggedArray>(cachedLocales);
47 }
48 }
49 std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path);
50 JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales);
51 if (enableLocaleCache) {
52 GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants());
53 constants->SetCachedLocales(availableLocales.GetTaggedValue());
54 }
55 return availableLocales;
56 }
57
58 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const NativePointerCallback & callback)59 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
60 icu::Collator *icuCollator, const NativePointerCallback &callback)
61 {
62 EcmaVM *ecmaVm = thread->GetEcmaVM();
63 ObjectFactory *factory = ecmaVm->GetFactory();
64
65 ASSERT(icuCollator != nullptr);
66 JSTaggedValue data = collator->GetIcuField();
67 if (data.IsJSNativePointer()) {
68 JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
69 native->ResetExternalPointer(thread, icuCollator);
70 return;
71 }
72 JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
73 collator->SetIcuField(thread, pointer.GetTaggedValue());
74 }
75
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache,bool enableLocaleCache)76 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
77 const JSHandle<JSCollator> &collator,
78 const JSHandle<JSTaggedValue> &locales,
79 const JSHandle<JSTaggedValue> &options,
80 bool forIcuCache,
81 bool enableLocaleCache)
82 {
83 EcmaVM *ecmaVm = thread->GetEcmaVM();
84 ObjectFactory *factory = ecmaVm->GetFactory();
85 const GlobalEnvConstants *globalConst = thread->GlobalConstants();
86 // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
87 JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales);
88 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
89
90 // 2. If options is undefined, then
91 // a. Let options be ObjectCreate(null).
92 // 3. Else,
93 // a. Let options be ? ToObject(options).
94 JSHandle<JSObject> optionsObject;
95 if (options->IsUndefined()) {
96 optionsObject = factory->CreateNullJSObject();
97 } else {
98 optionsObject = JSTaggedValue::ToObject(thread, options);
99 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
100 }
101 // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
102 auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
103 {UsageOption::SORT, UsageOption::SEARCH}, {"sort", "search"},
104 UsageOption::SORT);
105 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
106 collator->SetUsage(usage);
107
108 // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
109 auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
110 thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
111 {LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT}, {"lookup", "best fit"},
112 LocaleMatcherOption::BEST_FIT);
113 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
114
115 // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
116 // 7. If collation is not undefined, then
117 // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
118 JSHandle<JSTaggedValue> collation =
119 JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
120 globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
121 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
122 collator->SetCollation(thread, collation);
123 std::string collationStr;
124 if (!collation->IsUndefined()) {
125 JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
126 collationStr = intl::LocaleHelper::ConvertToStdString(collationEcmaStr);
127 if (!JSLocale::IsWellAlphaNumList(collationStr)) {
128 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
129 }
130 }
131
132 // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
133 bool numeric = false;
134 bool foundNumeric =
135 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
136 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
137 collator->SetNumeric(numeric);
138
139 // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
140 CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
141 thread, optionsObject, globalConst->GetHandledCaseFirstString(),
142 {CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION}, {"upper", "lower", "false"},
143 CaseFirstOption::UNDEFINED);
144 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
145 collator->SetCaseFirst(caseFirst);
146
147 // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
148 std::set<std::string> relevantExtensionKeys = {"co", "kn", "kf"};
149
150 // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
151 // %Collator%.[[RelevantExtensionKeys]], localeData).
152 JSHandle<TaggedArray> availableLocales;
153 if (requestedLocales->GetLength() == 0) {
154 availableLocales = factory->EmptyArray();
155 } else {
156 availableLocales = GetAvailableLocales(thread, enableLocaleCache);
157 }
158 ResolvedLocale r =
159 JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, relevantExtensionKeys);
160 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
161 icu::Locale icuLocale = r.localeData;
162 JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale);
163 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
164 collator->SetLocale(thread, localeStr.GetTaggedValue());
165 ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
166
167 // If collation is undefined iterate RelevantExtensionKeys to find "co"
168 // if found, set ICU collator UnicodeKeyword to iterator->second
169 UErrorCode status = U_ZERO_ERROR;
170 if (!collation->IsUndefined()) {
171 auto extensionIter = r.extensions.find("co");
172 if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
173 icuLocale.setUnicodeKeywordValue("co", nullptr, status);
174 ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
175 }
176 }
177
178 // If usage is serach set co-serach to icu locale key word value
179 // Eles set collation string to icu locale key word value
180 if (usage == UsageOption::SEARCH) {
181 icuLocale.setUnicodeKeywordValue("co", "search", status);
182 ASSERT(U_SUCCESS(status));
183 } else {
184 if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
185 icuLocale.setUnicodeKeywordValue("co", collationStr, status);
186 ASSERT(U_SUCCESS(status));
187 }
188 }
189
190 std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
191 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
192 if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) {
193 THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator);
194 }
195 status = U_ZERO_ERROR;
196 icu::Locale localeName(icuLocale.getBaseName());
197 icuCollator.reset(icu::Collator::createInstance(localeName, status));
198 if (U_FAILURE(status) || icuCollator == nullptr) { // NOLINT(readability-implicit-bool-conversion)
199 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
200 }
201 }
202 ASSERT(U_SUCCESS(status));
203 icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
204
205 icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
206 ASSERT(U_SUCCESS(status));
207
208 // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
209 // Else iterate RelevantExtensionKeys to find "kn"
210 // if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
211 status = U_ZERO_ERROR;
212 if (foundNumeric) {
213 ASSERT(icuCollator.get() != nullptr);
214 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
215 ASSERT(U_SUCCESS(status));
216 } else {
217 auto extensionIter = r.extensions.find("kn");
218 if (extensionIter != r.extensions.end()) {
219 ASSERT(icuCollator.get() != nullptr);
220 bool found = (extensionIter->second == "true");
221 collator->SetNumeric(found);
222 icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
223 ASSERT(U_SUCCESS(status));
224 }
225 }
226
227 // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
228 // Else iterate RelevantExtensionKeys to find "kf"
229 // if found, set ICU collator UColAttributeValue to iterator->second
230 status = U_ZERO_ERROR;
231 if (caseFirst != CaseFirstOption::UNDEFINED) {
232 ASSERT(icuCollator.get() != nullptr);
233 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
234 ASSERT(U_SUCCESS(status));
235 } else {
236 auto extensionIter = r.extensions.find("kf");
237 if (extensionIter != r.extensions.end()) {
238 ASSERT(icuCollator.get() != nullptr);
239 auto mapIter = caseFirstMap.find(extensionIter->second);
240 if (mapIter != caseFirstMap.end()) {
241 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
242 collator->SetCaseFirst(mapIter->second);
243 } else {
244 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
245 status);
246 }
247 ASSERT(U_SUCCESS(status));
248 }
249 }
250
251 // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
252 // undefined).
253 SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
254 thread, optionsObject, globalConst->GetHandledSensitivityString(),
255 {SensitivityOption::BASE, SensitivityOption::ACCENT, SensitivityOption::CASE, SensitivityOption::VARIANT},
256 {"base", "accent", "case", "variant"}, SensitivityOption::UNDEFINED);
257 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
258 // 25. If sensitivity is undefined, then
259 // a. If usage is "sort", then
260 // i. Let sensitivity be "variant".
261 if (sensitivity == SensitivityOption::UNDEFINED) {
262 if (usage == UsageOption::SORT) {
263 sensitivity = SensitivityOption::VARIANT;
264 }
265 }
266 collator->SetSensitivity(sensitivity);
267
268 // Trans SensitivityOption to Icu strength option
269 switch (sensitivity) {
270 case SensitivityOption::BASE:
271 icuCollator->setStrength(icu::Collator::PRIMARY);
272 break;
273 case SensitivityOption::ACCENT:
274 icuCollator->setStrength(icu::Collator::SECONDARY);
275 break;
276 case SensitivityOption::CASE:
277 icuCollator->setStrength(icu::Collator::PRIMARY);
278 icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
279 break;
280 case SensitivityOption::VARIANT:
281 icuCollator->setStrength(icu::Collator::TERTIARY);
282 break;
283 case SensitivityOption::UNDEFINED:
284 break;
285 case SensitivityOption::EXCEPTION:
286 LOG_ECMA(FATAL) << "this branch is unreachable";
287 UNREACHABLE();
288 }
289
290 // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
291 // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
292 bool ignorePunctuation = false;
293 bool defaultIgnorePunctuation = false;
294 // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales.
295 JSHandle<EcmaString> thKey = factory->NewFromUtf8("th");
296 if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) {
297 defaultIgnorePunctuation = true;
298 }
299 JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(),
300 defaultIgnorePunctuation, &ignorePunctuation);
301 collator->SetIgnorePunctuation(ignorePunctuation);
302 if (ignorePunctuation) {
303 status = U_ZERO_ERROR;
304 icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
305 ASSERT(U_SUCCESS(status));
306 }
307
308 if (forIcuCache) {
309 std::string cacheEntry =
310 locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
311 thread->GetCurrentEcmaContext()->SetIcuFormatterToCache(IcuFormatterType::COLLATOR,
312 cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator);
313 } else {
314 SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
315 }
316 collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
317 // 29. Return collator.
318 return collator;
319 }
320
GetCachedIcuCollator(JSThread * thread,const JSTaggedValue & locales)321 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales)
322 {
323 std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString();
324 void *cachedCollator =
325 thread->GetCurrentEcmaContext()->GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry);
326 if (cachedCollator != nullptr) {
327 return reinterpret_cast<icu::Collator*>(cachedCollator);
328 }
329 return nullptr;
330 }
331
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)332 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
333 {
334 return GetCachedIcuCollator(thread, locales.GetTaggedValue());
335 }
336
OptionToUColAttribute(CaseFirstOption caseFirstOption)337 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
338 {
339 auto iter = uColAttributeValueMap.find(caseFirstOption);
340 if (iter != uColAttributeValueMap.end()) {
341 return iter->second;
342 }
343 LOG_ECMA(FATAL) << "this branch is unreachable";
344 UNREACHABLE();
345 }
346
OptionsToEcmaString(JSThread * thread,UsageOption usage)347 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
348 {
349 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
350 auto globalConst = thread->GlobalConstants();
351 switch (usage) {
352 case UsageOption::SORT:
353 result.Update(globalConst->GetSortString());
354 break;
355 case UsageOption::SEARCH:
356 result.Update(globalConst->GetSearchString());
357 break;
358 default:
359 LOG_ECMA(FATAL) << "this branch is unreachable";
360 UNREACHABLE();
361 }
362 return result;
363 }
364
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)365 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
366 {
367 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
368 auto globalConst = thread->GlobalConstants();
369 switch (sensitivity) {
370 case SensitivityOption::BASE:
371 result.Update(globalConst->GetBaseString());
372 break;
373 case SensitivityOption::ACCENT:
374 result.Update(globalConst->GetAccentString());
375 break;
376 case SensitivityOption::CASE:
377 result.Update(globalConst->GetCaseString());
378 break;
379 case SensitivityOption::VARIANT:
380 result.Update(globalConst->GetVariantString());
381 break;
382 case SensitivityOption::UNDEFINED:
383 break;
384 default:
385 LOG_ECMA(FATAL) << "this branch is unreachable";
386 UNREACHABLE();
387 }
388 return result;
389 }
390
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)391 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
392 {
393 JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
394 auto globalConst = thread->GlobalConstants();
395 switch (caseFirst) {
396 case CaseFirstOption::UPPER:
397 result.Update(globalConst->GetUpperString());
398 break;
399 case CaseFirstOption::LOWER:
400 result.Update(globalConst->GetLowerString());
401 break;
402 case CaseFirstOption::FALSE_OPTION:
403 result.Update(globalConst->GetFalseString());
404 break;
405 case CaseFirstOption::UNDEFINED:
406 result.Update(globalConst->GetUpperString());
407 break;
408 default:
409 LOG_ECMA(FATAL) << "this branch is unreachable";
410 UNREACHABLE();
411 }
412 return result;
413 }
414
415 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)416 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
417 {
418 auto ecmaVm = thread->GetEcmaVM();
419 auto globalConst = thread->GlobalConstants();
420 ObjectFactory *factory = ecmaVm->GetFactory();
421 JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
422 JSHandle<JSFunction> funCtor(env->GetObjectFunction());
423 JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
424
425 // [[Locale]]
426 JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
427 JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
428 JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
429 RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread);
430
431 // [[Usage]]
432 UsageOption usageOption = collator->GetUsage();
433 JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
434 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
435
436 // [[Sensitivity]]
437 auto sentivityOption = collator->GetSensitivity();
438 JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
439 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
440
441 // [[IgnorePunctuation]]
442 JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
443 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
444 ignorePunctuationValue);
445
446 // [[Collation]]
447 JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
448 UErrorCode status = U_ZERO_ERROR;
449 icu::Collator *icuCollator = collator->GetIcuCollator();
450 icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
451 std::string collation_value =
452 icu_locale.getUnicodeKeywordValue<std::string>("co", status);
453 if (collationValue->IsUndefined()) {
454 if (collation_value != "search" && collation_value != "") {
455 collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue());
456 } else {
457 collationValue.Update(globalConst->GetDefaultString());
458 }
459 }
460 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
461
462 // [[Numeric]]
463 JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
464 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
465
466 // [[CaseFirst]]
467 CaseFirstOption caseFirstOption = collator->GetCaseFirst();
468 // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
469 // the requirement maybe change in the future
470 JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
471 JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
472 return options;
473 }
474
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales)475 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
476 JSHandle<JSTaggedValue> locales)
477 {
478 // All the available locales that are statically known to fulfill fast path conditions.
479 static const char* const FAST_LOCALE[] = {
480 "en-US", "en", "fr", "es", "de", "pt", "it", "ca",
481 "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro",
482 "sl", "sv", "sw", "vi", "en-DE", "en-GB",
483 };
484 if (locales->IsUndefined()) {
485 auto context = thread->GetCurrentEcmaContext();
486 auto defaultCompareOption = context->GetDefaultCompareStringsOption();
487 if (defaultCompareOption.has_value()) {
488 return defaultCompareOption.value();
489 }
490 auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread);
491 for (const char *fastLocale : FAST_LOCALE) {
492 if (strcmp(fastLocale, defaultLocale.c_str()) == 0) {
493 context->SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH);
494 return CompareStringsOption::TRY_FAST_PATH;
495 }
496 }
497 context->SetDefaultCompareStringsOption(CompareStringsOption::NONE);
498 return CompareStringsOption::NONE;
499 }
500
501 if (!locales->IsString()) {
502 return CompareStringsOption::NONE;
503 }
504
505 JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales);
506 CString localesStr = ConvertToString(*localesString, StringConvertedUsage::LOGICOPERATION);
507 for (const char *fastLocale : FAST_LOCALE) {
508 if (strcmp(fastLocale, localesStr.c_str()) == 0) {
509 return CompareStringsOption::TRY_FAST_PATH;
510 }
511 }
512
513 return CompareStringsOption::NONE;
514 }
515
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales,JSHandle<JSTaggedValue> options)516 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
517 JSHandle<JSTaggedValue> locales,
518 JSHandle<JSTaggedValue> options)
519 {
520 if (!options->IsUndefined()) {
521 return CompareStringsOption::NONE;
522 }
523 return CompareStringsOptionFor(thread, locales);
524 }
525
526 // Anonymous namespace for ComapreStrings
527 namespace {
528 constexpr uint8_t COLLATION_WEIGHT_L1[256] = {
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 12, 16, 28, 38, 29, 27, 15,
531 17, 18, 24, 32, 9, 8, 14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10,
532 33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
533 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7, 30, 49, 50, 51,
534 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
535 72, 73, 74, 21, 36, 22, 37, 0,
536 };
537 constexpr uint8_t COLLATION_WEIGHT_L3[256] = {
538 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
542 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 0,
545 };
546 constexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]);
547
ToUCollationResult(int delta)548 constexpr UCollationResult ToUCollationResult(int delta)
549 {
550 return delta < 0 ? UCollationResult::UCOL_LESS
551 : (delta > 0 ? UCollationResult::UCOL_GREATER
552 : UCollationResult::UCOL_EQUAL);
553 }
554
555 struct FastCompareStringsData {
556 UCollationResult l1Result = UCollationResult::UCOL_EQUAL;
557 UCollationResult l3Result = UCollationResult::UCOL_EQUAL;
558 int processedUntil = 0;
559 int firstDiffAt = 0; // The first relevant diff (L1 if exists, else L3).
560 bool hasDiff = false;
561
FastCompareFailedpanda::ecmascript::__anond86117160111::FastCompareStringsData562 std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const
563 {
564 if (hasDiff) {
565 // Found some difference, continue there to ensure the generic algorithm picks it up.
566 processedUntilOut = firstDiffAt;
567 } else {
568 // No difference found, reprocess the last processed character since it may be
569 // followed by a unicode combining character.
570 processedUntilOut = std::max(processedUntil - 1, 0);
571 }
572 return {};
573 }
574 };
575
576 template <class T>
CanFastCompare(T ch)577 constexpr bool CanFastCompare(T ch)
578 {
579 return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0;
580 }
581
582 // Check canFastCompare, L1 weight, and L3 weight together.
583 // Use FastCompareStringsData to store these results.
584 template <class T1, class T2>
FastCompareFlatString(const T1 * lhs,const T2 * rhs,int length,FastCompareStringsData & fastCompareData)585 bool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData)
586 {
587 for (int i = 0; i < length; i++) {
588 const T1 l = lhs[i];
589 const T2 r = rhs[i];
590 if (!CanFastCompare(l) || !CanFastCompare(r)) {
591 fastCompareData.processedUntil = i;
592 return false;
593 }
594 auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]);
595 if (l1Result != UCollationResult::UCOL_EQUAL) {
596 fastCompareData.hasDiff = true;
597 fastCompareData.firstDiffAt = i;
598 fastCompareData.processedUntil = i;
599 fastCompareData.l1Result = l1Result;
600 return true;
601 }
602 if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) {
603 auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]);
604 fastCompareData.l3Result = l3Result;
605 if (!fastCompareData.hasDiff) {
606 fastCompareData.hasDiff = true;
607 fastCompareData.firstDiffAt = i;
608 }
609 }
610 }
611 fastCompareData.processedUntil = length;
612 return true;
613 }
614
FastCompareStringFlatContent(EcmaString * string1,EcmaString * string2,int length,FastCompareStringsData & fastCompareData)615 bool FastCompareStringFlatContent(EcmaString* string1, EcmaString* string2,
616 int length, FastCompareStringsData& fastCompareData)
617 {
618 EcmaStringAccessor string1Acc(string1);
619 EcmaStringAccessor string2Acc(string2);
620 if (string1Acc.IsUtf8()) {
621 auto l = EcmaStringAccessor::GetNonTreeUtf8Data(string1);
622 if (string2Acc.IsUtf8()) {
623 auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
624 return FastCompareFlatString(l, r, length, fastCompareData);
625 } else {
626 auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
627 return FastCompareFlatString(l, r, length, fastCompareData);
628 }
629 } else {
630 auto l = EcmaStringAccessor::GetNonTreeUtf16Data(string1);
631 if (string2Acc.IsUtf8()) {
632 auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
633 return FastCompareFlatString(l, r, length, fastCompareData);
634 } else {
635 auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
636 return FastCompareFlatString(l, r, length, fastCompareData);
637 }
638 }
639 UNREACHABLE();
640 }
641
CharIsAsciiOrOutOfBounds(EcmaString * string,int stringLength,int index)642 bool CharIsAsciiOrOutOfBounds(EcmaString* string, int stringLength, int index)
643 {
644 return index >= stringLength || EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(index));
645 }
646
CharCanFastCompareOrOutOfBounds(EcmaString * string,int stringLength,int index)647 bool CharCanFastCompareOrOutOfBounds(EcmaString* string, int stringLength, int index)
648 {
649 return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(index));
650 }
651
652 // Pseudo-code for simplified multi-pass algorithm is:
653 // // Only a certain subset of the ASCII range can be fast-compared.
654 // // In the actual single-pass algorithm below, we tolerate non-ASCII contents.
655 // 1. Check string1 and string2 can fastcompare.
656 // 2. Compare L1 weight for each char, the greater wins.
657 // 3. Is two strings are L1 equal in common length, the longer wins.
658 // 4. Compare L3 weight for each char, the greater wins.
659 // 5. If all equal, return equal.
660 // 6. Once some chars cannot be fastcompared, use icu.
661
TryFastCompareStrings(const icu::Collator * icuCollator,EcmaString * string1,EcmaString * string2,int & processedUntilOut)662 std::optional<UCollationResult> TryFastCompareStrings([[maybe_unused]] const icu::Collator* icuCollator,
663 EcmaString* string1, EcmaString* string2,
664 int& processedUntilOut)
665 {
666 processedUntilOut = 0;
667
668 const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength());
669 const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength());
670 int commonLength = std::min(length1, length2);
671
672 FastCompareStringsData fastCompareData;
673 if (!FastCompareStringFlatContent(string1, string2, commonLength, fastCompareData)) {
674 return fastCompareData.FastCompareFailed(processedUntilOut);
675 }
676 // The result is only valid if the last processed character is not followed
677 // by a unicode combining character.
678 if (!CharIsAsciiOrOutOfBounds(string1, length1, fastCompareData.processedUntil + 1) ||
679 !CharIsAsciiOrOutOfBounds(string2, length2, fastCompareData.processedUntil + 1)) {
680 return fastCompareData.FastCompareFailed(processedUntilOut);
681 }
682 if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) {
683 return fastCompareData.l1Result;
684 }
685 // Strings are L1-equal up to their common length, length differences win.
686 UCollationResult lengthResult = ToUCollationResult(length1 - length2);
687 if (lengthResult != UCollationResult::UCOL_EQUAL) {
688 // Strings of different lengths may still compare as equal if the longer
689 // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
690 if (!CharCanFastCompareOrOutOfBounds(string1, length1, commonLength) ||
691 !CharCanFastCompareOrOutOfBounds(string2, length2, commonLength)) {
692 return fastCompareData.FastCompareFailed(processedUntilOut);
693 }
694 return lengthResult;
695 }
696 // L1-equal and same length, the L3 result wins.
697 return fastCompareData.l3Result;
698 }
699 } // namespace
700
701 //StringPiece is similar to std::string_view
ToICUStringPiece(const JSHandle<EcmaString> & string,int offset=0)702 icu::StringPiece ToICUStringPiece(const JSHandle<EcmaString>& string, int offset = 0)
703 {
704 EcmaStringAccessor stringAcc(string);
705 ASSERT(stringAcc.IsUtf8());
706 ASSERT(!stringAcc.IsTreeString());
707 return icu::StringPiece(reinterpret_cast<const char*>(EcmaStringAccessor::GetNonTreeUtf8Data(*string)) + offset,
708 static_cast<int>(stringAcc.GetLength()) - offset);
709 }
710
711 // Convert to a UTF16 string and partially convert to ICUUnicodeString
ToICUUnicodeString(const JSHandle<EcmaString> & string,int offset=0)712 icu::UnicodeString ToICUUnicodeString(const JSHandle<EcmaString> &string, int offset = 0)
713 {
714 EcmaStringAccessor stringAcc(string);
715 ASSERT(!stringAcc.IsTreeString());
716 int strLength = static_cast<int>(stringAcc.GetLength());
717 int partialLength = strLength - offset;
718 if (stringAcc.IsUtf8()) {
719 constexpr int shortStringLength = 80; // 80: short string length
720 if (partialLength <= shortStringLength) {
721 // short string on stack
722 UChar shortStringBuffer[shortStringLength];
723 // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK
724 std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(*string) + offset, partialLength, shortStringBuffer);
725 return icu::UnicodeString(shortStringBuffer, partialLength);
726 }
727 CVector<uint16_t> ucharBuffer(partialLength);
728 std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(*string) + offset, partialLength, ucharBuffer.begin());
729 return icu::UnicodeString(ucharBuffer.data(), partialLength);
730 } else {
731 return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(*string) + offset, partialLength);
732 }
733 }
734
CompareStrings(JSThread * thread,const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)735 JSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator,
736 const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2,
737 [[maybe_unused]]CompareStringsOption csOption)
738 {
739 if (*string1 == *string2) {
740 return JSTaggedValue(UCollationResult::UCOL_EQUAL);
741 }
742
743 // Since Unicode has ignorable characters,
744 // we cannot return early for 0-length strings.
745 auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
746 auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
747
748 int processedUntil = 0;
749 if (csOption == CompareStringsOption::TRY_FAST_PATH) {
750 auto maybeResult = TryFastCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil);
751 if (maybeResult.has_value()) {
752 return JSTaggedValue(maybeResult.value());
753 }
754 }
755
756 UCollationResult result;
757 UErrorCode status = U_ZERO_ERROR;
758 if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) {
759 auto string1Piece = ToICUStringPiece(flatString1, processedUntil);
760 if (!string1Piece.empty()) {
761 auto string2Piece = ToICUStringPiece(flatString2, processedUntil);
762 if (!string2Piece.empty()) {
763 result = icuCollator->compareUTF8(string1Piece, string2Piece, status);
764 return JSTaggedValue(result);
765 }
766 }
767 }
768
769 auto uString1 = ToICUUnicodeString(flatString1, processedUntil);
770 auto uString2 = ToICUUnicodeString(flatString2, processedUntil);
771 result = icuCollator->compare(uString1, uString2, status);
772 ASSERT(U_SUCCESS(status));
773
774 return JSTaggedValue(result);
775 }
776 } // namespace panda::ecmascript
777