• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/js_collator.h"
17 
18 #include "ecmascript/intl/locale_helper.h"
19 #include "ecmascript/global_env.h"
20 #include "ecmascript/ecma_string-inl.h"
21 namespace panda::ecmascript {
22 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
23 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
24 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
25     {"upper", CaseFirstOption::UPPER},
26     {"lower", CaseFirstOption::LOWER},
27     {"false", CaseFirstOption::FALSE_OPTION}
28 };
29 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
30     {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
31     {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
32     {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
33     {CaseFirstOption::UNDEFINED, UCOL_OFF}
34 };
35 const std::vector<LocaleMatcherOption> JSCollator::LOCALE_MATCHER_OPTION = {
36     LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT
37 };
38 const std::vector<std::string>  JSCollator::LOCALE_MATCHER_OPTION_NAME = {"lookup", "best fit"};
39 
40 const std::vector<CaseFirstOption>  JSCollator::CASE_FIRST_OPTION = {
41     CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION
42 };
43 const std::vector<std::string>  JSCollator::CASE_FIRST_OPTION_NAME = {"upper", "lower", "false"};
44 
45 const std::set<std::string>  JSCollator::RELEVANT_EXTENSION_KEYS = {"co", "kn", "kf"};
46 
47 const std::vector<SensitivityOption>  JSCollator::SENSITIVITY_OPTION = {
48     SensitivityOption::BASE, SensitivityOption::ACCENT,
49     SensitivityOption::CASE, SensitivityOption::VARIANT
50 };
51 const std::vector<std::string>  JSCollator::SENSITIVITY_OPTION_NAME = {"base", "accent", "case", "variant"};
52 
53 const std::vector<UsageOption> JSCollator::USAGE_OPTION = {UsageOption::SORT, UsageOption::SEARCH};
54 const std::vector<std::string> JSCollator::USAGE_OPTION_NAME = {"sort", "search"};
55 
56 // All the available locales that are statically known to fulfill fast path conditions.
57 const char* const  JSCollator::FAST_LOCALE[] = {
58     "en-US", "en", "fr", "es", "de", "pt", "it", "ca",
59     "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro",
60     "sl", "sv", "sw", "vi", "en-DE", "en-GB",
61 };
62 
63 
GetAvailableLocales(JSThread * thread,bool enableLocaleCache)64 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache)
65 {
66     const char *key = nullptr;
67     const char *path = JSCollator::uIcuDataColl.c_str();
68     // key and path are const, so we can cache the result
69     if (enableLocaleCache) {
70         JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales();
71         if (cachedLocales->IsHeapObject()) {
72             return JSHandle<TaggedArray>(cachedLocales);
73         }
74     }
75     std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path);
76     JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales);
77     if (enableLocaleCache) {
78         GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants());
79         constants->SetCachedLocales(availableLocales.GetTaggedValue());
80     }
81     return availableLocales;
82 }
83 
84 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const NativePointerCallback & callback)85 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
86     icu::Collator *icuCollator, const NativePointerCallback &callback)
87 {
88     EcmaVM *ecmaVm = thread->GetEcmaVM();
89     ObjectFactory *factory = ecmaVm->GetFactory();
90 
91     ASSERT(icuCollator != nullptr);
92     JSTaggedValue data = collator->GetIcuField(thread);
93     if (data.IsJSNativePointer()) {
94         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
95         native->ResetExternalPointer(thread, icuCollator);
96         return;
97     }
98     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
99     collator->SetIcuField(thread, pointer.GetTaggedValue());
100 }
101 
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache,bool enableLocaleCache)102 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
103                                                     const JSHandle<JSCollator> &collator,
104                                                     const JSHandle<JSTaggedValue> &locales,
105                                                     const JSHandle<JSTaggedValue> &options,
106                                                     bool forIcuCache,
107                                                     bool enableLocaleCache)
108 {
109     EcmaVM *ecmaVm = thread->GetEcmaVM();
110     ObjectFactory *factory = ecmaVm->GetFactory();
111     const GlobalEnvConstants *globalConst = thread->GlobalConstants();
112     // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
113     JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales);
114     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
115 
116     // 2. If options is undefined, then
117     //      a. Let options be ObjectCreate(null).
118     // 3. Else,
119     //      a. Let options be ? ToObject(options).
120     JSHandle<JSObject> optionsObject;
121     if (options->IsUndefined()) {
122         optionsObject = factory->CreateNullJSObject();
123     } else {
124         optionsObject = JSTaggedValue::ToObject(thread, options);
125         RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
126     }
127     // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
128     auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
129                                                           JSCollator::USAGE_OPTION, JSCollator::USAGE_OPTION_NAME,
130                                                           UsageOption::SORT);
131     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
132     collator->SetUsage(usage);
133 
134     // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
135     auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
136         thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
137         JSCollator::LOCALE_MATCHER_OPTION, JSCollator::LOCALE_MATCHER_OPTION_NAME,
138         LocaleMatcherOption::BEST_FIT);
139     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
140 
141     // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
142     // 7. If collation is not undefined, then
143     //    a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
144     JSHandle<JSTaggedValue> collation =
145         JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
146                             globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
147     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
148     collator->SetCollation(thread, collation);
149     std::string collationStr;
150     if (!collation->IsUndefined()) {
151         JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
152         collationStr = intl::LocaleHelper::ConvertToStdString(thread, collationEcmaStr);
153         if (!JSLocale::IsWellAlphaNumList(collationStr)) {
154             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
155         }
156     }
157 
158     // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
159     bool numeric = false;
160     bool foundNumeric =
161         JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
162     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
163     collator->SetNumeric(numeric);
164 
165     // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
166     CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
167         thread, optionsObject, globalConst->GetHandledCaseFirstString(),
168         JSCollator::CASE_FIRST_OPTION, JSCollator::CASE_FIRST_OPTION_NAME,
169         CaseFirstOption::UNDEFINED);
170     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
171     collator->SetCaseFirst(caseFirst);
172 
173     // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
174 
175     // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
176     //     %Collator%.[[RelevantExtensionKeys]], localeData).
177     JSHandle<TaggedArray> availableLocales;
178     if (requestedLocales->GetLength() == 0) {
179         availableLocales = factory->EmptyArray();
180     } else {
181         availableLocales = GetAvailableLocales(thread, enableLocaleCache);
182     }
183     ResolvedLocale r =
184         JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, RELEVANT_EXTENSION_KEYS);
185     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
186     icu::Locale icuLocale = r.localeData;
187     JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale);
188     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
189     collator->SetLocale(thread, localeStr.GetTaggedValue());
190     ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
191 
192     // If collation is undefined iterate RelevantExtensionKeys to find "co"
193     //  if found, set ICU collator UnicodeKeyword to iterator->second
194     UErrorCode status = U_ZERO_ERROR;
195     if (!collation->IsUndefined()) {
196         auto extensionIter = r.extensions.find("co");
197         if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
198             icuLocale.setUnicodeKeywordValue("co", nullptr, status);
199             ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
200         }
201     }
202 
203     // If usage is serach set co-serach to icu locale key word value
204     // Eles set collation string to icu locale key word value
205     if (usage == UsageOption::SEARCH) {
206         icuLocale.setUnicodeKeywordValue("co", "search", status);
207         ASSERT(U_SUCCESS(status));
208     } else {
209         if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
210             icuLocale.setUnicodeKeywordValue("co", collationStr, status);
211             ASSERT(U_SUCCESS(status));
212         }
213     }
214 
215     std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
216     if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
217         if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) {
218             THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator);
219         }
220         status = U_ZERO_ERROR;
221         icu::Locale localeName(icuLocale.getBaseName());
222         icuCollator.reset(icu::Collator::createInstance(localeName, status));
223         if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
224             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
225         }
226     }
227     ASSERT(U_SUCCESS(status));
228     icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
229 
230     icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
231     ASSERT(U_SUCCESS(status));
232 
233     // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
234     // Else iterate RelevantExtensionKeys to find "kn"
235     //  if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
236     status = U_ZERO_ERROR;
237     if (foundNumeric) {
238         ASSERT(icuCollator.get() != nullptr);
239         icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
240         ASSERT(U_SUCCESS(status));
241     } else {
242         auto extensionIter = r.extensions.find("kn");
243         if (extensionIter != r.extensions.end()) {
244             ASSERT(icuCollator.get() != nullptr);
245             bool found = (extensionIter->second == "true");
246             collator->SetNumeric(found);
247             icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
248             ASSERT(U_SUCCESS(status));
249         }
250     }
251 
252     // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
253     // Else iterate RelevantExtensionKeys to find "kf"
254     //  if found, set ICU collator UColAttributeValue to iterator->second
255     status = U_ZERO_ERROR;
256     if (caseFirst != CaseFirstOption::UNDEFINED) {
257         ASSERT(icuCollator.get() != nullptr);
258         icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
259         ASSERT(U_SUCCESS(status));
260     } else {
261         auto extensionIter = r.extensions.find("kf");
262         if (extensionIter != r.extensions.end()) {
263             ASSERT(icuCollator.get() != nullptr);
264             auto mapIter = caseFirstMap.find(extensionIter->second);
265             if (mapIter != caseFirstMap.end()) {
266                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
267                 collator->SetCaseFirst(mapIter->second);
268             } else {
269                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
270                                                 status);
271             }
272             ASSERT(U_SUCCESS(status));
273         }
274     }
275 
276     // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
277     //     undefined).
278     SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
279         thread, optionsObject, globalConst->GetHandledSensitivityString(),
280         JSCollator::SENSITIVITY_OPTION, JSCollator::SENSITIVITY_OPTION_NAME,
281         SensitivityOption::UNDEFINED);
282     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
283     // 25. If sensitivity is undefined, then
284     //     a. If usage is "sort", then
285     //        i. Let sensitivity be "variant".
286     if (sensitivity == SensitivityOption::UNDEFINED) {
287         if (usage == UsageOption::SORT) {
288             sensitivity = SensitivityOption::VARIANT;
289         }
290     }
291     collator->SetSensitivity(sensitivity);
292 
293     // Trans SensitivityOption to Icu strength option
294     switch (sensitivity) {
295         case SensitivityOption::BASE:
296             icuCollator->setStrength(icu::Collator::PRIMARY);
297             break;
298         case SensitivityOption::ACCENT:
299             icuCollator->setStrength(icu::Collator::SECONDARY);
300             break;
301         case SensitivityOption::CASE:
302             icuCollator->setStrength(icu::Collator::PRIMARY);
303             icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
304             break;
305         case SensitivityOption::VARIANT:
306             icuCollator->setStrength(icu::Collator::TERTIARY);
307             break;
308         case SensitivityOption::UNDEFINED:
309             break;
310         case SensitivityOption::EXCEPTION:
311             LOG_ECMA(FATAL) << "this branch is unreachable";
312             UNREACHABLE();
313     }
314 
315     // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
316     // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
317     bool ignorePunctuation = false;
318     bool defaultIgnorePunctuation = false;
319     // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales.
320     JSHandle<EcmaString> thKey = factory->NewFromUtf8("th");
321     if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) {
322         defaultIgnorePunctuation = true;
323     }
324     JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(),
325                               defaultIgnorePunctuation, &ignorePunctuation);
326     collator->SetIgnorePunctuation(ignorePunctuation);
327     if (ignorePunctuation) {
328         status = U_ZERO_ERROR;
329         icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
330         ASSERT(U_SUCCESS(status));
331     }
332 
333     if (forIcuCache) {
334         std::string cacheEntry =
335             locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString(thread);
336         ecmaVm->GetIntlCache().SetIcuFormatterToCache(IcuFormatterType::COLLATOR,
337             cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator);
338     } else {
339         SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
340     }
341     collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
342     // 29. Return collator.
343     return collator;
344 }
345 
GetCachedIcuCollator(JSThread * thread,const JSTaggedValue & locales)346 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales)
347 {
348     std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString(thread);
349     void *cachedCollator =
350         thread->GetEcmaVM()->GetIntlCache().GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry);
351     if (cachedCollator != nullptr) {
352         return reinterpret_cast<icu::Collator*>(cachedCollator);
353     }
354     return nullptr;
355 }
356 
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)357 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
358 {
359     return GetCachedIcuCollator(thread, locales.GetTaggedValue());
360 }
361 
OptionToUColAttribute(CaseFirstOption caseFirstOption)362 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
363 {
364     auto iter = uColAttributeValueMap.find(caseFirstOption);
365     if (iter != uColAttributeValueMap.end()) {
366         return iter->second;
367     }
368     LOG_ECMA(FATAL) << "this branch is unreachable";
369     UNREACHABLE();
370 }
371 
OptionsToEcmaString(JSThread * thread,UsageOption usage)372 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
373 {
374     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
375     auto globalConst = thread->GlobalConstants();
376     switch (usage) {
377         case UsageOption::SORT:
378             result.Update(globalConst->GetSortString());
379             break;
380         case UsageOption::SEARCH:
381             result.Update(globalConst->GetSearchString());
382             break;
383         default:
384             LOG_ECMA(FATAL) << "this branch is unreachable";
385             UNREACHABLE();
386     }
387     return result;
388 }
389 
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)390 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
391 {
392     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
393     auto globalConst = thread->GlobalConstants();
394     switch (sensitivity) {
395         case SensitivityOption::BASE:
396             result.Update(globalConst->GetBaseString());
397             break;
398         case SensitivityOption::ACCENT:
399             result.Update(globalConst->GetAccentString());
400             break;
401         case SensitivityOption::CASE:
402             result.Update(globalConst->GetCaseString());
403             break;
404         case SensitivityOption::VARIANT:
405             result.Update(globalConst->GetVariantString());
406             break;
407         case SensitivityOption::UNDEFINED:
408             break;
409         default:
410             LOG_ECMA(FATAL) << "this branch is unreachable";
411             UNREACHABLE();
412     }
413     return result;
414 }
415 
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)416 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
417 {
418     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
419     auto globalConst = thread->GlobalConstants();
420     switch (caseFirst) {
421         case CaseFirstOption::UPPER:
422             result.Update(globalConst->GetUpperString());
423             break;
424         case CaseFirstOption::LOWER:
425             result.Update(globalConst->GetLowerString());
426             break;
427         case CaseFirstOption::FALSE_OPTION:
428             result.Update(globalConst->GetFalseString());
429             break;
430         case CaseFirstOption::UNDEFINED:
431             result.Update(globalConst->GetUpperString());
432             break;
433         default:
434             LOG_ECMA(FATAL) << "this branch is unreachable";
435             UNREACHABLE();
436     }
437     return result;
438 }
439 
440 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)441 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
442 {
443     auto ecmaVm = thread->GetEcmaVM();
444     auto globalConst = thread->GlobalConstants();
445     ObjectFactory *factory = ecmaVm->GetFactory();
446     JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
447     JSHandle<JSFunction> funCtor(env->GetObjectFunction());
448     JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
449 
450     // [[Locale]]
451     JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
452     JSHandle<JSTaggedValue> locale(thread, collator->GetLocale(thread));
453     JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
454     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread);
455 
456     // [[Usage]]
457     UsageOption usageOption = collator->GetUsage();
458     JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
459     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
460 
461     // [[Sensitivity]]
462     auto sentivityOption = collator->GetSensitivity();
463     JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
464     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
465 
466     // [[IgnorePunctuation]]
467     JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
468     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
469                                  ignorePunctuationValue);
470 
471     // [[Collation]]
472     JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation(thread));
473     UErrorCode status = U_ZERO_ERROR;
474     icu::Collator *icuCollator = collator->GetIcuCollator(thread);
475     icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
476     std::string collation_value =
477         icu_locale.getUnicodeKeywordValue<std::string>("co", status);
478     if (collationValue->IsUndefined()) {
479         if (collation_value != "search" && collation_value != "") {
480             collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue());
481         } else {
482             collationValue.Update(globalConst->GetDefaultString());
483         }
484     }
485     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
486 
487     // [[Numeric]]
488     JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
489     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
490 
491     // [[CaseFirst]]
492     CaseFirstOption caseFirstOption = collator->GetCaseFirst();
493     // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
494     // the requirement maybe change in the future
495     JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
496     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
497     return options;
498 }
499 
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales)500 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
501                                                          JSHandle<JSTaggedValue> locales)
502 {
503     if (locales->IsUndefined()) {
504         auto& intlCache = thread->GetEcmaVM()->GetIntlCache();
505         auto defaultCompareOption = intlCache.GetDefaultCompareStringsOption();
506         if (defaultCompareOption.has_value()) {
507             return defaultCompareOption.value();
508         }
509         auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread);
510         for (const char *fastLocale : FAST_LOCALE) {
511             if (strcmp(fastLocale, defaultLocale.c_str()) == 0) {
512                 intlCache.SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH);
513                 return CompareStringsOption::TRY_FAST_PATH;
514             }
515         }
516         intlCache.SetDefaultCompareStringsOption(CompareStringsOption::NONE);
517         return CompareStringsOption::NONE;
518     }
519 
520     if (!locales->IsString()) {
521         return CompareStringsOption::NONE;
522     }
523 
524     JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales);
525     CString localesStr = ConvertToString(thread, *localesString, StringConvertedUsage::LOGICOPERATION);
526     for (const char *fastLocale : FAST_LOCALE) {
527         if (strcmp(fastLocale, localesStr.c_str()) == 0) {
528             return CompareStringsOption::TRY_FAST_PATH;
529         }
530     }
531 
532     return CompareStringsOption::NONE;
533 }
534 
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales,JSHandle<JSTaggedValue> options)535 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
536                                                          JSHandle<JSTaggedValue> locales,
537                                                          JSHandle<JSTaggedValue> options)
538 {
539     if (!options->IsUndefined()) {
540         return CompareStringsOption::NONE;
541     }
542     return CompareStringsOptionFor(thread, locales);
543 }
544 
545 // Anonymous namespace for ComapreStrings
546 namespace {
547 constexpr uint8_t COLLATION_WEIGHT_L1[256] = {
548     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  0,  0,  0,  0,  0,  0,
549     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  12, 16, 28, 38, 29, 27, 15,
550     17, 18, 24, 32, 9,  8,  14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10,
551     33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
552     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7,  30, 49, 50, 51,
553     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
554     72, 73, 74, 21, 36, 22, 37, 0,
555 };
556 constexpr uint8_t COLLATION_WEIGHT_L3[256] = {
557     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,
558     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
559     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
560     1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
561     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,
562     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
563     1,  1,  1,  1,  1,  1,  1,  0,
564 };
565 constexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]);
566 
ToUCollationResult(int delta)567 constexpr UCollationResult ToUCollationResult(int delta)
568 {
569     return delta < 0 ? UCollationResult::UCOL_LESS
570                      : (delta > 0 ? UCollationResult::UCOL_GREATER
571                                   : UCollationResult::UCOL_EQUAL);
572 }
573 
574 struct FastCompareStringsData {
575     UCollationResult l1Result = UCollationResult::UCOL_EQUAL;
576     UCollationResult l3Result = UCollationResult::UCOL_EQUAL;
577     int processedUntil = 0;
578     int firstDiffAt = 0;  // The first relevant diff (L1 if exists, else L3).
579     bool hasDiff = false;
580 
FastCompareFailedpanda::ecmascript::__anon13eda0b80111::FastCompareStringsData581     std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const
582     {
583         if (hasDiff) {
584             // Found some difference, continue there to ensure the generic algorithm picks it up.
585             processedUntilOut = firstDiffAt;
586         } else {
587             // No difference found, reprocess the last processed character since it may be
588             // followed by a unicode combining character.
589             processedUntilOut = std::max(processedUntil - 1, 0);
590         }
591         return {};
592     }
593 };
594 
595 template <class T>
CanFastCompare(T ch)596 constexpr bool CanFastCompare(T ch)
597 {
598     return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0;
599 }
600 
601 // Check canFastCompare, L1 weight, and L3 weight together.
602 // Use FastCompareStringsData to store these results.
603 template <class T1, class T2>
FastCompareFlatString(const T1 * lhs,const T2 * rhs,int length,FastCompareStringsData & fastCompareData)604 bool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData)
605 {
606     for (int i = 0; i < length; i++) {
607         const T1 l = lhs[i];
608         const T2 r = rhs[i];
609         if (!CanFastCompare(l) || !CanFastCompare(r)) {
610             fastCompareData.processedUntil = i;
611             return false;
612         }
613         auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]);
614         if (l1Result != UCollationResult::UCOL_EQUAL) {
615             fastCompareData.hasDiff = true;
616             fastCompareData.firstDiffAt = i;
617             fastCompareData.processedUntil = i;
618             fastCompareData.l1Result = l1Result;
619             return true;
620         }
621         if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) {
622             auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]);
623             fastCompareData.l3Result = l3Result;
624             if (!fastCompareData.hasDiff) {
625                 fastCompareData.hasDiff = true;
626                 fastCompareData.firstDiffAt = i;
627             }
628         }
629     }
630     fastCompareData.processedUntil = length;
631     return true;
632 }
633 
FastCompareStringFlatContent(JSThread * thread,EcmaString * string1,EcmaString * string2,int length,FastCompareStringsData & fastCompareData)634 bool FastCompareStringFlatContent(JSThread *thread,
635                                   EcmaString* string1, EcmaString* string2,
636                                   int length, FastCompareStringsData& fastCompareData)
637 {
638     EcmaStringAccessor string1Acc(string1);
639     EcmaStringAccessor string2Acc(string2);
640     if (string1Acc.IsUtf8()) {
641         auto l = EcmaStringAccessor::GetNonTreeUtf8Data(thread, string1);
642         if (string2Acc.IsUtf8()) {
643             auto r = EcmaStringAccessor::GetNonTreeUtf8Data(thread, string2);
644             return FastCompareFlatString(l, r, length, fastCompareData);
645         } else {
646             auto r = EcmaStringAccessor::GetNonTreeUtf16Data(thread, string2);
647             return FastCompareFlatString(l, r, length, fastCompareData);
648         }
649     } else {
650         auto l = EcmaStringAccessor::GetNonTreeUtf16Data(thread, string1);
651         if (string2Acc.IsUtf8()) {
652             auto r = EcmaStringAccessor::GetNonTreeUtf8Data(thread, string2);
653             return FastCompareFlatString(l, r, length, fastCompareData);
654         } else {
655             auto r = EcmaStringAccessor::GetNonTreeUtf16Data(thread, string2);
656             return FastCompareFlatString(l, r, length, fastCompareData);
657         }
658     }
659     UNREACHABLE();
660 }
661 
CharIsAsciiOrOutOfBounds(JSThread * thread,EcmaString * string,int stringLength,int index)662 bool CharIsAsciiOrOutOfBounds(JSThread *thread, EcmaString* string, int stringLength, int index)
663 {
664     return index >= stringLength ||
665            EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(thread, index));
666 }
667 
CharCanFastCompareOrOutOfBounds(JSThread * thread,EcmaString * string,int stringLength,int index)668 bool CharCanFastCompareOrOutOfBounds(JSThread *thread, EcmaString* string, int stringLength, int index)
669 {
670     return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(thread, index));
671 }
672 
673 // Pseudo-code for simplified multi-pass algorithm is:
674 //     // Only a certain subset of the ASCII range can be fast-compared.
675 //     // In the actual single-pass algorithm below, we tolerate non-ASCII contents.
676 //     1. Check string1 and string2 can fastcompare.
677 //     2. Compare L1 weight for each char, the greater wins.
678 //     3. Is two strings are L1 equal in common length, the longer wins.
679 //     4. Compare L3 weight for each char, the greater wins.
680 //     5. If all equal, return equal.
681 //     6. Once some chars cannot be fastcompared, use icu.
682 
TryFastCompareStrings(JSThread * thread,EcmaString * string1,EcmaString * string2,int & processedUntilOut)683 std::optional<UCollationResult> TryFastCompareStrings(JSThread *thread,
684                                                       EcmaString* string1, EcmaString* string2,
685                                                       int& processedUntilOut)
686 {
687     processedUntilOut = 0;
688 
689     const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength());
690     const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength());
691     int commonLength = std::min(length1, length2);
692 
693     FastCompareStringsData fastCompareData;
694     if (!FastCompareStringFlatContent(thread, string1, string2, commonLength, fastCompareData)) {
695         return fastCompareData.FastCompareFailed(processedUntilOut);
696     }
697     // The result is only valid if the last processed character is not followed
698     // by a unicode combining character.
699     if (!CharIsAsciiOrOutOfBounds(thread, string1, length1, fastCompareData.processedUntil + 1) ||
700         !CharIsAsciiOrOutOfBounds(thread, string2, length2, fastCompareData.processedUntil + 1)) {
701         return fastCompareData.FastCompareFailed(processedUntilOut);
702     }
703     if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) {
704         return fastCompareData.l1Result;
705     }
706     // Strings are L1-equal up to their common length, length differences win.
707     UCollationResult lengthResult = ToUCollationResult(length1 - length2);
708     if (lengthResult != UCollationResult::UCOL_EQUAL) {
709         // Strings of different lengths may still compare as equal if the longer
710         // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
711         if (!CharCanFastCompareOrOutOfBounds(thread, string1, length1, commonLength) ||
712             !CharCanFastCompareOrOutOfBounds(thread, string2, length2, commonLength)) {
713             return fastCompareData.FastCompareFailed(processedUntilOut);
714         }
715         return lengthResult;
716     }
717     // L1-equal and same length, the L3 result wins.
718     return fastCompareData.l3Result;
719 }
720 } // namespace
721 
722 //StringPiece is similar to std::string_view
ToICUStringPiece(JSThread * thread,EcmaString * string,int offset=0)723 icu::StringPiece ToICUStringPiece(JSThread *thread, EcmaString* string, int offset = 0)
724 {
725     EcmaStringAccessor stringAcc(string);
726     ASSERT(stringAcc.IsUtf8());
727     ASSERT(!stringAcc.IsTreeString());
728     return icu::StringPiece(reinterpret_cast<const char *>(EcmaStringAccessor::GetNonTreeUtf8Data(thread, string)) +
729                                 offset,
730                             static_cast<int>(stringAcc.GetLength()) - offset);
731 }
732 
733 // Convert to a UTF16 string and partially convert to ICUUnicodeString
ToICUUnicodeString(JSThread * thread,EcmaString * string,int offset=0)734 icu::UnicodeString ToICUUnicodeString(JSThread *thread, EcmaString* string, int offset = 0)
735 {
736     EcmaStringAccessor stringAcc(string);
737     ASSERT(!stringAcc.IsTreeString());
738     int strLength = static_cast<int>(stringAcc.GetLength());
739     int partialLength = strLength - offset;
740     if (stringAcc.IsUtf8()) {
741         constexpr int shortStringLength = 80;  // 80: short string length
742         if (partialLength <= shortStringLength) {
743             // short string on stack
744             UChar shortStringBuffer[shortStringLength];
745             // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK
746             std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(thread, string) + offset, partialLength,
747                         shortStringBuffer);
748             return icu::UnicodeString(shortStringBuffer, partialLength);
749         }
750         CVector<uint16_t> ucharBuffer(partialLength);
751         std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(thread, string) + offset, partialLength,
752                     ucharBuffer.begin());
753         return icu::UnicodeString(ucharBuffer.data(), partialLength);
754     } else {
755         return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(thread, string) + offset, partialLength);
756     }
757 }
758 
SlowCompareStrings(JSThread * thread,const icu::Collator * icuCollator,EcmaString * flatString1,EcmaString * flatString2,int processedUntil)759 JSTaggedValue JSCollator::SlowCompareStrings(JSThread *thread,
760                                              const icu::Collator *icuCollator,
761                                              EcmaString* flatString1,
762                                              EcmaString* flatString2,
763                                              int processedUntil)
764 {
765     UCollationResult result;
766     UErrorCode status = U_ZERO_ERROR;
767     if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) {
768         auto string1Piece = ToICUStringPiece(thread, flatString1, processedUntil);
769         if (!string1Piece.empty()) {
770             auto string2Piece = ToICUStringPiece(thread, flatString2, processedUntil);
771             if (!string2Piece.empty()) {
772                 result = icuCollator->compareUTF8(string1Piece, string2Piece, status);
773                 return JSTaggedValue(result);
774             }
775         }
776     }
777 
778     auto uString1 = ToICUUnicodeString(thread, flatString1, processedUntil);
779     auto uString2 = ToICUUnicodeString(thread, flatString2, processedUntil);
780     result = icuCollator->compare(uString1, uString2, status);
781     ASSERT(U_SUCCESS(status));
782     return JSTaggedValue(result);
783 }
784 
CompareStrings(JSThread * thread,const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)785 JSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator,
786                                          const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2,
787                                          [[maybe_unused]]CompareStringsOption csOption)
788 {
789     if (*string1 == *string2) {
790         return JSTaggedValue(UCollationResult::UCOL_EQUAL);
791     }
792 
793     // Since Unicode has ignorable characters,
794     // we cannot return early for 0-length strings.
795     auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
796     auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
797 
798     int processedUntil = 0;
799     if (csOption == CompareStringsOption::TRY_FAST_PATH) {
800         auto maybeResult = TryFastCompareStrings(thread, *flatString1, *flatString2, processedUntil);
801         if (maybeResult.has_value()) {
802             return JSTaggedValue(maybeResult.value());
803         }
804     }
805     return SlowCompareStrings(thread, icuCollator, *flatString1, *flatString2, processedUntil);
806 }
807 
FastCachedCompareStrings(JSThread * thread,JSHandle<JSTaggedValue> locales,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)808 JSTaggedValue JSCollator::FastCachedCompareStrings(JSThread *thread, JSHandle<JSTaggedValue> locales,
809                                                    const JSHandle<EcmaString> &string1,
810                                                    const JSHandle<EcmaString> &string2,
811                                                    CompareStringsOption csOption)
812 {
813     if (*string1 == *string2) {
814         return JSTaggedValue(UCollationResult::UCOL_EQUAL);
815     }
816 
817     // Since Unicode has ignorable characters,
818     // we cannot return early for 0-length strings.
819     auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
820     auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
821 
822     int processedUntil = 0;
823     if (csOption == CompareStringsOption::TRY_FAST_PATH) {
824         auto maybeResult = TryFastCompareStrings(thread, *flatString1, *flatString2, processedUntil);
825         if (maybeResult.has_value()) {
826             return JSTaggedValue(maybeResult.value());
827         }
828     }
829 
830     auto icuCollator = JSCollator::GetCachedIcuCollator(thread, locales);
831     if (icuCollator != nullptr) {
832         return SlowCompareStrings(thread, icuCollator, *flatString1, *flatString2, processedUntil);
833     }
834     return JSTaggedValue::Undefined();
835 }
836 }  // namespace panda::ecmascript
837