• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/js_collator.h"
17 
18 #include "ecmascript/ecma_context.h"
19 #include "ecmascript/intl/locale_helper.h"
20 #include "ecmascript/global_env.h"
21 #include "ecmascript/ecma_string-inl.h"
22 namespace panda::ecmascript {
23 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
24 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
25 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
26     {"upper", CaseFirstOption::UPPER},
27     {"lower", CaseFirstOption::LOWER},
28     {"false", CaseFirstOption::FALSE_OPTION}
29 };
30 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
31     {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
32     {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
33     {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
34     {CaseFirstOption::UNDEFINED, UCOL_OFF}
35 };
36 const std::vector<LocaleMatcherOption> JSCollator::LOCALE_MATCHER_OPTION = {
37     LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT
38 };
39 const std::vector<std::string>  JSCollator::LOCALE_MATCHER_OPTION_NAME = {"lookup", "best fit"};
40 
41 const std::vector<CaseFirstOption>  JSCollator::CASE_FIRST_OPTION = {
42     CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION
43 };
44 const std::vector<std::string>  JSCollator::CASE_FIRST_OPTION_NAME = {"upper", "lower", "false"};
45 
46 const std::set<std::string>  JSCollator::RELEVANT_EXTENSION_KEYS = {"co", "kn", "kf"};
47 
48 const std::vector<SensitivityOption>  JSCollator::SENSITIVITY_OPTION = {
49     SensitivityOption::BASE, SensitivityOption::ACCENT,
50     SensitivityOption::CASE, SensitivityOption::VARIANT
51 };
52 const std::vector<std::string>  JSCollator::SENSITIVITY_OPTION_NAME = {"base", "accent", "case", "variant"};
53 
54 const std::vector<UsageOption> JSCollator::USAGE_OPTION = {UsageOption::SORT, UsageOption::SEARCH};
55 const std::vector<std::string> JSCollator::USAGE_OPTION_NAME = {"sort", "search"};
56 
57 // All the available locales that are statically known to fulfill fast path conditions.
58 const char* const  JSCollator::FAST_LOCALE[] = {
59     "en-US", "en", "fr", "es", "de", "pt", "it", "ca",
60     "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro",
61     "sl", "sv", "sw", "vi", "en-DE", "en-GB",
62 };
63 
64 
GetAvailableLocales(JSThread * thread,bool enableLocaleCache)65 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache)
66 {
67     const char *key = nullptr;
68     const char *path = JSCollator::uIcuDataColl.c_str();
69     // key and path are const, so we can cache the result
70     if (enableLocaleCache) {
71         JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales();
72         if (cachedLocales->IsHeapObject()) {
73             return JSHandle<TaggedArray>(cachedLocales);
74         }
75     }
76     std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path);
77     JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales);
78     if (enableLocaleCache) {
79         GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants());
80         constants->SetCachedLocales(availableLocales.GetTaggedValue());
81     }
82     return availableLocales;
83 }
84 
85 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const NativePointerCallback & callback)86 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
87     icu::Collator *icuCollator, const NativePointerCallback &callback)
88 {
89     EcmaVM *ecmaVm = thread->GetEcmaVM();
90     ObjectFactory *factory = ecmaVm->GetFactory();
91 
92     ASSERT(icuCollator != nullptr);
93     JSTaggedValue data = collator->GetIcuField();
94     if (data.IsJSNativePointer()) {
95         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
96         native->ResetExternalPointer(thread, icuCollator);
97         return;
98     }
99     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
100     collator->SetIcuField(thread, pointer.GetTaggedValue());
101 }
102 
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache,bool enableLocaleCache)103 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
104                                                     const JSHandle<JSCollator> &collator,
105                                                     const JSHandle<JSTaggedValue> &locales,
106                                                     const JSHandle<JSTaggedValue> &options,
107                                                     bool forIcuCache,
108                                                     bool enableLocaleCache)
109 {
110     EcmaVM *ecmaVm = thread->GetEcmaVM();
111     ObjectFactory *factory = ecmaVm->GetFactory();
112     const GlobalEnvConstants *globalConst = thread->GlobalConstants();
113     // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
114     JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales);
115     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
116 
117     // 2. If options is undefined, then
118     //      a. Let options be ObjectCreate(null).
119     // 3. Else,
120     //      a. Let options be ? ToObject(options).
121     JSHandle<JSObject> optionsObject;
122     if (options->IsUndefined()) {
123         optionsObject = factory->CreateNullJSObject();
124     } else {
125         optionsObject = JSTaggedValue::ToObject(thread, options);
126         RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
127     }
128     // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
129     auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
130                                                           JSCollator::USAGE_OPTION, JSCollator::USAGE_OPTION_NAME,
131                                                           UsageOption::SORT);
132     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
133     collator->SetUsage(usage);
134 
135     // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
136     auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
137         thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
138         JSCollator::LOCALE_MATCHER_OPTION, JSCollator::LOCALE_MATCHER_OPTION_NAME,
139         LocaleMatcherOption::BEST_FIT);
140     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
141 
142     // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
143     // 7. If collation is not undefined, then
144     //    a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
145     JSHandle<JSTaggedValue> collation =
146         JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
147                             globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
148     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
149     collator->SetCollation(thread, collation);
150     std::string collationStr;
151     if (!collation->IsUndefined()) {
152         JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
153         collationStr = intl::LocaleHelper::ConvertToStdString(collationEcmaStr);
154         if (!JSLocale::IsWellAlphaNumList(collationStr)) {
155             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
156         }
157     }
158 
159     // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
160     bool numeric = false;
161     bool foundNumeric =
162         JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
163     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
164     collator->SetNumeric(numeric);
165 
166     // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
167     CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
168         thread, optionsObject, globalConst->GetHandledCaseFirstString(),
169         JSCollator::CASE_FIRST_OPTION, JSCollator::CASE_FIRST_OPTION_NAME,
170         CaseFirstOption::UNDEFINED);
171     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
172     collator->SetCaseFirst(caseFirst);
173 
174     // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
175 
176     // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
177     //     %Collator%.[[RelevantExtensionKeys]], localeData).
178     JSHandle<TaggedArray> availableLocales;
179     if (requestedLocales->GetLength() == 0) {
180         availableLocales = factory->EmptyArray();
181     } else {
182         availableLocales = GetAvailableLocales(thread, enableLocaleCache);
183     }
184     ResolvedLocale r =
185         JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, RELEVANT_EXTENSION_KEYS);
186     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
187     icu::Locale icuLocale = r.localeData;
188     JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale);
189     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
190     collator->SetLocale(thread, localeStr.GetTaggedValue());
191     ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
192 
193     // If collation is undefined iterate RelevantExtensionKeys to find "co"
194     //  if found, set ICU collator UnicodeKeyword to iterator->second
195     UErrorCode status = U_ZERO_ERROR;
196     if (!collation->IsUndefined()) {
197         auto extensionIter = r.extensions.find("co");
198         if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
199             icuLocale.setUnicodeKeywordValue("co", nullptr, status);
200             ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
201         }
202     }
203 
204     // If usage is serach set co-serach to icu locale key word value
205     // Eles set collation string to icu locale key word value
206     if (usage == UsageOption::SEARCH) {
207         icuLocale.setUnicodeKeywordValue("co", "search", status);
208         ASSERT(U_SUCCESS(status));
209     } else {
210         if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
211             icuLocale.setUnicodeKeywordValue("co", collationStr, status);
212             ASSERT(U_SUCCESS(status));
213         }
214     }
215 
216     std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
217     if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
218         if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) {
219             THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator);
220         }
221         status = U_ZERO_ERROR;
222         icu::Locale localeName(icuLocale.getBaseName());
223         icuCollator.reset(icu::Collator::createInstance(localeName, status));
224         if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
225             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
226         }
227     }
228     ASSERT(U_SUCCESS(status));
229     icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
230 
231     icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
232     ASSERT(U_SUCCESS(status));
233 
234     // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
235     // Else iterate RelevantExtensionKeys to find "kn"
236     //  if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
237     status = U_ZERO_ERROR;
238     if (foundNumeric) {
239         ASSERT(icuCollator.get() != nullptr);
240         icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
241         ASSERT(U_SUCCESS(status));
242     } else {
243         auto extensionIter = r.extensions.find("kn");
244         if (extensionIter != r.extensions.end()) {
245             ASSERT(icuCollator.get() != nullptr);
246             bool found = (extensionIter->second == "true");
247             collator->SetNumeric(found);
248             icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
249             ASSERT(U_SUCCESS(status));
250         }
251     }
252 
253     // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
254     // Else iterate RelevantExtensionKeys to find "kf"
255     //  if found, set ICU collator UColAttributeValue to iterator->second
256     status = U_ZERO_ERROR;
257     if (caseFirst != CaseFirstOption::UNDEFINED) {
258         ASSERT(icuCollator.get() != nullptr);
259         icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
260         ASSERT(U_SUCCESS(status));
261     } else {
262         auto extensionIter = r.extensions.find("kf");
263         if (extensionIter != r.extensions.end()) {
264             ASSERT(icuCollator.get() != nullptr);
265             auto mapIter = caseFirstMap.find(extensionIter->second);
266             if (mapIter != caseFirstMap.end()) {
267                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
268                 collator->SetCaseFirst(mapIter->second);
269             } else {
270                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
271                                                 status);
272             }
273             ASSERT(U_SUCCESS(status));
274         }
275     }
276 
277     // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
278     //     undefined).
279     SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
280         thread, optionsObject, globalConst->GetHandledSensitivityString(),
281         JSCollator::SENSITIVITY_OPTION, JSCollator::SENSITIVITY_OPTION_NAME,
282         SensitivityOption::UNDEFINED);
283     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
284     // 25. If sensitivity is undefined, then
285     //     a. If usage is "sort", then
286     //        i. Let sensitivity be "variant".
287     if (sensitivity == SensitivityOption::UNDEFINED) {
288         if (usage == UsageOption::SORT) {
289             sensitivity = SensitivityOption::VARIANT;
290         }
291     }
292     collator->SetSensitivity(sensitivity);
293 
294     // Trans SensitivityOption to Icu strength option
295     switch (sensitivity) {
296         case SensitivityOption::BASE:
297             icuCollator->setStrength(icu::Collator::PRIMARY);
298             break;
299         case SensitivityOption::ACCENT:
300             icuCollator->setStrength(icu::Collator::SECONDARY);
301             break;
302         case SensitivityOption::CASE:
303             icuCollator->setStrength(icu::Collator::PRIMARY);
304             icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
305             break;
306         case SensitivityOption::VARIANT:
307             icuCollator->setStrength(icu::Collator::TERTIARY);
308             break;
309         case SensitivityOption::UNDEFINED:
310             break;
311         case SensitivityOption::EXCEPTION:
312             LOG_ECMA(FATAL) << "this branch is unreachable";
313             UNREACHABLE();
314     }
315 
316     // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
317     // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
318     bool ignorePunctuation = false;
319     bool defaultIgnorePunctuation = false;
320     // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales.
321     JSHandle<EcmaString> thKey = factory->NewFromUtf8("th");
322     if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) {
323         defaultIgnorePunctuation = true;
324     }
325     JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(),
326                               defaultIgnorePunctuation, &ignorePunctuation);
327     collator->SetIgnorePunctuation(ignorePunctuation);
328     if (ignorePunctuation) {
329         status = U_ZERO_ERROR;
330         icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
331         ASSERT(U_SUCCESS(status));
332     }
333 
334     if (forIcuCache) {
335         std::string cacheEntry =
336             locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
337         ecmaVm->GetIntlCache().SetIcuFormatterToCache(IcuFormatterType::COLLATOR,
338             cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator);
339     } else {
340         SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
341     }
342     collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
343     // 29. Return collator.
344     return collator;
345 }
346 
GetCachedIcuCollator(JSThread * thread,const JSTaggedValue & locales)347 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales)
348 {
349     std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString();
350     void *cachedCollator =
351         thread->GetEcmaVM()->GetIntlCache().GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry);
352     if (cachedCollator != nullptr) {
353         return reinterpret_cast<icu::Collator*>(cachedCollator);
354     }
355     return nullptr;
356 }
357 
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)358 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
359 {
360     return GetCachedIcuCollator(thread, locales.GetTaggedValue());
361 }
362 
OptionToUColAttribute(CaseFirstOption caseFirstOption)363 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
364 {
365     auto iter = uColAttributeValueMap.find(caseFirstOption);
366     if (iter != uColAttributeValueMap.end()) {
367         return iter->second;
368     }
369     LOG_ECMA(FATAL) << "this branch is unreachable";
370     UNREACHABLE();
371 }
372 
OptionsToEcmaString(JSThread * thread,UsageOption usage)373 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
374 {
375     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
376     auto globalConst = thread->GlobalConstants();
377     switch (usage) {
378         case UsageOption::SORT:
379             result.Update(globalConst->GetSortString());
380             break;
381         case UsageOption::SEARCH:
382             result.Update(globalConst->GetSearchString());
383             break;
384         default:
385             LOG_ECMA(FATAL) << "this branch is unreachable";
386             UNREACHABLE();
387     }
388     return result;
389 }
390 
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)391 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
392 {
393     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
394     auto globalConst = thread->GlobalConstants();
395     switch (sensitivity) {
396         case SensitivityOption::BASE:
397             result.Update(globalConst->GetBaseString());
398             break;
399         case SensitivityOption::ACCENT:
400             result.Update(globalConst->GetAccentString());
401             break;
402         case SensitivityOption::CASE:
403             result.Update(globalConst->GetCaseString());
404             break;
405         case SensitivityOption::VARIANT:
406             result.Update(globalConst->GetVariantString());
407             break;
408         case SensitivityOption::UNDEFINED:
409             break;
410         default:
411             LOG_ECMA(FATAL) << "this branch is unreachable";
412             UNREACHABLE();
413     }
414     return result;
415 }
416 
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)417 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
418 {
419     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
420     auto globalConst = thread->GlobalConstants();
421     switch (caseFirst) {
422         case CaseFirstOption::UPPER:
423             result.Update(globalConst->GetUpperString());
424             break;
425         case CaseFirstOption::LOWER:
426             result.Update(globalConst->GetLowerString());
427             break;
428         case CaseFirstOption::FALSE_OPTION:
429             result.Update(globalConst->GetFalseString());
430             break;
431         case CaseFirstOption::UNDEFINED:
432             result.Update(globalConst->GetUpperString());
433             break;
434         default:
435             LOG_ECMA(FATAL) << "this branch is unreachable";
436             UNREACHABLE();
437     }
438     return result;
439 }
440 
441 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)442 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
443 {
444     auto ecmaVm = thread->GetEcmaVM();
445     auto globalConst = thread->GlobalConstants();
446     ObjectFactory *factory = ecmaVm->GetFactory();
447     JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
448     JSHandle<JSFunction> funCtor(env->GetObjectFunction());
449     JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
450 
451     // [[Locale]]
452     JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
453     JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
454     JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
455     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread);
456 
457     // [[Usage]]
458     UsageOption usageOption = collator->GetUsage();
459     JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
460     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
461 
462     // [[Sensitivity]]
463     auto sentivityOption = collator->GetSensitivity();
464     JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
465     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
466 
467     // [[IgnorePunctuation]]
468     JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
469     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
470                                  ignorePunctuationValue);
471 
472     // [[Collation]]
473     JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
474     UErrorCode status = U_ZERO_ERROR;
475     icu::Collator *icuCollator = collator->GetIcuCollator();
476     icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
477     std::string collation_value =
478         icu_locale.getUnicodeKeywordValue<std::string>("co", status);
479     if (collationValue->IsUndefined()) {
480         if (collation_value != "search" && collation_value != "") {
481             collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue());
482         } else {
483             collationValue.Update(globalConst->GetDefaultString());
484         }
485     }
486     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
487 
488     // [[Numeric]]
489     JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
490     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
491 
492     // [[CaseFirst]]
493     CaseFirstOption caseFirstOption = collator->GetCaseFirst();
494     // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
495     // the requirement maybe change in the future
496     JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
497     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
498     return options;
499 }
500 
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales)501 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
502                                                          JSHandle<JSTaggedValue> locales)
503 {
504     if (locales->IsUndefined()) {
505         auto& intlCache = thread->GetEcmaVM()->GetIntlCache();
506         auto defaultCompareOption = intlCache.GetDefaultCompareStringsOption();
507         if (defaultCompareOption.has_value()) {
508             return defaultCompareOption.value();
509         }
510         auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread);
511         for (const char *fastLocale : FAST_LOCALE) {
512             if (strcmp(fastLocale, defaultLocale.c_str()) == 0) {
513                 intlCache.SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH);
514                 return CompareStringsOption::TRY_FAST_PATH;
515             }
516         }
517         intlCache.SetDefaultCompareStringsOption(CompareStringsOption::NONE);
518         return CompareStringsOption::NONE;
519     }
520 
521     if (!locales->IsString()) {
522         return CompareStringsOption::NONE;
523     }
524 
525     JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales);
526     CString localesStr = ConvertToString(*localesString, StringConvertedUsage::LOGICOPERATION);
527     for (const char *fastLocale : FAST_LOCALE) {
528         if (strcmp(fastLocale, localesStr.c_str()) == 0) {
529             return CompareStringsOption::TRY_FAST_PATH;
530         }
531     }
532 
533     return CompareStringsOption::NONE;
534 }
535 
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales,JSHandle<JSTaggedValue> options)536 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
537                                                          JSHandle<JSTaggedValue> locales,
538                                                          JSHandle<JSTaggedValue> options)
539 {
540     if (!options->IsUndefined()) {
541         return CompareStringsOption::NONE;
542     }
543     return CompareStringsOptionFor(thread, locales);
544 }
545 
546 // Anonymous namespace for ComapreStrings
547 namespace {
548 constexpr uint8_t COLLATION_WEIGHT_L1[256] = {
549     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  0,  0,  0,  0,  0,  0,
550     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  12, 16, 28, 38, 29, 27, 15,
551     17, 18, 24, 32, 9,  8,  14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10,
552     33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
553     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7,  30, 49, 50, 51,
554     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
555     72, 73, 74, 21, 36, 22, 37, 0,
556 };
557 constexpr uint8_t COLLATION_WEIGHT_L3[256] = {
558     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,
559     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
560     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
561     1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
562     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,
563     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
564     1,  1,  1,  1,  1,  1,  1,  0,
565 };
566 constexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]);
567 
ToUCollationResult(int delta)568 constexpr UCollationResult ToUCollationResult(int delta)
569 {
570     return delta < 0 ? UCollationResult::UCOL_LESS
571                      : (delta > 0 ? UCollationResult::UCOL_GREATER
572                                   : UCollationResult::UCOL_EQUAL);
573 }
574 
575 struct FastCompareStringsData {
576     UCollationResult l1Result = UCollationResult::UCOL_EQUAL;
577     UCollationResult l3Result = UCollationResult::UCOL_EQUAL;
578     int processedUntil = 0;
579     int firstDiffAt = 0;  // The first relevant diff (L1 if exists, else L3).
580     bool hasDiff = false;
581 
FastCompareFailedpanda::ecmascript::__anonfbbf73560111::FastCompareStringsData582     std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const
583     {
584         if (hasDiff) {
585             // Found some difference, continue there to ensure the generic algorithm picks it up.
586             processedUntilOut = firstDiffAt;
587         } else {
588             // No difference found, reprocess the last processed character since it may be
589             // followed by a unicode combining character.
590             processedUntilOut = std::max(processedUntil - 1, 0);
591         }
592         return {};
593     }
594 };
595 
596 template <class T>
CanFastCompare(T ch)597 constexpr bool CanFastCompare(T ch)
598 {
599     return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0;
600 }
601 
602 // Check canFastCompare, L1 weight, and L3 weight together.
603 // Use FastCompareStringsData to store these results.
604 template <class T1, class T2>
FastCompareFlatString(const T1 * lhs,const T2 * rhs,int length,FastCompareStringsData & fastCompareData)605 bool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData)
606 {
607     for (int i = 0; i < length; i++) {
608         const T1 l = lhs[i];
609         const T2 r = rhs[i];
610         if (!CanFastCompare(l) || !CanFastCompare(r)) {
611             fastCompareData.processedUntil = i;
612             return false;
613         }
614         auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]);
615         if (l1Result != UCollationResult::UCOL_EQUAL) {
616             fastCompareData.hasDiff = true;
617             fastCompareData.firstDiffAt = i;
618             fastCompareData.processedUntil = i;
619             fastCompareData.l1Result = l1Result;
620             return true;
621         }
622         if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) {
623             auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]);
624             fastCompareData.l3Result = l3Result;
625             if (!fastCompareData.hasDiff) {
626                 fastCompareData.hasDiff = true;
627                 fastCompareData.firstDiffAt = i;
628             }
629         }
630     }
631     fastCompareData.processedUntil = length;
632     return true;
633 }
634 
FastCompareStringFlatContent(EcmaString * string1,EcmaString * string2,int length,FastCompareStringsData & fastCompareData)635 bool FastCompareStringFlatContent(EcmaString* string1, EcmaString* string2,
636                                   int length, FastCompareStringsData& fastCompareData)
637 {
638     EcmaStringAccessor string1Acc(string1);
639     EcmaStringAccessor string2Acc(string2);
640     if (string1Acc.IsUtf8()) {
641         auto l = EcmaStringAccessor::GetNonTreeUtf8Data(string1);
642         if (string2Acc.IsUtf8()) {
643             auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
644             return FastCompareFlatString(l, r, length, fastCompareData);
645         } else {
646             auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
647             return FastCompareFlatString(l, r, length, fastCompareData);
648         }
649     } else {
650         auto l = EcmaStringAccessor::GetNonTreeUtf16Data(string1);
651         if (string2Acc.IsUtf8()) {
652             auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
653             return FastCompareFlatString(l, r, length, fastCompareData);
654         } else {
655             auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
656             return FastCompareFlatString(l, r, length, fastCompareData);
657         }
658     }
659     UNREACHABLE();
660 }
661 
CharIsAsciiOrOutOfBounds(EcmaString * string,int stringLength,int index)662 bool CharIsAsciiOrOutOfBounds(EcmaString* string, int stringLength, int index)
663 {
664     return index >= stringLength || EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(index));
665 }
666 
CharCanFastCompareOrOutOfBounds(EcmaString * string,int stringLength,int index)667 bool CharCanFastCompareOrOutOfBounds(EcmaString* string, int stringLength, int index)
668 {
669     return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(index));
670 }
671 
672 // Pseudo-code for simplified multi-pass algorithm is:
673 //     // Only a certain subset of the ASCII range can be fast-compared.
674 //     // In the actual single-pass algorithm below, we tolerate non-ASCII contents.
675 //     1. Check string1 and string2 can fastcompare.
676 //     2. Compare L1 weight for each char, the greater wins.
677 //     3. Is two strings are L1 equal in common length, the longer wins.
678 //     4. Compare L3 weight for each char, the greater wins.
679 //     5. If all equal, return equal.
680 //     6. Once some chars cannot be fastcompared, use icu.
681 
TryFastCompareStrings(EcmaString * string1,EcmaString * string2,int & processedUntilOut)682 std::optional<UCollationResult> TryFastCompareStrings(EcmaString* string1, EcmaString* string2,
683                                                       int& processedUntilOut)
684 {
685     processedUntilOut = 0;
686 
687     const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength());
688     const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength());
689     int commonLength = std::min(length1, length2);
690 
691     FastCompareStringsData fastCompareData;
692     if (!FastCompareStringFlatContent(string1, string2, commonLength, fastCompareData)) {
693         return fastCompareData.FastCompareFailed(processedUntilOut);
694     }
695     // The result is only valid if the last processed character is not followed
696     // by a unicode combining character.
697     if (!CharIsAsciiOrOutOfBounds(string1, length1, fastCompareData.processedUntil + 1) ||
698         !CharIsAsciiOrOutOfBounds(string2, length2, fastCompareData.processedUntil + 1)) {
699         return fastCompareData.FastCompareFailed(processedUntilOut);
700     }
701     if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) {
702         return fastCompareData.l1Result;
703     }
704     // Strings are L1-equal up to their common length, length differences win.
705     UCollationResult lengthResult = ToUCollationResult(length1 - length2);
706     if (lengthResult != UCollationResult::UCOL_EQUAL) {
707         // Strings of different lengths may still compare as equal if the longer
708         // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
709         if (!CharCanFastCompareOrOutOfBounds(string1, length1, commonLength) ||
710             !CharCanFastCompareOrOutOfBounds(string2, length2, commonLength)) {
711             return fastCompareData.FastCompareFailed(processedUntilOut);
712         }
713         return lengthResult;
714     }
715     // L1-equal and same length, the L3 result wins.
716     return fastCompareData.l3Result;
717 }
718 } // namespace
719 
720 //StringPiece is similar to std::string_view
ToICUStringPiece(EcmaString * string,int offset=0)721 icu::StringPiece ToICUStringPiece(EcmaString* string, int offset = 0)
722 {
723     EcmaStringAccessor stringAcc(string);
724     ASSERT(stringAcc.IsUtf8());
725     ASSERT(!stringAcc.IsTreeString());
726     return icu::StringPiece(reinterpret_cast<const char*>(EcmaStringAccessor::GetNonTreeUtf8Data(string)) + offset,
727                             static_cast<int>(stringAcc.GetLength()) - offset);
728 }
729 
730 // Convert to a UTF16 string and partially convert to ICUUnicodeString
ToICUUnicodeString(EcmaString * string,int offset=0)731 icu::UnicodeString ToICUUnicodeString(EcmaString* string, int offset = 0)
732 {
733     EcmaStringAccessor stringAcc(string);
734     ASSERT(!stringAcc.IsTreeString());
735     int strLength = static_cast<int>(stringAcc.GetLength());
736     int partialLength = strLength - offset;
737     if (stringAcc.IsUtf8()) {
738         constexpr int shortStringLength = 80;  // 80: short string length
739         if (partialLength <= shortStringLength) {
740             // short string on stack
741             UChar shortStringBuffer[shortStringLength];
742             // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK
743             std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(string) + offset, partialLength, shortStringBuffer);
744             return icu::UnicodeString(shortStringBuffer, partialLength);
745         }
746         CVector<uint16_t> ucharBuffer(partialLength);
747         std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(string) + offset, partialLength, ucharBuffer.begin());
748         return icu::UnicodeString(ucharBuffer.data(), partialLength);
749     } else {
750         return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(string) + offset, partialLength);
751     }
752 }
753 
SlowCompareStrings(const icu::Collator * icuCollator,EcmaString * flatString1,EcmaString * flatString2,int processedUntil)754 JSTaggedValue JSCollator::SlowCompareStrings(const icu::Collator *icuCollator,
755                                              EcmaString* flatString1,
756                                              EcmaString* flatString2,
757                                              int processedUntil)
758 {
759     UCollationResult result;
760     UErrorCode status = U_ZERO_ERROR;
761     if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) {
762         auto string1Piece = ToICUStringPiece(flatString1, processedUntil);
763         if (!string1Piece.empty()) {
764             auto string2Piece = ToICUStringPiece(flatString2, processedUntil);
765             if (!string2Piece.empty()) {
766                 result = icuCollator->compareUTF8(string1Piece, string2Piece, status);
767                 return JSTaggedValue(result);
768             }
769         }
770     }
771 
772     auto uString1 = ToICUUnicodeString(flatString1, processedUntil);
773     auto uString2 = ToICUUnicodeString(flatString2, processedUntil);
774     result = icuCollator->compare(uString1, uString2, status);
775     ASSERT(U_SUCCESS(status));
776     return JSTaggedValue(result);
777 }
778 
CompareStrings(JSThread * thread,const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)779 JSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator,
780                                          const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2,
781                                          [[maybe_unused]]CompareStringsOption csOption)
782 {
783     if (*string1 == *string2) {
784         return JSTaggedValue(UCollationResult::UCOL_EQUAL);
785     }
786 
787     // Since Unicode has ignorable characters,
788     // we cannot return early for 0-length strings.
789     auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
790     auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
791 
792     int processedUntil = 0;
793     if (csOption == CompareStringsOption::TRY_FAST_PATH) {
794         auto maybeResult = TryFastCompareStrings(*flatString1, *flatString2, processedUntil);
795         if (maybeResult.has_value()) {
796             return JSTaggedValue(maybeResult.value());
797         }
798     }
799     return SlowCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil);
800 }
801 
FastCachedCompareStrings(JSThread * thread,JSHandle<JSTaggedValue> locales,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)802 JSTaggedValue JSCollator::FastCachedCompareStrings(JSThread *thread, JSHandle<JSTaggedValue> locales,
803                                                    const JSHandle<EcmaString> &string1,
804                                                    const JSHandle<EcmaString> &string2,
805                                                    CompareStringsOption csOption)
806 {
807     if (*string1 == *string2) {
808         return JSTaggedValue(UCollationResult::UCOL_EQUAL);
809     }
810 
811     // Since Unicode has ignorable characters,
812     // we cannot return early for 0-length strings.
813     auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
814     auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
815 
816     int processedUntil = 0;
817     if (csOption == CompareStringsOption::TRY_FAST_PATH) {
818         auto maybeResult = TryFastCompareStrings(*flatString1, *flatString2, processedUntil);
819         if (maybeResult.has_value()) {
820             return JSTaggedValue(maybeResult.value());
821         }
822     }
823 
824     auto icuCollator = JSCollator::GetCachedIcuCollator(thread, locales);
825     if (icuCollator != nullptr) {
826         return SlowCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil);
827     }
828     return JSTaggedValue::Undefined();
829 }
830 }  // namespace panda::ecmascript
831