• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/js_collator.h"
17 
18 #include "ecmascript/ecma_context.h"
19 #include "ecmascript/intl/locale_helper.h"
20 #include "ecmascript/global_env.h"
21 #include "ecmascript/ecma_string-inl.h"
22 
23 namespace panda::ecmascript {
24 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
25 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
26 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
27     {"upper", CaseFirstOption::UPPER},
28     {"lower", CaseFirstOption::LOWER},
29     {"false", CaseFirstOption::FALSE_OPTION}
30 };
31 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
32     {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
33     {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
34     {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
35     {CaseFirstOption::UNDEFINED, UCOL_OFF}
36 };
37 
GetAvailableLocales(JSThread * thread,bool enableLocaleCache)38 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread, bool enableLocaleCache)
39 {
40     const char *key = nullptr;
41     const char *path = JSCollator::uIcuDataColl.c_str();
42     // key and path are const, so we can cache the result
43     if (enableLocaleCache) {
44         JSHandle<JSTaggedValue> cachedLocales = thread->GlobalConstants()->GetHandledCachedJSCollatorLocales();
45         if (cachedLocales->IsHeapObject()) {
46             return JSHandle<TaggedArray>(cachedLocales);
47         }
48     }
49     std::vector<std::string> availableStringLocales = intl::LocaleHelper::GetAvailableLocales(thread, key, path);
50     JSHandle<TaggedArray> availableLocales = JSLocale::ConstructLocaleList(thread, availableStringLocales);
51     if (enableLocaleCache) {
52         GlobalEnvConstants *constants = const_cast<GlobalEnvConstants *>(thread->GlobalConstants());
53         constants->SetCachedLocales(availableLocales.GetTaggedValue());
54     }
55     return availableLocales;
56 }
57 
58 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const NativePointerCallback & callback)59 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
60     icu::Collator *icuCollator, const NativePointerCallback &callback)
61 {
62     EcmaVM *ecmaVm = thread->GetEcmaVM();
63     ObjectFactory *factory = ecmaVm->GetFactory();
64 
65     ASSERT(icuCollator != nullptr);
66     JSTaggedValue data = collator->GetIcuField();
67     if (data.IsJSNativePointer()) {
68         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
69         native->ResetExternalPointer(thread, icuCollator);
70         return;
71     }
72     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
73     collator->SetIcuField(thread, pointer.GetTaggedValue());
74 }
75 
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache,bool enableLocaleCache)76 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
77                                                     const JSHandle<JSCollator> &collator,
78                                                     const JSHandle<JSTaggedValue> &locales,
79                                                     const JSHandle<JSTaggedValue> &options,
80                                                     bool forIcuCache,
81                                                     bool enableLocaleCache)
82 {
83     EcmaVM *ecmaVm = thread->GetEcmaVM();
84     ObjectFactory *factory = ecmaVm->GetFactory();
85     const GlobalEnvConstants *globalConst = thread->GlobalConstants();
86     // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
87     JSHandle<TaggedArray> requestedLocales = intl::LocaleHelper::CanonicalizeLocaleList(thread, locales);
88     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
89 
90     // 2. If options is undefined, then
91     //      a. Let options be ObjectCreate(null).
92     // 3. Else,
93     //      a. Let options be ? ToObject(options).
94     JSHandle<JSObject> optionsObject;
95     if (options->IsUndefined()) {
96         optionsObject = factory->CreateNullJSObject();
97     } else {
98         optionsObject = JSTaggedValue::ToObject(thread, options);
99         RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
100     }
101     // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
102     auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
103                                                           {UsageOption::SORT, UsageOption::SEARCH}, {"sort", "search"},
104                                                           UsageOption::SORT);
105     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
106     collator->SetUsage(usage);
107 
108     // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
109     auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
110         thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
111         {LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT}, {"lookup", "best fit"},
112         LocaleMatcherOption::BEST_FIT);
113     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
114 
115     // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
116     // 7. If collation is not undefined, then
117     //    a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
118     JSHandle<JSTaggedValue> collation =
119         JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
120                             globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
121     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
122     collator->SetCollation(thread, collation);
123     std::string collationStr;
124     if (!collation->IsUndefined()) {
125         JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
126         collationStr = intl::LocaleHelper::ConvertToStdString(collationEcmaStr);
127         if (!JSLocale::IsWellAlphaNumList(collationStr)) {
128             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
129         }
130     }
131 
132     // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
133     bool numeric = false;
134     bool foundNumeric =
135         JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
136     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
137     collator->SetNumeric(numeric);
138 
139     // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
140     CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
141         thread, optionsObject, globalConst->GetHandledCaseFirstString(),
142         {CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION}, {"upper", "lower", "false"},
143         CaseFirstOption::UNDEFINED);
144     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
145     collator->SetCaseFirst(caseFirst);
146 
147     // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
148     std::set<std::string> relevantExtensionKeys = {"co", "kn", "kf"};
149 
150     // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
151     //     %Collator%.[[RelevantExtensionKeys]], localeData).
152     JSHandle<TaggedArray> availableLocales;
153     if (requestedLocales->GetLength() == 0) {
154         availableLocales = factory->EmptyArray();
155     } else {
156         availableLocales = GetAvailableLocales(thread, enableLocaleCache);
157     }
158     ResolvedLocale r =
159         JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, relevantExtensionKeys);
160     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
161     icu::Locale icuLocale = r.localeData;
162     JSHandle<EcmaString> localeStr = intl::LocaleHelper::ToLanguageTag(thread, icuLocale);
163     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
164     collator->SetLocale(thread, localeStr.GetTaggedValue());
165     ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
166 
167     // If collation is undefined iterate RelevantExtensionKeys to find "co"
168     //  if found, set ICU collator UnicodeKeyword to iterator->second
169     UErrorCode status = U_ZERO_ERROR;
170     if (!collation->IsUndefined()) {
171         auto extensionIter = r.extensions.find("co");
172         if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
173             icuLocale.setUnicodeKeywordValue("co", nullptr, status);
174             ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
175         }
176     }
177 
178     // If usage is serach set co-serach to icu locale key word value
179     // Eles set collation string to icu locale key word value
180     if (usage == UsageOption::SEARCH) {
181         icuLocale.setUnicodeKeywordValue("co", "search", status);
182         ASSERT(U_SUCCESS(status));
183     } else {
184         if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
185             icuLocale.setUnicodeKeywordValue("co", collationStr, status);
186             ASSERT(U_SUCCESS(status));
187         }
188     }
189 
190     std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
191     if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
192         if (status == UErrorCode::U_MISSING_RESOURCE_ERROR) {
193             THROW_REFERENCE_ERROR_AND_RETURN(thread, "can not find icu data resources", collator);
194         }
195         status = U_ZERO_ERROR;
196         icu::Locale localeName(icuLocale.getBaseName());
197         icuCollator.reset(icu::Collator::createInstance(localeName, status));
198         if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
199             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
200         }
201     }
202     ASSERT(U_SUCCESS(status));
203     icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
204 
205     icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
206     ASSERT(U_SUCCESS(status));
207 
208     // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
209     // Else iterate RelevantExtensionKeys to find "kn"
210     //  if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
211     status = U_ZERO_ERROR;
212     if (foundNumeric) {
213         ASSERT(icuCollator.get() != nullptr);
214         icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
215         ASSERT(U_SUCCESS(status));
216     } else {
217         auto extensionIter = r.extensions.find("kn");
218         if (extensionIter != r.extensions.end()) {
219             ASSERT(icuCollator.get() != nullptr);
220             bool found = (extensionIter->second == "true");
221             collator->SetNumeric(found);
222             icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
223             ASSERT(U_SUCCESS(status));
224         }
225     }
226 
227     // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
228     // Else iterate RelevantExtensionKeys to find "kf"
229     //  if found, set ICU collator UColAttributeValue to iterator->second
230     status = U_ZERO_ERROR;
231     if (caseFirst != CaseFirstOption::UNDEFINED) {
232         ASSERT(icuCollator.get() != nullptr);
233         icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
234         ASSERT(U_SUCCESS(status));
235     } else {
236         auto extensionIter = r.extensions.find("kf");
237         if (extensionIter != r.extensions.end()) {
238             ASSERT(icuCollator.get() != nullptr);
239             auto mapIter = caseFirstMap.find(extensionIter->second);
240             if (mapIter != caseFirstMap.end()) {
241                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
242                 collator->SetCaseFirst(mapIter->second);
243             } else {
244                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
245                                                 status);
246             }
247             ASSERT(U_SUCCESS(status));
248         }
249     }
250 
251     // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
252     //     undefined).
253     SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
254         thread, optionsObject, globalConst->GetHandledSensitivityString(),
255         {SensitivityOption::BASE, SensitivityOption::ACCENT, SensitivityOption::CASE, SensitivityOption::VARIANT},
256         {"base", "accent", "case", "variant"}, SensitivityOption::UNDEFINED);
257     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
258     // 25. If sensitivity is undefined, then
259     //     a. If usage is "sort", then
260     //        i. Let sensitivity be "variant".
261     if (sensitivity == SensitivityOption::UNDEFINED) {
262         if (usage == UsageOption::SORT) {
263             sensitivity = SensitivityOption::VARIANT;
264         }
265     }
266     collator->SetSensitivity(sensitivity);
267 
268     // Trans SensitivityOption to Icu strength option
269     switch (sensitivity) {
270         case SensitivityOption::BASE:
271             icuCollator->setStrength(icu::Collator::PRIMARY);
272             break;
273         case SensitivityOption::ACCENT:
274             icuCollator->setStrength(icu::Collator::SECONDARY);
275             break;
276         case SensitivityOption::CASE:
277             icuCollator->setStrength(icu::Collator::PRIMARY);
278             icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
279             break;
280         case SensitivityOption::VARIANT:
281             icuCollator->setStrength(icu::Collator::TERTIARY);
282             break;
283         case SensitivityOption::UNDEFINED:
284             break;
285         case SensitivityOption::EXCEPTION:
286             LOG_ECMA(FATAL) << "this branch is unreachable";
287             UNREACHABLE();
288     }
289 
290     // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
291     // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
292     bool ignorePunctuation = false;
293     bool defaultIgnorePunctuation = false;
294     // If the ignorePunctuation is not defined, which in "th" locale that is true but false on other locales.
295     JSHandle<EcmaString> thKey = factory->NewFromUtf8("th");
296     if (JSTaggedValue::Equal(thread, JSHandle<JSTaggedValue>::Cast(thKey), locales)) {
297         defaultIgnorePunctuation = true;
298     }
299     JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(),
300                               defaultIgnorePunctuation, &ignorePunctuation);
301     collator->SetIgnorePunctuation(ignorePunctuation);
302     if (ignorePunctuation) {
303         status = U_ZERO_ERROR;
304         icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
305         ASSERT(U_SUCCESS(status));
306     }
307 
308     if (forIcuCache) {
309         std::string cacheEntry =
310             locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
311         thread->GetCurrentEcmaContext()->SetIcuFormatterToCache(IcuFormatterType::COLLATOR,
312             cacheEntry, icuCollator.release(), JSCollator::FreeIcuCollator);
313     } else {
314         SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
315     }
316     collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
317     // 29. Return collator.
318     return collator;
319 }
320 
GetCachedIcuCollator(JSThread * thread,const JSTaggedValue & locales)321 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSTaggedValue &locales)
322 {
323     std::string cacheEntry = locales.IsUndefined() ? "" : EcmaStringAccessor(locales).ToStdString();
324     void *cachedCollator =
325         thread->GetCurrentEcmaContext()->GetIcuFormatterFromCache(IcuFormatterType::COLLATOR, cacheEntry);
326     if (cachedCollator != nullptr) {
327         return reinterpret_cast<icu::Collator*>(cachedCollator);
328     }
329     return nullptr;
330 }
331 
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)332 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
333 {
334     return GetCachedIcuCollator(thread, locales.GetTaggedValue());
335 }
336 
OptionToUColAttribute(CaseFirstOption caseFirstOption)337 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
338 {
339     auto iter = uColAttributeValueMap.find(caseFirstOption);
340     if (iter != uColAttributeValueMap.end()) {
341         return iter->second;
342     }
343     LOG_ECMA(FATAL) << "this branch is unreachable";
344     UNREACHABLE();
345 }
346 
OptionsToEcmaString(JSThread * thread,UsageOption usage)347 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
348 {
349     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
350     auto globalConst = thread->GlobalConstants();
351     switch (usage) {
352         case UsageOption::SORT:
353             result.Update(globalConst->GetSortString());
354             break;
355         case UsageOption::SEARCH:
356             result.Update(globalConst->GetSearchString());
357             break;
358         default:
359             LOG_ECMA(FATAL) << "this branch is unreachable";
360             UNREACHABLE();
361     }
362     return result;
363 }
364 
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)365 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
366 {
367     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
368     auto globalConst = thread->GlobalConstants();
369     switch (sensitivity) {
370         case SensitivityOption::BASE:
371             result.Update(globalConst->GetBaseString());
372             break;
373         case SensitivityOption::ACCENT:
374             result.Update(globalConst->GetAccentString());
375             break;
376         case SensitivityOption::CASE:
377             result.Update(globalConst->GetCaseString());
378             break;
379         case SensitivityOption::VARIANT:
380             result.Update(globalConst->GetVariantString());
381             break;
382         case SensitivityOption::UNDEFINED:
383             break;
384         default:
385             LOG_ECMA(FATAL) << "this branch is unreachable";
386             UNREACHABLE();
387     }
388     return result;
389 }
390 
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)391 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
392 {
393     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
394     auto globalConst = thread->GlobalConstants();
395     switch (caseFirst) {
396         case CaseFirstOption::UPPER:
397             result.Update(globalConst->GetUpperString());
398             break;
399         case CaseFirstOption::LOWER:
400             result.Update(globalConst->GetLowerString());
401             break;
402         case CaseFirstOption::FALSE_OPTION:
403             result.Update(globalConst->GetFalseString());
404             break;
405         case CaseFirstOption::UNDEFINED:
406             result.Update(globalConst->GetUpperString());
407             break;
408         default:
409             LOG_ECMA(FATAL) << "this branch is unreachable";
410             UNREACHABLE();
411     }
412     return result;
413 }
414 
415 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)416 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
417 {
418     auto ecmaVm = thread->GetEcmaVM();
419     auto globalConst = thread->GlobalConstants();
420     ObjectFactory *factory = ecmaVm->GetFactory();
421     JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
422     JSHandle<JSFunction> funCtor(env->GetObjectFunction());
423     JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
424 
425     // [[Locale]]
426     JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
427     JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
428     JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
429     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSObject, thread);
430 
431     // [[Usage]]
432     UsageOption usageOption = collator->GetUsage();
433     JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
434     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
435 
436     // [[Sensitivity]]
437     auto sentivityOption = collator->GetSensitivity();
438     JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
439     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
440 
441     // [[IgnorePunctuation]]
442     JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
443     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
444                                  ignorePunctuationValue);
445 
446     // [[Collation]]
447     JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
448     UErrorCode status = U_ZERO_ERROR;
449     icu::Collator *icuCollator = collator->GetIcuCollator();
450     icu::Locale icu_locale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
451     std::string collation_value =
452         icu_locale.getUnicodeKeywordValue<std::string>("co", status);
453     if (collationValue->IsUndefined()) {
454         if (collation_value != "search" && collation_value != "") {
455             collationValue.Update(factory->NewFromStdString(collation_value).GetTaggedValue());
456         } else {
457             collationValue.Update(globalConst->GetDefaultString());
458         }
459     }
460     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
461 
462     // [[Numeric]]
463     JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
464     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
465 
466     // [[CaseFirst]]
467     CaseFirstOption caseFirstOption = collator->GetCaseFirst();
468     // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
469     // the requirement maybe change in the future
470     JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
471     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
472     return options;
473 }
474 
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales)475 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
476                                                          JSHandle<JSTaggedValue> locales)
477 {
478     // All the available locales that are statically known to fulfill fast path conditions.
479     static const char* const FAST_LOCALE[] = {
480         "en-US", "en", "fr", "es",    "de",    "pt",    "it", "ca",
481         "de-AT", "fi", "id", "id-ID", "ms",    "nl",    "pl", "ro",
482         "sl",    "sv", "sw", "vi",    "en-DE", "en-GB",
483     };
484     if (locales->IsUndefined()) {
485         auto context = thread->GetCurrentEcmaContext();
486         auto defaultCompareOption = context->GetDefaultCompareStringsOption();
487         if (defaultCompareOption.has_value()) {
488             return defaultCompareOption.value();
489         }
490         auto defaultLocale = intl::LocaleHelper::StdStringDefaultLocale(thread);
491         for (const char *fastLocale : FAST_LOCALE) {
492             if (strcmp(fastLocale, defaultLocale.c_str()) == 0) {
493                 context->SetDefaultCompareStringsOption(CompareStringsOption::TRY_FAST_PATH);
494                 return CompareStringsOption::TRY_FAST_PATH;
495             }
496         }
497         context->SetDefaultCompareStringsOption(CompareStringsOption::NONE);
498         return CompareStringsOption::NONE;
499     }
500 
501     if (!locales->IsString()) {
502         return CompareStringsOption::NONE;
503     }
504 
505     JSHandle<EcmaString> localesString = JSHandle<EcmaString>::Cast(locales);
506     CString localesStr = ConvertToString(*localesString, StringConvertedUsage::LOGICOPERATION);
507     for (const char *fastLocale : FAST_LOCALE) {
508         if (strcmp(fastLocale, localesStr.c_str()) == 0) {
509             return CompareStringsOption::TRY_FAST_PATH;
510         }
511     }
512 
513     return CompareStringsOption::NONE;
514 }
515 
CompareStringsOptionFor(JSThread * thread,JSHandle<JSTaggedValue> locales,JSHandle<JSTaggedValue> options)516 CompareStringsOption JSCollator::CompareStringsOptionFor(JSThread* thread,
517                                                          JSHandle<JSTaggedValue> locales,
518                                                          JSHandle<JSTaggedValue> options)
519 {
520     if (!options->IsUndefined()) {
521         return CompareStringsOption::NONE;
522     }
523     return CompareStringsOptionFor(thread, locales);
524 }
525 
526 // Anonymous namespace for ComapreStrings
527 namespace {
528 constexpr uint8_t COLLATION_WEIGHT_L1[256] = {
529     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  0,  0,  0,  0,  0,  0,
530     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  12, 16, 28, 38, 29, 27, 15,
531     17, 18, 24, 32, 9,  8,  14, 25, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10,
532     33, 34, 35, 13, 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
533     64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7,  30, 49, 50, 51,
534     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
535     72, 73, 74, 21, 36, 22, 37, 0,
536 };
537 constexpr uint8_t COLLATION_WEIGHT_L3[256] = {
538     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,
539     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  1,  1,  1,
540     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
541     1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
542     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,
543     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
544     1,  1,  1,  1,  1,  1,  1,  0,
545 };
546 constexpr int COLLATION_WEIGHT_LENGTH = sizeof(COLLATION_WEIGHT_L1) / sizeof(COLLATION_WEIGHT_L1[0]);
547 
ToUCollationResult(int delta)548 constexpr UCollationResult ToUCollationResult(int delta)
549 {
550     return delta < 0 ? UCollationResult::UCOL_LESS
551                      : (delta > 0 ? UCollationResult::UCOL_GREATER
552                                   : UCollationResult::UCOL_EQUAL);
553 }
554 
555 struct FastCompareStringsData {
556     UCollationResult l1Result = UCollationResult::UCOL_EQUAL;
557     UCollationResult l3Result = UCollationResult::UCOL_EQUAL;
558     int processedUntil = 0;
559     int firstDiffAt = 0;  // The first relevant diff (L1 if exists, else L3).
560     bool hasDiff = false;
561 
FastCompareFailedpanda::ecmascript::__anond86117160111::FastCompareStringsData562     std::optional<UCollationResult> FastCompareFailed(int& processedUntilOut) const
563     {
564         if (hasDiff) {
565             // Found some difference, continue there to ensure the generic algorithm picks it up.
566             processedUntilOut = firstDiffAt;
567         } else {
568             // No difference found, reprocess the last processed character since it may be
569             // followed by a unicode combining character.
570             processedUntilOut = std::max(processedUntil - 1, 0);
571         }
572         return {};
573     }
574 };
575 
576 template <class T>
CanFastCompare(T ch)577 constexpr bool CanFastCompare(T ch)
578 {
579     return ch < COLLATION_WEIGHT_LENGTH && COLLATION_WEIGHT_L1[ch] != 0;
580 }
581 
582 // Check canFastCompare, L1 weight, and L3 weight together.
583 // Use FastCompareStringsData to store these results.
584 template <class T1, class T2>
FastCompareFlatString(const T1 * lhs,const T2 * rhs,int length,FastCompareStringsData & fastCompareData)585 bool FastCompareFlatString(const T1* lhs, const T2* rhs, int length, FastCompareStringsData& fastCompareData)
586 {
587     for (int i = 0; i < length; i++) {
588         const T1 l = lhs[i];
589         const T2 r = rhs[i];
590         if (!CanFastCompare(l) || !CanFastCompare(r)) {
591             fastCompareData.processedUntil = i;
592             return false;
593         }
594         auto l1Result = ToUCollationResult(COLLATION_WEIGHT_L1[l] - COLLATION_WEIGHT_L1[r]);
595         if (l1Result != UCollationResult::UCOL_EQUAL) {
596             fastCompareData.hasDiff = true;
597             fastCompareData.firstDiffAt = i;
598             fastCompareData.processedUntil = i;
599             fastCompareData.l1Result = l1Result;
600             return true;
601         }
602         if (l != r && fastCompareData.l3Result == UCollationResult::UCOL_EQUAL) {
603             auto l3Result = ToUCollationResult(COLLATION_WEIGHT_L3[l] - COLLATION_WEIGHT_L3[r]);
604             fastCompareData.l3Result = l3Result;
605             if (!fastCompareData.hasDiff) {
606                 fastCompareData.hasDiff = true;
607                 fastCompareData.firstDiffAt = i;
608             }
609         }
610     }
611     fastCompareData.processedUntil = length;
612     return true;
613 }
614 
FastCompareStringFlatContent(EcmaString * string1,EcmaString * string2,int length,FastCompareStringsData & fastCompareData)615 bool FastCompareStringFlatContent(EcmaString* string1, EcmaString* string2,
616                                   int length, FastCompareStringsData& fastCompareData)
617 {
618     EcmaStringAccessor string1Acc(string1);
619     EcmaStringAccessor string2Acc(string2);
620     if (string1Acc.IsUtf8()) {
621         auto l = EcmaStringAccessor::GetNonTreeUtf8Data(string1);
622         if (string2Acc.IsUtf8()) {
623             auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
624             return FastCompareFlatString(l, r, length, fastCompareData);
625         } else {
626             auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
627             return FastCompareFlatString(l, r, length, fastCompareData);
628         }
629     } else {
630         auto l = EcmaStringAccessor::GetNonTreeUtf16Data(string1);
631         if (string2Acc.IsUtf8()) {
632             auto r = EcmaStringAccessor::GetNonTreeUtf8Data(string2);
633             return FastCompareFlatString(l, r, length, fastCompareData);
634         } else {
635             auto r = EcmaStringAccessor::GetNonTreeUtf16Data(string2);
636             return FastCompareFlatString(l, r, length, fastCompareData);
637         }
638     }
639     UNREACHABLE();
640 }
641 
CharIsAsciiOrOutOfBounds(EcmaString * string,int stringLength,int index)642 bool CharIsAsciiOrOutOfBounds(EcmaString* string, int stringLength, int index)
643 {
644     return index >= stringLength || EcmaStringAccessor::IsASCIICharacter(EcmaStringAccessor(string).Get<false>(index));
645 }
646 
CharCanFastCompareOrOutOfBounds(EcmaString * string,int stringLength,int index)647 bool CharCanFastCompareOrOutOfBounds(EcmaString* string, int stringLength, int index)
648 {
649     return index >= stringLength || CanFastCompare(EcmaStringAccessor(string).Get<false>(index));
650 }
651 
652 // Pseudo-code for simplified multi-pass algorithm is:
653 //     // Only a certain subset of the ASCII range can be fast-compared.
654 //     // In the actual single-pass algorithm below, we tolerate non-ASCII contents.
655 //     1. Check string1 and string2 can fastcompare.
656 //     2. Compare L1 weight for each char, the greater wins.
657 //     3. Is two strings are L1 equal in common length, the longer wins.
658 //     4. Compare L3 weight for each char, the greater wins.
659 //     5. If all equal, return equal.
660 //     6. Once some chars cannot be fastcompared, use icu.
661 
TryFastCompareStrings(const icu::Collator * icuCollator,EcmaString * string1,EcmaString * string2,int & processedUntilOut)662 std::optional<UCollationResult> TryFastCompareStrings([[maybe_unused]] const icu::Collator* icuCollator,
663                                                       EcmaString* string1, EcmaString* string2,
664                                                       int& processedUntilOut)
665 {
666     processedUntilOut = 0;
667 
668     const auto length1 = static_cast<int>(EcmaStringAccessor(string1).GetLength());
669     const auto length2 = static_cast<int>(EcmaStringAccessor(string2).GetLength());
670     int commonLength = std::min(length1, length2);
671 
672     FastCompareStringsData fastCompareData;
673     if (!FastCompareStringFlatContent(string1, string2, commonLength, fastCompareData)) {
674         return fastCompareData.FastCompareFailed(processedUntilOut);
675     }
676     // The result is only valid if the last processed character is not followed
677     // by a unicode combining character.
678     if (!CharIsAsciiOrOutOfBounds(string1, length1, fastCompareData.processedUntil + 1) ||
679         !CharIsAsciiOrOutOfBounds(string2, length2, fastCompareData.processedUntil + 1)) {
680         return fastCompareData.FastCompareFailed(processedUntilOut);
681     }
682     if (fastCompareData.l1Result != UCollationResult::UCOL_EQUAL) {
683         return fastCompareData.l1Result;
684     }
685     // Strings are L1-equal up to their common length, length differences win.
686     UCollationResult lengthResult = ToUCollationResult(length1 - length2);
687     if (lengthResult != UCollationResult::UCOL_EQUAL) {
688         // Strings of different lengths may still compare as equal if the longer
689         // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
690         if (!CharCanFastCompareOrOutOfBounds(string1, length1, commonLength) ||
691             !CharCanFastCompareOrOutOfBounds(string2, length2, commonLength)) {
692             return fastCompareData.FastCompareFailed(processedUntilOut);
693         }
694         return lengthResult;
695     }
696     // L1-equal and same length, the L3 result wins.
697     return fastCompareData.l3Result;
698 }
699 } // namespace
700 
701 //StringPiece is similar to std::string_view
ToICUStringPiece(const JSHandle<EcmaString> & string,int offset=0)702 icu::StringPiece ToICUStringPiece(const JSHandle<EcmaString>& string, int offset = 0)
703 {
704     EcmaStringAccessor stringAcc(string);
705     ASSERT(stringAcc.IsUtf8());
706     ASSERT(!stringAcc.IsTreeString());
707     return icu::StringPiece(reinterpret_cast<const char*>(EcmaStringAccessor::GetNonTreeUtf8Data(*string)) + offset,
708                             static_cast<int>(stringAcc.GetLength()) - offset);
709 }
710 
711 // Convert to a UTF16 string and partially convert to ICUUnicodeString
ToICUUnicodeString(const JSHandle<EcmaString> & string,int offset=0)712 icu::UnicodeString ToICUUnicodeString(const JSHandle<EcmaString> &string, int offset = 0)
713 {
714     EcmaStringAccessor stringAcc(string);
715     ASSERT(!stringAcc.IsTreeString());
716     int strLength = static_cast<int>(stringAcc.GetLength());
717     int partialLength = strLength - offset;
718     if (stringAcc.IsUtf8()) {
719         constexpr int shortStringLength = 80;  // 80: short string length
720         if (partialLength <= shortStringLength) {
721             // short string on stack
722             UChar shortStringBuffer[shortStringLength];
723             // utf8 is within ascii, std::copy_n from utf8 to utf16 is OK
724             std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(*string) + offset, partialLength, shortStringBuffer);
725             return icu::UnicodeString(shortStringBuffer, partialLength);
726         }
727         CVector<uint16_t> ucharBuffer(partialLength);
728         std::copy_n(EcmaStringAccessor::GetNonTreeUtf8Data(*string) + offset, partialLength, ucharBuffer.begin());
729         return icu::UnicodeString(ucharBuffer.data(), partialLength);
730     } else {
731         return icu::UnicodeString(EcmaStringAccessor::GetNonTreeUtf16Data(*string) + offset, partialLength);
732     }
733 }
734 
CompareStrings(JSThread * thread,const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2,CompareStringsOption csOption)735 JSTaggedValue JSCollator::CompareStrings(JSThread *thread, const icu::Collator *icuCollator,
736                                          const JSHandle<EcmaString> &string1, const JSHandle<EcmaString> &string2,
737                                          [[maybe_unused]]CompareStringsOption csOption)
738 {
739     if (*string1 == *string2) {
740         return JSTaggedValue(UCollationResult::UCOL_EQUAL);
741     }
742 
743     // Since Unicode has ignorable characters,
744     // we cannot return early for 0-length strings.
745     auto flatString1 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string1));
746     auto flatString2 = JSHandle<EcmaString>(thread, EcmaStringAccessor::Flatten(thread->GetEcmaVM(), string2));
747 
748     int processedUntil = 0;
749     if (csOption == CompareStringsOption::TRY_FAST_PATH) {
750         auto maybeResult = TryFastCompareStrings(icuCollator, *flatString1, *flatString2, processedUntil);
751         if (maybeResult.has_value()) {
752             return JSTaggedValue(maybeResult.value());
753         }
754     }
755 
756     UCollationResult result;
757     UErrorCode status = U_ZERO_ERROR;
758     if (EcmaStringAccessor(flatString1).IsUtf8() && EcmaStringAccessor(flatString2).IsUtf8()) {
759         auto string1Piece = ToICUStringPiece(flatString1, processedUntil);
760         if (!string1Piece.empty()) {
761             auto string2Piece = ToICUStringPiece(flatString2, processedUntil);
762             if (!string2Piece.empty()) {
763                 result = icuCollator->compareUTF8(string1Piece, string2Piece, status);
764                 return JSTaggedValue(result);
765             }
766         }
767     }
768 
769     auto uString1 = ToICUUnicodeString(flatString1, processedUntil);
770     auto uString2 = ToICUUnicodeString(flatString2, processedUntil);
771     result = icuCollator->compare(uString1, uString2, status);
772     ASSERT(U_SUCCESS(status));
773 
774     return JSTaggedValue(result);
775 }
776 }  // namespace panda::ecmascript
777