• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/js_collator.h"
17 
18 #include "ecmascript/global_env.h"
19 #include "ecmascript/mem/c_string.h"
20 #include "ecmascript/mem/barriers-inl.h"
21 
22 #include "unicode/udata.h"
23 
24 namespace panda::ecmascript {
25 // NOLINTNEXTLINE (readability-identifier-naming, fuchsia-statically-constructed-objects)
26 const CString JSCollator::uIcuDataColl = U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "coll";
27 const std::map<std::string, CaseFirstOption> JSCollator::caseFirstMap = {
28     {"upper", CaseFirstOption::UPPER},
29     {"lower", CaseFirstOption::LOWER},
30     {"false", CaseFirstOption::FALSE_OPTION}
31 };
32 const std::map<CaseFirstOption, UColAttributeValue> JSCollator::uColAttributeValueMap = {
33     {CaseFirstOption::UPPER, UCOL_UPPER_FIRST},
34     {CaseFirstOption::LOWER, UCOL_LOWER_FIRST},
35     {CaseFirstOption::FALSE_OPTION, UCOL_OFF},
36     {CaseFirstOption::UNDEFINED, UCOL_OFF}
37 };
38 
GetAvailableLocales(JSThread * thread)39 JSHandle<TaggedArray> JSCollator::GetAvailableLocales(JSThread *thread)
40 {
41     const char *key = nullptr;
42     const char *path = JSCollator::uIcuDataColl.c_str();
43     JSHandle<TaggedArray> availableLocales = JSLocale::GetAvailableLocales(thread, key, path);
44     return availableLocales;
45 }
46 
47 /* static */
SetIcuCollator(JSThread * thread,const JSHandle<JSCollator> & collator,icu::Collator * icuCollator,const DeleteEntryPoint & callback)48 void JSCollator::SetIcuCollator(JSThread *thread, const JSHandle<JSCollator> &collator,
49     icu::Collator *icuCollator, const DeleteEntryPoint &callback)
50 {
51     EcmaVM *ecmaVm = thread->GetEcmaVM();
52     ObjectFactory *factory = ecmaVm->GetFactory();
53 
54     ASSERT(icuCollator != nullptr);
55     JSTaggedValue data = collator->GetIcuField();
56     if (data.IsJSNativePointer()) {
57         JSNativePointer *native = JSNativePointer::Cast(data.GetTaggedObject());
58         native->ResetExternalPointer(icuCollator);
59         return;
60     }
61     JSHandle<JSNativePointer> pointer = factory->NewJSNativePointer(icuCollator, callback);
62     collator->SetIcuField(thread, pointer.GetTaggedValue());
63 }
64 
InitializeCollator(JSThread * thread,const JSHandle<JSCollator> & collator,const JSHandle<JSTaggedValue> & locales,const JSHandle<JSTaggedValue> & options,bool forIcuCache)65 JSHandle<JSCollator> JSCollator::InitializeCollator(JSThread *thread,
66                                                     const JSHandle<JSCollator> &collator,
67                                                     const JSHandle<JSTaggedValue> &locales,
68                                                     const JSHandle<JSTaggedValue> &options,
69                                                     bool forIcuCache)
70 {
71     EcmaVM *ecmaVm = thread->GetEcmaVM();
72     ObjectFactory *factory = ecmaVm->GetFactory();
73     const GlobalEnvConstants *globalConst = thread->GlobalConstants();
74     // 1. Let requestedLocales be ? CanonicalizeLocaleList(locales).
75     JSHandle<TaggedArray> requestedLocales = JSLocale::CanonicalizeLocaleList(thread, locales);
76     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
77 
78     // 2. If options is undefined, then
79     //      a. Let options be ObjectCreate(null).
80     // 3. Else,
81     //      a. Let options be ? ToObject(options).
82     JSHandle<JSObject> optionsObject;
83     if (options->IsUndefined()) {
84         optionsObject = factory->CreateNullJSObject();
85     } else {
86         optionsObject = JSTaggedValue::ToObject(thread, options);
87         RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
88     }
89     // 4. Let usage be ? GetOption(options, "usage", "string", « "sort", "search" », "sort").
90     auto usage = JSLocale::GetOptionOfString<UsageOption>(thread, optionsObject, globalConst->GetHandledUsageString(),
91                                                           {UsageOption::SORT, UsageOption::SEARCH}, {"sort", "search"},
92                                                           UsageOption::SORT);
93     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
94     collator->SetUsage(usage);
95 
96     // 5. Let matcher be ? GetOption(options, "localeMatcher", "string", « "lookup", "best fit" », "best fit").
97     auto matcher = JSLocale::GetOptionOfString<LocaleMatcherOption>(
98         thread, optionsObject, globalConst->GetHandledLocaleMatcherString(),
99         {LocaleMatcherOption::LOOKUP, LocaleMatcherOption::BEST_FIT}, {"lookup", "best fit"},
100         LocaleMatcherOption::BEST_FIT);
101     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
102 
103     // 6. Let collation be ? GetOption(options, "collation", "string", undefined, undefined).
104     // 7. If collation is not undefined, then
105     //    a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception.
106     JSHandle<JSTaggedValue> collation =
107         JSLocale::GetOption(thread, optionsObject, globalConst->GetHandledCollationString(), OptionType::STRING,
108                             globalConst->GetHandledUndefined(), globalConst->GetHandledUndefined());
109     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
110     collator->SetCollation(thread, collation);
111     std::string collationStr;
112     if (!collation->IsUndefined()) {
113         JSHandle<EcmaString> collationEcmaStr = JSHandle<EcmaString>::Cast(collation);
114         collationStr = JSLocale::ConvertToStdString(collationEcmaStr);
115         if (!JSLocale::IsWellAlphaNumList(collationStr)) {
116             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
117         }
118     }
119 
120     // 8. Let numeric be ? GetOption(options, "numeric", "boolean", undefined, undefined).
121     bool numeric = false;
122     bool foundNumeric =
123         JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledNumericString(), false, &numeric);
124     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
125     collator->SetNumeric(numeric);
126 
127     // 14. Let caseFirst be ? GetOption(options, "caseFirst", "string", « "upper", "lower", "false" », undefined).
128     CaseFirstOption caseFirst = JSLocale::GetOptionOfString<CaseFirstOption>(
129         thread, optionsObject, globalConst->GetHandledCaseFirstString(),
130         {CaseFirstOption::UPPER, CaseFirstOption::LOWER, CaseFirstOption::FALSE_OPTION}, {"upper", "lower", "false"},
131         CaseFirstOption::UNDEFINED);
132     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
133     collator->SetCaseFirst(caseFirst);
134 
135     // 16. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
136     std::set<std::string> relevantExtensionKeys = {"co", "kn", "kf"};
137 
138     // 17. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt,
139     //     %Collator%.[[RelevantExtensionKeys]], localeData).
140     JSHandle<TaggedArray> availableLocales;
141     if (requestedLocales->GetLength() == 0) {
142         availableLocales = factory->EmptyArray();
143     } else {
144         availableLocales = GetAvailableLocales(thread);
145     }
146     ResolvedLocale r =
147         JSLocale::ResolveLocale(thread, availableLocales, requestedLocales, matcher, relevantExtensionKeys);
148     icu::Locale icuLocale = r.localeData;
149     JSHandle<EcmaString> localeStr = JSLocale::ToLanguageTag(thread, icuLocale);
150     collator->SetLocale(thread, localeStr.GetTaggedValue());
151     ASSERT_PRINT(!icuLocale.isBogus(), "icuLocale is bogus");
152 
153     // If collation is undefined iterate RelevantExtensionKeys to find "co"
154     //  if found, set ICU collator UnicodeKeyword to iterator->second
155     UErrorCode status = U_ZERO_ERROR;
156     if (!collation->IsUndefined()) {
157         auto extensionIter = r.extensions.find("co");
158         if (extensionIter != r.extensions.end() && extensionIter->second != collationStr) {
159             icuLocale.setUnicodeKeywordValue("co", nullptr, status);
160             ASSERT_PRINT(U_SUCCESS(status), "icuLocale set co failed");
161         }
162     }
163 
164     // If usage is serach set co-serach to icu locale key word value
165     // Eles set collation string to icu locale key word value
166     if (usage == UsageOption::SEARCH) {
167         icuLocale.setUnicodeKeywordValue("co", "search", status);
168         ASSERT(U_SUCCESS(status));
169     } else {
170         if (!collationStr.empty() && JSLocale::IsWellCollation(icuLocale, collationStr)) {
171             icuLocale.setUnicodeKeywordValue("co", collationStr, status);
172             ASSERT(U_SUCCESS(status));
173         }
174     }
175 
176     std::unique_ptr<icu::Collator> icuCollator(icu::Collator::createInstance(icuLocale, status));
177     if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
178         status = U_ZERO_ERROR;
179         icu::Locale localeName(icuLocale.getBaseName());
180         icuCollator.reset(icu::Collator::createInstance(localeName, status));
181         if (U_FAILURE(status) || icuCollator == nullptr) {  // NOLINT(readability-implicit-bool-conversion)
182             THROW_RANGE_ERROR_AND_RETURN(thread, "invalid collation", collator);
183         }
184     }
185     ASSERT(U_SUCCESS(status));
186     icu::Locale collatorLocale(icuCollator->getLocale(ULOC_VALID_LOCALE, status));
187 
188     icuCollator->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status);
189     ASSERT(U_SUCCESS(status));
190 
191     // If numeric is found set ICU collator UCOL_NUMERIC_COLLATION to numeric
192     // Else iterate RelevantExtensionKeys to find "kn"
193     //  if found, set ICU collator UCOL_NUMERIC_COLLATION to iterator->second
194     status = U_ZERO_ERROR;
195     if (foundNumeric) {
196         ASSERT(icuCollator.get() != nullptr);
197         icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF, status);
198         ASSERT(U_SUCCESS(status));
199     } else {
200         auto extensionIter = r.extensions.find("kn");
201         if (extensionIter != r.extensions.end()) {
202             ASSERT(icuCollator.get() != nullptr);
203             bool found = (extensionIter->second == "true");
204             collator->SetNumeric(found);
205             icuCollator.get()->setAttribute(UCOL_NUMERIC_COLLATION, found ? UCOL_ON : UCOL_OFF, status);
206             ASSERT(U_SUCCESS(status));
207         }
208     }
209 
210     // If caseFirst is not undefined set ICU collator UColAttributeValue to caseFirst
211     // Else iterate RelevantExtensionKeys to find "kf"
212     //  if found, set ICU collator UColAttributeValue to iterator->second
213     status = U_ZERO_ERROR;
214     if (caseFirst != CaseFirstOption::UNDEFINED) {
215         ASSERT(icuCollator.get() != nullptr);
216         icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(caseFirst), status);
217         ASSERT(U_SUCCESS(status));
218     } else {
219         auto extensionIter = r.extensions.find("kf");
220         if (extensionIter != r.extensions.end()) {
221             ASSERT(icuCollator.get() != nullptr);
222             auto mapIter = caseFirstMap.find(extensionIter->second);
223             if (mapIter != caseFirstMap.end()) {
224                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(mapIter->second), status);
225                 collator->SetCaseFirst(mapIter->second);
226             } else {
227                 icuCollator.get()->setAttribute(UCOL_CASE_FIRST, OptionToUColAttribute(CaseFirstOption::UNDEFINED),
228                                                 status);
229             }
230             ASSERT(U_SUCCESS(status));
231         }
232     }
233 
234     // 24. Let sensitivity be ? GetOption(options, "sensitivity", "string", « "base", "accent", "case", "variant" »,
235     //     undefined).
236     SensitivityOption sensitivity = JSLocale::GetOptionOfString<SensitivityOption>(
237         thread, optionsObject, globalConst->GetHandledSensitivityString(),
238         {SensitivityOption::BASE, SensitivityOption::ACCENT, SensitivityOption::CASE, SensitivityOption::VARIANT},
239         {"base", "accent", "case", "variant"}, SensitivityOption::UNDEFINED);
240     RETURN_HANDLE_IF_ABRUPT_COMPLETION(JSCollator, thread);
241     // 25. If sensitivity is undefined, then
242     //     a. If usage is "sort", then
243     //        i. Let sensitivity be "variant".
244     if (sensitivity == SensitivityOption::UNDEFINED) {
245         if (usage == UsageOption::SORT) {
246             sensitivity = SensitivityOption::VARIANT;
247         }
248     }
249     collator->SetSensitivity(sensitivity);
250 
251     // Trans SensitivityOption to Icu strength option
252     switch (sensitivity) {
253         case SensitivityOption::BASE:
254             icuCollator->setStrength(icu::Collator::PRIMARY);
255             break;
256         case SensitivityOption::ACCENT:
257             icuCollator->setStrength(icu::Collator::SECONDARY);
258             break;
259         case SensitivityOption::CASE:
260             icuCollator->setStrength(icu::Collator::PRIMARY);
261             icuCollator->setAttribute(UCOL_CASE_LEVEL, UCOL_ON, status);
262             break;
263         case SensitivityOption::VARIANT:
264             icuCollator->setStrength(icu::Collator::TERTIARY);
265             break;
266         case SensitivityOption::UNDEFINED:
267             break;
268         case SensitivityOption::EXCEPTION:
269             UNREACHABLE();
270     }
271 
272     // 27. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", "boolean", undefined, false).
273     // 28. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
274     bool ignorePunctuation = false;
275     JSLocale::GetOptionOfBool(thread, optionsObject, globalConst->GetHandledIgnorePunctuationString(), false,
276                               &ignorePunctuation);
277     collator->SetIgnorePunctuation(ignorePunctuation);
278     if (ignorePunctuation) {
279         status = U_ZERO_ERROR;
280         icuCollator->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
281         ASSERT(U_SUCCESS(status));
282     }
283 
284     if (forIcuCache) {
285         std::string cacheEntry =
286             locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
287         ecmaVm->SetIcuFormatterToCache(IcuFormatterType::Collator, cacheEntry, icuCollator.release(),
288                                        JSCollator::FreeIcuCollator);
289     } else {
290         SetIcuCollator(thread, collator, icuCollator.release(), JSCollator::FreeIcuCollator);
291     }
292     collator->SetBoundCompare(thread, JSTaggedValue::Undefined());
293     // 29. Return collator.
294     return collator;
295 }
296 
GetCachedIcuCollator(JSThread * thread,const JSHandle<JSTaggedValue> & locales)297 icu::Collator *JSCollator::GetCachedIcuCollator(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
298 {
299     std::string cacheEntry = locales->IsUndefined() ? "" : EcmaStringAccessor(locales.GetTaggedValue()).ToStdString();
300     EcmaVM *ecmaVm = thread->GetEcmaVM();
301     void *cachedCollator = ecmaVm->GetIcuFormatterFromCache(IcuFormatterType::Collator, cacheEntry);
302     if (cachedCollator != nullptr) {
303         return reinterpret_cast<icu::Collator*>(cachedCollator);
304     }
305     return nullptr;
306 }
307 
OptionToUColAttribute(CaseFirstOption caseFirstOption)308 UColAttributeValue JSCollator::OptionToUColAttribute(CaseFirstOption caseFirstOption)
309 {
310     auto iter = uColAttributeValueMap.find(caseFirstOption);
311     if (iter != uColAttributeValueMap.end()) {
312         return iter->second;
313     }
314     UNREACHABLE();
315 }
316 
OptionsToEcmaString(JSThread * thread,UsageOption usage)317 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, UsageOption usage)
318 {
319     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
320     auto globalConst = thread->GlobalConstants();
321     switch (usage) {
322         case UsageOption::SORT:
323             result.Update(globalConst->GetSortString());
324             break;
325         case UsageOption::SEARCH:
326             result.Update(globalConst->GetSearchString());
327             break;
328         default:
329             UNREACHABLE();
330     }
331     return result;
332 }
333 
OptionsToEcmaString(JSThread * thread,SensitivityOption sensitivity)334 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, SensitivityOption sensitivity)
335 {
336     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
337     auto globalConst = thread->GlobalConstants();
338     switch (sensitivity) {
339         case SensitivityOption::BASE:
340             result.Update(globalConst->GetBaseString());
341             break;
342         case SensitivityOption::ACCENT:
343             result.Update(globalConst->GetAccentString());
344             break;
345         case SensitivityOption::CASE:
346             result.Update(globalConst->GetCaseString());
347             break;
348         case SensitivityOption::VARIANT:
349             result.Update(globalConst->GetVariantString());
350             break;
351         case SensitivityOption::UNDEFINED:
352             break;
353         default:
354             UNREACHABLE();
355     }
356     return result;
357 }
358 
OptionsToEcmaString(JSThread * thread,CaseFirstOption caseFirst)359 JSHandle<JSTaggedValue> OptionsToEcmaString(JSThread *thread, CaseFirstOption caseFirst)
360 {
361     JSMutableHandle<JSTaggedValue> result(thread, JSTaggedValue::Undefined());
362     auto globalConst = thread->GlobalConstants();
363     switch (caseFirst) {
364         case CaseFirstOption::UPPER:
365             result.Update(globalConst->GetUpperString());
366             break;
367         case CaseFirstOption::LOWER:
368             result.Update(globalConst->GetLowerString());
369             break;
370         case CaseFirstOption::FALSE_OPTION:
371             result.Update(globalConst->GetFalseString());
372             break;
373         case CaseFirstOption::UNDEFINED:
374             result.Update(globalConst->GetUpperString());
375             break;
376         default:
377             UNREACHABLE();
378     }
379     return result;
380 }
381 
382 // 11.3.4 Intl.Collator.prototype.resolvedOptions ()
ResolvedOptions(JSThread * thread,const JSHandle<JSCollator> & collator)383 JSHandle<JSObject> JSCollator::ResolvedOptions(JSThread *thread, const JSHandle<JSCollator> &collator)
384 {
385     auto ecmaVm = thread->GetEcmaVM();
386     auto globalConst = thread->GlobalConstants();
387     ObjectFactory *factory = ecmaVm->GetFactory();
388     JSHandle<GlobalEnv> env = ecmaVm->GetGlobalEnv();
389     JSHandle<JSFunction> funCtor(env->GetObjectFunction());
390     JSHandle<JSObject> options(factory->NewJSObjectByConstructor(funCtor));
391 
392     // [[Locale]]
393     JSHandle<JSTaggedValue> property = globalConst->GetHandledLocaleString();
394     JSHandle<JSTaggedValue> locale(thread, collator->GetLocale());
395     JSObject::CreateDataPropertyOrThrow(thread, options, property, locale);
396 
397     // [[Usage]]
398     UsageOption usageOption = collator->GetUsage();
399     JSHandle<JSTaggedValue> usageValue = OptionsToEcmaString(thread, usageOption);
400     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledUsageString(), usageValue);
401 
402     // [[Sensitivity]]
403     auto sentivityOption = collator->GetSensitivity();
404     JSHandle<JSTaggedValue> sensitivityValue = OptionsToEcmaString(thread, sentivityOption);
405     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledSensitivityString(), sensitivityValue);
406 
407     // [[IgnorePunctuation]]
408     JSHandle<JSTaggedValue> ignorePunctuationValue(thread, JSTaggedValue(collator->GetIgnorePunctuation()));
409     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledIgnorePunctuationString(),
410                                  ignorePunctuationValue);
411 
412     // [[Collation]]
413     JSMutableHandle<JSTaggedValue> collationValue(thread, collator->GetCollation());
414     if (collationValue->IsUndefined()) {
415         collationValue.Update(globalConst->GetDefaultString());
416     }
417     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCollationString(), collationValue);
418 
419     // [[Numeric]]
420     JSHandle<JSTaggedValue> numericValue(thread, JSTaggedValue(collator->GetNumeric()));
421     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledNumericString(), numericValue);
422 
423     // [[CaseFirst]]
424     CaseFirstOption caseFirstOption = collator->GetCaseFirst();
425     // In Ecma402 spec, caseFirst is an optional property so we set it to Upper when input is undefined
426     // the requirement maybe change in the future
427     JSHandle<JSTaggedValue> caseFirstValue = OptionsToEcmaString(thread, caseFirstOption);
428     JSObject::CreateDataProperty(thread, options, globalConst->GetHandledCaseFirstString(), caseFirstValue);
429     return options;
430 }
431 
EcmaStringToUString(const JSHandle<EcmaString> & string)432 icu::UnicodeString EcmaStringToUString(const JSHandle<EcmaString> &string)
433 {
434     std::string stdString(ConvertToString(*string, StringConvertedUsage::LOGICOPERATION));
435     icu::StringPiece sp(stdString);
436     icu::UnicodeString uString = icu::UnicodeString::fromUTF8(sp);
437     return uString;
438 }
439 
CompareStrings(const icu::Collator * icuCollator,const JSHandle<EcmaString> & string1,const JSHandle<EcmaString> & string2)440 JSTaggedValue JSCollator::CompareStrings(const icu::Collator *icuCollator, const JSHandle<EcmaString> &string1,
441                                          const JSHandle<EcmaString> &string2)
442 {
443     icu::UnicodeString uString1 = EcmaStringToUString(string1);
444     icu::UnicodeString uString2 = EcmaStringToUString(string2);
445 
446     UCollationResult result;
447     UErrorCode status = U_ZERO_ERROR;
448     result = icuCollator->compare(uString1, uString2, status);
449     ASSERT(U_SUCCESS(status));
450 
451     return JSTaggedValue(result);
452 }
453 }  // namespace panda::ecmascript
454