• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/intl/locale_helper.h"
17 
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/ecma_macros.h"
20 #include "ecmascript/ecma_vm.h"
21 #include "ecmascript/global_env.h"
22 #include "ecmascript/object_factory.h"
23 
24 #if defined(__clang__)
25 #pragma clang diagnostic push
26 #pragma clang diagnostic ignored "-Wshadow"
27 #elif defined(__GNUC__)
28 #pragma GCC diagnostic push
29 #pragma GCC diagnostic ignored "-Wshadow"
30 #endif
31 #include "unicode/localebuilder.h"
32 #if defined(__clang__)
33 #pragma clang diagnostic pop
34 #elif defined(__GNUC__)
35 #pragma GCC diagnostic pop
36 #endif
37 
38 namespace panda::ecmascript::intl {
UStringToString(JSThread * thread,const icu::UnicodeString & string)39 JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string)
40 {
41     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
42     return factory->NewFromUtf16(reinterpret_cast<const uint16_t *>(string.getBuffer()), string.length());
43 }
44 
UStringToString(JSThread * thread,const icu::UnicodeString & string,int32_t begin,int32_t end)45 JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string, int32_t begin,
46                                                    int32_t end)
47 {
48     return UStringToString(thread, string.tempSubStringBetween(begin, end));
49 }
50 
51 // 9.2.1 CanonicalizeLocaleList ( locales )
CanonicalizeLocaleList(JSThread * thread,const JSHandle<JSTaggedValue> & locales)52 JSHandle<TaggedArray> LocaleHelper::CanonicalizeLocaleList(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
53 {
54     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
55     // 1. If locales is undefined, then
56     //    a. Return a new empty List.
57     if (locales->IsUndefined()) {
58         return factory->EmptyArray();
59     }
60     // 2. Let seen be a new empty List.
61     JSHandle<TaggedArray> localeSeen = factory->NewTaggedArray(1);
62     // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot,
63     //    then
64     //    a. Let O be CreateArrayFromList(« locales »).
65     // 4. Else,
66     //    a.Let O be ? ToObject(locales).
67     if (locales->IsString()) {
68         JSHandle<EcmaString> tag = JSHandle<EcmaString>::Cast(locales);
69         JSHandle<TaggedArray> temp = factory->NewTaggedArray(1);
70         temp->Set(thread, 0, tag.GetTaggedValue());
71         JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp);
72         JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen);
73         return finalSeen;
74 #ifdef ARK_SUPPORT_INTL
75     } else if (locales->IsJSLocale()) {
76         JSHandle<EcmaString> tag = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(locales));
77         JSHandle<TaggedArray> temp = factory->NewTaggedArray(1);
78         RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
79         temp->Set(thread, 0, tag.GetTaggedValue());
80         JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp);
81         JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen);
82         return finalSeen;
83 #endif
84     } else {
85         JSHandle<JSObject> obj = JSTaggedValue::ToObject(thread, locales);
86         RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
87         JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSObject>(thread, obj, localeSeen);
88         return finalSeen;
89     }
90     return localeSeen;
91 }
92 
93 template<typename T>
CanonicalizeHelper(JSThread * thread,JSHandle<T> & obj,JSHandle<TaggedArray> & seen)94 JSHandle<TaggedArray> LocaleHelper::CanonicalizeHelper(JSThread *thread, JSHandle<T> &obj, JSHandle<TaggedArray> &seen)
95 {
96     OperationResult operationResult = JSTaggedValue::GetProperty(thread, JSHandle<JSTaggedValue>::Cast(obj),
97                                                                  thread->GlobalConstants()->GetHandledLengthString());
98     RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
99     JSTaggedNumber len = JSTaggedValue::ToLength(thread, operationResult.GetValue());
100     RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
101     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
102     // 2. Let seen be a new empty List.
103     uint32_t requestedLocalesLen = len.ToUint32();
104     seen = factory->NewTaggedArray(requestedLocalesLen);
105     // 6. Let k be 0.
106     // 7. Repeat, while k < len
107     JSMutableHandle<JSTaggedValue> pk(thread, JSTaggedValue::Undefined());
108     JSMutableHandle<JSTaggedValue> tag(thread, JSTaggedValue::Undefined());
109     uint32_t index = 0;
110     JSHandle<JSTaggedValue> objTagged = JSHandle<JSTaggedValue>::Cast(obj);
111     for (uint32_t k = 0; k < requestedLocalesLen; k++) {
112         // a. Let Pk be ToString(k).
113         JSHandle<JSTaggedValue> kHandle(thread, JSTaggedValue(k));
114         JSHandle<EcmaString> str = JSTaggedValue::ToString(thread, kHandle);
115         RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
116         pk.Update(str.GetTaggedValue());
117         // b. Let kPresent be ? HasProperty(O, Pk).
118         bool kPresent = JSTaggedValue::HasProperty(thread, objTagged, pk);
119         RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
120 
121         // c. If kPresent is true, then
122         if (kPresent) {
123             // i. Let kValue be ? Get(O, Pk).
124             OperationResult result = JSTaggedValue::GetProperty(thread, objTagged, pk);
125             RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
126             JSHandle<JSTaggedValue> kValue = result.GetValue();
127             // ii. If Type(kValue) is not String or Object, throw a TypeError exception.
128             if (!kValue->IsString() && !kValue->IsJSObject()) {
129                 THROW_TYPE_ERROR_AND_RETURN(thread, "kValue is not String or Object.", factory->EmptyArray());
130             }
131             // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then
132             //        1. Let tag be kValue.[[Locale]].
133             // iv.  Else,
134             //        1. Let tag be ? ToString(kValue).
135 #ifdef ARK_SUPPORT_INTL
136             if (kValue->IsJSLocale()) {
137                 JSHandle<EcmaString> kValueStr = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(kValue));
138                 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
139                 tag.Update(kValueStr.GetTaggedValue());
140             } else {
141                 JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue);
142                 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
143                 JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString);
144                 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
145                 tag.Update(canonicalStr.GetTaggedValue());
146             }
147 #else
148             JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue);
149             RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
150             JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString);
151             RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
152             tag.Update(canonicalStr.GetTaggedValue());
153 #endif
154             // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
155             bool isExist = false;
156             uint32_t seenLen = seen->GetLength();
157             for (uint32_t i = 0; i < seenLen; i++) {
158                 if (JSTaggedValue::SameValue(seen->Get(thread, i), tag.GetTaggedValue())) {
159                     isExist = true;
160                 }
161             }
162             if (!isExist) {
163                 seen->Set(thread, index++, JSHandle<JSTaggedValue>::Cast(tag));
164             }
165         }
166         // d. Increase k by 1.
167     }
168     // set capacity
169     seen = TaggedArray::SetCapacity(thread, seen, index);
170     // 8. Return seen.
171     return seen;
172 }
173 
174 // 6.2.3 CanonicalizeUnicodeLocaleId( locale )
CanonicalizeUnicodeLocaleId(JSThread * thread,const JSHandle<EcmaString> & locale)175 JSHandle<EcmaString> LocaleHelper::CanonicalizeUnicodeLocaleId(JSThread *thread, const JSHandle<EcmaString> &locale)
176 {
177     [[maybe_unused]] ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
178     if (!IsStructurallyValidLanguageTag(locale)) {
179         THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
180     }
181 
182     if (EcmaStringAccessor(locale).GetLength() == 0 || EcmaStringAccessor(locale).IsUtf16()) {
183         THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
184     }
185 
186     std::string localeCStr = ConvertToStdString(locale);
187     std::transform(localeCStr.begin(), localeCStr.end(), localeCStr.begin(), AsciiAlphaToLower);
188     UErrorCode status = U_ZERO_ERROR;
189     icu::Locale formalLocale = icu::Locale::forLanguageTag(localeCStr.c_str(), status);
190     if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) {
191         THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
192     }
193 
194     // Resets the LocaleBuilder to match the locale.
195     // Returns an instance of Locale created from the fields set on this builder.
196     formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(status);
197     // Canonicalize the locale ID of this object according to CLDR.
198     formalLocale.canonicalize(status);
199     if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) {
200         THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
201     }
202     JSHandle<EcmaString> languageTag = ToLanguageTag(thread, formalLocale);
203     RETURN_HANDLE_IF_ABRUPT_COMPLETION(EcmaString, thread);
204     return languageTag;
205 }
206 
ToLanguageTag(JSThread * thread,const icu::Locale & locale)207 JSHandle<EcmaString> LocaleHelper::ToLanguageTag(JSThread *thread, const icu::Locale &locale)
208 {
209     UErrorCode status = U_ZERO_ERROR;
210     ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
211     auto result = locale.toLanguageTag<std::string>(status);
212     if (U_FAILURE(status) != 0) {
213         THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
214     }
215     size_t findBeginning = result.find("-u-");
216     std::string finalRes;
217     std::string tempRes;
218     if (findBeginning == std::string::npos) {
219         return factory->NewFromStdString(result);
220     }
221     size_t specialBeginning = findBeginning + INTL_INDEX_THREE;
222     size_t specialCount = 0;
223     while ((specialBeginning < result.size()) && (result[specialBeginning] != '-')) {
224         specialCount++;
225         specialBeginning++;
226     }
227     if (findBeginning != std::string::npos) {
228         // It begin with "-u-xx" or with more elements.
229         tempRes = result.substr(0, findBeginning + INTL_INDEX_THREE + specialCount);
230         if (result.size() <= findBeginning + INTL_INDEX_THREE + specialCount) {
231             return factory->NewFromStdString(result);
232         }
233         std::string leftStr = result.substr(findBeginning + INTL_INDEX_THREE + specialCount + 1);
234         std::istringstream temp(leftStr);
235         std::string buffer;
236         std::vector<std::string> resContainer;
237         while (getline(temp, buffer, '-')) {
238             if (buffer != "true" && buffer != "yes") {
239                 resContainer.push_back(buffer);
240             }
241         }
242         for (auto it = resContainer.begin(); it != resContainer.end(); it++) {
243             std::string tag = "-";
244             tag += *it;
245             finalRes += tag;
246         }
247     }
248     if (!finalRes.empty()) {
249         tempRes += finalRes;
250     }
251     result = tempRes;
252     return factory->NewFromStdString(result);
253 }
254 
255 // 6.2.2 IsStructurallyValidLanguageTag( locale )
IsStructurallyValidLanguageTag(const JSHandle<EcmaString> & tag)256 bool LocaleHelper::IsStructurallyValidLanguageTag(const JSHandle<EcmaString> &tag)
257 {
258     std::string tagCollection = ConvertToStdString(tag);
259     std::vector<std::string> containers;
260     std::string substring;
261     std::set<std::string> uniqueSubtags;
262     size_t address = 1;
263     for (auto it = tagCollection.begin(); it != tagCollection.end(); it++) {
264         if (*it != '-' && it != tagCollection.end() - 1) {
265             substring += *it;
266         } else {
267             if (it == tagCollection.end() - 1) {
268                 substring += *it;
269             }
270             containers.push_back(substring);
271             if (IsVariantSubtag(substring)) {
272                 std::transform(substring.begin(), substring.end(), substring.begin(), AsciiAlphaToLower);
273                 if (!uniqueSubtags.insert(substring).second) {
274                     return false;
275                 }
276             }
277             substring.clear();
278         }
279     }
280     bool result = DealwithLanguageTag(containers, address);
281     return result;
282 }
283 
ConvertToStdString(const JSHandle<EcmaString> & ecmaStr)284 std::string LocaleHelper::ConvertToStdString(const JSHandle<EcmaString> &ecmaStr)
285 {
286     return std::string(ConvertToString(*ecmaStr, StringConvertedUsage::LOGICOPERATION));
287 }
288 
DealwithLanguageTag(const std::vector<std::string> & containers,size_t & address)289 bool LocaleHelper::DealwithLanguageTag(const std::vector<std::string> &containers, size_t &address)
290 {
291     // The abstract operation returns true if locale can be generated from the ABNF grammar in section 2.1 of the RFC,
292     // starting with Language-Tag, and does not contain duplicate variant or singleton subtags
293     // If language tag is empty, return false.
294     if (containers.empty()) {
295         return false;
296     }
297 
298     // a. if the first tag is not language, return false.
299     if (!IsLanguageSubtag(containers[0])) {
300         return false;
301     }
302 
303     // if the tag include language only, like "zh" or "de", return true;
304     if (containers.size() == 1) {
305         return true;
306     }
307 
308     // Else, then
309     // if is unique singleton subtag, script and region tag.
310     if (IsExtensionSingleton(containers[1])) {
311         return true;
312     }
313 
314     if (IsScriptSubtag(containers[address])) {
315         address++;
316         if (containers.size() == address) {
317             return true;
318         }
319     }
320 
321     if (IsRegionSubtag(containers[address])) {
322         address++;
323     }
324 
325     for (size_t i = address; i < containers.size(); i++) {
326         if (IsExtensionSingleton(containers[i])) {
327             return true;
328         }
329         if (!IsVariantSubtag(containers[i])) {
330             return false;
331         }
332     }
333     return true;
334 }
335 
336 // 6.2.4 DefaultLocale ()
DefaultLocale(JSThread * thread)337 JSHandle<EcmaString> LocaleHelper::DefaultLocale(JSThread *thread)
338 {
339     icu::Locale defaultLocale;
340     auto globalConst = thread->GlobalConstants();
341     if (strcmp(defaultLocale.getName(), "en_US_POSIX") == 0 || strcmp(defaultLocale.getName(), "c") == 0) {
342         return JSHandle<EcmaString>::Cast(globalConst->GetHandledEnUsString());
343     }
344     if (defaultLocale.isBogus() != 0) {
345         return JSHandle<EcmaString>::Cast(globalConst->GetHandledUndString());
346     }
347     return ToLanguageTag(thread, defaultLocale);
348 }
349 
HandleLocaleExtension(size_t & start,size_t & extensionEnd,const std::string result,size_t len)350 void LocaleHelper::HandleLocaleExtension(size_t &start, size_t &extensionEnd, const std::string result, size_t len)
351 {
352     while (start < len - INTL_INDEX_TWO) {
353         if (result[start] != '-') {
354             start++;
355             continue;
356         }
357         if (result[start + INTL_INDEX_TWO] == '-') {
358             extensionEnd = start;
359             break;
360         }
361         start += INTL_INDEX_THREE;
362     }
363 }
364 
HandleLocale(const JSHandle<EcmaString> & localeString)365 LocaleHelper::ParsedLocale LocaleHelper::HandleLocale(const JSHandle<EcmaString> &localeString)
366 {
367     std::string result = ConvertToStdString(localeString);
368     size_t len = result.size();
369     ParsedLocale parsedResult;
370 
371     // a. The single-character subtag ’x’ as the primary subtag indicates
372     //    that the language tag consists solely of subtags whose meaning is
373     //    defined by private agreement.
374     // b. Extensions cannot be used in tags that are entirely private use.
375     if (IsPrivateSubTag(result, len)) {
376         parsedResult.base = result;
377         return parsedResult;
378     }
379     // If cannot find "-u-", return the whole string as base.
380     size_t foundExtension = result.find("-u-");
381     if (foundExtension == std::string::npos) {
382         parsedResult.base = result;
383         return parsedResult;
384     }
385     // Let privateIndex be Call(%StringProto_indexOf%, foundLocale, « "-x-" »).
386     size_t privateIndex = result.find("-x-");
387     if (privateIndex != std::string::npos && privateIndex < foundExtension) {
388         parsedResult.base = result;
389         return parsedResult;
390     }
391     const std::string basis = result.substr(0, foundExtension);
392     size_t extensionEnd = len;
393     ASSERT(len > INTL_INDEX_TWO);
394     size_t start = foundExtension + 1;
395     HandleLocaleExtension(start, extensionEnd, result, len);
396     const std::string end = result.substr(extensionEnd);
397     parsedResult.base = basis + end;
398     parsedResult.extension = result.substr(foundExtension, extensionEnd - foundExtension);
399     return parsedResult;
400 }
401 
GetAvailableLocales(JSThread * thread,const char * localeKey,const char * localePath)402 std::vector<std::string> LocaleHelper::GetAvailableLocales(JSThread *thread, const char *localeKey,
403                                                            const char *localePath)
404 {
405     UErrorCode status = U_ZERO_ERROR;
406     auto globalConst = thread->GlobalConstants();
407     JSHandle<EcmaString> specialValue = JSHandle<EcmaString>::Cast(globalConst->GetHandledEnUsPosixString());
408     std::string specialString = ConvertToStdString(specialValue);
409     UEnumeration *uenum = uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
410     std::vector<std::string> allLocales;
411     const char *loc = nullptr;
412     for (loc = uenum_next(uenum, nullptr, &status); loc != nullptr; loc = uenum_next(uenum, nullptr, &status)) {
413         ASSERT(U_SUCCESS(status));
414         std::string locStr(loc);
415         std::replace(locStr.begin(), locStr.end(), '_', '-');
416         if (locStr == specialString) {
417             locStr = "en-US-u-va-posix";
418         }
419 
420         if (localePath != nullptr || localeKey != nullptr) {
421             icu::Locale locale(locStr.c_str());
422             bool res = false;
423             if (!CheckLocales(locale, localeKey, localePath, res)) {
424                 continue;
425             }
426         }
427         allLocales.push_back(locStr);
428         icu::Locale formalLocale = icu::Locale::createCanonical(locStr.c_str());
429         std::string scriptStr = formalLocale.getScript();
430         if (!scriptStr.empty()) {
431             std::string languageStr = formalLocale.getLanguage();
432             std::string countryStr = formalLocale.getCountry();
433             std::string shortLocale = icu::Locale(languageStr.c_str(), countryStr.c_str()).getName();
434             std::replace(shortLocale.begin(), shortLocale.end(), '_', '-');
435             allLocales.push_back(shortLocale);
436         }
437     }
438     uenum_close(uenum);
439     return allLocales;
440 }
441 
442 // 9.2.2 BestAvailableLocale ( availableLocales, locale )
BestAvailableLocale(const std::vector<std::string> & availableLocales,const std::string & locale)443 std::string LocaleHelper::BestAvailableLocale(const std::vector<std::string> &availableLocales,
444                                               const std::string &locale)
445 {
446     // 1. Let candidate be locale.
447     std::string localeCandidate = locale;
448     std::string undefined = std::string();
449     // 2. Repeat,
450     uint32_t length = availableLocales.size();
451     while (true) {
452         // a. If availableLocales contains an element equal to candidate, return candidate.
453         for (uint32_t i = 0; i < length; ++i) {
454             std::string itemStr = availableLocales[i];
455             if (itemStr == localeCandidate) {
456                 return localeCandidate;
457             }
458         }
459         // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate.
460         //    If that character does not occur, return undefined.
461         size_t pos = localeCandidate.rfind('-');
462         if (pos == std::string::npos) {
463             return undefined;
464         }
465         // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
466         if (pos >= INTL_INDEX_TWO && localeCandidate[pos - INTL_INDEX_TWO] == '-') {
467             pos -= INTL_INDEX_TWO;
468         }
469         // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
470         localeCandidate.resize(pos);
471     }
472 }
473 } // namespace panda::ecmascript::base