1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/intl/locale_helper.h"
17
18 #include "ecmascript/base/string_helper.h"
19 #include "ecmascript/ecma_macros.h"
20 #include "ecmascript/ecma_vm.h"
21 #include "ecmascript/global_env.h"
22 #include "ecmascript/object_factory.h"
23
24 #if defined(__clang__)
25 #pragma clang diagnostic push
26 #pragma clang diagnostic ignored "-Wshadow"
27 #elif defined(__GNUC__)
28 #pragma GCC diagnostic push
29 #pragma GCC diagnostic ignored "-Wshadow"
30 #endif
31 #include "unicode/localebuilder.h"
32 #if defined(__clang__)
33 #pragma clang diagnostic pop
34 #elif defined(__GNUC__)
35 #pragma GCC diagnostic pop
36 #endif
37
38 namespace panda::ecmascript::intl {
UStringToString(JSThread * thread,const icu::UnicodeString & string)39 JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string)
40 {
41 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
42 return factory->NewFromUtf16(reinterpret_cast<const uint16_t *>(string.getBuffer()), string.length());
43 }
44
UStringToString(JSThread * thread,const icu::UnicodeString & string,int32_t begin,int32_t end)45 JSHandle<EcmaString> LocaleHelper::UStringToString(JSThread *thread, const icu::UnicodeString &string, int32_t begin,
46 int32_t end)
47 {
48 return UStringToString(thread, string.tempSubStringBetween(begin, end));
49 }
50
51 // 9.2.1 CanonicalizeLocaleList ( locales )
CanonicalizeLocaleList(JSThread * thread,const JSHandle<JSTaggedValue> & locales)52 JSHandle<TaggedArray> LocaleHelper::CanonicalizeLocaleList(JSThread *thread, const JSHandle<JSTaggedValue> &locales)
53 {
54 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
55 // 1. If locales is undefined, then
56 // a. Return a new empty List.
57 if (locales->IsUndefined()) {
58 return factory->EmptyArray();
59 }
60 // 2. Let seen be a new empty List.
61 JSHandle<TaggedArray> localeSeen = factory->NewTaggedArray(1);
62 // 3. If Type(locales) is String or Type(locales) is Object and locales has an [[InitializedLocale]] internal slot,
63 // then
64 // a. Let O be CreateArrayFromList(« locales »).
65 // 4. Else,
66 // a.Let O be ? ToObject(locales).
67 if (locales->IsString()) {
68 JSHandle<EcmaString> tag = JSHandle<EcmaString>::Cast(locales);
69 JSHandle<TaggedArray> temp = factory->NewTaggedArray(1);
70 temp->Set(thread, 0, tag.GetTaggedValue());
71 JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp);
72 JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen);
73 return finalSeen;
74 #ifdef ARK_SUPPORT_INTL
75 } else if (locales->IsJSLocale()) {
76 JSHandle<EcmaString> tag = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(locales));
77 JSHandle<TaggedArray> temp = factory->NewTaggedArray(1);
78 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
79 temp->Set(thread, 0, tag.GetTaggedValue());
80 JSHandle<JSArray> obj = JSArray::CreateArrayFromList(thread, temp);
81 JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSArray>(thread, obj, localeSeen);
82 return finalSeen;
83 #endif
84 } else {
85 JSHandle<JSObject> obj = JSTaggedValue::ToObject(thread, locales);
86 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
87 JSHandle<TaggedArray> finalSeen = CanonicalizeHelper<JSObject>(thread, obj, localeSeen);
88 return finalSeen;
89 }
90 return localeSeen;
91 }
92
93 template<typename T>
CanonicalizeHelper(JSThread * thread,JSHandle<T> & obj,JSHandle<TaggedArray> & seen)94 JSHandle<TaggedArray> LocaleHelper::CanonicalizeHelper(JSThread *thread, JSHandle<T> &obj, JSHandle<TaggedArray> &seen)
95 {
96 OperationResult operationResult = JSTaggedValue::GetProperty(thread, JSHandle<JSTaggedValue>::Cast(obj),
97 thread->GlobalConstants()->GetHandledLengthString());
98 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
99 JSTaggedNumber len = JSTaggedValue::ToLength(thread, operationResult.GetValue());
100 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
101 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
102 // 2. Let seen be a new empty List.
103 uint32_t requestedLocalesLen = len.ToUint32();
104 seen = factory->NewTaggedArray(requestedLocalesLen);
105 // 6. Let k be 0.
106 // 7. Repeat, while k < len
107 JSMutableHandle<JSTaggedValue> pk(thread, JSTaggedValue::Undefined());
108 JSMutableHandle<JSTaggedValue> tag(thread, JSTaggedValue::Undefined());
109 uint32_t index = 0;
110 JSHandle<JSTaggedValue> objTagged = JSHandle<JSTaggedValue>::Cast(obj);
111 for (uint32_t k = 0; k < requestedLocalesLen; k++) {
112 // a. Let Pk be ToString(k).
113 JSHandle<JSTaggedValue> kHandle(thread, JSTaggedValue(k));
114 JSHandle<EcmaString> str = JSTaggedValue::ToString(thread, kHandle);
115 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
116 pk.Update(str.GetTaggedValue());
117 // b. Let kPresent be ? HasProperty(O, Pk).
118 bool kPresent = JSTaggedValue::HasProperty(thread, objTagged, pk);
119 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
120
121 // c. If kPresent is true, then
122 if (kPresent) {
123 // i. Let kValue be ? Get(O, Pk).
124 OperationResult result = JSTaggedValue::GetProperty(thread, objTagged, pk);
125 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
126 JSHandle<JSTaggedValue> kValue = result.GetValue();
127 // ii. If Type(kValue) is not String or Object, throw a TypeError exception.
128 if (!kValue->IsString() && !kValue->IsJSObject()) {
129 THROW_TYPE_ERROR_AND_RETURN(thread, "kValue is not String or Object.", factory->EmptyArray());
130 }
131 // iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]] internal slot, then
132 // 1. Let tag be kValue.[[Locale]].
133 // iv. Else,
134 // 1. Let tag be ? ToString(kValue).
135 #ifdef ARK_SUPPORT_INTL
136 if (kValue->IsJSLocale()) {
137 JSHandle<EcmaString> kValueStr = JSLocale::ToString(thread, JSHandle<JSLocale>::Cast(kValue));
138 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
139 tag.Update(kValueStr.GetTaggedValue());
140 } else {
141 JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue);
142 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
143 JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString);
144 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
145 tag.Update(canonicalStr.GetTaggedValue());
146 }
147 #else
148 JSHandle<EcmaString> kValueString = JSTaggedValue::ToString(thread, kValue);
149 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
150 JSHandle<EcmaString> canonicalStr = CanonicalizeUnicodeLocaleId(thread, kValueString);
151 RETURN_HANDLE_IF_ABRUPT_COMPLETION(TaggedArray, thread);
152 tag.Update(canonicalStr.GetTaggedValue());
153 #endif
154 // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen.
155 bool isExist = false;
156 uint32_t seenLen = seen->GetLength();
157 for (uint32_t i = 0; i < seenLen; i++) {
158 if (JSTaggedValue::SameValue(seen->Get(thread, i), tag.GetTaggedValue())) {
159 isExist = true;
160 }
161 }
162 if (!isExist) {
163 seen->Set(thread, index++, JSHandle<JSTaggedValue>::Cast(tag));
164 }
165 }
166 // d. Increase k by 1.
167 }
168 // set capacity
169 seen = TaggedArray::SetCapacity(thread, seen, index);
170 // 8. Return seen.
171 return seen;
172 }
173
174 // 6.2.3 CanonicalizeUnicodeLocaleId( locale )
CanonicalizeUnicodeLocaleId(JSThread * thread,const JSHandle<EcmaString> & locale)175 JSHandle<EcmaString> LocaleHelper::CanonicalizeUnicodeLocaleId(JSThread *thread, const JSHandle<EcmaString> &locale)
176 {
177 [[maybe_unused]] ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
178 if (!IsStructurallyValidLanguageTag(locale)) {
179 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
180 }
181
182 if (EcmaStringAccessor(locale).GetLength() == 0 || EcmaStringAccessor(locale).IsUtf16()) {
183 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
184 }
185
186 std::string localeCStr = ConvertToStdString(locale);
187 std::transform(localeCStr.begin(), localeCStr.end(), localeCStr.begin(), AsciiAlphaToLower);
188 UErrorCode status = U_ZERO_ERROR;
189 icu::Locale formalLocale = icu::Locale::forLanguageTag(localeCStr.c_str(), status);
190 if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) {
191 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
192 }
193
194 // Resets the LocaleBuilder to match the locale.
195 // Returns an instance of Locale created from the fields set on this builder.
196 formalLocale = icu::LocaleBuilder().setLocale(formalLocale).build(status);
197 // Canonicalize the locale ID of this object according to CLDR.
198 formalLocale.canonicalize(status);
199 if ((U_FAILURE(status) != 0) || (formalLocale.isBogus() != 0)) {
200 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
201 }
202 JSHandle<EcmaString> languageTag = ToLanguageTag(thread, formalLocale);
203 RETURN_HANDLE_IF_ABRUPT_COMPLETION(EcmaString, thread);
204 return languageTag;
205 }
206
ToLanguageTag(JSThread * thread,const icu::Locale & locale)207 JSHandle<EcmaString> LocaleHelper::ToLanguageTag(JSThread *thread, const icu::Locale &locale)
208 {
209 UErrorCode status = U_ZERO_ERROR;
210 ObjectFactory *factory = thread->GetEcmaVM()->GetFactory();
211 auto result = locale.toLanguageTag<std::string>(status);
212 if (U_FAILURE(status) != 0) {
213 THROW_RANGE_ERROR_AND_RETURN(thread, "invalid locale", factory->GetEmptyString());
214 }
215 size_t findBeginning = result.find("-u-");
216 std::string finalRes;
217 std::string tempRes;
218 if (findBeginning == std::string::npos) {
219 return factory->NewFromStdString(result);
220 }
221 size_t specialBeginning = findBeginning + INTL_INDEX_THREE;
222 size_t specialCount = 0;
223 while ((specialBeginning < result.size()) && (result[specialBeginning] != '-')) {
224 specialCount++;
225 specialBeginning++;
226 }
227 if (findBeginning != std::string::npos) {
228 // It begin with "-u-xx" or with more elements.
229 tempRes = result.substr(0, findBeginning + INTL_INDEX_THREE + specialCount);
230 if (result.size() <= findBeginning + INTL_INDEX_THREE + specialCount) {
231 return factory->NewFromStdString(result);
232 }
233 std::string leftStr = result.substr(findBeginning + INTL_INDEX_THREE + specialCount + 1);
234 std::istringstream temp(leftStr);
235 std::string buffer;
236 std::vector<std::string> resContainer;
237 while (getline(temp, buffer, '-')) {
238 if (buffer != "true" && buffer != "yes") {
239 resContainer.push_back(buffer);
240 }
241 }
242 for (auto it = resContainer.begin(); it != resContainer.end(); it++) {
243 std::string tag = "-";
244 tag += *it;
245 finalRes += tag;
246 }
247 }
248 if (!finalRes.empty()) {
249 tempRes += finalRes;
250 }
251 result = tempRes;
252 return factory->NewFromStdString(result);
253 }
254
255 // 6.2.2 IsStructurallyValidLanguageTag( locale )
IsStructurallyValidLanguageTag(const JSHandle<EcmaString> & tag)256 bool LocaleHelper::IsStructurallyValidLanguageTag(const JSHandle<EcmaString> &tag)
257 {
258 std::string tagCollection = ConvertToStdString(tag);
259 std::vector<std::string> containers;
260 std::string substring;
261 std::set<std::string> uniqueSubtags;
262 size_t address = 1;
263 for (auto it = tagCollection.begin(); it != tagCollection.end(); it++) {
264 if (*it != '-' && it != tagCollection.end() - 1) {
265 substring += *it;
266 } else {
267 if (it == tagCollection.end() - 1) {
268 substring += *it;
269 }
270 containers.push_back(substring);
271 if (IsVariantSubtag(substring)) {
272 std::transform(substring.begin(), substring.end(), substring.begin(), AsciiAlphaToLower);
273 if (!uniqueSubtags.insert(substring).second) {
274 return false;
275 }
276 }
277 substring.clear();
278 }
279 }
280 bool result = DealwithLanguageTag(containers, address);
281 return result;
282 }
283
ConvertToStdString(const JSHandle<EcmaString> & ecmaStr)284 std::string LocaleHelper::ConvertToStdString(const JSHandle<EcmaString> &ecmaStr)
285 {
286 return std::string(ConvertToString(*ecmaStr, StringConvertedUsage::LOGICOPERATION));
287 }
288
DealwithLanguageTag(const std::vector<std::string> & containers,size_t & address)289 bool LocaleHelper::DealwithLanguageTag(const std::vector<std::string> &containers, size_t &address)
290 {
291 // The abstract operation returns true if locale can be generated from the ABNF grammar in section 2.1 of the RFC,
292 // starting with Language-Tag, and does not contain duplicate variant or singleton subtags
293 // If language tag is empty, return false.
294 if (containers.empty()) {
295 return false;
296 }
297
298 // a. if the first tag is not language, return false.
299 if (!IsLanguageSubtag(containers[0])) {
300 return false;
301 }
302
303 // if the tag include language only, like "zh" or "de", return true;
304 if (containers.size() == 1) {
305 return true;
306 }
307
308 // Else, then
309 // if is unique singleton subtag, script and region tag.
310 if (IsExtensionSingleton(containers[1])) {
311 return true;
312 }
313
314 if (IsScriptSubtag(containers[address])) {
315 address++;
316 if (containers.size() == address) {
317 return true;
318 }
319 }
320
321 if (IsRegionSubtag(containers[address])) {
322 address++;
323 }
324
325 for (size_t i = address; i < containers.size(); i++) {
326 if (IsExtensionSingleton(containers[i])) {
327 return true;
328 }
329 if (!IsVariantSubtag(containers[i])) {
330 return false;
331 }
332 }
333 return true;
334 }
335
336 // 6.2.4 DefaultLocale ()
DefaultLocale(JSThread * thread)337 JSHandle<EcmaString> LocaleHelper::DefaultLocale(JSThread *thread)
338 {
339 icu::Locale defaultLocale;
340 auto globalConst = thread->GlobalConstants();
341 if (strcmp(defaultLocale.getName(), "en_US_POSIX") == 0 || strcmp(defaultLocale.getName(), "c") == 0) {
342 return JSHandle<EcmaString>::Cast(globalConst->GetHandledEnUsString());
343 }
344 if (defaultLocale.isBogus() != 0) {
345 return JSHandle<EcmaString>::Cast(globalConst->GetHandledUndString());
346 }
347 return ToLanguageTag(thread, defaultLocale);
348 }
349
HandleLocaleExtension(size_t & start,size_t & extensionEnd,const std::string result,size_t len)350 void LocaleHelper::HandleLocaleExtension(size_t &start, size_t &extensionEnd, const std::string result, size_t len)
351 {
352 while (start < len - INTL_INDEX_TWO) {
353 if (result[start] != '-') {
354 start++;
355 continue;
356 }
357 if (result[start + INTL_INDEX_TWO] == '-') {
358 extensionEnd = start;
359 break;
360 }
361 start += INTL_INDEX_THREE;
362 }
363 }
364
HandleLocale(const JSHandle<EcmaString> & localeString)365 LocaleHelper::ParsedLocale LocaleHelper::HandleLocale(const JSHandle<EcmaString> &localeString)
366 {
367 std::string result = ConvertToStdString(localeString);
368 size_t len = result.size();
369 ParsedLocale parsedResult;
370
371 // a. The single-character subtag ’x’ as the primary subtag indicates
372 // that the language tag consists solely of subtags whose meaning is
373 // defined by private agreement.
374 // b. Extensions cannot be used in tags that are entirely private use.
375 if (IsPrivateSubTag(result, len)) {
376 parsedResult.base = result;
377 return parsedResult;
378 }
379 // If cannot find "-u-", return the whole string as base.
380 size_t foundExtension = result.find("-u-");
381 if (foundExtension == std::string::npos) {
382 parsedResult.base = result;
383 return parsedResult;
384 }
385 // Let privateIndex be Call(%StringProto_indexOf%, foundLocale, « "-x-" »).
386 size_t privateIndex = result.find("-x-");
387 if (privateIndex != std::string::npos && privateIndex < foundExtension) {
388 parsedResult.base = result;
389 return parsedResult;
390 }
391 const std::string basis = result.substr(0, foundExtension);
392 size_t extensionEnd = len;
393 ASSERT(len > INTL_INDEX_TWO);
394 size_t start = foundExtension + 1;
395 HandleLocaleExtension(start, extensionEnd, result, len);
396 const std::string end = result.substr(extensionEnd);
397 parsedResult.base = basis + end;
398 parsedResult.extension = result.substr(foundExtension, extensionEnd - foundExtension);
399 return parsedResult;
400 }
401
GetAvailableLocales(JSThread * thread,const char * localeKey,const char * localePath)402 std::vector<std::string> LocaleHelper::GetAvailableLocales(JSThread *thread, const char *localeKey,
403 const char *localePath)
404 {
405 UErrorCode status = U_ZERO_ERROR;
406 auto globalConst = thread->GlobalConstants();
407 JSHandle<EcmaString> specialValue = JSHandle<EcmaString>::Cast(globalConst->GetHandledEnUsPosixString());
408 std::string specialString = ConvertToStdString(specialValue);
409 UEnumeration *uenum = uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
410 std::vector<std::string> allLocales;
411 const char *loc = nullptr;
412 for (loc = uenum_next(uenum, nullptr, &status); loc != nullptr; loc = uenum_next(uenum, nullptr, &status)) {
413 ASSERT(U_SUCCESS(status));
414 std::string locStr(loc);
415 std::replace(locStr.begin(), locStr.end(), '_', '-');
416 if (locStr == specialString) {
417 locStr = "en-US-u-va-posix";
418 }
419
420 if (localePath != nullptr || localeKey != nullptr) {
421 icu::Locale locale(locStr.c_str());
422 bool res = false;
423 if (!CheckLocales(locale, localeKey, localePath, res)) {
424 continue;
425 }
426 }
427 allLocales.push_back(locStr);
428 icu::Locale formalLocale = icu::Locale::createCanonical(locStr.c_str());
429 std::string scriptStr = formalLocale.getScript();
430 if (!scriptStr.empty()) {
431 std::string languageStr = formalLocale.getLanguage();
432 std::string countryStr = formalLocale.getCountry();
433 std::string shortLocale = icu::Locale(languageStr.c_str(), countryStr.c_str()).getName();
434 std::replace(shortLocale.begin(), shortLocale.end(), '_', '-');
435 allLocales.push_back(shortLocale);
436 }
437 }
438 uenum_close(uenum);
439 return allLocales;
440 }
441
442 // 9.2.2 BestAvailableLocale ( availableLocales, locale )
BestAvailableLocale(const std::vector<std::string> & availableLocales,const std::string & locale)443 std::string LocaleHelper::BestAvailableLocale(const std::vector<std::string> &availableLocales,
444 const std::string &locale)
445 {
446 // 1. Let candidate be locale.
447 std::string localeCandidate = locale;
448 std::string undefined = std::string();
449 // 2. Repeat,
450 uint32_t length = availableLocales.size();
451 while (true) {
452 // a. If availableLocales contains an element equal to candidate, return candidate.
453 for (uint32_t i = 0; i < length; ++i) {
454 std::string itemStr = availableLocales[i];
455 if (itemStr == localeCandidate) {
456 return localeCandidate;
457 }
458 }
459 // b. Let pos be the character index of the last occurrence of "-" (U+002D) within candidate.
460 // If that character does not occur, return undefined.
461 size_t pos = localeCandidate.rfind('-');
462 if (pos == std::string::npos) {
463 return undefined;
464 }
465 // c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate, decrease pos by 2.
466 if (pos >= INTL_INDEX_TWO && localeCandidate[pos - INTL_INDEX_TWO] == '-') {
467 pos -= INTL_INDEX_TWO;
468 }
469 // d. Let candidate be the substring of candidate from position 0, inclusive, to position pos, exclusive.
470 localeCandidate.resize(pos);
471 }
472 }
473 } // namespace panda::ecmascript::base