• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-locale.h"
10 
11 #include <map>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "src/api/api.h"
17 #include "src/execution/isolate.h"
18 #include "src/handles/global-handles.h"
19 #include "src/heap/factory.h"
20 #include "src/objects/intl-objects.h"
21 #include "src/objects/js-locale-inl.h"
22 #include "src/objects/objects-inl.h"
23 #include "unicode/char16ptr.h"
24 #include "unicode/localebuilder.h"
25 #include "unicode/locid.h"
26 #include "unicode/uloc.h"
27 #include "unicode/unistr.h"
28 
29 namespace v8 {
30 namespace internal {
31 
32 namespace {
33 
34 struct OptionData {
35   const char* name;
36   const char* key;
37   const std::vector<const char*>* possible_values;
38   bool is_bool_value;
39 };
40 
41 // Inserts tags from options into locale string.
InsertOptionsIntoLocale(Isolate * isolate,Handle<JSReceiver> options,icu::LocaleBuilder * builder)42 Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
43                                     Handle<JSReceiver> options,
44                                     icu::LocaleBuilder* builder) {
45   DCHECK(isolate);
46 
47   const std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
48                                                       "h24"};
49   const std::vector<const char*> case_first_values = {"upper", "lower",
50                                                       "false"};
51   const std::vector<const char*> empty_values = {};
52   const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
53       {{"calendar", "ca", &empty_values, false},
54        {"collation", "co", &empty_values, false},
55        {"hourCycle", "hc", &hour_cycle_values, false},
56        {"caseFirst", "kf", &case_first_values, false},
57        {"numeric", "kn", &empty_values, true},
58        {"numberingSystem", "nu", &empty_values, false}}};
59 
60   // TODO(cira): Pass in values as per the spec to make this to be
61   // spec compliant.
62 
63   for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
64     std::unique_ptr<char[]> value_str = nullptr;
65     bool value_bool = false;
66     Maybe<bool> maybe_found =
67         option_to_bcp47.is_bool_value
68             ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
69                                   "locale", &value_bool)
70             : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
71                                     *(option_to_bcp47.possible_values),
72                                     "locale", &value_str);
73     MAYBE_RETURN(maybe_found, Nothing<bool>());
74 
75     // TODO(cira): Use fallback value if value is not found to make
76     // this spec compliant.
77     if (!maybe_found.FromJust()) continue;
78 
79     if (option_to_bcp47.is_bool_value) {
80       value_str = value_bool ? isolate->factory()->true_string()->ToCString()
81                              : isolate->factory()->false_string()->ToCString();
82     }
83     DCHECK_NOT_NULL(value_str.get());
84 
85     // Overwrite existing, or insert new key-value to the locale string.
86     if (!uloc_toLegacyType(uloc_toLegacyKey(option_to_bcp47.key),
87                            value_str.get())) {
88       return Just(false);
89     }
90     builder->setUnicodeLocaleKeyword(option_to_bcp47.key, value_str.get());
91   }
92   return Just(true);
93 }
94 
UnicodeKeywordValue(Isolate * isolate,Handle<JSLocale> locale,const char * key)95 Handle<Object> UnicodeKeywordValue(Isolate* isolate, Handle<JSLocale> locale,
96                                    const char* key) {
97   icu::Locale* icu_locale = locale->icu_locale().raw();
98   UErrorCode status = U_ZERO_ERROR;
99   std::string value =
100       icu_locale->getUnicodeKeywordValue<std::string>(key, status);
101   if (status == U_ILLEGAL_ARGUMENT_ERROR || value == "") {
102     return isolate->factory()->undefined_value();
103   }
104   if (value == "yes") {
105     value = "true";
106   }
107   return isolate->factory()->NewStringFromAsciiChecked(value.c_str());
108 }
109 
InRange(size_t value,size_t start,size_t end)110 bool InRange(size_t value, size_t start, size_t end) {
111   return (start <= value) && (value <= end);
112 }
113 
InRange(char value,char start,char end)114 bool InRange(char value, char start, char end) {
115   return (start <= value) && (value <= end);
116 }
117 
IsCheckRange(const std::string & str,size_t min,size_t max,bool (range_check_func)(char))118 bool IsCheckRange(const std::string& str, size_t min, size_t max,
119                   bool(range_check_func)(char)) {
120   if (!InRange(str.length(), min, max)) return false;
121   for (size_t i = 0; i < str.length(); i++) {
122     if (!range_check_func(str[i])) return false;
123   }
124   return true;
125 }
IsAlpha(const std::string & str,size_t min,size_t max)126 bool IsAlpha(const std::string& str, size_t min, size_t max) {
127   return IsCheckRange(str, min, max, [](char c) -> bool {
128     return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z');
129   });
130 }
131 
IsDigit(const std::string & str,size_t min,size_t max)132 bool IsDigit(const std::string& str, size_t min, size_t max) {
133   return IsCheckRange(str, min, max,
134                       [](char c) -> bool { return InRange(c, '0', '9'); });
135 }
136 
IsAlphanum(const std::string & str,size_t min,size_t max)137 bool IsAlphanum(const std::string& str, size_t min, size_t max) {
138   return IsCheckRange(str, min, max, [](char c) -> bool {
139     return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z') || InRange(c, '0', '9');
140   });
141 }
142 
IsUnicodeLanguageSubtag(const std::string & value)143 bool IsUnicodeLanguageSubtag(const std::string& value) {
144   // unicode_language_subtag = alpha{2,3} | alpha{5,8};
145   return IsAlpha(value, 2, 3) || IsAlpha(value, 5, 8);
146 }
147 
IsUnicodeScriptSubtag(const std::string & value)148 bool IsUnicodeScriptSubtag(const std::string& value) {
149   // unicode_script_subtag = alpha{4} ;
150   return IsAlpha(value, 4, 4);
151 }
152 
IsUnicodeRegionSubtag(const std::string & value)153 bool IsUnicodeRegionSubtag(const std::string& value) {
154   // unicode_region_subtag = (alpha{2} | digit{3});
155   return IsAlpha(value, 2, 2) || IsDigit(value, 3, 3);
156 }
157 
IsDigitAlphanum3(const std::string & value)158 bool IsDigitAlphanum3(const std::string& value) {
159   return value.length() == 4 && InRange(value[0], '0', '9') &&
160          IsAlphanum(value.substr(1), 3, 3);
161 }
162 
IsUnicodeVariantSubtag(const std::string & value)163 bool IsUnicodeVariantSubtag(const std::string& value) {
164   // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
165   return IsAlphanum(value, 5, 8) || IsDigitAlphanum3(value);
166 }
167 
IsExtensionSingleton(const std::string & value)168 bool IsExtensionSingleton(const std::string& value) {
169   return IsAlphanum(value, 1, 1);
170 }
171 }  // namespace
172 
Is38AlphaNumList(const std::string & value)173 bool JSLocale::Is38AlphaNumList(const std::string& value) {
174   std::size_t found = value.find("-");
175   if (found == std::string::npos) {
176     return IsAlphanum(value, 3, 8);
177   }
178   return IsAlphanum(value.substr(0, found), 3, 8) &&
179          JSLocale::Is38AlphaNumList(value.substr(found + 1));
180 }
181 
Is3Alpha(const std::string & value)182 bool JSLocale::Is3Alpha(const std::string& value) {
183   return IsAlpha(value, 3, 3);
184 }
185 
186 // TODO(ftang) Replace the following check w/ icu::LocaleBuilder
187 // once ICU64 land in March 2019.
StartsWithUnicodeLanguageId(const std::string & value)188 bool JSLocale::StartsWithUnicodeLanguageId(const std::string& value) {
189   // unicode_language_id =
190   // unicode_language_subtag (sep unicode_script_subtag)?
191   //   (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
192   std::vector<std::string> tokens;
193   std::string token;
194   std::istringstream token_stream(value);
195   while (std::getline(token_stream, token, '-')) {
196     tokens.push_back(token);
197   }
198   if (tokens.size() == 0) return false;
199 
200   // length >= 1
201   if (!IsUnicodeLanguageSubtag(tokens[0])) return false;
202 
203   if (tokens.size() == 1) return true;
204 
205   // length >= 2
206   if (IsExtensionSingleton(tokens[1])) return true;
207 
208   size_t index = 1;
209   if (IsUnicodeScriptSubtag(tokens[index])) {
210     index++;
211     if (index == tokens.size()) return true;
212   }
213   if (IsUnicodeRegionSubtag(tokens[index])) {
214     index++;
215   }
216   while (index < tokens.size()) {
217     if (IsExtensionSingleton(tokens[index])) return true;
218     if (!IsUnicodeVariantSubtag(tokens[index])) return false;
219     index++;
220   }
221   return true;
222 }
223 
224 namespace {
ApplyOptionsToTag(Isolate * isolate,Handle<String> tag,Handle<JSReceiver> options,icu::LocaleBuilder * builder)225 Maybe<bool> ApplyOptionsToTag(Isolate* isolate, Handle<String> tag,
226                               Handle<JSReceiver> options,
227                               icu::LocaleBuilder* builder) {
228   v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
229   if (tag->length() == 0) {
230     THROW_NEW_ERROR_RETURN_VALUE(
231         isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
232         Nothing<bool>());
233   }
234 
235   v8::String::Utf8Value bcp47_tag(v8_isolate, v8::Utils::ToLocal(tag));
236   builder->setLanguageTag({*bcp47_tag, bcp47_tag.length()});
237   DCHECK_LT(0, bcp47_tag.length());
238   DCHECK_NOT_NULL(*bcp47_tag);
239   // 2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError
240   // exception.
241   if (!JSLocale::StartsWithUnicodeLanguageId(*bcp47_tag)) {
242     return Just(false);
243   }
244   UErrorCode status = U_ZERO_ERROR;
245   builder->build(status);
246   if (U_FAILURE(status)) {
247     return Just(false);
248   }
249 
250   // 3. Let language be ? GetOption(options, "language", "string", undefined,
251   // undefined).
252   const std::vector<const char*> empty_values = {};
253   std::unique_ptr<char[]> language_str = nullptr;
254   Maybe<bool> maybe_language =
255       Intl::GetStringOption(isolate, options, "language", empty_values,
256                             "ApplyOptionsToTag", &language_str);
257   MAYBE_RETURN(maybe_language, Nothing<bool>());
258   // 4. If language is not undefined, then
259   if (maybe_language.FromJust()) {
260     builder->setLanguage(language_str.get());
261     builder->build(status);
262     // a. If language does not match the unicode_language_subtag production,
263     //    throw a RangeError exception.
264     if (U_FAILURE(status) || language_str[0] == '\0' ||
265         IsAlpha(language_str.get(), 4, 4)) {
266       return Just(false);
267     }
268   }
269   // 5. Let script be ? GetOption(options, "script", "string", undefined,
270   // undefined).
271   std::unique_ptr<char[]> script_str = nullptr;
272   Maybe<bool> maybe_script =
273       Intl::GetStringOption(isolate, options, "script", empty_values,
274                             "ApplyOptionsToTag", &script_str);
275   MAYBE_RETURN(maybe_script, Nothing<bool>());
276   // 6. If script is not undefined, then
277   if (maybe_script.FromJust()) {
278     builder->setScript(script_str.get());
279     builder->build(status);
280     // a. If script does not match the unicode_script_subtag production, throw
281     //    a RangeError exception.
282     if (U_FAILURE(status) || script_str[0] == '\0') {
283       return Just(false);
284     }
285   }
286   // 7. Let region be ? GetOption(options, "region", "string", undefined,
287   // undefined).
288   std::unique_ptr<char[]> region_str = nullptr;
289   Maybe<bool> maybe_region =
290       Intl::GetStringOption(isolate, options, "region", empty_values,
291                             "ApplyOptionsToTag", &region_str);
292   MAYBE_RETURN(maybe_region, Nothing<bool>());
293   // 8. If region is not undefined, then
294   if (maybe_region.FromJust()) {
295     // a. If region does not match the region production, throw a RangeError
296     // exception.
297     builder->setRegion(region_str.get());
298     builder->build(status);
299     if (U_FAILURE(status) || region_str[0] == '\0') {
300       return Just(false);
301     }
302   }
303 
304   // 9. Set tag to CanonicalizeLanguageTag(tag).
305   // 10.  If language is not undefined,
306   // a. Assert: tag matches the unicode_locale_id production.
307   // b. Set tag to tag with the substring corresponding to the
308   //    unicode_language_subtag production replaced by the string language.
309   // 11. If script is not undefined, then
310   // a. If tag does not contain a unicode_script_subtag production, then
311   //   i. Set tag to the concatenation of the unicode_language_subtag
312   //      production of tag, "-", script, and the rest of tag.
313   // b. Else,
314   //   i. Set tag to tag with the substring corresponding to the
315   //      unicode_script_subtag production replaced by the string script.
316   // 12. If region is not undefined, then
317   // a. If tag does not contain a unicode_region_subtag production, then
318   //   i. Set tag to the concatenation of the unicode_language_subtag
319   //      production of tag, the substring corresponding to the  "-"
320   //      unicode_script_subtag production if present, "-", region, and
321   //      the rest of tag.
322   // b. Else,
323   // i. Set tag to tag with the substring corresponding to the
324   //    unicode_region_subtag production replaced by the string region.
325   // 13.  Return CanonicalizeLanguageTag(tag).
326   return Just(true);
327 }
328 
329 }  // namespace
330 
New(Isolate * isolate,Handle<Map> map,Handle<String> locale_str,Handle<JSReceiver> options)331 MaybeHandle<JSLocale> JSLocale::New(Isolate* isolate, Handle<Map> map,
332                                     Handle<String> locale_str,
333                                     Handle<JSReceiver> options) {
334   icu::LocaleBuilder builder;
335   Maybe<bool> maybe_apply =
336       ApplyOptionsToTag(isolate, locale_str, options, &builder);
337   MAYBE_RETURN(maybe_apply, MaybeHandle<JSLocale>());
338   if (!maybe_apply.FromJust()) {
339     THROW_NEW_ERROR(isolate,
340                     NewRangeError(MessageTemplate::kLocaleBadParameters),
341                     JSLocale);
342   }
343 
344   Maybe<bool> maybe_insert =
345       InsertOptionsIntoLocale(isolate, options, &builder);
346   MAYBE_RETURN(maybe_insert, MaybeHandle<JSLocale>());
347   UErrorCode status = U_ZERO_ERROR;
348   icu::Locale icu_locale = builder.build(status);
349   if (!maybe_insert.FromJust() || U_FAILURE(status)) {
350     THROW_NEW_ERROR(isolate,
351                     NewRangeError(MessageTemplate::kLocaleBadParameters),
352                     JSLocale);
353   }
354 
355   // 31. Set locale.[[Locale]] to r.[[locale]].
356   Handle<Managed<icu::Locale>> managed_locale =
357       Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
358 
359   // Now all properties are ready, so we can allocate the result object.
360   Handle<JSLocale> locale = Handle<JSLocale>::cast(
361       isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
362   DisallowHeapAllocation no_gc;
363   locale->set_icu_locale(*managed_locale);
364   return locale;
365 }
366 
367 namespace {
368 
Construct(Isolate * isolate,const icu::Locale & icu_locale)369 MaybeHandle<JSLocale> Construct(Isolate* isolate,
370                                 const icu::Locale& icu_locale) {
371   Handle<Managed<icu::Locale>> managed_locale =
372       Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
373 
374   Handle<JSFunction> constructor(
375       isolate->native_context()->intl_locale_function(), isolate);
376 
377   Handle<Map> map;
378   ASSIGN_RETURN_ON_EXCEPTION(
379       isolate, map,
380       JSFunction::GetDerivedMap(isolate, constructor, constructor), JSLocale);
381 
382   Handle<JSLocale> locale = Handle<JSLocale>::cast(
383       isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
384   DisallowHeapAllocation no_gc;
385   locale->set_icu_locale(*managed_locale);
386   return locale;
387 }
388 
389 }  // namespace
390 
Maximize(Isolate * isolate,Handle<JSLocale> locale)391 MaybeHandle<JSLocale> JSLocale::Maximize(Isolate* isolate,
392                                          Handle<JSLocale> locale) {
393   icu::Locale icu_locale(*(locale->icu_locale().raw()));
394   UErrorCode status = U_ZERO_ERROR;
395   icu_locale.addLikelySubtags(status);
396   DCHECK(U_SUCCESS(status));
397   DCHECK(!icu_locale.isBogus());
398   return Construct(isolate, icu_locale);
399 }
400 
Minimize(Isolate * isolate,Handle<JSLocale> locale)401 MaybeHandle<JSLocale> JSLocale::Minimize(Isolate* isolate,
402                                          Handle<JSLocale> locale) {
403   icu::Locale icu_locale(*(locale->icu_locale().raw()));
404   UErrorCode status = U_ZERO_ERROR;
405   icu_locale.minimizeSubtags(status);
406   DCHECK(U_SUCCESS(status));
407   DCHECK(!icu_locale.isBogus());
408   return Construct(isolate, icu_locale);
409 }
410 
Language(Isolate * isolate,Handle<JSLocale> locale)411 Handle<Object> JSLocale::Language(Isolate* isolate, Handle<JSLocale> locale) {
412   Factory* factory = isolate->factory();
413   const char* language = locale->icu_locale().raw()->getLanguage();
414   if (strlen(language) == 0) return factory->undefined_value();
415   return factory->NewStringFromAsciiChecked(language);
416 }
417 
Script(Isolate * isolate,Handle<JSLocale> locale)418 Handle<Object> JSLocale::Script(Isolate* isolate, Handle<JSLocale> locale) {
419   Factory* factory = isolate->factory();
420   const char* script = locale->icu_locale().raw()->getScript();
421   if (strlen(script) == 0) return factory->undefined_value();
422   return factory->NewStringFromAsciiChecked(script);
423 }
424 
Region(Isolate * isolate,Handle<JSLocale> locale)425 Handle<Object> JSLocale::Region(Isolate* isolate, Handle<JSLocale> locale) {
426   Factory* factory = isolate->factory();
427   const char* region = locale->icu_locale().raw()->getCountry();
428   if (strlen(region) == 0) return factory->undefined_value();
429   return factory->NewStringFromAsciiChecked(region);
430 }
431 
BaseName(Isolate * isolate,Handle<JSLocale> locale)432 Handle<String> JSLocale::BaseName(Isolate* isolate, Handle<JSLocale> locale) {
433   icu::Locale icu_locale =
434       icu::Locale::createFromName(locale->icu_locale().raw()->getBaseName());
435   std::string base_name = Intl::ToLanguageTag(icu_locale).FromJust();
436   return isolate->factory()->NewStringFromAsciiChecked(base_name.c_str());
437 }
438 
Calendar(Isolate * isolate,Handle<JSLocale> locale)439 Handle<Object> JSLocale::Calendar(Isolate* isolate, Handle<JSLocale> locale) {
440   return UnicodeKeywordValue(isolate, locale, "ca");
441 }
442 
CaseFirst(Isolate * isolate,Handle<JSLocale> locale)443 Handle<Object> JSLocale::CaseFirst(Isolate* isolate, Handle<JSLocale> locale) {
444   return UnicodeKeywordValue(isolate, locale, "kf");
445 }
446 
Collation(Isolate * isolate,Handle<JSLocale> locale)447 Handle<Object> JSLocale::Collation(Isolate* isolate, Handle<JSLocale> locale) {
448   return UnicodeKeywordValue(isolate, locale, "co");
449 }
450 
HourCycle(Isolate * isolate,Handle<JSLocale> locale)451 Handle<Object> JSLocale::HourCycle(Isolate* isolate, Handle<JSLocale> locale) {
452   return UnicodeKeywordValue(isolate, locale, "hc");
453 }
454 
Numeric(Isolate * isolate,Handle<JSLocale> locale)455 Handle<Object> JSLocale::Numeric(Isolate* isolate, Handle<JSLocale> locale) {
456   Factory* factory = isolate->factory();
457   icu::Locale* icu_locale = locale->icu_locale().raw();
458   UErrorCode status = U_ZERO_ERROR;
459   std::string numeric =
460       icu_locale->getUnicodeKeywordValue<std::string>("kn", status);
461   return (numeric == "true") ? factory->true_value() : factory->false_value();
462 }
463 
NumberingSystem(Isolate * isolate,Handle<JSLocale> locale)464 Handle<Object> JSLocale::NumberingSystem(Isolate* isolate,
465                                          Handle<JSLocale> locale) {
466   return UnicodeKeywordValue(isolate, locale, "nu");
467 }
468 
ToString(Handle<JSLocale> locale)469 std::string JSLocale::ToString(Handle<JSLocale> locale) {
470   icu::Locale* icu_locale = locale->icu_locale().raw();
471   return Intl::ToLanguageTag(*icu_locale).FromJust();
472 }
473 
ToString(Isolate * isolate,Handle<JSLocale> locale)474 Handle<String> JSLocale::ToString(Isolate* isolate, Handle<JSLocale> locale) {
475   std::string locale_str = JSLocale::ToString(locale);
476   return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
477 }
478 
479 }  // namespace internal
480 }  // namespace v8
481