• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-locale.h"
10 
11 #include <map>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "src/api.h"
17 #include "src/global-handles.h"
18 #include "src/heap/factory.h"
19 #include "src/isolate.h"
20 #include "src/objects-inl.h"
21 #include "src/objects/intl-objects.h"
22 #include "src/objects/js-locale-inl.h"
23 #include "unicode/locid.h"
24 #include "unicode/uloc.h"
25 #include "unicode/unistr.h"
26 #include "unicode/uvernum.h"
27 #include "unicode/uversion.h"
28 
29 #if U_ICU_VERSION_MAJOR_NUM >= 59
30 #include "unicode/char16ptr.h"
31 #endif
32 
33 namespace v8 {
34 namespace internal {
35 
36 namespace {
37 
38 struct OptionData {
39   const char* name;
40   const char* key;
41   const std::vector<const char*>* possible_values;
42   bool is_bool_value;
43 };
44 
45 // Inserts tags from options into locale string.
InsertOptionsIntoLocale(Isolate * isolate,Handle<JSReceiver> options,char * icu_locale)46 Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
47                                     Handle<JSReceiver> options,
48                                     char* icu_locale) {
49   CHECK(isolate);
50   CHECK(icu_locale);
51 
52   static std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
53                                                        "h24"};
54   static std::vector<const char*> case_first_values = {"upper", "lower",
55                                                        "false"};
56   static std::vector<const char*> empty_values = {};
57   static const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
58       {{"calendar", "ca", &empty_values, false},
59        {"collation", "co", &empty_values, false},
60        {"hourCycle", "hc", &hour_cycle_values, false},
61        {"caseFirst", "kf", &case_first_values, false},
62        {"numeric", "kn", &empty_values, true},
63        {"numberingSystem", "nu", &empty_values, false}}};
64 
65   // TODO(cira): Pass in values as per the spec to make this to be
66   // spec compliant.
67 
68   for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
69     std::unique_ptr<char[]> value_str = nullptr;
70     bool value_bool = false;
71     Maybe<bool> maybe_found =
72         option_to_bcp47.is_bool_value
73             ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
74                                   "locale", &value_bool)
75             : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
76                                     *(option_to_bcp47.possible_values),
77                                     "locale", &value_str);
78     if (maybe_found.IsNothing()) return maybe_found;
79 
80     // TODO(cira): Use fallback value if value is not found to make
81     // this spec compliant.
82     if (!maybe_found.FromJust()) continue;
83 
84     if (option_to_bcp47.is_bool_value) {
85       value_str = value_bool ? isolate->factory()->true_string()->ToCString()
86                              : isolate->factory()->false_string()->ToCString();
87     }
88     DCHECK_NOT_NULL(value_str.get());
89 
90     // Convert bcp47 key and value into legacy ICU format so we can use
91     // uloc_setKeywordValue.
92     const char* key = uloc_toLegacyKey(option_to_bcp47.key);
93     DCHECK_NOT_NULL(key);
94 
95     // Overwrite existing, or insert new key-value to the locale string.
96     const char* value = uloc_toLegacyType(key, value_str.get());
97     UErrorCode status = U_ZERO_ERROR;
98     if (value) {
99       // TODO(cira): ICU puts artificial limit on locale length, while BCP47
100       // doesn't. Switch to C++ API when it's ready.
101       // Related ICU bug - https://ssl.icu-project.org/trac/ticket/13417.
102       uloc_setKeywordValue(key, value, icu_locale, ULOC_FULLNAME_CAPACITY,
103                            &status);
104       if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
105         return Just(false);
106       }
107     } else {
108       return Just(false);
109     }
110   }
111 
112   return Just(true);
113 }
114 
115 // Fills in the JSLocale object slots with Unicode tag/values.
PopulateLocaleWithUnicodeTags(Isolate * isolate,const char * icu_locale,Handle<JSLocale> locale_holder)116 bool PopulateLocaleWithUnicodeTags(Isolate* isolate, const char* icu_locale,
117                                    Handle<JSLocale> locale_holder) {
118   CHECK(isolate);
119   CHECK(icu_locale);
120 
121   Factory* factory = isolate->factory();
122 
123   UErrorCode status = U_ZERO_ERROR;
124   UEnumeration* keywords = uloc_openKeywords(icu_locale, &status);
125   if (!keywords) return true;
126 
127   char value[ULOC_FULLNAME_CAPACITY];
128   while (const char* keyword = uenum_next(keywords, nullptr, &status)) {
129     uloc_getKeywordValue(icu_locale, keyword, value, ULOC_FULLNAME_CAPACITY,
130                          &status);
131     if (U_FAILURE(status)) {
132       status = U_ZERO_ERROR;
133       continue;
134     }
135 
136     // Ignore those we don't recognize - spec allows that.
137     const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
138     if (bcp47_key) {
139       const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
140       if (bcp47_value) {
141           Handle<String> bcp47_handle =
142               factory->NewStringFromAsciiChecked(bcp47_value);
143           if (strcmp(bcp47_key, "kn") == 0) {
144             locale_holder->set_numeric(*bcp47_handle);
145           } else if (strcmp(bcp47_key, "ca") == 0) {
146             locale_holder->set_calendar(*bcp47_handle);
147           } else if (strcmp(bcp47_key, "kf") == 0) {
148             locale_holder->set_case_first(*bcp47_handle);
149           } else if (strcmp(bcp47_key, "co") == 0) {
150             locale_holder->set_collation(*bcp47_handle);
151           } else if (strcmp(bcp47_key, "hc") == 0) {
152             locale_holder->set_hour_cycle(*bcp47_handle);
153           } else if (strcmp(bcp47_key, "nu") == 0) {
154             locale_holder->set_numbering_system(*bcp47_handle);
155           }
156       }
157     }
158   }
159 
160   uenum_close(keywords);
161 
162   return true;
163 }
164 }  // namespace
165 
InitializeLocale(Isolate * isolate,Handle<JSLocale> locale_holder,Handle<String> locale,Handle<JSReceiver> options)166 MaybeHandle<JSLocale> JSLocale::InitializeLocale(Isolate* isolate,
167                                                  Handle<JSLocale> locale_holder,
168                                                  Handle<String> locale,
169                                                  Handle<JSReceiver> options) {
170   static const char* const kMethod = "Intl.Locale";
171   v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
172   UErrorCode status = U_ZERO_ERROR;
173 
174   // Get ICU locale format, and canonicalize it.
175   char icu_result[ULOC_FULLNAME_CAPACITY];
176   char icu_canonical[ULOC_FULLNAME_CAPACITY];
177 
178   if (locale->length() == 0) {
179     THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
180                     JSLocale);
181   }
182 
183   v8::String::Utf8Value bcp47_locale(v8_isolate, v8::Utils::ToLocal(locale));
184   CHECK_LT(0, bcp47_locale.length());
185   CHECK_NOT_NULL(*bcp47_locale);
186 
187   int icu_length = uloc_forLanguageTag(
188       *bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY, nullptr, &status);
189 
190   if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING ||
191       icu_length == 0) {
192     THROW_NEW_ERROR(
193         isolate,
194         NewRangeError(MessageTemplate::kLocaleBadParameters,
195                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
196                       locale_holder),
197         JSLocale);
198     return MaybeHandle<JSLocale>();
199   }
200 
201   Maybe<bool> error = InsertOptionsIntoLocale(isolate, options, icu_result);
202   MAYBE_RETURN(error, MaybeHandle<JSLocale>());
203   if (!error.FromJust()) {
204     THROW_NEW_ERROR(
205         isolate,
206         NewRangeError(MessageTemplate::kLocaleBadParameters,
207                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
208                       locale_holder),
209         JSLocale);
210     return MaybeHandle<JSLocale>();
211   }
212   DCHECK(error.FromJust());
213 
214   uloc_canonicalize(icu_result, icu_canonical, ULOC_FULLNAME_CAPACITY, &status);
215   if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
216     THROW_NEW_ERROR(
217         isolate,
218         NewRangeError(MessageTemplate::kLocaleBadParameters,
219                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
220                       locale_holder),
221         JSLocale);
222     return MaybeHandle<JSLocale>();
223   }
224 
225   if (!PopulateLocaleWithUnicodeTags(isolate, icu_canonical, locale_holder)) {
226     THROW_NEW_ERROR(
227         isolate,
228         NewRangeError(MessageTemplate::kLocaleBadParameters,
229                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
230                       locale_holder),
231         JSLocale);
232     return MaybeHandle<JSLocale>();
233   }
234 
235   // Extract language, script and region parts.
236   char icu_language[ULOC_LANG_CAPACITY];
237   uloc_getLanguage(icu_canonical, icu_language, ULOC_LANG_CAPACITY, &status);
238 
239   char icu_script[ULOC_SCRIPT_CAPACITY];
240   uloc_getScript(icu_canonical, icu_script, ULOC_SCRIPT_CAPACITY, &status);
241 
242   char icu_region[ULOC_COUNTRY_CAPACITY];
243   uloc_getCountry(icu_canonical, icu_region, ULOC_COUNTRY_CAPACITY, &status);
244 
245   if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
246     THROW_NEW_ERROR(
247         isolate,
248         NewRangeError(MessageTemplate::kLocaleBadParameters,
249                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
250                       locale_holder),
251         JSLocale);
252     return MaybeHandle<JSLocale>();
253   }
254 
255   Factory* factory = isolate->factory();
256 
257   // NOTE: One shouldn't use temporary handles, because they can go out of
258   // scope and be garbage collected before properly assigned.
259   // DON'T DO THIS: locale_holder->set_language(*f->NewStringAscii...);
260   Handle<String> language = factory->NewStringFromAsciiChecked(icu_language);
261   locale_holder->set_language(*language);
262 
263   if (strlen(icu_script) != 0) {
264     Handle<String> script = factory->NewStringFromAsciiChecked(icu_script);
265     locale_holder->set_script(*script);
266   }
267 
268   if (strlen(icu_region) != 0) {
269     Handle<String> region = factory->NewStringFromAsciiChecked(icu_region);
270     locale_holder->set_region(*region);
271   }
272 
273   char icu_base_name[ULOC_FULLNAME_CAPACITY];
274   uloc_getBaseName(icu_canonical, icu_base_name, ULOC_FULLNAME_CAPACITY,
275                    &status);
276   // We need to convert it back to BCP47.
277   char bcp47_result[ULOC_FULLNAME_CAPACITY];
278   uloc_toLanguageTag(icu_base_name, bcp47_result, ULOC_FULLNAME_CAPACITY, true,
279                      &status);
280   if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
281     THROW_NEW_ERROR(
282         isolate,
283         NewRangeError(MessageTemplate::kLocaleBadParameters,
284                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
285                       locale_holder),
286         JSLocale);
287     return MaybeHandle<JSLocale>();
288   }
289   Handle<String> base_name = factory->NewStringFromAsciiChecked(bcp47_result);
290   locale_holder->set_base_name(*base_name);
291 
292   // Produce final representation of the locale string, for toString().
293   uloc_toLanguageTag(icu_canonical, bcp47_result, ULOC_FULLNAME_CAPACITY, true,
294                      &status);
295   if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
296     THROW_NEW_ERROR(
297         isolate,
298         NewRangeError(MessageTemplate::kLocaleBadParameters,
299                       isolate->factory()->NewStringFromAsciiChecked(kMethod),
300                       locale_holder),
301         JSLocale);
302     return MaybeHandle<JSLocale>();
303   }
304   Handle<String> locale_handle =
305       factory->NewStringFromAsciiChecked(bcp47_result);
306   locale_holder->set_locale(*locale_handle);
307 
308   return locale_holder;
309 }
310 
311 namespace {
312 
MorphLocale(Isolate * isolate,String * input,int32_t (* morph_func)(const char *,char *,int32_t,UErrorCode *))313 Handle<String> MorphLocale(Isolate* isolate, String* input,
314                            int32_t (*morph_func)(const char*, char*, int32_t,
315                                                  UErrorCode*)) {
316   Factory* factory = isolate->factory();
317   char localeBuffer[ULOC_FULLNAME_CAPACITY];
318   UErrorCode status = U_ZERO_ERROR;
319   DCHECK_NOT_NULL(morph_func);
320   int32_t length = (*morph_func)(input->ToCString().get(), localeBuffer,
321                                  ULOC_FULLNAME_CAPACITY, &status);
322   DCHECK(U_SUCCESS(status));
323   DCHECK_GT(length, 0);
324   std::string locale(localeBuffer, length);
325   std::replace(locale.begin(), locale.end(), '_', '-');
326   return factory->NewStringFromAsciiChecked(locale.c_str());
327 }
328 
329 }  // namespace
330 
Maximize(Isolate * isolate,String * locale)331 Handle<String> JSLocale::Maximize(Isolate* isolate, String* locale) {
332   return MorphLocale(isolate, locale, uloc_addLikelySubtags);
333 }
334 
Minimize(Isolate * isolate,String * locale)335 Handle<String> JSLocale::Minimize(Isolate* isolate, String* locale) {
336   return MorphLocale(isolate, locale, uloc_minimizeSubtags);
337 }
338 
339 }  // namespace internal
340 }  // namespace v8
341