1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8
9 #include "src/objects/js-locale.h"
10
11 #include <map>
12 #include <memory>
13 #include <string>
14 #include <vector>
15
16 #include "src/api/api.h"
17 #include "src/execution/isolate.h"
18 #include "src/handles/global-handles.h"
19 #include "src/heap/factory.h"
20 #include "src/objects/intl-objects.h"
21 #include "src/objects/js-locale-inl.h"
22 #include "src/objects/objects-inl.h"
23 #include "unicode/char16ptr.h"
24 #include "unicode/localebuilder.h"
25 #include "unicode/locid.h"
26 #include "unicode/uloc.h"
27 #include "unicode/unistr.h"
28
29 namespace v8 {
30 namespace internal {
31
32 namespace {
33
34 struct OptionData {
35 const char* name;
36 const char* key;
37 const std::vector<const char*>* possible_values;
38 bool is_bool_value;
39 };
40
41 // Inserts tags from options into locale string.
InsertOptionsIntoLocale(Isolate * isolate,Handle<JSReceiver> options,icu::LocaleBuilder * builder)42 Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
43 Handle<JSReceiver> options,
44 icu::LocaleBuilder* builder) {
45 DCHECK(isolate);
46
47 const std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
48 "h24"};
49 const std::vector<const char*> case_first_values = {"upper", "lower",
50 "false"};
51 const std::vector<const char*> empty_values = {};
52 const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
53 {{"calendar", "ca", &empty_values, false},
54 {"collation", "co", &empty_values, false},
55 {"hourCycle", "hc", &hour_cycle_values, false},
56 {"caseFirst", "kf", &case_first_values, false},
57 {"numeric", "kn", &empty_values, true},
58 {"numberingSystem", "nu", &empty_values, false}}};
59
60 // TODO(cira): Pass in values as per the spec to make this to be
61 // spec compliant.
62
63 for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
64 std::unique_ptr<char[]> value_str = nullptr;
65 bool value_bool = false;
66 Maybe<bool> maybe_found =
67 option_to_bcp47.is_bool_value
68 ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
69 "locale", &value_bool)
70 : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
71 *(option_to_bcp47.possible_values),
72 "locale", &value_str);
73 MAYBE_RETURN(maybe_found, Nothing<bool>());
74
75 // TODO(cira): Use fallback value if value is not found to make
76 // this spec compliant.
77 if (!maybe_found.FromJust()) continue;
78
79 if (option_to_bcp47.is_bool_value) {
80 value_str = value_bool ? isolate->factory()->true_string()->ToCString()
81 : isolate->factory()->false_string()->ToCString();
82 }
83 DCHECK_NOT_NULL(value_str.get());
84
85 // Overwrite existing, or insert new key-value to the locale string.
86 if (!uloc_toLegacyType(uloc_toLegacyKey(option_to_bcp47.key),
87 value_str.get())) {
88 return Just(false);
89 }
90 builder->setUnicodeLocaleKeyword(option_to_bcp47.key, value_str.get());
91 }
92 return Just(true);
93 }
94
UnicodeKeywordValue(Isolate * isolate,Handle<JSLocale> locale,const char * key)95 Handle<Object> UnicodeKeywordValue(Isolate* isolate, Handle<JSLocale> locale,
96 const char* key) {
97 icu::Locale* icu_locale = locale->icu_locale().raw();
98 UErrorCode status = U_ZERO_ERROR;
99 std::string value =
100 icu_locale->getUnicodeKeywordValue<std::string>(key, status);
101 if (status == U_ILLEGAL_ARGUMENT_ERROR || value == "") {
102 return isolate->factory()->undefined_value();
103 }
104 if (value == "yes") {
105 value = "true";
106 }
107 return isolate->factory()->NewStringFromAsciiChecked(value.c_str());
108 }
109
InRange(size_t value,size_t start,size_t end)110 bool InRange(size_t value, size_t start, size_t end) {
111 return (start <= value) && (value <= end);
112 }
113
InRange(char value,char start,char end)114 bool InRange(char value, char start, char end) {
115 return (start <= value) && (value <= end);
116 }
117
IsCheckRange(const std::string & str,size_t min,size_t max,bool (range_check_func)(char))118 bool IsCheckRange(const std::string& str, size_t min, size_t max,
119 bool(range_check_func)(char)) {
120 if (!InRange(str.length(), min, max)) return false;
121 for (size_t i = 0; i < str.length(); i++) {
122 if (!range_check_func(str[i])) return false;
123 }
124 return true;
125 }
IsAlpha(const std::string & str,size_t min,size_t max)126 bool IsAlpha(const std::string& str, size_t min, size_t max) {
127 return IsCheckRange(str, min, max, [](char c) -> bool {
128 return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z');
129 });
130 }
131
IsDigit(const std::string & str,size_t min,size_t max)132 bool IsDigit(const std::string& str, size_t min, size_t max) {
133 return IsCheckRange(str, min, max,
134 [](char c) -> bool { return InRange(c, '0', '9'); });
135 }
136
IsAlphanum(const std::string & str,size_t min,size_t max)137 bool IsAlphanum(const std::string& str, size_t min, size_t max) {
138 return IsCheckRange(str, min, max, [](char c) -> bool {
139 return InRange(c, 'a', 'z') || InRange(c, 'A', 'Z') || InRange(c, '0', '9');
140 });
141 }
142
IsUnicodeLanguageSubtag(const std::string & value)143 bool IsUnicodeLanguageSubtag(const std::string& value) {
144 // unicode_language_subtag = alpha{2,3} | alpha{5,8};
145 return IsAlpha(value, 2, 3) || IsAlpha(value, 5, 8);
146 }
147
IsUnicodeScriptSubtag(const std::string & value)148 bool IsUnicodeScriptSubtag(const std::string& value) {
149 // unicode_script_subtag = alpha{4} ;
150 return IsAlpha(value, 4, 4);
151 }
152
IsUnicodeRegionSubtag(const std::string & value)153 bool IsUnicodeRegionSubtag(const std::string& value) {
154 // unicode_region_subtag = (alpha{2} | digit{3});
155 return IsAlpha(value, 2, 2) || IsDigit(value, 3, 3);
156 }
157
IsDigitAlphanum3(const std::string & value)158 bool IsDigitAlphanum3(const std::string& value) {
159 return value.length() == 4 && InRange(value[0], '0', '9') &&
160 IsAlphanum(value.substr(1), 3, 3);
161 }
162
IsUnicodeVariantSubtag(const std::string & value)163 bool IsUnicodeVariantSubtag(const std::string& value) {
164 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
165 return IsAlphanum(value, 5, 8) || IsDigitAlphanum3(value);
166 }
167
IsExtensionSingleton(const std::string & value)168 bool IsExtensionSingleton(const std::string& value) {
169 return IsAlphanum(value, 1, 1);
170 }
171 } // namespace
172
Is38AlphaNumList(const std::string & value)173 bool JSLocale::Is38AlphaNumList(const std::string& value) {
174 std::size_t found = value.find("-");
175 if (found == std::string::npos) {
176 return IsAlphanum(value, 3, 8);
177 }
178 return IsAlphanum(value.substr(0, found), 3, 8) &&
179 JSLocale::Is38AlphaNumList(value.substr(found + 1));
180 }
181
Is3Alpha(const std::string & value)182 bool JSLocale::Is3Alpha(const std::string& value) {
183 return IsAlpha(value, 3, 3);
184 }
185
186 // TODO(ftang) Replace the following check w/ icu::LocaleBuilder
187 // once ICU64 land in March 2019.
StartsWithUnicodeLanguageId(const std::string & value)188 bool JSLocale::StartsWithUnicodeLanguageId(const std::string& value) {
189 // unicode_language_id =
190 // unicode_language_subtag (sep unicode_script_subtag)?
191 // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
192 std::vector<std::string> tokens;
193 std::string token;
194 std::istringstream token_stream(value);
195 while (std::getline(token_stream, token, '-')) {
196 tokens.push_back(token);
197 }
198 if (tokens.size() == 0) return false;
199
200 // length >= 1
201 if (!IsUnicodeLanguageSubtag(tokens[0])) return false;
202
203 if (tokens.size() == 1) return true;
204
205 // length >= 2
206 if (IsExtensionSingleton(tokens[1])) return true;
207
208 size_t index = 1;
209 if (IsUnicodeScriptSubtag(tokens[index])) {
210 index++;
211 if (index == tokens.size()) return true;
212 }
213 if (IsUnicodeRegionSubtag(tokens[index])) {
214 index++;
215 }
216 while (index < tokens.size()) {
217 if (IsExtensionSingleton(tokens[index])) return true;
218 if (!IsUnicodeVariantSubtag(tokens[index])) return false;
219 index++;
220 }
221 return true;
222 }
223
224 namespace {
ApplyOptionsToTag(Isolate * isolate,Handle<String> tag,Handle<JSReceiver> options,icu::LocaleBuilder * builder)225 Maybe<bool> ApplyOptionsToTag(Isolate* isolate, Handle<String> tag,
226 Handle<JSReceiver> options,
227 icu::LocaleBuilder* builder) {
228 v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
229 if (tag->length() == 0) {
230 THROW_NEW_ERROR_RETURN_VALUE(
231 isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
232 Nothing<bool>());
233 }
234
235 v8::String::Utf8Value bcp47_tag(v8_isolate, v8::Utils::ToLocal(tag));
236 builder->setLanguageTag({*bcp47_tag, bcp47_tag.length()});
237 DCHECK_LT(0, bcp47_tag.length());
238 DCHECK_NOT_NULL(*bcp47_tag);
239 // 2. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError
240 // exception.
241 if (!JSLocale::StartsWithUnicodeLanguageId(*bcp47_tag)) {
242 return Just(false);
243 }
244 UErrorCode status = U_ZERO_ERROR;
245 builder->build(status);
246 if (U_FAILURE(status)) {
247 return Just(false);
248 }
249
250 // 3. Let language be ? GetOption(options, "language", "string", undefined,
251 // undefined).
252 const std::vector<const char*> empty_values = {};
253 std::unique_ptr<char[]> language_str = nullptr;
254 Maybe<bool> maybe_language =
255 Intl::GetStringOption(isolate, options, "language", empty_values,
256 "ApplyOptionsToTag", &language_str);
257 MAYBE_RETURN(maybe_language, Nothing<bool>());
258 // 4. If language is not undefined, then
259 if (maybe_language.FromJust()) {
260 builder->setLanguage(language_str.get());
261 builder->build(status);
262 // a. If language does not match the unicode_language_subtag production,
263 // throw a RangeError exception.
264 if (U_FAILURE(status) || language_str[0] == '\0' ||
265 IsAlpha(language_str.get(), 4, 4)) {
266 return Just(false);
267 }
268 }
269 // 5. Let script be ? GetOption(options, "script", "string", undefined,
270 // undefined).
271 std::unique_ptr<char[]> script_str = nullptr;
272 Maybe<bool> maybe_script =
273 Intl::GetStringOption(isolate, options, "script", empty_values,
274 "ApplyOptionsToTag", &script_str);
275 MAYBE_RETURN(maybe_script, Nothing<bool>());
276 // 6. If script is not undefined, then
277 if (maybe_script.FromJust()) {
278 builder->setScript(script_str.get());
279 builder->build(status);
280 // a. If script does not match the unicode_script_subtag production, throw
281 // a RangeError exception.
282 if (U_FAILURE(status) || script_str[0] == '\0') {
283 return Just(false);
284 }
285 }
286 // 7. Let region be ? GetOption(options, "region", "string", undefined,
287 // undefined).
288 std::unique_ptr<char[]> region_str = nullptr;
289 Maybe<bool> maybe_region =
290 Intl::GetStringOption(isolate, options, "region", empty_values,
291 "ApplyOptionsToTag", ®ion_str);
292 MAYBE_RETURN(maybe_region, Nothing<bool>());
293 // 8. If region is not undefined, then
294 if (maybe_region.FromJust()) {
295 // a. If region does not match the region production, throw a RangeError
296 // exception.
297 builder->setRegion(region_str.get());
298 builder->build(status);
299 if (U_FAILURE(status) || region_str[0] == '\0') {
300 return Just(false);
301 }
302 }
303
304 // 9. Set tag to CanonicalizeLanguageTag(tag).
305 // 10. If language is not undefined,
306 // a. Assert: tag matches the unicode_locale_id production.
307 // b. Set tag to tag with the substring corresponding to the
308 // unicode_language_subtag production replaced by the string language.
309 // 11. If script is not undefined, then
310 // a. If tag does not contain a unicode_script_subtag production, then
311 // i. Set tag to the concatenation of the unicode_language_subtag
312 // production of tag, "-", script, and the rest of tag.
313 // b. Else,
314 // i. Set tag to tag with the substring corresponding to the
315 // unicode_script_subtag production replaced by the string script.
316 // 12. If region is not undefined, then
317 // a. If tag does not contain a unicode_region_subtag production, then
318 // i. Set tag to the concatenation of the unicode_language_subtag
319 // production of tag, the substring corresponding to the "-"
320 // unicode_script_subtag production if present, "-", region, and
321 // the rest of tag.
322 // b. Else,
323 // i. Set tag to tag with the substring corresponding to the
324 // unicode_region_subtag production replaced by the string region.
325 // 13. Return CanonicalizeLanguageTag(tag).
326 return Just(true);
327 }
328
329 } // namespace
330
New(Isolate * isolate,Handle<Map> map,Handle<String> locale_str,Handle<JSReceiver> options)331 MaybeHandle<JSLocale> JSLocale::New(Isolate* isolate, Handle<Map> map,
332 Handle<String> locale_str,
333 Handle<JSReceiver> options) {
334 icu::LocaleBuilder builder;
335 Maybe<bool> maybe_apply =
336 ApplyOptionsToTag(isolate, locale_str, options, &builder);
337 MAYBE_RETURN(maybe_apply, MaybeHandle<JSLocale>());
338 if (!maybe_apply.FromJust()) {
339 THROW_NEW_ERROR(isolate,
340 NewRangeError(MessageTemplate::kLocaleBadParameters),
341 JSLocale);
342 }
343
344 Maybe<bool> maybe_insert =
345 InsertOptionsIntoLocale(isolate, options, &builder);
346 MAYBE_RETURN(maybe_insert, MaybeHandle<JSLocale>());
347 UErrorCode status = U_ZERO_ERROR;
348 icu::Locale icu_locale = builder.build(status);
349 if (!maybe_insert.FromJust() || U_FAILURE(status)) {
350 THROW_NEW_ERROR(isolate,
351 NewRangeError(MessageTemplate::kLocaleBadParameters),
352 JSLocale);
353 }
354
355 // 31. Set locale.[[Locale]] to r.[[locale]].
356 Handle<Managed<icu::Locale>> managed_locale =
357 Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
358
359 // Now all properties are ready, so we can allocate the result object.
360 Handle<JSLocale> locale = Handle<JSLocale>::cast(
361 isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
362 DisallowHeapAllocation no_gc;
363 locale->set_icu_locale(*managed_locale);
364 return locale;
365 }
366
367 namespace {
368
Construct(Isolate * isolate,const icu::Locale & icu_locale)369 MaybeHandle<JSLocale> Construct(Isolate* isolate,
370 const icu::Locale& icu_locale) {
371 Handle<Managed<icu::Locale>> managed_locale =
372 Managed<icu::Locale>::FromRawPtr(isolate, 0, icu_locale.clone());
373
374 Handle<JSFunction> constructor(
375 isolate->native_context()->intl_locale_function(), isolate);
376
377 Handle<Map> map;
378 ASSIGN_RETURN_ON_EXCEPTION(
379 isolate, map,
380 JSFunction::GetDerivedMap(isolate, constructor, constructor), JSLocale);
381
382 Handle<JSLocale> locale = Handle<JSLocale>::cast(
383 isolate->factory()->NewFastOrSlowJSObjectFromMap(map));
384 DisallowHeapAllocation no_gc;
385 locale->set_icu_locale(*managed_locale);
386 return locale;
387 }
388
389 } // namespace
390
Maximize(Isolate * isolate,Handle<JSLocale> locale)391 MaybeHandle<JSLocale> JSLocale::Maximize(Isolate* isolate,
392 Handle<JSLocale> locale) {
393 icu::Locale icu_locale(*(locale->icu_locale().raw()));
394 UErrorCode status = U_ZERO_ERROR;
395 icu_locale.addLikelySubtags(status);
396 DCHECK(U_SUCCESS(status));
397 DCHECK(!icu_locale.isBogus());
398 return Construct(isolate, icu_locale);
399 }
400
Minimize(Isolate * isolate,Handle<JSLocale> locale)401 MaybeHandle<JSLocale> JSLocale::Minimize(Isolate* isolate,
402 Handle<JSLocale> locale) {
403 icu::Locale icu_locale(*(locale->icu_locale().raw()));
404 UErrorCode status = U_ZERO_ERROR;
405 icu_locale.minimizeSubtags(status);
406 DCHECK(U_SUCCESS(status));
407 DCHECK(!icu_locale.isBogus());
408 return Construct(isolate, icu_locale);
409 }
410
Language(Isolate * isolate,Handle<JSLocale> locale)411 Handle<Object> JSLocale::Language(Isolate* isolate, Handle<JSLocale> locale) {
412 Factory* factory = isolate->factory();
413 const char* language = locale->icu_locale().raw()->getLanguage();
414 if (strlen(language) == 0) return factory->undefined_value();
415 return factory->NewStringFromAsciiChecked(language);
416 }
417
Script(Isolate * isolate,Handle<JSLocale> locale)418 Handle<Object> JSLocale::Script(Isolate* isolate, Handle<JSLocale> locale) {
419 Factory* factory = isolate->factory();
420 const char* script = locale->icu_locale().raw()->getScript();
421 if (strlen(script) == 0) return factory->undefined_value();
422 return factory->NewStringFromAsciiChecked(script);
423 }
424
Region(Isolate * isolate,Handle<JSLocale> locale)425 Handle<Object> JSLocale::Region(Isolate* isolate, Handle<JSLocale> locale) {
426 Factory* factory = isolate->factory();
427 const char* region = locale->icu_locale().raw()->getCountry();
428 if (strlen(region) == 0) return factory->undefined_value();
429 return factory->NewStringFromAsciiChecked(region);
430 }
431
BaseName(Isolate * isolate,Handle<JSLocale> locale)432 Handle<String> JSLocale::BaseName(Isolate* isolate, Handle<JSLocale> locale) {
433 icu::Locale icu_locale =
434 icu::Locale::createFromName(locale->icu_locale().raw()->getBaseName());
435 std::string base_name = Intl::ToLanguageTag(icu_locale).FromJust();
436 return isolate->factory()->NewStringFromAsciiChecked(base_name.c_str());
437 }
438
Calendar(Isolate * isolate,Handle<JSLocale> locale)439 Handle<Object> JSLocale::Calendar(Isolate* isolate, Handle<JSLocale> locale) {
440 return UnicodeKeywordValue(isolate, locale, "ca");
441 }
442
CaseFirst(Isolate * isolate,Handle<JSLocale> locale)443 Handle<Object> JSLocale::CaseFirst(Isolate* isolate, Handle<JSLocale> locale) {
444 return UnicodeKeywordValue(isolate, locale, "kf");
445 }
446
Collation(Isolate * isolate,Handle<JSLocale> locale)447 Handle<Object> JSLocale::Collation(Isolate* isolate, Handle<JSLocale> locale) {
448 return UnicodeKeywordValue(isolate, locale, "co");
449 }
450
HourCycle(Isolate * isolate,Handle<JSLocale> locale)451 Handle<Object> JSLocale::HourCycle(Isolate* isolate, Handle<JSLocale> locale) {
452 return UnicodeKeywordValue(isolate, locale, "hc");
453 }
454
Numeric(Isolate * isolate,Handle<JSLocale> locale)455 Handle<Object> JSLocale::Numeric(Isolate* isolate, Handle<JSLocale> locale) {
456 Factory* factory = isolate->factory();
457 icu::Locale* icu_locale = locale->icu_locale().raw();
458 UErrorCode status = U_ZERO_ERROR;
459 std::string numeric =
460 icu_locale->getUnicodeKeywordValue<std::string>("kn", status);
461 return (numeric == "true") ? factory->true_value() : factory->false_value();
462 }
463
NumberingSystem(Isolate * isolate,Handle<JSLocale> locale)464 Handle<Object> JSLocale::NumberingSystem(Isolate* isolate,
465 Handle<JSLocale> locale) {
466 return UnicodeKeywordValue(isolate, locale, "nu");
467 }
468
ToString(Handle<JSLocale> locale)469 std::string JSLocale::ToString(Handle<JSLocale> locale) {
470 icu::Locale* icu_locale = locale->icu_locale().raw();
471 return Intl::ToLanguageTag(*icu_locale).FromJust();
472 }
473
ToString(Isolate * isolate,Handle<JSLocale> locale)474 Handle<String> JSLocale::ToString(Isolate* isolate, Handle<JSLocale> locale) {
475 std::string locale_str = JSLocale::ToString(locale);
476 return isolate->factory()->NewStringFromAsciiChecked(locale_str.c_str());
477 }
478
479 } // namespace internal
480 } // namespace v8
481