1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8
9 #include "src/objects/intl-objects.h"
10 #include "src/objects/intl-objects-inl.h"
11
12 #include <algorithm>
13 #include <memory>
14 #include <string>
15 #include <vector>
16
17 #include "src/api-inl.h"
18 #include "src/global-handles.h"
19 #include "src/heap/factory.h"
20 #include "src/intl.h"
21 #include "src/isolate.h"
22 #include "src/objects-inl.h"
23 #include "src/objects/js-collator-inl.h"
24 #include "src/objects/managed.h"
25 #include "src/objects/string.h"
26 #include "src/property-descriptor.h"
27 #include "unicode/brkiter.h"
28 #include "unicode/bytestream.h"
29 #include "unicode/calendar.h"
30 #include "unicode/coll.h"
31 #include "unicode/curramt.h"
32 #include "unicode/dcfmtsym.h"
33 #include "unicode/decimfmt.h"
34 #include "unicode/dtfmtsym.h"
35 #include "unicode/dtptngen.h"
36 #include "unicode/gregocal.h"
37 #include "unicode/locid.h"
38 #include "unicode/numfmt.h"
39 #include "unicode/numsys.h"
40 #include "unicode/plurrule.h"
41 #include "unicode/rbbi.h"
42 #include "unicode/regex.h"
43 #include "unicode/smpdtfmt.h"
44 #include "unicode/timezone.h"
45 #include "unicode/uchar.h"
46 #include "unicode/ucol.h"
47 #include "unicode/ucurr.h"
48 #include "unicode/unum.h"
49 #include "unicode/upluralrules.h"
50 #include "unicode/ures.h"
51 #include "unicode/uvernum.h"
52 #include "unicode/uversion.h"
53
54 #if U_ICU_VERSION_MAJOR_NUM >= 59
55 #include "unicode/char16ptr.h"
56 #endif
57
58 namespace v8 {
59 namespace internal {
60
61 namespace {
62
ExtractStringSetting(Isolate * isolate,Handle<JSObject> options,const char * key,icu::UnicodeString * setting)63 bool ExtractStringSetting(Isolate* isolate, Handle<JSObject> options,
64 const char* key, icu::UnicodeString* setting) {
65 v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
66 Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(key);
67 Handle<Object> object =
68 JSReceiver::GetProperty(isolate, options, str).ToHandleChecked();
69 if (object->IsString()) {
70 v8::String::Utf8Value utf8_string(
71 v8_isolate, v8::Utils::ToLocal(Handle<String>::cast(object)));
72 *setting = icu::UnicodeString::fromUTF8(*utf8_string);
73 return true;
74 }
75 return false;
76 }
77
ExtractIntegerSetting(Isolate * isolate,Handle<JSObject> options,const char * key,int32_t * value)78 bool ExtractIntegerSetting(Isolate* isolate, Handle<JSObject> options,
79 const char* key, int32_t* value) {
80 Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(key);
81 Handle<Object> object =
82 JSReceiver::GetProperty(isolate, options, str).ToHandleChecked();
83 if (object->IsNumber()) {
84 return object->ToInt32(value);
85 }
86 return false;
87 }
88
ExtractBooleanSetting(Isolate * isolate,Handle<JSObject> options,const char * key,bool * value)89 bool ExtractBooleanSetting(Isolate* isolate, Handle<JSObject> options,
90 const char* key, bool* value) {
91 Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(key);
92 Handle<Object> object =
93 JSReceiver::GetProperty(isolate, options, str).ToHandleChecked();
94 if (object->IsBoolean()) {
95 *value = object->BooleanValue(isolate);
96 return true;
97 }
98 return false;
99 }
100
CreateICUDateFormat(Isolate * isolate,const icu::Locale & icu_locale,Handle<JSObject> options)101 icu::SimpleDateFormat* CreateICUDateFormat(Isolate* isolate,
102 const icu::Locale& icu_locale,
103 Handle<JSObject> options) {
104 // Create time zone as specified by the user. We have to re-create time zone
105 // since calendar takes ownership.
106 icu::TimeZone* tz = nullptr;
107 icu::UnicodeString timezone;
108 if (ExtractStringSetting(isolate, options, "timeZone", &timezone)) {
109 tz = icu::TimeZone::createTimeZone(timezone);
110 } else {
111 tz = icu::TimeZone::createDefault();
112 }
113
114 // Create a calendar using locale, and apply time zone to it.
115 UErrorCode status = U_ZERO_ERROR;
116 icu::Calendar* calendar =
117 icu::Calendar::createInstance(tz, icu_locale, status);
118
119 if (calendar->getDynamicClassID() ==
120 icu::GregorianCalendar::getStaticClassID()) {
121 icu::GregorianCalendar* gc = (icu::GregorianCalendar*)calendar;
122 UErrorCode status = U_ZERO_ERROR;
123 // The beginning of ECMAScript time, namely -(2**53)
124 const double start_of_time = -9007199254740992;
125 gc->setGregorianChange(start_of_time, status);
126 DCHECK(U_SUCCESS(status));
127 }
128
129 // Make formatter from skeleton. Calendar and numbering system are added
130 // to the locale as Unicode extension (if they were specified at all).
131 icu::SimpleDateFormat* date_format = nullptr;
132 icu::UnicodeString skeleton;
133 if (ExtractStringSetting(isolate, options, "skeleton", &skeleton)) {
134 // See https://github.com/tc39/ecma402/issues/225 . The best pattern
135 // generation needs to be done in the base locale according to the
136 // current spec however odd it may be. See also crbug.com/826549 .
137 // This is a temporary work-around to get v8's external behavior to match
138 // the current spec, but does not follow the spec provisions mentioned
139 // in the above Ecma 402 issue.
140 // TODO(jshin): The spec may need to be revised because using the base
141 // locale for the pattern match is not quite right. Moreover, what to
142 // do with 'related year' part when 'chinese/dangi' calendar is specified
143 // has to be discussed. Revisit once the spec is clarified/revised.
144 icu::Locale no_extension_locale(icu_locale.getBaseName());
145 std::unique_ptr<icu::DateTimePatternGenerator> generator(
146 icu::DateTimePatternGenerator::createInstance(no_extension_locale,
147 status));
148 icu::UnicodeString pattern;
149 if (U_SUCCESS(status))
150 pattern = generator->getBestPattern(skeleton, status);
151
152 date_format = new icu::SimpleDateFormat(pattern, icu_locale, status);
153 if (U_SUCCESS(status)) {
154 date_format->adoptCalendar(calendar);
155 }
156 }
157
158 if (U_FAILURE(status)) {
159 delete calendar;
160 delete date_format;
161 date_format = nullptr;
162 }
163
164 return date_format;
165 }
166
SetResolvedDateSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::SimpleDateFormat * date_format,Handle<JSObject> resolved)167 void SetResolvedDateSettings(Isolate* isolate, const icu::Locale& icu_locale,
168 icu::SimpleDateFormat* date_format,
169 Handle<JSObject> resolved) {
170 Factory* factory = isolate->factory();
171 UErrorCode status = U_ZERO_ERROR;
172 icu::UnicodeString pattern;
173 date_format->toPattern(pattern);
174 JSObject::SetProperty(
175 isolate, resolved, factory->intl_pattern_symbol(),
176 factory
177 ->NewStringFromTwoByte(Vector<const uint16_t>(
178 reinterpret_cast<const uint16_t*>(pattern.getBuffer()),
179 pattern.length()))
180 .ToHandleChecked(),
181 LanguageMode::kSloppy)
182 .Assert();
183
184 // Set time zone and calendar.
185 const icu::Calendar* calendar = date_format->getCalendar();
186 // getType() returns legacy calendar type name instead of LDML/BCP47 calendar
187 // key values. intl.js maps them to BCP47 values for key "ca".
188 // TODO(jshin): Consider doing it here, instead.
189 const char* calendar_name = calendar->getType();
190 JSObject::SetProperty(
191 isolate, resolved, factory->NewStringFromStaticChars("calendar"),
192 factory->NewStringFromAsciiChecked(calendar_name), LanguageMode::kSloppy)
193 .Assert();
194
195 const icu::TimeZone& tz = calendar->getTimeZone();
196 icu::UnicodeString time_zone;
197 tz.getID(time_zone);
198
199 icu::UnicodeString canonical_time_zone;
200 icu::TimeZone::getCanonicalID(time_zone, canonical_time_zone, status);
201 if (U_SUCCESS(status)) {
202 // In CLDR (http://unicode.org/cldr/trac/ticket/9943), Etc/UTC is made
203 // a separate timezone ID from Etc/GMT even though they're still the same
204 // timezone. We have Etc/UTC because 'UTC', 'Etc/Universal',
205 // 'Etc/Zulu' and others are turned to 'Etc/UTC' by ICU. Etc/GMT comes
206 // from Etc/GMT0, Etc/GMT+0, Etc/GMT-0, Etc/Greenwich.
207 // ecma402##sec-canonicalizetimezonename step 3
208 if (canonical_time_zone == UNICODE_STRING_SIMPLE("Etc/UTC") ||
209 canonical_time_zone == UNICODE_STRING_SIMPLE("Etc/GMT")) {
210 JSObject::SetProperty(
211 isolate, resolved, factory->NewStringFromStaticChars("timeZone"),
212 factory->NewStringFromStaticChars("UTC"), LanguageMode::kSloppy)
213 .Assert();
214 } else {
215 JSObject::SetProperty(isolate, resolved,
216 factory->NewStringFromStaticChars("timeZone"),
217 factory
218 ->NewStringFromTwoByte(Vector<const uint16_t>(
219 reinterpret_cast<const uint16_t*>(
220 canonical_time_zone.getBuffer()),
221 canonical_time_zone.length()))
222 .ToHandleChecked(),
223 LanguageMode::kSloppy)
224 .Assert();
225 }
226 }
227
228 // Ugly hack. ICU doesn't expose numbering system in any way, so we have
229 // to assume that for given locale NumberingSystem constructor produces the
230 // same digits as NumberFormat/Calendar would.
231 status = U_ZERO_ERROR;
232 icu::NumberingSystem* numbering_system =
233 icu::NumberingSystem::createInstance(icu_locale, status);
234 if (U_SUCCESS(status)) {
235 const char* ns = numbering_system->getName();
236 JSObject::SetProperty(
237 isolate, resolved, factory->NewStringFromStaticChars("numberingSystem"),
238 factory->NewStringFromAsciiChecked(ns), LanguageMode::kSloppy)
239 .Assert();
240 } else {
241 JSObject::SetProperty(isolate, resolved,
242 factory->NewStringFromStaticChars("numberingSystem"),
243 factory->undefined_value(), LanguageMode::kSloppy)
244 .Assert();
245 }
246 delete numbering_system;
247
248 // Set the locale
249 char result[ULOC_FULLNAME_CAPACITY];
250 status = U_ZERO_ERROR;
251 uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
252 FALSE, &status);
253 if (U_SUCCESS(status)) {
254 JSObject::SetProperty(
255 isolate, resolved, factory->NewStringFromStaticChars("locale"),
256 factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
257 .Assert();
258 } else {
259 // This would never happen, since we got the locale from ICU.
260 JSObject::SetProperty(
261 isolate, resolved, factory->NewStringFromStaticChars("locale"),
262 factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
263 .Assert();
264 }
265 }
266
SetNumericSettings(Isolate * isolate,icu::DecimalFormat * number_format,Handle<JSObject> options)267 void SetNumericSettings(Isolate* isolate, icu::DecimalFormat* number_format,
268 Handle<JSObject> options) {
269 int32_t digits;
270 if (ExtractIntegerSetting(isolate, options, "minimumIntegerDigits",
271 &digits)) {
272 number_format->setMinimumIntegerDigits(digits);
273 }
274
275 if (ExtractIntegerSetting(isolate, options, "minimumFractionDigits",
276 &digits)) {
277 number_format->setMinimumFractionDigits(digits);
278 }
279
280 if (ExtractIntegerSetting(isolate, options, "maximumFractionDigits",
281 &digits)) {
282 number_format->setMaximumFractionDigits(digits);
283 }
284
285 bool significant_digits_used = false;
286 if (ExtractIntegerSetting(isolate, options, "minimumSignificantDigits",
287 &digits)) {
288 number_format->setMinimumSignificantDigits(digits);
289 significant_digits_used = true;
290 }
291
292 if (ExtractIntegerSetting(isolate, options, "maximumSignificantDigits",
293 &digits)) {
294 number_format->setMaximumSignificantDigits(digits);
295 significant_digits_used = true;
296 }
297
298 number_format->setSignificantDigitsUsed(significant_digits_used);
299
300 number_format->setRoundingMode(icu::DecimalFormat::kRoundHalfUp);
301 }
302
CreateICUNumberFormat(Isolate * isolate,const icu::Locale & icu_locale,Handle<JSObject> options)303 icu::DecimalFormat* CreateICUNumberFormat(Isolate* isolate,
304 const icu::Locale& icu_locale,
305 Handle<JSObject> options) {
306 // Make formatter from options. Numbering system is added
307 // to the locale as Unicode extension (if it was specified at all).
308 UErrorCode status = U_ZERO_ERROR;
309 icu::DecimalFormat* number_format = nullptr;
310 icu::UnicodeString style;
311 icu::UnicodeString currency;
312 if (ExtractStringSetting(isolate, options, "style", &style)) {
313 if (style == UNICODE_STRING_SIMPLE("currency")) {
314 icu::UnicodeString display;
315 ExtractStringSetting(isolate, options, "currency", ¤cy);
316 ExtractStringSetting(isolate, options, "currencyDisplay", &display);
317
318 #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 6)
319 icu::NumberFormat::EStyles format_style;
320 if (display == UNICODE_STRING_SIMPLE("code")) {
321 format_style = icu::NumberFormat::kIsoCurrencyStyle;
322 } else if (display == UNICODE_STRING_SIMPLE("name")) {
323 format_style = icu::NumberFormat::kPluralCurrencyStyle;
324 } else {
325 format_style = icu::NumberFormat::kCurrencyStyle;
326 }
327 #else // ICU version is 4.8 or above (we ignore versions below 4.0).
328 UNumberFormatStyle format_style;
329 if (display == UNICODE_STRING_SIMPLE("code")) {
330 format_style = UNUM_CURRENCY_ISO;
331 } else if (display == UNICODE_STRING_SIMPLE("name")) {
332 format_style = UNUM_CURRENCY_PLURAL;
333 } else {
334 format_style = UNUM_CURRENCY;
335 }
336 #endif
337
338 number_format = static_cast<icu::DecimalFormat*>(
339 icu::NumberFormat::createInstance(icu_locale, format_style, status));
340
341 if (U_FAILURE(status)) {
342 delete number_format;
343 return nullptr;
344 }
345 } else if (style == UNICODE_STRING_SIMPLE("percent")) {
346 number_format = static_cast<icu::DecimalFormat*>(
347 icu::NumberFormat::createPercentInstance(icu_locale, status));
348 if (U_FAILURE(status)) {
349 delete number_format;
350 return nullptr;
351 }
352 // Make sure 1.1% doesn't go into 2%.
353 number_format->setMinimumFractionDigits(1);
354 } else {
355 // Make a decimal instance by default.
356 number_format = static_cast<icu::DecimalFormat*>(
357 icu::NumberFormat::createInstance(icu_locale, status));
358 }
359 }
360
361 if (U_FAILURE(status)) {
362 delete number_format;
363 return nullptr;
364 }
365
366 // Set all options.
367 if (!currency.isEmpty()) {
368 number_format->setCurrency(currency.getBuffer(), status);
369 }
370
371 SetNumericSettings(isolate, number_format, options);
372
373 bool grouping;
374 if (ExtractBooleanSetting(isolate, options, "useGrouping", &grouping)) {
375 number_format->setGroupingUsed(grouping);
376 }
377
378 return number_format;
379 }
380
SetResolvedNumericSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::DecimalFormat * number_format,Handle<JSObject> resolved)381 void SetResolvedNumericSettings(Isolate* isolate, const icu::Locale& icu_locale,
382 icu::DecimalFormat* number_format,
383 Handle<JSObject> resolved) {
384 Factory* factory = isolate->factory();
385
386 JSObject::SetProperty(
387 isolate, resolved,
388 factory->NewStringFromStaticChars("minimumIntegerDigits"),
389 factory->NewNumberFromInt(number_format->getMinimumIntegerDigits()),
390 LanguageMode::kSloppy)
391 .Assert();
392
393 JSObject::SetProperty(
394 isolate, resolved,
395 factory->NewStringFromStaticChars("minimumFractionDigits"),
396 factory->NewNumberFromInt(number_format->getMinimumFractionDigits()),
397 LanguageMode::kSloppy)
398 .Assert();
399
400 JSObject::SetProperty(
401 isolate, resolved,
402 factory->NewStringFromStaticChars("maximumFractionDigits"),
403 factory->NewNumberFromInt(number_format->getMaximumFractionDigits()),
404 LanguageMode::kSloppy)
405 .Assert();
406
407 Handle<String> key =
408 factory->NewStringFromStaticChars("minimumSignificantDigits");
409 Maybe<bool> maybe = JSReceiver::HasOwnProperty(resolved, key);
410 CHECK(maybe.IsJust());
411 if (maybe.FromJust()) {
412 JSObject::SetProperty(
413 isolate, resolved,
414 factory->NewStringFromStaticChars("minimumSignificantDigits"),
415 factory->NewNumberFromInt(number_format->getMinimumSignificantDigits()),
416 LanguageMode::kSloppy)
417 .Assert();
418 }
419
420 key = factory->NewStringFromStaticChars("maximumSignificantDigits");
421 maybe = JSReceiver::HasOwnProperty(resolved, key);
422 CHECK(maybe.IsJust());
423 if (maybe.FromJust()) {
424 JSObject::SetProperty(
425 isolate, resolved,
426 factory->NewStringFromStaticChars("maximumSignificantDigits"),
427 factory->NewNumberFromInt(number_format->getMaximumSignificantDigits()),
428 LanguageMode::kSloppy)
429 .Assert();
430 }
431
432 // Set the locale
433 char result[ULOC_FULLNAME_CAPACITY];
434 UErrorCode status = U_ZERO_ERROR;
435 uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
436 FALSE, &status);
437 if (U_SUCCESS(status)) {
438 JSObject::SetProperty(
439 isolate, resolved, factory->NewStringFromStaticChars("locale"),
440 factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
441 .Assert();
442 } else {
443 // This would never happen, since we got the locale from ICU.
444 JSObject::SetProperty(
445 isolate, resolved, factory->NewStringFromStaticChars("locale"),
446 factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
447 .Assert();
448 }
449 }
450
SetResolvedNumberSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::DecimalFormat * number_format,Handle<JSObject> resolved)451 void SetResolvedNumberSettings(Isolate* isolate, const icu::Locale& icu_locale,
452 icu::DecimalFormat* number_format,
453 Handle<JSObject> resolved) {
454 Factory* factory = isolate->factory();
455
456 // Set resolved currency code in options.currency if not empty.
457 icu::UnicodeString currency(number_format->getCurrency());
458 if (!currency.isEmpty()) {
459 JSObject::SetProperty(
460 isolate, resolved, factory->NewStringFromStaticChars("currency"),
461 factory
462 ->NewStringFromTwoByte(Vector<const uint16_t>(
463 reinterpret_cast<const uint16_t*>(currency.getBuffer()),
464 currency.length()))
465 .ToHandleChecked(),
466 LanguageMode::kSloppy)
467 .Assert();
468 }
469
470 // Ugly hack. ICU doesn't expose numbering system in any way, so we have
471 // to assume that for given locale NumberingSystem constructor produces the
472 // same digits as NumberFormat/Calendar would.
473 UErrorCode status = U_ZERO_ERROR;
474 icu::NumberingSystem* numbering_system =
475 icu::NumberingSystem::createInstance(icu_locale, status);
476 if (U_SUCCESS(status)) {
477 const char* ns = numbering_system->getName();
478 JSObject::SetProperty(
479 isolate, resolved, factory->NewStringFromStaticChars("numberingSystem"),
480 factory->NewStringFromAsciiChecked(ns), LanguageMode::kSloppy)
481 .Assert();
482 } else {
483 JSObject::SetProperty(isolate, resolved,
484 factory->NewStringFromStaticChars("numberingSystem"),
485 factory->undefined_value(), LanguageMode::kSloppy)
486 .Assert();
487 }
488 delete numbering_system;
489
490 JSObject::SetProperty(isolate, resolved,
491 factory->NewStringFromStaticChars("useGrouping"),
492 factory->ToBoolean(number_format->isGroupingUsed()),
493 LanguageMode::kSloppy)
494 .Assert();
495
496 SetResolvedNumericSettings(isolate, icu_locale, number_format, resolved);
497 }
498
CreateICUBreakIterator(Isolate * isolate,const icu::Locale & icu_locale,Handle<JSObject> options)499 icu::BreakIterator* CreateICUBreakIterator(Isolate* isolate,
500 const icu::Locale& icu_locale,
501 Handle<JSObject> options) {
502 UErrorCode status = U_ZERO_ERROR;
503 icu::BreakIterator* break_iterator = nullptr;
504 icu::UnicodeString type;
505 if (!ExtractStringSetting(isolate, options, "type", &type)) return nullptr;
506
507 if (type == UNICODE_STRING_SIMPLE("character")) {
508 break_iterator =
509 icu::BreakIterator::createCharacterInstance(icu_locale, status);
510 } else if (type == UNICODE_STRING_SIMPLE("sentence")) {
511 break_iterator =
512 icu::BreakIterator::createSentenceInstance(icu_locale, status);
513 } else if (type == UNICODE_STRING_SIMPLE("line")) {
514 break_iterator = icu::BreakIterator::createLineInstance(icu_locale, status);
515 } else {
516 // Defualt is word iterator.
517 break_iterator = icu::BreakIterator::createWordInstance(icu_locale, status);
518 }
519
520 if (U_FAILURE(status)) {
521 delete break_iterator;
522 return nullptr;
523 }
524
525 isolate->CountUsage(v8::Isolate::UseCounterFeature::kBreakIterator);
526
527 return break_iterator;
528 }
529
SetResolvedBreakIteratorSettings(Isolate * isolate,const icu::Locale & icu_locale,icu::BreakIterator * break_iterator,Handle<JSObject> resolved)530 void SetResolvedBreakIteratorSettings(Isolate* isolate,
531 const icu::Locale& icu_locale,
532 icu::BreakIterator* break_iterator,
533 Handle<JSObject> resolved) {
534 Factory* factory = isolate->factory();
535 UErrorCode status = U_ZERO_ERROR;
536
537 // Set the locale
538 char result[ULOC_FULLNAME_CAPACITY];
539 status = U_ZERO_ERROR;
540 uloc_toLanguageTag(icu_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
541 FALSE, &status);
542 if (U_SUCCESS(status)) {
543 JSObject::SetProperty(
544 isolate, resolved, factory->NewStringFromStaticChars("locale"),
545 factory->NewStringFromAsciiChecked(result), LanguageMode::kSloppy)
546 .Assert();
547 } else {
548 // This would never happen, since we got the locale from ICU.
549 JSObject::SetProperty(
550 isolate, resolved, factory->NewStringFromStaticChars("locale"),
551 factory->NewStringFromStaticChars("und"), LanguageMode::kSloppy)
552 .Assert();
553 }
554 }
555
CachedOrNewService(Isolate * isolate,Handle<String> service,Handle<Object> locales,Handle<Object> options,Handle<Object> internal_options)556 MaybeHandle<JSObject> CachedOrNewService(Isolate* isolate,
557 Handle<String> service,
558 Handle<Object> locales,
559 Handle<Object> options,
560 Handle<Object> internal_options) {
561 Handle<Object> result;
562 Handle<Object> undefined_value(ReadOnlyRoots(isolate).undefined_value(),
563 isolate);
564 Handle<Object> args[] = {service, locales, options, internal_options};
565 ASSIGN_RETURN_ON_EXCEPTION(
566 isolate, result,
567 Execution::Call(isolate, isolate->cached_or_new_service(),
568 undefined_value, arraysize(args), args),
569 JSArray);
570 return Handle<JSObject>::cast(result);
571 }
572 } // namespace
573
CreateICULocale(Isolate * isolate,Handle<String> bcp47_locale_str)574 icu::Locale Intl::CreateICULocale(Isolate* isolate,
575 Handle<String> bcp47_locale_str) {
576 v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
577 v8::String::Utf8Value bcp47_locale(v8_isolate,
578 v8::Utils::ToLocal(bcp47_locale_str));
579 CHECK_NOT_NULL(*bcp47_locale);
580
581 DisallowHeapAllocation no_gc;
582
583 // Convert BCP47 into ICU locale format.
584 UErrorCode status = U_ZERO_ERROR;
585 char icu_result[ULOC_FULLNAME_CAPACITY];
586 int icu_length = 0;
587
588 // bcp47_locale_str should be a canonicalized language tag, which
589 // means this shouldn't fail.
590 uloc_forLanguageTag(*bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY,
591 &icu_length, &status);
592 CHECK(U_SUCCESS(status));
593 CHECK_LT(0, icu_length);
594
595 icu::Locale icu_locale(icu_result);
596 if (icu_locale.isBogus()) {
597 FATAL("Failed to create ICU locale, are ICU data files missing?");
598 }
599
600 return icu_locale;
601 }
602
603 // static
InitializeDateTimeFormat(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)604 icu::SimpleDateFormat* DateFormat::InitializeDateTimeFormat(
605 Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
606 Handle<JSObject> resolved) {
607 icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
608 DCHECK(!icu_locale.isBogus());
609
610 icu::SimpleDateFormat* date_format =
611 CreateICUDateFormat(isolate, icu_locale, options);
612 if (!date_format) {
613 // Remove extensions and try again.
614 icu::Locale no_extension_locale(icu_locale.getBaseName());
615 date_format = CreateICUDateFormat(isolate, no_extension_locale, options);
616
617 if (!date_format) {
618 FATAL("Failed to create ICU date format, are ICU data files missing?");
619 }
620
621 // Set resolved settings (pattern, numbering system, calendar).
622 SetResolvedDateSettings(isolate, no_extension_locale, date_format,
623 resolved);
624 } else {
625 SetResolvedDateSettings(isolate, icu_locale, date_format, resolved);
626 }
627
628 CHECK_NOT_NULL(date_format);
629 return date_format;
630 }
631
UnpackDateFormat(Handle<JSObject> obj)632 icu::SimpleDateFormat* DateFormat::UnpackDateFormat(Handle<JSObject> obj) {
633 return reinterpret_cast<icu::SimpleDateFormat*>(
634 obj->GetEmbedderField(DateFormat::kSimpleDateFormatIndex));
635 }
636
DeleteDateFormat(const v8::WeakCallbackInfo<void> & data)637 void DateFormat::DeleteDateFormat(const v8::WeakCallbackInfo<void>& data) {
638 delete reinterpret_cast<icu::SimpleDateFormat*>(data.GetInternalField(0));
639 GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
640 }
641
Unwrap(Isolate * isolate,Handle<JSReceiver> receiver,const char * method_name)642 MaybeHandle<JSObject> DateFormat::Unwrap(Isolate* isolate,
643 Handle<JSReceiver> receiver,
644 const char* method_name) {
645 Handle<Context> native_context =
646 Handle<Context>(isolate->context()->native_context(), isolate);
647 Handle<JSFunction> constructor = Handle<JSFunction>(
648 JSFunction::cast(native_context->intl_date_time_format_function()),
649 isolate);
650 Handle<String> method_name_str =
651 isolate->factory()->NewStringFromAsciiChecked(method_name);
652
653 return Intl::UnwrapReceiver(isolate, receiver, constructor,
654 Intl::Type::kDateTimeFormat, method_name_str,
655 true);
656 }
657
658 // ecma402/#sec-formatdatetime
659 // FormatDateTime( dateTimeFormat, x )
FormatDateTime(Isolate * isolate,Handle<JSObject> date_time_format_holder,double x)660 MaybeHandle<String> DateFormat::FormatDateTime(
661 Isolate* isolate, Handle<JSObject> date_time_format_holder, double x) {
662 double date_value = DateCache::TimeClip(x);
663 if (std::isnan(date_value)) {
664 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kInvalidTimeValue),
665 String);
666 }
667
668 CHECK(Intl::IsObjectOfType(isolate, date_time_format_holder,
669 Intl::Type::kDateTimeFormat));
670 icu::SimpleDateFormat* date_format =
671 DateFormat::UnpackDateFormat(date_time_format_holder);
672 CHECK_NOT_NULL(date_format);
673
674 icu::UnicodeString result;
675 date_format->format(date_value, result);
676
677 return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
678 reinterpret_cast<const uint16_t*>(result.getBuffer()), result.length()));
679 }
680
681 // ecma402/#sec-datetime-format-functions
682 // DateTime Format Functions
DateTimeFormat(Isolate * isolate,Handle<JSObject> date_time_format_holder,Handle<Object> date)683 MaybeHandle<String> DateFormat::DateTimeFormat(
684 Isolate* isolate, Handle<JSObject> date_time_format_holder,
685 Handle<Object> date) {
686 // 2. Assert: Type(dtf) is Object and dtf has an [[InitializedDateTimeFormat]]
687 // internal slot.
688 DCHECK(Intl::IsObjectOfType(isolate, date_time_format_holder,
689 Intl::Type::kDateTimeFormat));
690
691 // 3. If date is not provided or is undefined, then
692 double x;
693 if (date->IsUndefined()) {
694 // 3.a Let x be Call(%Date_now%, undefined).
695 x = JSDate::CurrentTimeValue(isolate);
696 } else {
697 // 4. Else,
698 // a. Let x be ? ToNumber(date).
699 ASSIGN_RETURN_ON_EXCEPTION(isolate, date, Object::ToNumber(isolate, date),
700 String);
701 CHECK(date->IsNumber());
702 x = date->Number();
703 }
704 // 5. Return FormatDateTime(dtf, x).
705 return DateFormat::FormatDateTime(isolate, date_time_format_holder, x);
706 }
707
ToLocaleDateTime(Isolate * isolate,Handle<Object> date,Handle<Object> locales,Handle<Object> options,const char * required,const char * defaults,const char * service)708 MaybeHandle<String> DateFormat::ToLocaleDateTime(
709 Isolate* isolate, Handle<Object> date, Handle<Object> locales,
710 Handle<Object> options, const char* required, const char* defaults,
711 const char* service) {
712 Factory* factory = isolate->factory();
713 // 1. Let x be ? thisTimeValue(this value);
714 if (!date->IsJSDate()) {
715 THROW_NEW_ERROR(isolate,
716 NewTypeError(MessageTemplate::kMethodInvokedOnWrongType,
717 factory->NewStringFromStaticChars("Date")),
718 String);
719 }
720
721 double const x = Handle<JSDate>::cast(date)->value()->Number();
722 // 2. If x is NaN, return "Invalid Date"
723 if (std::isnan(x)) {
724 return factory->NewStringFromStaticChars("Invalid Date");
725 }
726
727 // 3. Let options be ? ToDateTimeOptions(options, required, defaults).
728 Handle<JSObject> internal_options;
729 ASSIGN_RETURN_ON_EXCEPTION(
730 isolate, internal_options,
731 DateFormat::ToDateTimeOptions(isolate, options, required, defaults),
732 String);
733
734 // 4. Let dateFormat be ? Construct(%DateTimeFormat%, « locales, options »).
735 Handle<JSObject> date_format;
736 ASSIGN_RETURN_ON_EXCEPTION(
737 isolate, date_format,
738 CachedOrNewService(isolate, factory->NewStringFromAsciiChecked(service),
739 locales, options, internal_options),
740 String);
741
742 // 5. Return FormatDateTime(dateFormat, x).
743 return DateFormat::FormatDateTime(isolate, date_format, x);
744 }
745
InitializeNumberFormat(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)746 icu::DecimalFormat* NumberFormat::InitializeNumberFormat(
747 Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
748 Handle<JSObject> resolved) {
749 icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
750 DCHECK(!icu_locale.isBogus());
751
752 icu::DecimalFormat* number_format =
753 CreateICUNumberFormat(isolate, icu_locale, options);
754 if (!number_format) {
755 // Remove extensions and try again.
756 icu::Locale no_extension_locale(icu_locale.getBaseName());
757 number_format =
758 CreateICUNumberFormat(isolate, no_extension_locale, options);
759
760 if (!number_format) {
761 FATAL("Failed to create ICU number format, are ICU data files missing?");
762 }
763
764 // Set resolved settings (pattern, numbering system).
765 SetResolvedNumberSettings(isolate, no_extension_locale, number_format,
766 resolved);
767 } else {
768 SetResolvedNumberSettings(isolate, icu_locale, number_format, resolved);
769 }
770
771 CHECK_NOT_NULL(number_format);
772 return number_format;
773 }
774
UnpackNumberFormat(Handle<JSObject> obj)775 icu::DecimalFormat* NumberFormat::UnpackNumberFormat(Handle<JSObject> obj) {
776 return reinterpret_cast<icu::DecimalFormat*>(
777 obj->GetEmbedderField(NumberFormat::kDecimalFormatIndex));
778 }
779
DeleteNumberFormat(const v8::WeakCallbackInfo<void> & data)780 void NumberFormat::DeleteNumberFormat(const v8::WeakCallbackInfo<void>& data) {
781 delete reinterpret_cast<icu::DecimalFormat*>(data.GetInternalField(0));
782 GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
783 }
784
InitializeBreakIterator(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)785 icu::BreakIterator* V8BreakIterator::InitializeBreakIterator(
786 Isolate* isolate, Handle<String> locale, Handle<JSObject> options,
787 Handle<JSObject> resolved) {
788 icu::Locale icu_locale = Intl::CreateICULocale(isolate, locale);
789 DCHECK(!icu_locale.isBogus());
790
791 icu::BreakIterator* break_iterator =
792 CreateICUBreakIterator(isolate, icu_locale, options);
793 if (!break_iterator) {
794 // Remove extensions and try again.
795 icu::Locale no_extension_locale(icu_locale.getBaseName());
796 break_iterator =
797 CreateICUBreakIterator(isolate, no_extension_locale, options);
798
799 if (!break_iterator) {
800 FATAL("Failed to create ICU break iterator, are ICU data files missing?");
801 }
802
803 // Set resolved settings (locale).
804 SetResolvedBreakIteratorSettings(isolate, no_extension_locale,
805 break_iterator, resolved);
806 } else {
807 SetResolvedBreakIteratorSettings(isolate, icu_locale, break_iterator,
808 resolved);
809 }
810
811 CHECK_NOT_NULL(break_iterator);
812 return break_iterator;
813 }
814
UnpackBreakIterator(Handle<JSObject> obj)815 icu::BreakIterator* V8BreakIterator::UnpackBreakIterator(Handle<JSObject> obj) {
816 return reinterpret_cast<icu::BreakIterator*>(
817 obj->GetEmbedderField(V8BreakIterator::kBreakIteratorIndex));
818 }
819
DeleteBreakIterator(const v8::WeakCallbackInfo<void> & data)820 void V8BreakIterator::DeleteBreakIterator(
821 const v8::WeakCallbackInfo<void>& data) {
822 delete reinterpret_cast<icu::BreakIterator*>(data.GetInternalField(0));
823 delete reinterpret_cast<icu::UnicodeString*>(data.GetInternalField(1));
824 GlobalHandles::Destroy(reinterpret_cast<Object**>(data.GetParameter()));
825 }
826
AdoptText(Isolate * isolate,Handle<JSObject> break_iterator_holder,Handle<String> text)827 void V8BreakIterator::AdoptText(Isolate* isolate,
828 Handle<JSObject> break_iterator_holder,
829 Handle<String> text) {
830 icu::BreakIterator* break_iterator =
831 V8BreakIterator::UnpackBreakIterator(break_iterator_holder);
832 CHECK_NOT_NULL(break_iterator);
833
834 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
835 break_iterator_holder->GetEmbedderField(
836 V8BreakIterator::kUnicodeStringIndex));
837 delete u_text;
838
839 int length = text->length();
840 text = String::Flatten(isolate, text);
841 DisallowHeapAllocation no_gc;
842 String::FlatContent flat = text->GetFlatContent();
843 std::unique_ptr<uc16[]> sap;
844 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
845 u_text = new icu::UnicodeString(text_value, length);
846 break_iterator_holder->SetEmbedderField(V8BreakIterator::kUnicodeStringIndex,
847 reinterpret_cast<Smi*>(u_text));
848
849 break_iterator->setText(*u_text);
850 }
851
ToString(Isolate * isolate,const icu::UnicodeString & string)852 MaybeHandle<String> Intl::ToString(Isolate* isolate,
853 const icu::UnicodeString& string) {
854 return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
855 reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
856 }
857
ToString(Isolate * isolate,const icu::UnicodeString & string,int32_t begin,int32_t end)858 MaybeHandle<String> Intl::ToString(Isolate* isolate,
859 const icu::UnicodeString& string,
860 int32_t begin, int32_t end) {
861 return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
862 }
863
864 namespace {
865
InnerAddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)866 Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
867 int index, Handle<String> field_type_string,
868 Handle<String> value) {
869 // let element = $array[$index] = {
870 // type: $field_type_string,
871 // value: $value
872 // }
873 // return element;
874 Factory* factory = isolate->factory();
875 Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
876 JSObject::AddProperty(isolate, element, factory->type_string(),
877 field_type_string, NONE);
878
879 JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
880 JSObject::AddDataElement(array, index, element, NONE);
881 return element;
882 }
883
884 } // namespace
885
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)886 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
887 Handle<String> field_type_string, Handle<String> value) {
888 // Same as $array[$index] = {type: $field_type_string, value: $value};
889 InnerAddElement(isolate, array, index, field_type_string, value);
890 }
891
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value,Handle<String> additional_property_name,Handle<String> additional_property_value)892 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
893 Handle<String> field_type_string, Handle<String> value,
894 Handle<String> additional_property_name,
895 Handle<String> additional_property_value) {
896 // Same as $array[$index] = {
897 // type: $field_type_string, value: $value,
898 // $additional_property_name: $additional_property_value
899 // }
900 Handle<JSObject> element =
901 InnerAddElement(isolate, array, index, field_type_string, value);
902 JSObject::AddProperty(isolate, element, additional_property_name,
903 additional_property_value, NONE);
904 }
905 // Build the shortened locale; eg, convert xx_Yyyy_ZZ to xx_ZZ.
RemoveLocaleScriptTag(const std::string & icu_locale,std::string * locale_less_script)906 bool Intl::RemoveLocaleScriptTag(const std::string& icu_locale,
907 std::string* locale_less_script) {
908 icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
909 const char* icu_script = new_locale.getScript();
910 if (icu_script == NULL || strlen(icu_script) == 0) {
911 *locale_less_script = std::string();
912 return false;
913 }
914
915 const char* icu_language = new_locale.getLanguage();
916 const char* icu_country = new_locale.getCountry();
917 icu::Locale short_locale = icu::Locale(icu_language, icu_country);
918 const char* icu_name = short_locale.getName();
919 *locale_less_script = std::string(icu_name);
920 return true;
921 }
922
923 namespace {
924
IsPropertyUndefined(Isolate * isolate,Handle<JSObject> options,const char * property)925 Maybe<bool> IsPropertyUndefined(Isolate* isolate, Handle<JSObject> options,
926 const char* property) {
927 Factory* factory = isolate->factory();
928 // i. Let prop be the property name.
929 // ii. Let value be ? Get(options, prop).
930 Handle<Object> value;
931 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
932 isolate, value,
933 Object::GetPropertyOrElement(
934 isolate, options, factory->NewStringFromAsciiChecked(property)),
935 Nothing<bool>());
936 return Just(value->IsUndefined(isolate));
937 }
938
939 } // namespace
940
941 // ecma-402/#sec-todatetimeoptions
ToDateTimeOptions(Isolate * isolate,Handle<Object> input_options,const char * required,const char * defaults)942 MaybeHandle<JSObject> DateFormat::ToDateTimeOptions(
943 Isolate* isolate, Handle<Object> input_options, const char* required,
944 const char* defaults) {
945 Factory* factory = isolate->factory();
946 // 1. If options is undefined, let options be null; otherwise let options be ?
947 // ToObject(options).
948 Handle<JSObject> options;
949 if (input_options->IsUndefined(isolate)) {
950 options = factory->NewJSObjectWithNullProto();
951 } else {
952 Handle<JSReceiver> options_obj;
953 ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
954 Object::ToObject(isolate, input_options),
955 JSObject);
956 // 2. Let options be ObjectCreate(options).
957 ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
958 JSObject::ObjectCreate(isolate, options_obj),
959 JSObject);
960 }
961
962 // 3. Let needDefaults be true.
963 bool needs_default = true;
964
965 bool required_is_any = strcmp(required, "any") == 0;
966 // 4. If required is "date" or "any", then
967 if (required_is_any || (strcmp(required, "date") == 0)) {
968 // a. For each of the property names "weekday", "year", "month", "day", do
969 for (auto& prop : {"weekday", "year", "month", "day"}) {
970 // i. Let prop be the property name.
971 // ii. Let value be ? Get(options, prop)
972 Maybe<bool> maybe_undefined = IsPropertyUndefined(isolate, options, prop);
973 MAYBE_RETURN(maybe_undefined, Handle<JSObject>());
974 // iii. If value is not undefined, let needDefaults be false.
975 if (!maybe_undefined.FromJust()) {
976 needs_default = false;
977 }
978 }
979 }
980
981 // 5. If required is "time" or "any", then
982 if (required_is_any || (strcmp(required, "time") == 0)) {
983 // a. For each of the property names "hour", "minute", "second", do
984 for (auto& prop : {"hour", "minute", "second"}) {
985 // i. Let prop be the property name.
986 // ii. Let value be ? Get(options, prop)
987 Maybe<bool> maybe_undefined = IsPropertyUndefined(isolate, options, prop);
988 MAYBE_RETURN(maybe_undefined, Handle<JSObject>());
989 // iii. If value is not undefined, let needDefaults be false.
990 if (!maybe_undefined.FromJust()) {
991 needs_default = false;
992 }
993 }
994 }
995
996 // 6. If needDefaults is true and defaults is either "date" or "all", then
997 if (needs_default) {
998 bool default_is_all = strcmp(defaults, "all") == 0;
999 if (default_is_all || (strcmp(defaults, "date") == 0)) {
1000 // a. For each of the property names "year", "month", "day", do
1001 // i. Perform ? CreateDataPropertyOrThrow(options, prop, "numeric").
1002 for (auto& prop : {"year", "month", "day"}) {
1003 MAYBE_RETURN(
1004 JSReceiver::CreateDataProperty(
1005 isolate, options, factory->NewStringFromAsciiChecked(prop),
1006 factory->numeric_string(), kThrowOnError),
1007 Handle<JSObject>());
1008 }
1009 }
1010 // 7. If needDefaults is true and defaults is either "time" or "all", then
1011 if (default_is_all || (strcmp(defaults, "time") == 0)) {
1012 // a. For each of the property names "hour", "minute", "second", do
1013 // i. Perform ? CreateDataPropertyOrThrow(options, prop, "numeric").
1014 for (auto& prop : {"hour", "minute", "second"}) {
1015 MAYBE_RETURN(
1016 JSReceiver::CreateDataProperty(
1017 isolate, options, factory->NewStringFromAsciiChecked(prop),
1018 factory->numeric_string(), kThrowOnError),
1019 Handle<JSObject>());
1020 }
1021 }
1022 }
1023 // 8. Return options.
1024 return options;
1025 }
1026
GetAvailableLocales(const IcuService & service)1027 std::set<std::string> Intl::GetAvailableLocales(const IcuService& service) {
1028 const icu::Locale* icu_available_locales = nullptr;
1029 int32_t count = 0;
1030 std::set<std::string> locales;
1031
1032 switch (service) {
1033 case IcuService::kBreakIterator:
1034 icu_available_locales = icu::BreakIterator::getAvailableLocales(count);
1035 break;
1036 case IcuService::kCollator:
1037 icu_available_locales = icu::Collator::getAvailableLocales(count);
1038 break;
1039 case IcuService::kDateFormat:
1040 icu_available_locales = icu::DateFormat::getAvailableLocales(count);
1041 break;
1042 case IcuService::kNumberFormat:
1043 icu_available_locales = icu::NumberFormat::getAvailableLocales(count);
1044 break;
1045 case IcuService::kPluralRules:
1046 // TODO(littledan): For PluralRules, filter out locales that
1047 // don't support PluralRules.
1048 // PluralRules is missing an appropriate getAvailableLocales method,
1049 // so we should filter from all locales, but it's not clear how; see
1050 // https://ssl.icu-project.org/trac/ticket/12756
1051 icu_available_locales = icu::Locale::getAvailableLocales(count);
1052 break;
1053 case IcuService::kResourceBundle: {
1054 UErrorCode status = U_ZERO_ERROR;
1055 UEnumeration* en = ures_openAvailableLocales(nullptr, &status);
1056 int32_t length = 0;
1057 const char* locale_str = uenum_next(en, &length, &status);
1058 while (U_SUCCESS(status) && (locale_str != nullptr)) {
1059 std::string locale(locale_str, length);
1060 std::replace(locale.begin(), locale.end(), '_', '-');
1061 locales.insert(locale);
1062 std::string shortened_locale;
1063 if (Intl::RemoveLocaleScriptTag(locale_str, &shortened_locale)) {
1064 std::replace(shortened_locale.begin(), shortened_locale.end(), '_',
1065 '-');
1066 locales.insert(shortened_locale);
1067 }
1068 locale_str = uenum_next(en, &length, &status);
1069 }
1070 uenum_close(en);
1071 return locales;
1072 }
1073 case IcuService::kRelativeDateTimeFormatter: {
1074 // ICU RelativeDateTimeFormatter does not provide a getAvailableLocales()
1075 // interface, because RelativeDateTimeFormatter depends on
1076 // 1. NumberFormat and 2. ResourceBundle, return the
1077 // intersection of these two set.
1078 // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20009
1079 // TODO(ftang): change to call ICU's getAvailableLocales() after it is
1080 // added.
1081 std::set<std::string> number_format_set(
1082 Intl::GetAvailableLocales(IcuService::kNumberFormat));
1083 std::set<std::string> resource_bundle_set(
1084 Intl::GetAvailableLocales(IcuService::kResourceBundle));
1085 set_intersection(resource_bundle_set.begin(), resource_bundle_set.end(),
1086 number_format_set.begin(), number_format_set.end(),
1087 std::inserter(locales, locales.begin()));
1088 return locales;
1089 }
1090 case IcuService::kListFormatter: {
1091 // TODO(ftang): for now just use
1092 // icu::Locale::getAvailableLocales(count) until we migrate to
1093 // Intl::GetAvailableLocales().
1094 // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
1095 icu_available_locales = icu::Locale::getAvailableLocales(count);
1096 break;
1097 }
1098 }
1099
1100 UErrorCode error = U_ZERO_ERROR;
1101 char result[ULOC_FULLNAME_CAPACITY];
1102
1103 for (int32_t i = 0; i < count; ++i) {
1104 const char* icu_name = icu_available_locales[i].getName();
1105
1106 error = U_ZERO_ERROR;
1107 // No need to force strict BCP47 rules.
1108 uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
1109 if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
1110 // This shouldn't happen, but lets not break the user.
1111 continue;
1112 }
1113 std::string locale(result);
1114 locales.insert(locale);
1115
1116 std::string shortened_locale;
1117 if (Intl::RemoveLocaleScriptTag(icu_name, &shortened_locale)) {
1118 std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
1119 locales.insert(shortened_locale);
1120 }
1121 }
1122
1123 return locales;
1124 }
1125
StringToIcuService(Handle<String> service)1126 IcuService Intl::StringToIcuService(Handle<String> service) {
1127 if (service->IsUtf8EqualTo(CStrVector("collator"))) {
1128 return IcuService::kCollator;
1129 } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
1130 return IcuService::kNumberFormat;
1131 } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
1132 return IcuService::kDateFormat;
1133 } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
1134 return IcuService::kBreakIterator;
1135 } else if (service->IsUtf8EqualTo(CStrVector("pluralrules"))) {
1136 return IcuService::kPluralRules;
1137 } else if (service->IsUtf8EqualTo(CStrVector("relativetimeformat"))) {
1138 return IcuService::kRelativeDateTimeFormatter;
1139 } else if (service->IsUtf8EqualTo(CStrVector("listformat"))) {
1140 return IcuService::kListFormatter;
1141 }
1142 UNREACHABLE();
1143 }
1144
AvailableLocalesOf(Isolate * isolate,Handle<String> service)1145 V8_WARN_UNUSED_RESULT MaybeHandle<JSObject> Intl::AvailableLocalesOf(
1146 Isolate* isolate, Handle<String> service) {
1147 Factory* factory = isolate->factory();
1148 std::set<std::string> results =
1149 Intl::GetAvailableLocales(StringToIcuService(service));
1150 Handle<JSObject> locales = factory->NewJSObjectWithNullProto();
1151
1152 int32_t i = 0;
1153 for (auto iter = results.begin(); iter != results.end(); ++iter) {
1154 RETURN_ON_EXCEPTION(
1155 isolate,
1156 JSObject::SetOwnPropertyIgnoreAttributes(
1157 locales, factory->NewStringFromAsciiChecked(iter->c_str()),
1158 factory->NewNumber(i++), NONE),
1159 JSObject);
1160 }
1161 return locales;
1162 }
1163
DefaultLocale(Isolate * isolate)1164 std::string Intl::DefaultLocale(Isolate* isolate) {
1165 if (isolate->default_locale().empty()) {
1166 icu::Locale default_locale;
1167 // Translate ICU's fallback locale to a well-known locale.
1168 if (strcmp(default_locale.getName(), "en_US_POSIX") == 0) {
1169 isolate->set_default_locale("en-US");
1170 } else {
1171 // Set the locale
1172 char result[ULOC_FULLNAME_CAPACITY];
1173 UErrorCode status = U_ZERO_ERROR;
1174 int32_t length =
1175 uloc_toLanguageTag(default_locale.getName(), result,
1176 ULOC_FULLNAME_CAPACITY, FALSE, &status);
1177 isolate->set_default_locale(
1178 U_SUCCESS(status) ? std::string(result, length) : "und");
1179 }
1180 DCHECK(!isolate->default_locale().empty());
1181 }
1182 return isolate->default_locale();
1183 }
1184
IsObjectOfType(Isolate * isolate,Handle<Object> input,Intl::Type expected_type)1185 bool Intl::IsObjectOfType(Isolate* isolate, Handle<Object> input,
1186 Intl::Type expected_type) {
1187 if (!input->IsJSObject()) return false;
1188 Handle<JSObject> obj = Handle<JSObject>::cast(input);
1189
1190 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
1191 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
1192
1193 if (!tag->IsSmi()) return false;
1194
1195 Intl::Type type = Intl::TypeFromSmi(Smi::cast(*tag));
1196 return type == expected_type;
1197 }
1198
1199 namespace {
1200
1201 // In ECMA 402 v1, Intl constructors supported a mode of operation
1202 // where calling them with an existing object as a receiver would
1203 // transform the receiver into the relevant Intl instance with all
1204 // internal slots. In ECMA 402 v2, this capability was removed, to
1205 // avoid adding internal slots on existing objects. In ECMA 402 v3,
1206 // the capability was re-added as "normative optional" in a mode
1207 // which chains the underlying Intl instance on any object, when the
1208 // constructor is called
1209 //
1210 // See ecma402/#legacy-constructor.
LegacyUnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,Intl::Type type)1211 MaybeHandle<Object> LegacyUnwrapReceiver(Isolate* isolate,
1212 Handle<JSReceiver> receiver,
1213 Handle<JSFunction> constructor,
1214 Intl::Type type) {
1215 bool has_initialized_slot = Intl::IsObjectOfType(isolate, receiver, type);
1216
1217 Handle<Object> obj_is_instance_of;
1218 ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
1219 Object::InstanceOf(isolate, receiver, constructor),
1220 Object);
1221 bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);
1222
1223 // 2. If receiver does not have an [[Initialized...]] internal slot
1224 // and ? InstanceofOperator(receiver, constructor) is true, then
1225 if (!has_initialized_slot && is_instance_of) {
1226 // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
1227 Handle<Object> new_receiver;
1228 ASSIGN_RETURN_ON_EXCEPTION(
1229 isolate, new_receiver,
1230 JSReceiver::GetProperty(isolate, receiver,
1231 isolate->factory()->intl_fallback_symbol()),
1232 Object);
1233 return new_receiver;
1234 }
1235
1236 return receiver;
1237 }
1238
1239 } // namespace
1240
UnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,Intl::Type type,Handle<String> method_name,bool check_legacy_constructor)1241 MaybeHandle<JSObject> Intl::UnwrapReceiver(Isolate* isolate,
1242 Handle<JSReceiver> receiver,
1243 Handle<JSFunction> constructor,
1244 Intl::Type type,
1245 Handle<String> method_name,
1246 bool check_legacy_constructor) {
1247 DCHECK(type == Intl::Type::kCollator || type == Intl::Type::kNumberFormat ||
1248 type == Intl::Type::kDateTimeFormat ||
1249 type == Intl::Type::kBreakIterator);
1250 Handle<Object> new_receiver = receiver;
1251 if (check_legacy_constructor) {
1252 ASSIGN_RETURN_ON_EXCEPTION(
1253 isolate, new_receiver,
1254 LegacyUnwrapReceiver(isolate, receiver, constructor, type), JSObject);
1255 }
1256
1257 // Collator has been ported to use regular instance types. We
1258 // shouldn't be using Intl::IsObjectOfType anymore.
1259 if (type == Intl::Type::kCollator) {
1260 if (!receiver->IsJSCollator()) {
1261 // 3. a. Throw a TypeError exception.
1262 THROW_NEW_ERROR(isolate,
1263 NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
1264 method_name, receiver),
1265 JSObject);
1266 }
1267 return Handle<JSCollator>::cast(receiver);
1268 }
1269
1270 DCHECK_NE(type, Intl::Type::kCollator);
1271 // 3. If Type(new_receiver) is not Object or nf does not have an
1272 // [[Initialized...]] internal slot, then
1273 if (!Intl::IsObjectOfType(isolate, new_receiver, type)) {
1274 // 3. a. Throw a TypeError exception.
1275 THROW_NEW_ERROR(isolate,
1276 NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
1277 method_name, receiver),
1278 JSObject);
1279 }
1280
1281 // The above IsObjectOfType returns true only for JSObjects, which
1282 // makes this cast safe.
1283 return Handle<JSObject>::cast(new_receiver);
1284 }
1285
Unwrap(Isolate * isolate,Handle<JSReceiver> receiver,const char * method_name)1286 MaybeHandle<JSObject> NumberFormat::Unwrap(Isolate* isolate,
1287 Handle<JSReceiver> receiver,
1288 const char* method_name) {
1289 Handle<Context> native_context =
1290 Handle<Context>(isolate->context()->native_context(), isolate);
1291 Handle<JSFunction> constructor = Handle<JSFunction>(
1292 JSFunction::cast(native_context->intl_number_format_function()), isolate);
1293 Handle<String> method_name_str =
1294 isolate->factory()->NewStringFromAsciiChecked(method_name);
1295
1296 return Intl::UnwrapReceiver(isolate, receiver, constructor,
1297 Intl::Type::kNumberFormat, method_name_str, true);
1298 }
1299
FormatNumber(Isolate * isolate,Handle<JSObject> number_format_holder,double value)1300 MaybeHandle<String> NumberFormat::FormatNumber(
1301 Isolate* isolate, Handle<JSObject> number_format_holder, double value) {
1302 icu::DecimalFormat* number_format =
1303 NumberFormat::UnpackNumberFormat(number_format_holder);
1304 CHECK_NOT_NULL(number_format);
1305
1306 icu::UnicodeString result;
1307 number_format->format(value, result);
1308
1309 return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
1310 reinterpret_cast<const uint16_t*>(result.getBuffer()), result.length()));
1311 }
1312
DefineWEProperty(Isolate * isolate,Handle<JSObject> target,Handle<Name> key,Handle<Object> value)1313 void Intl::DefineWEProperty(Isolate* isolate, Handle<JSObject> target,
1314 Handle<Name> key, Handle<Object> value) {
1315 PropertyDescriptor desc;
1316 desc.set_writable(true);
1317 desc.set_enumerable(true);
1318 desc.set_value(value);
1319 Maybe<bool> success =
1320 JSReceiver::DefineOwnProperty(isolate, target, key, &desc, kDontThrow);
1321 DCHECK(success.IsJust() && success.FromJust());
1322 USE(success);
1323 }
1324
1325 namespace {
1326
1327 // Define general regexp macros.
1328 // Note "(?:" means the regexp group a non-capture group.
1329 #define REGEX_ALPHA "[a-z]"
1330 #define REGEX_DIGIT "[0-9]"
1331 #define REGEX_ALPHANUM "(?:" REGEX_ALPHA "|" REGEX_DIGIT ")"
1332
BuildLanguageTagRegexps(Isolate * isolate)1333 void BuildLanguageTagRegexps(Isolate* isolate) {
1334 // Define the language tag regexp macros.
1335 // For info on BCP 47 see https://tools.ietf.org/html/bcp47 .
1336 // Because language tags are case insensitive per BCP 47 2.1.1 and regexp's
1337 // defined below will always be used after lowercasing the input, uppercase
1338 // ranges in BCP 47 2.1 are dropped and grandfathered tags are all lowercased.
1339 // clang-format off
1340 #define BCP47_REGULAR \
1341 "(?:art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|" \
1342 "zh-min|zh-min-nan|zh-xiang)"
1343 #define BCP47_IRREGULAR \
1344 "(?:en-gb-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|" \
1345 "i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|" \
1346 "i-tsu|sgn-be-fr|sgn-be-nl|sgn-ch-de)"
1347 #define BCP47_GRANDFATHERED "(?:" BCP47_IRREGULAR "|" BCP47_REGULAR ")"
1348 #define BCP47_PRIVATE_USE "(?:x(?:-" REGEX_ALPHANUM "{1,8})+)"
1349
1350 #define BCP47_SINGLETON "(?:" REGEX_DIGIT "|" "[a-wy-z])"
1351
1352 #define BCP47_EXTENSION "(?:" BCP47_SINGLETON "(?:-" REGEX_ALPHANUM "{2,8})+)"
1353 #define BCP47_VARIANT \
1354 "(?:" REGEX_ALPHANUM "{5,8}" "|" "(?:" REGEX_DIGIT REGEX_ALPHANUM "{3}))"
1355
1356 #define BCP47_REGION "(?:" REGEX_ALPHA "{2}" "|" REGEX_DIGIT "{3})"
1357 #define BCP47_SCRIPT "(?:" REGEX_ALPHA "{4})"
1358 #define BCP47_EXT_LANG "(?:" REGEX_ALPHA "{3}(?:-" REGEX_ALPHA "{3}){0,2})"
1359 #define BCP47_LANGUAGE "(?:" REGEX_ALPHA "{2,3}(?:-" BCP47_EXT_LANG ")?" \
1360 "|" REGEX_ALPHA "{4}" "|" REGEX_ALPHA "{5,8})"
1361 #define BCP47_LANG_TAG \
1362 BCP47_LANGUAGE \
1363 "(?:-" BCP47_SCRIPT ")?" \
1364 "(?:-" BCP47_REGION ")?" \
1365 "(?:-" BCP47_VARIANT ")*" \
1366 "(?:-" BCP47_EXTENSION ")*" \
1367 "(?:-" BCP47_PRIVATE_USE ")?"
1368 // clang-format on
1369
1370 constexpr char kLanguageTagSingletonRegexp[] = "^" BCP47_SINGLETON "$";
1371 constexpr char kLanguageTagVariantRegexp[] = "^" BCP47_VARIANT "$";
1372 constexpr char kLanguageTagRegexp[] =
1373 "^(?:" BCP47_LANG_TAG "|" BCP47_PRIVATE_USE "|" BCP47_GRANDFATHERED ")$";
1374
1375 UErrorCode status = U_ZERO_ERROR;
1376 icu::RegexMatcher* language_singleton_regexp_matcher = new icu::RegexMatcher(
1377 icu::UnicodeString(kLanguageTagSingletonRegexp, -1, US_INV), 0, status);
1378 icu::RegexMatcher* language_tag_regexp_matcher = new icu::RegexMatcher(
1379 icu::UnicodeString(kLanguageTagRegexp, -1, US_INV), 0, status);
1380 icu::RegexMatcher* language_variant_regexp_matcher = new icu::RegexMatcher(
1381 icu::UnicodeString(kLanguageTagVariantRegexp, -1, US_INV), 0, status);
1382 CHECK(U_SUCCESS(status));
1383
1384 isolate->set_language_tag_regexp_matchers(language_singleton_regexp_matcher,
1385 language_tag_regexp_matcher,
1386 language_variant_regexp_matcher);
1387 // Undefine the language tag regexp macros.
1388 #undef BCP47_EXTENSION
1389 #undef BCP47_EXT_LANG
1390 #undef BCP47_GRANDFATHERED
1391 #undef BCP47_IRREGULAR
1392 #undef BCP47_LANG_TAG
1393 #undef BCP47_LANGUAGE
1394 #undef BCP47_PRIVATE_USE
1395 #undef BCP47_REGION
1396 #undef BCP47_REGULAR
1397 #undef BCP47_SCRIPT
1398 #undef BCP47_SINGLETON
1399 #undef BCP47_VARIANT
1400 }
1401
1402 // Undefine the general regexp macros.
1403 #undef REGEX_ALPHA
1404 #undef REGEX_DIGIT
1405 #undef REGEX_ALPHANUM
1406
GetLanguageSingletonRegexMatcher(Isolate * isolate)1407 icu::RegexMatcher* GetLanguageSingletonRegexMatcher(Isolate* isolate) {
1408 icu::RegexMatcher* language_singleton_regexp_matcher =
1409 isolate->language_singleton_regexp_matcher();
1410 if (language_singleton_regexp_matcher == nullptr) {
1411 BuildLanguageTagRegexps(isolate);
1412 language_singleton_regexp_matcher =
1413 isolate->language_singleton_regexp_matcher();
1414 }
1415 return language_singleton_regexp_matcher;
1416 }
1417
GetLanguageTagRegexMatcher(Isolate * isolate)1418 icu::RegexMatcher* GetLanguageTagRegexMatcher(Isolate* isolate) {
1419 icu::RegexMatcher* language_tag_regexp_matcher =
1420 isolate->language_tag_regexp_matcher();
1421 if (language_tag_regexp_matcher == nullptr) {
1422 BuildLanguageTagRegexps(isolate);
1423 language_tag_regexp_matcher = isolate->language_tag_regexp_matcher();
1424 }
1425 return language_tag_regexp_matcher;
1426 }
1427
GetLanguageVariantRegexMatcher(Isolate * isolate)1428 icu::RegexMatcher* GetLanguageVariantRegexMatcher(Isolate* isolate) {
1429 icu::RegexMatcher* language_variant_regexp_matcher =
1430 isolate->language_variant_regexp_matcher();
1431 if (language_variant_regexp_matcher == nullptr) {
1432 BuildLanguageTagRegexps(isolate);
1433 language_variant_regexp_matcher =
1434 isolate->language_variant_regexp_matcher();
1435 }
1436 return language_variant_regexp_matcher;
1437 }
1438
1439 } // anonymous namespace
1440
ResolveLocale(Isolate * isolate,const char * service,Handle<Object> requestedLocales,Handle<Object> options)1441 MaybeHandle<JSObject> Intl::ResolveLocale(Isolate* isolate, const char* service,
1442 Handle<Object> requestedLocales,
1443 Handle<Object> options) {
1444 Handle<String> service_str =
1445 isolate->factory()->NewStringFromAsciiChecked(service);
1446
1447 Handle<JSFunction> resolve_locale_function = isolate->resolve_locale();
1448
1449 Handle<Object> result;
1450 Handle<Object> undefined_value = isolate->factory()->undefined_value();
1451 Handle<Object> args[] = {service_str, requestedLocales, options};
1452 ASSIGN_RETURN_ON_EXCEPTION(
1453 isolate, result,
1454 Execution::Call(isolate, resolve_locale_function, undefined_value,
1455 arraysize(args), args),
1456 JSObject);
1457
1458 return Handle<JSObject>::cast(result);
1459 }
1460
CanonicalizeLocaleListJS(Isolate * isolate,Handle<Object> locales)1461 MaybeHandle<JSObject> Intl::CanonicalizeLocaleListJS(Isolate* isolate,
1462 Handle<Object> locales) {
1463 Handle<JSFunction> canonicalize_locale_list_function =
1464 isolate->canonicalize_locale_list();
1465
1466 Handle<Object> result;
1467 Handle<Object> undefined_value = isolate->factory()->undefined_value();
1468 Handle<Object> args[] = {locales};
1469 ASSIGN_RETURN_ON_EXCEPTION(
1470 isolate, result,
1471 Execution::Call(isolate, canonicalize_locale_list_function,
1472 undefined_value, arraysize(args), args),
1473 JSObject);
1474
1475 return Handle<JSObject>::cast(result);
1476 }
1477
GetStringOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,std::vector<const char * > values,const char * service,std::unique_ptr<char[]> * result)1478 Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
1479 const char* property,
1480 std::vector<const char*> values,
1481 const char* service,
1482 std::unique_ptr<char[]>* result) {
1483 Handle<String> property_str =
1484 isolate->factory()->NewStringFromAsciiChecked(property);
1485
1486 // 1. Let value be ? Get(options, property).
1487 Handle<Object> value;
1488 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1489 isolate, value,
1490 Object::GetPropertyOrElement(isolate, options, property_str),
1491 Nothing<bool>());
1492
1493 if (value->IsUndefined(isolate)) {
1494 return Just(false);
1495 }
1496
1497 // 2. c. Let value be ? ToString(value).
1498 Handle<String> value_str;
1499 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1500 isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
1501 std::unique_ptr<char[]> value_cstr = value_str->ToCString();
1502
1503 // 2. d. if values is not undefined, then
1504 if (values.size() > 0) {
1505 // 2. d. i. If values does not contain an element equal to value,
1506 // throw a RangeError exception.
1507 for (size_t i = 0; i < values.size(); i++) {
1508 if (strcmp(values.at(i), value_cstr.get()) == 0) {
1509 // 2. e. return value
1510 *result = std::move(value_cstr);
1511 return Just(true);
1512 }
1513 }
1514
1515 Handle<String> service_str =
1516 isolate->factory()->NewStringFromAsciiChecked(service);
1517 THROW_NEW_ERROR_RETURN_VALUE(
1518 isolate,
1519 NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
1520 property_str),
1521 Nothing<bool>());
1522 }
1523
1524 // 2. e. return value
1525 *result = std::move(value_cstr);
1526 return Just(true);
1527 }
1528
GetBoolOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,const char * service,bool * result)1529 V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
1530 Isolate* isolate, Handle<JSReceiver> options, const char* property,
1531 const char* service, bool* result) {
1532 Handle<String> property_str =
1533 isolate->factory()->NewStringFromAsciiChecked(property);
1534
1535 // 1. Let value be ? Get(options, property).
1536 Handle<Object> value;
1537 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1538 isolate, value,
1539 Object::GetPropertyOrElement(isolate, options, property_str),
1540 Nothing<bool>());
1541
1542 // 2. If value is not undefined, then
1543 if (!value->IsUndefined(isolate)) {
1544 // 2. b. i. Let value be ToBoolean(value).
1545 *result = value->BooleanValue(isolate);
1546
1547 // 2. e. return value
1548 return Just(true);
1549 }
1550
1551 return Just(false);
1552 }
1553
1554 namespace {
1555
AsciiToLower(char c)1556 char AsciiToLower(char c) {
1557 if (c < 'A' || c > 'Z') {
1558 return c;
1559 }
1560 return c | (1 << 5);
1561 }
1562
1563 /**
1564 * Check the structural Validity of the language tag per ECMA 402 6.2.2:
1565 * - Well-formed per RFC 5646 2.1
1566 * - There are no duplicate variant subtags
1567 * - There are no duplicate singleton (extension) subtags
1568 *
1569 * One extra-check is done (from RFC 5646 2.2.9): the tag is compared
1570 * against the list of grandfathered tags. However, subtags for
1571 * primary/extended language, script, region, variant are not checked
1572 * against the IANA language subtag registry.
1573 *
1574 * ICU is too permissible and lets invalid tags, like
1575 * hant-cmn-cn, through.
1576 *
1577 * Returns false if the language tag is invalid.
1578 */
IsStructurallyValidLanguageTag(Isolate * isolate,const std::string & locale_in)1579 bool IsStructurallyValidLanguageTag(Isolate* isolate,
1580 const std::string& locale_in) {
1581 if (!String::IsAscii(locale_in.c_str(),
1582 static_cast<int>(locale_in.length()))) {
1583 return false;
1584 }
1585 std::string locale(locale_in);
1586 icu::RegexMatcher* language_tag_regexp_matcher =
1587 GetLanguageTagRegexMatcher(isolate);
1588
1589 // Check if it's well-formed, including grandfathered tags.
1590 icu::UnicodeString locale_uni(locale.c_str(), -1, US_INV);
1591 // Note: icu::RegexMatcher::reset does not make a copy of the input string
1592 // so cannot use a temp value; ie: cannot create it as a call parameter.
1593 language_tag_regexp_matcher->reset(locale_uni);
1594 UErrorCode status = U_ZERO_ERROR;
1595 bool is_valid_lang_tag = language_tag_regexp_matcher->matches(status);
1596 if (!is_valid_lang_tag || V8_UNLIKELY(U_FAILURE(status))) {
1597 return false;
1598 }
1599
1600 // Just return if it's a x- form. It's all private.
1601 if (locale.find("x-") == 0) {
1602 return true;
1603 }
1604
1605 // Check if there are any duplicate variants or singletons (extensions).
1606
1607 // Remove private use section.
1608 locale = locale.substr(0, locale.find("-x-"));
1609
1610 // Skip language since it can match variant regex, so we start from 1.
1611 // We are matching i-klingon here, but that's ok, since i-klingon-klingon
1612 // is not valid and would fail LANGUAGE_TAG_RE test.
1613 size_t pos = 0;
1614 std::vector<std::string> parts;
1615 while ((pos = locale.find("-")) != std::string::npos) {
1616 std::string token = locale.substr(0, pos);
1617 parts.push_back(token);
1618 locale = locale.substr(pos + 1);
1619 }
1620 if (locale.length() != 0) {
1621 parts.push_back(locale);
1622 }
1623
1624 icu::RegexMatcher* language_variant_regexp_matcher =
1625 GetLanguageVariantRegexMatcher(isolate);
1626
1627 icu::RegexMatcher* language_singleton_regexp_matcher =
1628 GetLanguageSingletonRegexMatcher(isolate);
1629
1630 std::vector<std::string> variants;
1631 std::vector<std::string> extensions;
1632 for (auto it = parts.begin() + 1; it != parts.end(); it++) {
1633 icu::UnicodeString part(it->data(), -1, US_INV);
1634 language_variant_regexp_matcher->reset(part);
1635 bool is_language_variant = language_variant_regexp_matcher->matches(status);
1636 if (V8_UNLIKELY(U_FAILURE(status))) {
1637 return false;
1638 }
1639 if (is_language_variant && extensions.size() == 0) {
1640 if (std::find(variants.begin(), variants.end(), *it) == variants.end()) {
1641 variants.push_back(*it);
1642 } else {
1643 return false;
1644 }
1645 }
1646
1647 language_singleton_regexp_matcher->reset(part);
1648 bool is_language_singleton =
1649 language_singleton_regexp_matcher->matches(status);
1650 if (V8_UNLIKELY(U_FAILURE(status))) {
1651 return false;
1652 }
1653 if (is_language_singleton) {
1654 if (std::find(extensions.begin(), extensions.end(), *it) ==
1655 extensions.end()) {
1656 extensions.push_back(*it);
1657 } else {
1658 return false;
1659 }
1660 }
1661 }
1662
1663 return true;
1664 }
1665
IsLowerAscii(char c)1666 bool IsLowerAscii(char c) { return c >= 'a' && c < 'z'; }
1667
IsTwoLetterLanguage(const std::string & locale)1668 bool IsTwoLetterLanguage(const std::string& locale) {
1669 // Two letters, both in range 'a'-'z'...
1670 return locale.length() == 2 && IsLowerAscii(locale[0]) &&
1671 IsLowerAscii(locale[1]);
1672 }
1673
IsDeprecatedLanguage(const std::string & locale)1674 bool IsDeprecatedLanguage(const std::string& locale) {
1675 // Check if locale is one of the deprecated language tags:
1676 return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw";
1677 }
1678
1679 // Reference:
1680 // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
IsGrandfatheredTagWithoutPreferredVaule(const std::string & locale)1681 bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
1682 if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
1683 if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
1684 V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
1685 return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
1686 locale.substr(2) == "mingo";
1687 }
1688 return false;
1689 }
1690
1691 } // anonymous namespace
1692
CanonicalizeLanguageTag(Isolate * isolate,Handle<Object> locale_in)1693 Maybe<std::string> Intl::CanonicalizeLanguageTag(Isolate* isolate,
1694 Handle<Object> locale_in) {
1695 Handle<String> locale_str;
1696 // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
1697 // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
1698 // exception.
1699 // 7c iii. Let tag be ? ToString(kValue).
1700 // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
1701 // RangeError exception.
1702
1703 if (locale_in->IsString()) {
1704 locale_str = Handle<String>::cast(locale_in);
1705 } else if (locale_in->IsJSReceiver()) {
1706 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
1707 Object::ToString(isolate, locale_in),
1708 Nothing<std::string>());
1709 } else {
1710 THROW_NEW_ERROR_RETURN_VALUE(isolate,
1711 NewTypeError(MessageTemplate::kLanguageID),
1712 Nothing<std::string>());
1713 }
1714 std::string locale(locale_str->ToCString().get());
1715
1716 // Optimize for the most common case: a 2-letter language code in the
1717 // canonical form/lowercase that is not one of the deprecated codes
1718 // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
1719 // codes. Instead, let them be handled by ICU in the slow path. However,
1720 // fast-track 'fil' (3-letter canonical code).
1721 if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
1722 locale == "fil") {
1723 return Just(locale);
1724 }
1725
1726 // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
1727 // the input before any more check.
1728 std::transform(locale.begin(), locale.end(), locale.begin(), AsciiToLower);
1729 if (!IsStructurallyValidLanguageTag(isolate, locale)) {
1730 THROW_NEW_ERROR_RETURN_VALUE(
1731 isolate,
1732 NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
1733 Nothing<std::string>());
1734 }
1735
1736 // ICU maps a few grandfathered tags to what looks like a regular language
1737 // tag even though IANA language tag registry does not have a preferred
1738 // entry map for them. Return them as they're with lowercasing.
1739 if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
1740 return Just(locale);
1741 }
1742
1743 // // ECMA 402 6.2.3
1744 // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
1745 // language tag if it's too long (much longer than 100 chars). Even if we
1746 // allocate a longer buffer, ICU will still fail if it's too long. Either
1747 // propose to Ecma 402 to put a limit on the locale length or change ICU to
1748 // handle long locale names better. See
1749 // https://unicode-org.atlassian.net/browse/ICU-13417
1750 UErrorCode error = U_ZERO_ERROR;
1751 char icu_result[ULOC_FULLNAME_CAPACITY];
1752 uloc_forLanguageTag(locale.c_str(), icu_result, ULOC_FULLNAME_CAPACITY,
1753 nullptr, &error);
1754 if (U_FAILURE(error) || error == U_STRING_NOT_TERMINATED_WARNING) {
1755 // TODO(jshin): This should not happen because the structural validity
1756 // is already checked. If that's the case, remove this.
1757 THROW_NEW_ERROR_RETURN_VALUE(
1758 isolate,
1759 NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
1760 Nothing<std::string>());
1761 }
1762
1763 // Force strict BCP47 rules.
1764 char result[ULOC_FULLNAME_CAPACITY];
1765 int32_t result_len = uloc_toLanguageTag(icu_result, result,
1766 ULOC_FULLNAME_CAPACITY, TRUE, &error);
1767
1768 if (U_FAILURE(error)) {
1769 THROW_NEW_ERROR_RETURN_VALUE(
1770 isolate,
1771 NewRangeError(MessageTemplate::kInvalidLanguageTag, locale_str),
1772 Nothing<std::string>());
1773 }
1774
1775 return Just(std::string(result, result_len));
1776 }
1777
CanonicalizeLocaleList(Isolate * isolate,Handle<Object> locales,bool only_return_one_result)1778 Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
1779 Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
1780 // 1. If locales is undefined, then
1781 if (locales->IsUndefined(isolate)) {
1782 // 1a. Return a new empty List.
1783 return Just(std::vector<std::string>());
1784 }
1785 // 2. Let seen be a new empty List.
1786 std::vector<std::string> seen;
1787 // 3. If Type(locales) is String, then
1788 if (locales->IsString()) {
1789 // 3a. Let O be CreateArrayFromList(« locales »).
1790 // Instead of creating a one-element array and then iterating over it,
1791 // we inline the body of the iteration:
1792 std::string canonicalized_tag;
1793 if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
1794 return Nothing<std::vector<std::string>>();
1795 }
1796 seen.push_back(canonicalized_tag);
1797 return Just(seen);
1798 }
1799 // 4. Else,
1800 // 4a. Let O be ? ToObject(locales).
1801 Handle<JSReceiver> o;
1802 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
1803 Object::ToObject(isolate, locales),
1804 Nothing<std::vector<std::string>>());
1805 // 5. Let len be ? ToLength(? Get(O, "length")).
1806 Handle<Object> length_obj;
1807 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
1808 Object::GetLengthFromArrayLike(isolate, o),
1809 Nothing<std::vector<std::string>>());
1810 // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
1811 // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
1812 // don't happen in practice (and would be very slow if they do), we'll keep
1813 // the code simple for now by using a saturating to-uint32 conversion.
1814 double raw_length = length_obj->Number();
1815 uint32_t len =
1816 raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
1817 // 6. Let k be 0.
1818 // 7. Repeat, while k < len
1819 for (uint32_t k = 0; k < len; k++) {
1820 // 7a. Let Pk be ToString(k).
1821 // 7b. Let kPresent be ? HasProperty(O, Pk).
1822 LookupIterator it(isolate, o, k);
1823 // 7c. If kPresent is true, then
1824 if (!it.IsFound()) continue;
1825 // 7c i. Let kValue be ? Get(O, Pk).
1826 Handle<Object> k_value;
1827 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
1828 Nothing<std::vector<std::string>>());
1829 // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
1830 // exception.
1831 // 7c iii. Let tag be ? ToString(kValue).
1832 // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
1833 // RangeError exception.
1834 // 7c v. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
1835 std::string canonicalized_tag;
1836 if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
1837 return Nothing<std::vector<std::string>>();
1838 }
1839 // 7c vi. If canonicalizedTag is not an element of seen, append
1840 // canonicalizedTag as the last element of seen.
1841 if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
1842 seen.push_back(canonicalized_tag);
1843 }
1844 // 7d. Increase k by 1. (See loop header.)
1845 // Optimization: some callers only need one result.
1846 if (only_return_one_result) return Just(seen);
1847 }
1848 // 8. Return seen.
1849 return Just(seen);
1850 }
1851
1852 // ecma-402/#sec-currencydigits
CurrencyDigits(Isolate * isolate,Handle<String> currency)1853 Handle<Smi> Intl::CurrencyDigits(Isolate* isolate, Handle<String> currency) {
1854 v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
1855 v8::String::Value currency_string(v8_isolate, v8::Utils::ToLocal(currency));
1856 CHECK_NOT_NULL(*currency_string);
1857
1858 DisallowHeapAllocation no_gc;
1859 UErrorCode status = U_ZERO_ERROR;
1860 uint32_t fraction_digits = ucurr_getDefaultFractionDigits(
1861 reinterpret_cast<const UChar*>(*currency_string), &status);
1862 // For missing currency codes, default to the most common, 2
1863 if (U_FAILURE(status)) fraction_digits = 2;
1864 return Handle<Smi>(Smi::FromInt(fraction_digits), isolate);
1865 }
1866
CreateNumberFormat(Isolate * isolate,Handle<String> locale,Handle<JSObject> options,Handle<JSObject> resolved)1867 MaybeHandle<JSObject> Intl::CreateNumberFormat(Isolate* isolate,
1868 Handle<String> locale,
1869 Handle<JSObject> options,
1870 Handle<JSObject> resolved) {
1871 Handle<JSFunction> constructor(
1872 isolate->native_context()->intl_number_format_function(), isolate);
1873
1874 Handle<JSObject> local_object;
1875 ASSIGN_RETURN_ON_EXCEPTION(isolate, local_object,
1876 JSObject::New(constructor, constructor), JSObject);
1877
1878 // Set number formatter as embedder field of the resulting JS object.
1879 icu::DecimalFormat* number_format =
1880 NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
1881
1882 CHECK_NOT_NULL(number_format);
1883
1884 local_object->SetEmbedderField(NumberFormat::kDecimalFormatIndex,
1885 reinterpret_cast<Smi*>(number_format));
1886
1887 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
1888 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
1889 NumberFormat::DeleteNumberFormat,
1890 WeakCallbackType::kInternalFields);
1891 return local_object;
1892 }
1893
1894 /**
1895 * Parses Unicode extension into key - value map.
1896 * Returns empty object if the extension string is invalid.
1897 * We are not concerned with the validity of the values at this point.
1898 * 'attribute' in RFC 6047 is not supported. Keys without explicit
1899 * values are assigned UNDEFINED.
1900 * TODO(jshin): Fix the handling of 'attribute' (in RFC 6047, but none
1901 * has been defined so that it's not used) and boolean keys without
1902 * an explicit value.
1903 */
ParseExtension(Isolate * isolate,const std::string & extension,std::map<std::string,std::string> & out)1904 void Intl::ParseExtension(Isolate* isolate, const std::string& extension,
1905 std::map<std::string, std::string>& out) {
1906 if (extension.compare(0, 3, "-u-") != 0) return;
1907
1908 // Key is {2}alphanum, value is {3,8}alphanum.
1909 // Some keys may not have explicit values (booleans).
1910 std::string key;
1911 std::string value;
1912 // Skip the "-u-".
1913 size_t start = 3;
1914 size_t end;
1915 do {
1916 end = extension.find("-", start);
1917 size_t length =
1918 (end == std::string::npos) ? extension.length() - start : end - start;
1919 std::string element = extension.substr(start, length);
1920 // Key is {2}alphanum
1921 if (length == 2) {
1922 if (!key.empty()) {
1923 out.insert(std::pair<std::string, std::string>(key, value));
1924 value.clear();
1925 }
1926 key = element;
1927 // value is {3,8}alphanum.
1928 } else if (length >= 3 && length <= 8 && !key.empty()) {
1929 value = value.empty() ? element : (value + "-" + element);
1930 } else {
1931 return;
1932 }
1933 start = end + 1;
1934 } while (end != std::string::npos);
1935 if (!key.empty()) out.insert(std::pair<std::string, std::string>(key, value));
1936 }
1937
1938 namespace {
1939
IsAToZ(char ch)1940 bool IsAToZ(char ch) {
1941 return IsInRange(AsciiAlphaToLower(ch), 'a', 'z');
1942 }
1943
1944 } // namespace
1945
1946 // Verifies that the input is a well-formed ISO 4217 currency code.
1947 // ecma402/#sec-currency-codes
IsWellFormedCurrencyCode(Isolate * isolate,Handle<String> currency)1948 bool Intl::IsWellFormedCurrencyCode(Isolate* isolate, Handle<String> currency) {
1949 // 2. If the number of elements in normalized is not 3, return false.
1950 if (currency->length() != 3) return false;
1951
1952 currency = String::Flatten(isolate, currency);
1953 {
1954 DisallowHeapAllocation no_gc;
1955 String::FlatContent flat = currency->GetFlatContent();
1956
1957 // 1. Let normalized be the result of mapping currency to upper case as
1958 // described in 6.1. 3. If normalized contains any character that is not in
1959 // the range "A" to "Z" (U+0041 to U+005A), return false. 4. Return true.
1960 // Don't uppercase to test. It could convert invalid code into a valid one.
1961 // For example \u00DFP (Eszett+P) becomes SSP.
1962 return (IsAToZ(flat.Get(0)) && IsAToZ(flat.Get(1)) && IsAToZ(flat.Get(2)));
1963 }
1964 }
1965
1966 // ecma402 #sup-string.prototype.tolocalelowercase
1967 // ecma402 #sup-string.prototype.tolocaleuppercase
StringLocaleConvertCase(Isolate * isolate,Handle<String> s,bool to_upper,Handle<Object> locales)1968 MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
1969 Handle<String> s,
1970 bool to_upper,
1971 Handle<Object> locales) {
1972 std::vector<std::string> requested_locales;
1973 if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
1974 return MaybeHandle<String>();
1975 }
1976 std::string requested_locale = requested_locales.size() == 0
1977 ? Intl::DefaultLocale(isolate)
1978 : requested_locales[0];
1979 size_t dash = requested_locale.find("-");
1980 if (dash != std::string::npos) {
1981 requested_locale = requested_locale.substr(0, dash);
1982 }
1983
1984 // Primary language tag can be up to 8 characters long in theory.
1985 // https://tools.ietf.org/html/bcp47#section-2.2.1
1986 DCHECK_LE(requested_locale.length(), 8);
1987 s = String::Flatten(isolate, s);
1988
1989 // All the languages requiring special-handling have two-letter codes.
1990 // Note that we have to check for '!= 2' here because private-use language
1991 // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
1992 // only 'x' or 'i' when they get here.
1993 if (V8_UNLIKELY(requested_locale.length() != 2)) {
1994 return ConvertCase(s, to_upper, isolate);
1995 }
1996 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1997 // in the root locale needs to be adjusted for az, lt and tr because even case
1998 // mapping of ASCII range characters are different in those locales.
1999 // Greek (el) does not require any adjustment.
2000 if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
2001 (requested_locale == "lt") || (requested_locale == "az"))) {
2002 return LocaleConvertCase(s, isolate, to_upper, requested_locale.c_str());
2003 } else {
2004 return ConvertCase(s, to_upper, isolate);
2005 }
2006 }
2007
StringLocaleCompare(Isolate * isolate,Handle<String> string1,Handle<String> string2,Handle<Object> locales,Handle<Object> options)2008 MaybeHandle<Object> Intl::StringLocaleCompare(Isolate* isolate,
2009 Handle<String> string1,
2010 Handle<String> string2,
2011 Handle<Object> locales,
2012 Handle<Object> options) {
2013 Factory* factory = isolate->factory();
2014 Handle<JSObject> collator;
2015 ASSIGN_RETURN_ON_EXCEPTION(
2016 isolate, collator,
2017 CachedOrNewService(isolate, factory->NewStringFromStaticChars("collator"),
2018 locales, options, factory->undefined_value()),
2019 Object);
2020 CHECK(collator->IsJSCollator());
2021 return Intl::CompareStrings(isolate, Handle<JSCollator>::cast(collator),
2022 string1, string2);
2023 }
2024
2025 // ecma402/#sec-collator-comparestrings
CompareStrings(Isolate * isolate,Handle<JSCollator> collator,Handle<String> string1,Handle<String> string2)2026 Handle<Object> Intl::CompareStrings(Isolate* isolate,
2027 Handle<JSCollator> collator,
2028 Handle<String> string1,
2029 Handle<String> string2) {
2030 Factory* factory = isolate->factory();
2031 icu::Collator* icu_collator = collator->icu_collator()->raw();
2032 CHECK_NOT_NULL(icu_collator);
2033
2034 string1 = String::Flatten(isolate, string1);
2035 string2 = String::Flatten(isolate, string2);
2036
2037 UCollationResult result;
2038 UErrorCode status = U_ZERO_ERROR;
2039 {
2040 DisallowHeapAllocation no_gc;
2041 int32_t length1 = string1->length();
2042 int32_t length2 = string2->length();
2043 String::FlatContent flat1 = string1->GetFlatContent();
2044 String::FlatContent flat2 = string2->GetFlatContent();
2045 std::unique_ptr<uc16[]> sap1;
2046 std::unique_ptr<uc16[]> sap2;
2047 icu::UnicodeString string_val1(
2048 FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1);
2049 icu::UnicodeString string_val2(
2050 FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2);
2051 result = icu_collator->compare(string_val1, string_val2, status);
2052 }
2053 DCHECK(U_SUCCESS(status));
2054
2055 return factory->NewNumberFromInt(result);
2056 }
2057
2058 // ecma402/#sup-properties-of-the-number-prototype-object
NumberToLocaleString(Isolate * isolate,Handle<Object> num,Handle<Object> locales,Handle<Object> options)2059 MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
2060 Handle<Object> num,
2061 Handle<Object> locales,
2062 Handle<Object> options) {
2063 Factory* factory = isolate->factory();
2064 Handle<JSObject> number_format_holder;
2065 // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
2066 ASSIGN_RETURN_ON_EXCEPTION(
2067 isolate, number_format_holder,
2068 CachedOrNewService(isolate,
2069 factory->NewStringFromStaticChars("numberformat"),
2070 locales, options, factory->undefined_value()),
2071 String);
2072 DCHECK(
2073 Intl::IsObjectOfType(isolate, number_format_holder, Intl::kNumberFormat));
2074 Handle<Object> number_obj;
2075 ASSIGN_RETURN_ON_EXCEPTION(isolate, number_obj,
2076 Object::ToNumber(isolate, num), String);
2077
2078 // Spec treats -0 and +0 as 0.
2079 double number = number_obj->Number() + 0;
2080 // Return FormatNumber(numberFormat, x).
2081 return NumberFormat::FormatNumber(isolate, number_format_holder, number);
2082 }
2083
2084 // ecma402/#sec-defaultnumberoption
DefaultNumberOption(Isolate * isolate,Handle<Object> value,int min,int max,int fallback,Handle<String> property)2085 Maybe<int> Intl::DefaultNumberOption(Isolate* isolate, Handle<Object> value,
2086 int min, int max, int fallback,
2087 Handle<String> property) {
2088 // 2. Else, return fallback.
2089 if (value->IsUndefined()) return Just(fallback);
2090
2091 // 1. If value is not undefined, then
2092 // a. Let value be ? ToNumber(value).
2093 Handle<Object> value_num;
2094 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2095 isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
2096 DCHECK(value_num->IsNumber());
2097
2098 // b. If value is NaN or less than minimum or greater than maximum, throw a
2099 // RangeError exception.
2100 if (value_num->IsNaN() || value_num->Number() < min ||
2101 value_num->Number() > max) {
2102 THROW_NEW_ERROR_RETURN_VALUE(
2103 isolate,
2104 NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
2105 Nothing<int>());
2106 }
2107
2108 // The max and min arguments are integers and the above check makes
2109 // sure that we are within the integer range making this double to
2110 // int conversion safe.
2111 //
2112 // c. Return floor(value).
2113 return Just(FastD2I(floor(value_num->Number())));
2114 }
2115
2116 // ecma402/#sec-getnumberoption
GetNumberOption(Isolate * isolate,Handle<JSReceiver> options,Handle<String> property,int min,int max,int fallback)2117 Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
2118 Handle<String> property, int min, int max,
2119 int fallback) {
2120 // 1. Let value be ? Get(options, property).
2121 Handle<Object> value;
2122 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2123 isolate, value, JSReceiver::GetProperty(isolate, options, property),
2124 Nothing<int>());
2125
2126 // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
2127 return DefaultNumberOption(isolate, value, min, max, fallback, property);
2128 }
2129
GetNumberOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,int min,int max,int fallback)2130 Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
2131 const char* property, int min, int max,
2132 int fallback) {
2133 Handle<String> property_str =
2134 isolate->factory()->NewStringFromAsciiChecked(property);
2135 return GetNumberOption(isolate, options, property_str, min, max, fallback);
2136 }
2137
SetNumberFormatDigitOptions(Isolate * isolate,icu::DecimalFormat * number_format,Handle<JSReceiver> options,int mnfd_default,int mxfd_default)2138 Maybe<bool> Intl::SetNumberFormatDigitOptions(Isolate* isolate,
2139 icu::DecimalFormat* number_format,
2140 Handle<JSReceiver> options,
2141 int mnfd_default,
2142 int mxfd_default) {
2143 CHECK_NOT_NULL(number_format);
2144
2145 // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
2146 // 1).
2147 int mnid;
2148 if (!GetNumberOption(isolate, options, "minimumIntegerDigits", 1, 21, 1)
2149 .To(&mnid)) {
2150 return Nothing<bool>();
2151 }
2152
2153 // 6. Let mnfd be ? GetNumberOption(options, "minimumFractionDigits", 0, 20,
2154 // mnfdDefault).
2155 int mnfd;
2156 if (!GetNumberOption(isolate, options, "minimumFractionDigits", 0, 20,
2157 mnfd_default)
2158 .To(&mnfd)) {
2159 return Nothing<bool>();
2160 }
2161
2162 // 7. Let mxfdActualDefault be max( mnfd, mxfdDefault ).
2163 int mxfd_actual_default = std::max(mnfd, mxfd_default);
2164
2165 // 8. Let mxfd be ? GetNumberOption(options,
2166 // "maximumFractionDigits", mnfd, 20, mxfdActualDefault).
2167 int mxfd;
2168 if (!GetNumberOption(isolate, options, "maximumFractionDigits", mnfd, 20,
2169 mxfd_actual_default)
2170 .To(&mxfd)) {
2171 return Nothing<bool>();
2172 }
2173
2174 // 9. Let mnsd be ? Get(options, "minimumSignificantDigits").
2175 Handle<Object> mnsd_obj;
2176 Handle<String> mnsd_str =
2177 isolate->factory()->NewStringFromStaticChars("minimumSignificantDigits");
2178 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2179 isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
2180 Nothing<bool>());
2181
2182 // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
2183 Handle<Object> mxsd_obj;
2184 Handle<String> mxsd_str =
2185 isolate->factory()->NewStringFromStaticChars("maximumSignificantDigits");
2186 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
2187 isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
2188 Nothing<bool>());
2189
2190 // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
2191 number_format->setMinimumIntegerDigits(mnid);
2192
2193 // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
2194 number_format->setMinimumFractionDigits(mnfd);
2195
2196 // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
2197 number_format->setMaximumFractionDigits(mxfd);
2198
2199 bool significant_digits_used = false;
2200 // 14. If mnsd is not undefined or mxsd is not undefined, then
2201 if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
2202 // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
2203 int mnsd;
2204 if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
2205 return Nothing<bool>();
2206 }
2207
2208 // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
2209 int mxsd;
2210 if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
2211 .To(&mxsd)) {
2212 return Nothing<bool>();
2213 }
2214
2215 significant_digits_used = true;
2216
2217 // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
2218 number_format->setMinimumSignificantDigits(mnsd);
2219
2220 // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
2221 number_format->setMaximumSignificantDigits(mxsd);
2222 }
2223
2224 number_format->setSignificantDigitsUsed(significant_digits_used);
2225 number_format->setRoundingMode(icu::DecimalFormat::kRoundHalfUp);
2226 return Just(true);
2227 }
2228
2229 namespace {
2230
2231 // ECMA 402 9.2.2 BestAvailableLocale(availableLocales, locale)
2232 // https://tc39.github.io/ecma402/#sec-bestavailablelocale
BestAvailableLocale(std::set<std::string> available_locales,std::string locale)2233 std::string BestAvailableLocale(std::set<std::string> available_locales,
2234 std::string locale) {
2235 const char separator = '-';
2236
2237 // 1. Let candidate be locale.
2238 // 2. Repeat,
2239 do {
2240 // 2.a. If availableLocales contains an element equal to candidate, return
2241 // candidate.
2242 if (available_locales.find(locale) != available_locales.end()) {
2243 return locale;
2244 }
2245 // 2.b. Let pos be the character index of the last occurrence of "-"
2246 // (U+002D) within candidate. If that character does not occur, return
2247 // undefined.
2248 size_t pos = locale.rfind(separator);
2249 if (pos == std::string::npos) {
2250 return "";
2251 }
2252 // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
2253 // decrease pos by 2.
2254 if (pos >= 2 && locale[pos - 2] == separator) {
2255 pos -= 2;
2256 }
2257 // 2.d. Let candidate be the substring of candidate from position 0,
2258 // inclusive, to position pos, exclusive.
2259 locale = locale.substr(0, pos);
2260 } while (true);
2261 }
2262
2263 #define ANY_EXTENSION_REGEXP "-[a-z0-9]{1}-.*"
2264
GetAnyExtensionRegexpMatcher()2265 std::unique_ptr<icu::RegexMatcher> GetAnyExtensionRegexpMatcher() {
2266 UErrorCode status = U_ZERO_ERROR;
2267 std::unique_ptr<icu::RegexMatcher> matcher(new icu::RegexMatcher(
2268 icu::UnicodeString(ANY_EXTENSION_REGEXP, -1, US_INV), 0, status));
2269 DCHECK(U_SUCCESS(status));
2270 return matcher;
2271 }
2272
2273 #undef ANY_EXTENSION_REGEXP
2274
2275 // ECMA 402 9.2.7 LookupSupportedLocales(availableLocales, requestedLocales)
2276 // https://tc39.github.io/ecma402/#sec-lookupsupportedlocales
LookupSupportedLocales(std::set<std::string> available_locales,std::vector<std::string> requested_locales)2277 std::vector<std::string> LookupSupportedLocales(
2278 std::set<std::string> available_locales,
2279 std::vector<std::string> requested_locales) {
2280 std::unique_ptr<icu::RegexMatcher> matcher = GetAnyExtensionRegexpMatcher();
2281
2282 // 1. Let subset be a new empty List.
2283 std::vector<std::string> subset;
2284
2285 // 2. For each element locale of requestedLocales in List order, do
2286 for (auto locale : requested_locales) {
2287 // 2.a. Let noExtensionsLocale be the String value that is locale with all
2288 // Unicode locale extension sequences removed.
2289 icu::UnicodeString locale_uni(locale.c_str(), -1, US_INV);
2290 // TODO(bstell): look at using uloc_forLanguageTag to convert the language
2291 // tag to locale id
2292 // TODO(bstell): look at using uloc_getBaseName to just get the name without
2293 // all the keywords
2294 matcher->reset(locale_uni);
2295 UErrorCode status = U_ZERO_ERROR;
2296 // TODO(bstell): need to determine if this is the correct behavior.
2297 // This matches the JS implementation but might not match the spec.
2298 // According to
2299 // https://tc39.github.io/ecma402/#sec-unicode-locale-extension-sequences:
2300 //
2301 // This standard uses the term "Unicode locale extension sequence" for
2302 // any substring of a language tag that is not part of a private use
2303 // subtag sequence, starts with a separator "-" and the singleton "u",
2304 // and includes the maximum sequence of following non-singleton subtags
2305 // and their preceding "-" separators.
2306 //
2307 // According to the spec a locale "en-t-aaa-u-bbb-v-ccc-x-u-ddd", should
2308 // remove only the "-u-bbb" part, and keep everything else, whereas this
2309 // regexp matcher would leave only the "en".
2310 icu::UnicodeString no_extensions_locale_uni =
2311 matcher->replaceAll("", status);
2312 DCHECK(U_SUCCESS(status));
2313 std::string no_extensions_locale;
2314 no_extensions_locale_uni.toUTF8String(no_extensions_locale);
2315 // 2.b. Let availableLocale be BestAvailableLocale(availableLocales,
2316 // noExtensionsLocale).
2317 std::string available_locale =
2318 BestAvailableLocale(available_locales, no_extensions_locale);
2319 // 2.c. If availableLocale is not undefined, append locale to the end of
2320 // subset.
2321 if (!available_locale.empty()) {
2322 subset.push_back(locale);
2323 }
2324 }
2325
2326 // 3. Return subset.
2327 return subset;
2328 }
2329
2330 // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
2331 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
BestFitSupportedLocales(std::set<std::string> available_locales,std::vector<std::string> requested_locales)2332 std::vector<std::string> BestFitSupportedLocales(
2333 std::set<std::string> available_locales,
2334 std::vector<std::string> requested_locales) {
2335 return LookupSupportedLocales(available_locales, requested_locales);
2336 }
2337
2338 enum MatcherOption { kBestFit, kLookup };
2339
2340 // TODO(bstell): should this be moved somewhere where it is reusable?
2341 // Implement steps 5, 6, 7 for ECMA 402 9.2.9 SupportedLocales
2342 // https://tc39.github.io/ecma402/#sec-supportedlocales
CreateReadOnlyArray(Isolate * isolate,std::vector<std::string> elements)2343 MaybeHandle<JSObject> CreateReadOnlyArray(Isolate* isolate,
2344 std::vector<std::string> elements) {
2345 Factory* factory = isolate->factory();
2346 if (elements.size() >= kMaxUInt32) {
2347 THROW_NEW_ERROR(
2348 isolate, NewRangeError(MessageTemplate::kInvalidArrayLength), JSObject);
2349 }
2350
2351 PropertyAttributes attr =
2352 static_cast<PropertyAttributes>(READ_ONLY | DONT_DELETE);
2353
2354 // 5. Let subset be CreateArrayFromList(elements).
2355 // 6. Let keys be subset.[[OwnPropertyKeys]]().
2356 Handle<JSArray> subset = factory->NewJSArray(0);
2357
2358 // 7. For each element P of keys in List order, do
2359 uint32_t length = static_cast<uint32_t>(elements.size());
2360 for (uint32_t i = 0; i < length; i++) {
2361 const std::string& part = elements[i];
2362 Handle<String> value =
2363 factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
2364 JSObject::AddDataElement(subset, i, value, attr);
2365 }
2366
2367 // 7.a. Let desc be PropertyDescriptor { [[Configurable]]: false,
2368 // [[Writable]]: false }.
2369 PropertyDescriptor desc;
2370 desc.set_writable(false);
2371 desc.set_configurable(false);
2372
2373 // 7.b. Perform ! DefinePropertyOrThrow(subset, P, desc).
2374 JSArray::ArraySetLength(isolate, subset, &desc, kThrowOnError).ToChecked();
2375 return subset;
2376 }
2377
2378 // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
2379 // https://tc39.github.io/ecma402/#sec-supportedlocales
SupportedLocales(Isolate * isolate,std::string service,std::set<std::string> available_locales,std::vector<std::string> requested_locales,Handle<Object> options)2380 MaybeHandle<JSObject> SupportedLocales(
2381 Isolate* isolate, std::string service,
2382 std::set<std::string> available_locales,
2383 std::vector<std::string> requested_locales, Handle<Object> options) {
2384 std::vector<std::string> supported_locales;
2385
2386 // 1. If options is not undefined, then
2387 // a. Let options be ? ToObject(options).
2388 // b. Let matcher be ? GetOption(options, "localeMatcher", "string",
2389 // « "lookup", "best fit" », "best fit").
2390 // 2. Else, let matcher be "best fit".
2391 MatcherOption matcher = kBestFit;
2392 if (!options->IsUndefined(isolate)) {
2393 Handle<JSReceiver> options_obj;
2394 ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
2395 Object::ToObject(isolate, options), JSObject);
2396 std::unique_ptr<char[]> matcher_str = nullptr;
2397 std::vector<const char*> matcher_values = {"lookup", "best fit"};
2398 Maybe<bool> maybe_found_matcher =
2399 Intl::GetStringOption(isolate, options_obj, "localeMatcher",
2400 matcher_values, service.c_str(), &matcher_str);
2401 MAYBE_RETURN(maybe_found_matcher, MaybeHandle<JSObject>());
2402 if (maybe_found_matcher.FromJust()) {
2403 DCHECK_NOT_NULL(matcher_str.get());
2404 if (strcmp(matcher_str.get(), "lookup") == 0) {
2405 matcher = kLookup;
2406 }
2407 }
2408 }
2409
2410 // 3. If matcher is "best fit", then
2411 // a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
2412 // requestedLocales).
2413 if (matcher == kBestFit) {
2414 supported_locales =
2415 BestFitSupportedLocales(available_locales, requested_locales);
2416 } else {
2417 // 4. Else,
2418 // a. Let supportedLocales be LookupSupportedLocales(availableLocales,
2419 // requestedLocales).
2420 DCHECK_EQ(matcher, kLookup);
2421 supported_locales =
2422 LookupSupportedLocales(available_locales, requested_locales);
2423 }
2424
2425 // TODO(jkummerow): Possibly revisit why the spec has the individual entries
2426 // readonly but the array is not frozen.
2427 // https://github.com/tc39/ecma402/issues/258
2428
2429 // 5. Let subset be CreateArrayFromList(supportedLocales).
2430 // 6. Let keys be subset.[[OwnPropertyKeys]]().
2431 // 7. For each element P of keys in List order, do
2432 // a. Let desc be PropertyDescriptor { [[Configurable]]: false,
2433 // [[Writable]]: false }.
2434 // b. Perform ! DefinePropertyOrThrow(subset, P, desc).
2435 MaybeHandle<JSObject> subset =
2436 CreateReadOnlyArray(isolate, supported_locales);
2437
2438 // 8. Return subset.
2439 return subset;
2440 }
2441 } // namespace
2442
2443 // ECMA 402 10.2.2 Intl.Collator.supportedLocalesOf
2444 // https://tc39.github.io/ecma402/#sec-intl.collator.supportedlocalesof
2445 // of Intl::SupportedLocalesOf thru JS
SupportedLocalesOf(Isolate * isolate,Handle<String> service,Handle<Object> locales_in,Handle<Object> options_in)2446 MaybeHandle<JSObject> Intl::SupportedLocalesOf(Isolate* isolate,
2447 Handle<String> service,
2448 Handle<Object> locales_in,
2449 Handle<Object> options_in) {
2450 // Let availableLocales be %Collator%.[[AvailableLocales]].
2451 IcuService icu_service = Intl::StringToIcuService(service);
2452 std::set<std::string> available_locales = GetAvailableLocales(icu_service);
2453 std::vector<std::string> requested_locales;
2454 // Let requestedLocales be ? CanonicalizeLocaleList(locales).
2455 bool got_requested_locales =
2456 CanonicalizeLocaleList(isolate, locales_in, false).To(&requested_locales);
2457 if (!got_requested_locales) {
2458 return MaybeHandle<JSObject>();
2459 }
2460
2461 // Return ? SupportedLocales(availableLocales, requestedLocales, options).
2462 std::string service_str(service->ToCString().get());
2463 return SupportedLocales(isolate, service_str, available_locales,
2464 requested_locales, options_in);
2465 }
2466
2467 } // namespace internal
2468 } // namespace v8
2469