1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8
9 #include "src/objects/intl-objects.h"
10
11 #include <algorithm>
12 #include <memory>
13 #include <string>
14 #include <vector>
15
16 #include "src/api/api-inl.h"
17 #include "src/execution/isolate.h"
18 #include "src/handles/global-handles.h"
19 #include "src/heap/factory.h"
20 #include "src/objects/js-collator-inl.h"
21 #include "src/objects/js-date-time-format-inl.h"
22 #include "src/objects/js-locale-inl.h"
23 #include "src/objects/js-locale.h"
24 #include "src/objects/js-number-format-inl.h"
25 #include "src/objects/objects-inl.h"
26 #include "src/objects/property-descriptor.h"
27 #include "src/objects/smi.h"
28 #include "src/objects/string.h"
29 #include "src/strings/string-case.h"
30 #include "unicode/basictz.h"
31 #include "unicode/brkiter.h"
32 #include "unicode/calendar.h"
33 #include "unicode/coll.h"
34 #include "unicode/datefmt.h"
35 #include "unicode/decimfmt.h"
36 #include "unicode/formattedvalue.h"
37 #include "unicode/localebuilder.h"
38 #include "unicode/localematcher.h"
39 #include "unicode/locid.h"
40 #include "unicode/normalizer2.h"
41 #include "unicode/numberformatter.h"
42 #include "unicode/numfmt.h"
43 #include "unicode/numsys.h"
44 #include "unicode/timezone.h"
45 #include "unicode/ures.h"
46 #include "unicode/ustring.h"
47 #include "unicode/uvernum.h" // U_ICU_VERSION_MAJOR_NUM
48
49 #define XSTR(s) STR(s)
50 #define STR(s) #s
51 static_assert(
52 V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
53 "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
54 #undef STR
55 #undef XSTR
56
57 namespace v8 {
58 namespace internal {
59
60 namespace {
61
62 constexpr uint8_t kToLower[256] = {
63 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
64 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
65 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
66 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
67 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
68 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
69 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
70 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
71 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
72 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
73 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
74 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
75 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
76 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
77 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
78 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
79 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
80 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
81 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
82 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
83 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
84 0xFC, 0xFD, 0xFE, 0xFF,
85 };
86
ToLatin1Lower(uint16_t ch)87 inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
88 return static_cast<uint16_t>(kToLower[ch]);
89 }
90
91 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)92 inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
93 CONSTEXPR_DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
94 return ch &
95 ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
96 }
97
98 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)99 bool ToUpperFastASCII(const Vector<const Char>& src,
100 Handle<SeqOneByteString> result) {
101 // Do a faster loop for the case where all the characters are ASCII.
102 uint16_t ored = 0;
103 int32_t index = 0;
104 for (auto it = src.begin(); it != src.end(); ++it) {
105 uint16_t ch = static_cast<uint16_t>(*it);
106 ored |= ch;
107 result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
108 }
109 return !(ored & ~0x7F);
110 }
111
112 const uint16_t sharp_s = 0xDF;
113
114 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,uint8_t * dest,int * sharp_s_count)115 bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
116 int* sharp_s_count) {
117 // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
118
119 // There are two special cases.
120 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
121 // 2. Lower case sharp-S converts to "SS" (two characters)
122 *sharp_s_count = 0;
123 for (auto it = src.begin(); it != src.end(); ++it) {
124 uint16_t ch = static_cast<uint16_t>(*it);
125 if (V8_UNLIKELY(ch == sharp_s)) {
126 ++(*sharp_s_count);
127 continue;
128 }
129 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
130 // Since this upper-cased character does not fit in an 8-bit string, we
131 // need to take the 16-bit path.
132 return false;
133 }
134 *dest++ = ToLatin1Upper(ch);
135 }
136
137 return true;
138 }
139
140 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)141 void ToUpperWithSharpS(const Vector<const Char>& src,
142 Handle<SeqOneByteString> result) {
143 int32_t dest_index = 0;
144 for (auto it = src.begin(); it != src.end(); ++it) {
145 uint16_t ch = static_cast<uint16_t>(*it);
146 if (ch == sharp_s) {
147 result->SeqOneByteStringSet(dest_index++, 'S');
148 result->SeqOneByteStringSet(dest_index++, 'S');
149 } else {
150 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
151 }
152 }
153 }
154
FindFirstUpperOrNonAscii(String s,int length)155 inline int FindFirstUpperOrNonAscii(String s, int length) {
156 for (int index = 0; index < length; ++index) {
157 uint16_t ch = s.Get(index);
158 if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
159 return index;
160 }
161 }
162 return length;
163 }
164
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<uc16[]> * dest,int32_t length)165 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
166 std::unique_ptr<uc16[]>* dest,
167 int32_t length) {
168 DCHECK(flat.IsFlat());
169 if (flat.IsOneByte()) {
170 if (!*dest) {
171 dest->reset(NewArray<uc16>(length));
172 CopyChars(dest->get(), flat.ToOneByteVector().begin(), length);
173 }
174 return reinterpret_cast<const UChar*>(dest->get());
175 } else {
176 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin());
177 }
178 }
179
180 template <typename T>
New(Isolate * isolate,Handle<JSFunction> constructor,Handle<Object> locales,Handle<Object> options,const char * method)181 MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
182 Handle<Object> locales, Handle<Object> options,
183 const char* method) {
184 Handle<Map> map;
185 ASSIGN_RETURN_ON_EXCEPTION(
186 isolate, map,
187 JSFunction::GetDerivedMap(isolate, constructor, constructor), T);
188 return T::New(isolate, map, locales, options, method);
189 }
190 } // namespace
191
ToLatin1LowerTable()192 const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
193
ToICUUnicodeString(Isolate * isolate,Handle<String> string)194 icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
195 Handle<String> string) {
196 DCHECK(string->IsFlat());
197 DisallowHeapAllocation no_gc;
198 std::unique_ptr<uc16[]> sap;
199 // Short one-byte strings can be expanded on the stack to avoid allocating a
200 // temporary buffer.
201 constexpr int kShortStringSize = 80;
202 UChar short_string_buffer[kShortStringSize];
203 const UChar* uchar_buffer = nullptr;
204 const String::FlatContent& flat = string->GetFlatContent(no_gc);
205 int32_t length = string->length();
206 if (flat.IsOneByte() && length <= kShortStringSize) {
207 CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length);
208 uchar_buffer = short_string_buffer;
209 } else {
210 uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length);
211 }
212 return icu::UnicodeString(uchar_buffer, length);
213 }
214
215 namespace {
ToICUStringPiece(Isolate * isolate,Handle<String> string)216 icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string) {
217 DCHECK(string->IsFlat());
218 DisallowHeapAllocation no_gc;
219
220 const String::FlatContent& flat = string->GetFlatContent(no_gc);
221 if (!flat.IsOneByte()) return icu::StringPiece(nullptr, 0);
222
223 int32_t length = string->length();
224 const char* char_buffer =
225 reinterpret_cast<const char*>(flat.ToOneByteVector().begin());
226 if (!String::IsAscii(char_buffer, length)) {
227 return icu::StringPiece(nullptr, 0);
228 }
229
230 return icu::StringPiece(char_buffer, length);
231 }
232
LocaleConvertCase(Isolate * isolate,Handle<String> s,bool is_to_upper,const char * lang)233 MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
234 bool is_to_upper, const char* lang) {
235 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
236 int32_t src_length = s->length();
237 int32_t dest_length = src_length;
238 UErrorCode status;
239 Handle<SeqTwoByteString> result;
240 std::unique_ptr<uc16[]> sap;
241
242 if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
243
244 // This is not a real loop. It'll be executed only once (no overflow) or
245 // twice (overflow).
246 for (int i = 0; i < 2; ++i) {
247 // Case conversion can increase the string length (e.g. sharp-S => SS) so
248 // that we have to handle RangeError exceptions here.
249 ASSIGN_RETURN_ON_EXCEPTION(
250 isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
251 String);
252 DisallowHeapAllocation no_gc;
253 DCHECK(s->IsFlat());
254 String::FlatContent flat = s->GetFlatContent(no_gc);
255 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
256 status = U_ZERO_ERROR;
257 dest_length =
258 case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
259 dest_length, src, src_length, lang, &status);
260 if (status != U_BUFFER_OVERFLOW_ERROR) break;
261 }
262
263 // In most cases, the output will fill the destination buffer completely
264 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
265 // Only in rare cases, it'll be shorter than the destination buffer and
266 // |result| has to be truncated.
267 DCHECK(U_SUCCESS(status));
268 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
269 DCHECK(dest_length == result->length());
270 return result;
271 }
272 DCHECK(dest_length < result->length());
273 return SeqString::Truncate(result, dest_length);
274 }
275
276 } // namespace
277
278 // A stripped-down version of ConvertToLower that can only handle flat one-byte
279 // strings and does not allocate. Note that {src} could still be, e.g., a
280 // one-byte sliced string with a two-byte parent string.
281 // Called from TF builtins.
ConvertOneByteToLower(String src,String dst)282 String Intl::ConvertOneByteToLower(String src, String dst) {
283 DCHECK_EQ(src.length(), dst.length());
284 DCHECK(src.IsOneByteRepresentation());
285 DCHECK(src.IsFlat());
286 DCHECK(dst.IsSeqOneByteString());
287
288 DisallowHeapAllocation no_gc;
289
290 const int length = src.length();
291 String::FlatContent src_flat = src.GetFlatContent(no_gc);
292 uint8_t* dst_data = SeqOneByteString::cast(dst).GetChars(no_gc);
293
294 if (src_flat.IsOneByte()) {
295 const uint8_t* src_data = src_flat.ToOneByteVector().begin();
296
297 bool has_changed_character = false;
298 int index_to_first_unprocessed =
299 FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
300 reinterpret_cast<const char*>(src_data), length,
301 &has_changed_character);
302
303 if (index_to_first_unprocessed == length) {
304 return has_changed_character ? dst : src;
305 }
306
307 // If not ASCII, we keep the result up to index_to_first_unprocessed and
308 // process the rest.
309 for (int index = index_to_first_unprocessed; index < length; ++index) {
310 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
311 }
312 } else {
313 DCHECK(src_flat.IsTwoByte());
314 int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
315 if (index_to_first_unprocessed == length) return src;
316
317 const uint16_t* src_data = src_flat.ToUC16Vector().begin();
318 CopyChars(dst_data, src_data, index_to_first_unprocessed);
319 for (int index = index_to_first_unprocessed; index < length; ++index) {
320 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
321 }
322 }
323
324 return dst;
325 }
326
ConvertToLower(Isolate * isolate,Handle<String> s)327 MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
328 if (!s->IsOneByteRepresentation()) {
329 // Use a slower implementation for strings with characters beyond U+00FF.
330 return LocaleConvertCase(isolate, s, false, "");
331 }
332
333 int length = s->length();
334
335 // We depend here on the invariant that the length of a Latin1
336 // string is invariant under ToLowerCase, and the result always
337 // fits in the Latin1 range in the *root locale*. It does not hold
338 // for ToUpperCase even in the root locale.
339
340 // Scan the string for uppercase and non-ASCII characters for strings
341 // shorter than a machine-word without any memory allocation overhead.
342 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
343 // to two parts, one for scanning the prefix with no change and the other for
344 // handling ASCII-only characters.
345
346 bool is_short = length < static_cast<int>(sizeof(uintptr_t));
347 if (is_short) {
348 bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
349 if (is_lower_ascii) return s;
350 }
351
352 Handle<SeqOneByteString> result =
353 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
354
355 return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
356 }
357
ConvertToUpper(Isolate * isolate,Handle<String> s)358 MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
359 int32_t length = s->length();
360 if (s->IsOneByteRepresentation() && length > 0) {
361 Handle<SeqOneByteString> result =
362 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363
364 DCHECK(s->IsFlat());
365 int sharp_s_count;
366 bool is_result_single_byte;
367 {
368 DisallowHeapAllocation no_gc;
369 String::FlatContent flat = s->GetFlatContent(no_gc);
370 uint8_t* dest = result->GetChars(no_gc);
371 if (flat.IsOneByte()) {
372 Vector<const uint8_t> src = flat.ToOneByteVector();
373 bool has_changed_character = false;
374 int index_to_first_unprocessed = FastAsciiConvert<false>(
375 reinterpret_cast<char*>(result->GetChars(no_gc)),
376 reinterpret_cast<const char*>(src.begin()), length,
377 &has_changed_character);
378 if (index_to_first_unprocessed == length) {
379 return has_changed_character ? result : s;
380 }
381 // If not ASCII, we keep the result up to index_to_first_unprocessed and
382 // process the rest.
383 is_result_single_byte =
384 ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
385 dest + index_to_first_unprocessed, &sharp_s_count);
386 } else {
387 DCHECK(flat.IsTwoByte());
388 Vector<const uint16_t> src = flat.ToUC16Vector();
389 if (ToUpperFastASCII(src, result)) return result;
390 is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
391 }
392 }
393
394 // Go to the full Unicode path if there are characters whose uppercase
395 // is beyond the Latin-1 range (cannot be represented in OneByteString).
396 if (V8_UNLIKELY(!is_result_single_byte)) {
397 return LocaleConvertCase(isolate, s, true, "");
398 }
399
400 if (sharp_s_count == 0) return result;
401
402 // We have sharp_s_count sharp-s characters, but the result is still
403 // in the Latin-1 range.
404 ASSIGN_RETURN_ON_EXCEPTION(
405 isolate, result,
406 isolate->factory()->NewRawOneByteString(length + sharp_s_count),
407 String);
408 DisallowHeapAllocation no_gc;
409 String::FlatContent flat = s->GetFlatContent(no_gc);
410 if (flat.IsOneByte()) {
411 ToUpperWithSharpS(flat.ToOneByteVector(), result);
412 } else {
413 ToUpperWithSharpS(flat.ToUC16Vector(), result);
414 }
415
416 return result;
417 }
418
419 return LocaleConvertCase(isolate, s, true, "");
420 }
421
GetNumberingSystem(const icu::Locale & icu_locale)422 std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
423 // Ugly hack. ICU doesn't expose numbering system in any way, so we have
424 // to assume that for given locale NumberingSystem constructor produces the
425 // same digits as NumberFormat/Calendar would.
426 UErrorCode status = U_ZERO_ERROR;
427 std::unique_ptr<icu::NumberingSystem> numbering_system(
428 icu::NumberingSystem::createInstance(icu_locale, status));
429 if (U_SUCCESS(status)) return numbering_system->getName();
430 return "latn";
431 }
432
433 namespace {
434
CreateICULocale(const std::string & bcp47_locale)435 Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {
436 DisallowHeapAllocation no_gc;
437
438 // Convert BCP47 into ICU locale format.
439 UErrorCode status = U_ZERO_ERROR;
440
441 icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
442 DCHECK(U_SUCCESS(status));
443 if (icu_locale.isBogus()) {
444 return Nothing<icu::Locale>();
445 }
446
447 return Just(icu_locale);
448 }
449
450 } // anonymous namespace
451
452 // static
453
ToString(Isolate * isolate,const icu::UnicodeString & string)454 MaybeHandle<String> Intl::ToString(Isolate* isolate,
455 const icu::UnicodeString& string) {
456 return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
457 reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
458 }
459
ToString(Isolate * isolate,const icu::UnicodeString & string,int32_t begin,int32_t end)460 MaybeHandle<String> Intl::ToString(Isolate* isolate,
461 const icu::UnicodeString& string,
462 int32_t begin, int32_t end) {
463 return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
464 }
465
466 namespace {
467
InnerAddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)468 Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
469 int index, Handle<String> field_type_string,
470 Handle<String> value) {
471 // let element = $array[$index] = {
472 // type: $field_type_string,
473 // value: $value
474 // }
475 // return element;
476 Factory* factory = isolate->factory();
477 Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
478 JSObject::AddProperty(isolate, element, factory->type_string(),
479 field_type_string, NONE);
480
481 JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
482 JSObject::AddDataElement(array, index, element, NONE);
483 return element;
484 }
485
486 } // namespace
487
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)488 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
489 Handle<String> field_type_string, Handle<String> value) {
490 // Same as $array[$index] = {type: $field_type_string, value: $value};
491 InnerAddElement(isolate, array, index, field_type_string, value);
492 }
493
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value,Handle<String> additional_property_name,Handle<String> additional_property_value)494 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
495 Handle<String> field_type_string, Handle<String> value,
496 Handle<String> additional_property_name,
497 Handle<String> additional_property_value) {
498 // Same as $array[$index] = {
499 // type: $field_type_string, value: $value,
500 // $additional_property_name: $additional_property_value
501 // }
502 Handle<JSObject> element =
503 InnerAddElement(isolate, array, index, field_type_string, value);
504 JSObject::AddProperty(isolate, element, additional_property_name,
505 additional_property_value, NONE);
506 }
507
508 namespace {
509
510 // Build the shortened locale; eg, convert xx_Yyyy_ZZ to xx_ZZ.
511 //
512 // If locale has a script tag then return true and the locale without the
513 // script else return false and an empty string.
RemoveLocaleScriptTag(const std::string & icu_locale,std::string * locale_less_script)514 bool RemoveLocaleScriptTag(const std::string& icu_locale,
515 std::string* locale_less_script) {
516 icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
517 const char* icu_script = new_locale.getScript();
518 if (icu_script == nullptr || strlen(icu_script) == 0) {
519 *locale_less_script = std::string();
520 return false;
521 }
522
523 const char* icu_language = new_locale.getLanguage();
524 const char* icu_country = new_locale.getCountry();
525 icu::Locale short_locale = icu::Locale(icu_language, icu_country);
526 *locale_less_script = short_locale.getName();
527 return true;
528 }
529
ValidateResource(const icu::Locale locale,const char * path,const char * key)530 bool ValidateResource(const icu::Locale locale, const char* path,
531 const char* key) {
532 bool result = false;
533 UErrorCode status = U_ZERO_ERROR;
534 UResourceBundle* bundle = ures_open(path, locale.getName(), &status);
535 if (bundle != nullptr && status == U_ZERO_ERROR) {
536 if (key == nullptr) {
537 result = true;
538 } else {
539 UResourceBundle* key_bundle =
540 ures_getByKey(bundle, key, nullptr, &status);
541 result = key_bundle != nullptr && (status == U_ZERO_ERROR);
542 ures_close(key_bundle);
543 }
544 }
545 ures_close(bundle);
546 if (!result) {
547 if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) {
548 // Fallback to try without country.
549 std::string without_country(locale.getLanguage());
550 without_country = without_country.append("-").append(locale.getScript());
551 return ValidateResource(without_country.c_str(), path, key);
552 } else if ((locale.getCountry()[0] != '\0') ||
553 (locale.getScript()[0] != '\0')) {
554 // Fallback to try with only language.
555 std::string language(locale.getLanguage());
556 return ValidateResource(language.c_str(), path, key);
557 }
558 }
559 return result;
560 }
561
562 } // namespace
563
BuildLocaleSet(const std::vector<std::string> & icu_available_locales,const char * path,const char * validate_key)564 std::set<std::string> Intl::BuildLocaleSet(
565 const std::vector<std::string>& icu_available_locales, const char* path,
566 const char* validate_key) {
567 std::set<std::string> locales;
568 for (const std::string& locale : icu_available_locales) {
569 if (path != nullptr || validate_key != nullptr) {
570 if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) {
571 continue;
572 }
573 }
574 locales.insert(locale);
575 std::string shortened_locale;
576 if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
577 std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
578 locales.insert(shortened_locale);
579 }
580 }
581 return locales;
582 }
583
ToLanguageTag(const icu::Locale & locale)584 Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
585 UErrorCode status = U_ZERO_ERROR;
586 std::string res = locale.toLanguageTag<std::string>(status);
587 if (U_FAILURE(status)) {
588 return Nothing<std::string>();
589 }
590 DCHECK(U_SUCCESS(status));
591
592 // Hack to remove -true and -yes from unicode extensions
593 // Address https://crbug.com/v8/8565
594 // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
595 // by fixing ICU-20310.
596 size_t u_ext_start = res.find("-u-");
597 if (u_ext_start != std::string::npos) {
598 // remove "-true" and "-yes" after -u-
599 const std::vector<std::string> remove_items({"-true", "-yes"});
600 for (auto item = remove_items.begin(); item != remove_items.end(); item++) {
601 for (size_t sep_remove =
602 res.find(*item, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
603 sep_remove != std::string::npos; sep_remove = res.find(*item)) {
604 size_t end_of_sep_remove = sep_remove + item->length();
605 if (res.length() == end_of_sep_remove ||
606 res.at(end_of_sep_remove) == '-') {
607 res.erase(sep_remove, item->length());
608 }
609 }
610 }
611 }
612 return Just(res);
613 }
614
615 namespace {
DefaultLocale(Isolate * isolate)616 std::string DefaultLocale(Isolate* isolate) {
617 if (isolate->default_locale().empty()) {
618 icu::Locale default_locale;
619 // Translate ICU's fallback locale to a well-known locale.
620 if (strcmp(default_locale.getName(), "en_US_POSIX") == 0 ||
621 strcmp(default_locale.getName(), "c") == 0) {
622 isolate->set_default_locale("en-US");
623 } else {
624 // Set the locale
625 isolate->set_default_locale(
626 default_locale.isBogus()
627 ? "und"
628 : Intl::ToLanguageTag(default_locale).FromJust());
629 }
630 DCHECK(!isolate->default_locale().empty());
631 }
632 return isolate->default_locale();
633 }
634 } // namespace
635
636 // See ecma402/#legacy-constructor.
LegacyUnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,bool has_initialized_slot)637 MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
638 Handle<JSReceiver> receiver,
639 Handle<JSFunction> constructor,
640 bool has_initialized_slot) {
641 Handle<Object> obj_is_instance_of;
642 ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
643 Object::InstanceOf(isolate, receiver, constructor),
644 Object);
645 bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);
646
647 // 2. If receiver does not have an [[Initialized...]] internal slot
648 // and ? InstanceofOperator(receiver, constructor) is true, then
649 if (!has_initialized_slot && is_instance_of) {
650 // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
651 Handle<Object> new_receiver;
652 ASSIGN_RETURN_ON_EXCEPTION(
653 isolate, new_receiver,
654 JSReceiver::GetProperty(isolate, receiver,
655 isolate->factory()->intl_fallback_symbol()),
656 Object);
657 return new_receiver;
658 }
659
660 return receiver;
661 }
662
GetStringOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,std::vector<const char * > values,const char * service,std::unique_ptr<char[]> * result)663 Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
664 const char* property,
665 std::vector<const char*> values,
666 const char* service,
667 std::unique_ptr<char[]>* result) {
668 Handle<String> property_str =
669 isolate->factory()->NewStringFromAsciiChecked(property);
670
671 // 1. Let value be ? Get(options, property).
672 Handle<Object> value;
673 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
674 isolate, value,
675 Object::GetPropertyOrElement(isolate, options, property_str),
676 Nothing<bool>());
677
678 if (value->IsUndefined(isolate)) {
679 return Just(false);
680 }
681
682 // 2. c. Let value be ? ToString(value).
683 Handle<String> value_str;
684 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
685 isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
686 std::unique_ptr<char[]> value_cstr = value_str->ToCString();
687
688 // 2. d. if values is not undefined, then
689 if (values.size() > 0) {
690 // 2. d. i. If values does not contain an element equal to value,
691 // throw a RangeError exception.
692 for (size_t i = 0; i < values.size(); i++) {
693 if (strcmp(values.at(i), value_cstr.get()) == 0) {
694 // 2. e. return value
695 *result = std::move(value_cstr);
696 return Just(true);
697 }
698 }
699
700 Handle<String> service_str =
701 isolate->factory()->NewStringFromAsciiChecked(service);
702 THROW_NEW_ERROR_RETURN_VALUE(
703 isolate,
704 NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
705 property_str),
706 Nothing<bool>());
707 }
708
709 // 2. e. return value
710 *result = std::move(value_cstr);
711 return Just(true);
712 }
713
GetBoolOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,const char * service,bool * result)714 V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
715 Isolate* isolate, Handle<JSReceiver> options, const char* property,
716 const char* service, bool* result) {
717 Handle<String> property_str =
718 isolate->factory()->NewStringFromAsciiChecked(property);
719
720 // 1. Let value be ? Get(options, property).
721 Handle<Object> value;
722 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
723 isolate, value,
724 Object::GetPropertyOrElement(isolate, options, property_str),
725 Nothing<bool>());
726
727 // 2. If value is not undefined, then
728 if (!value->IsUndefined(isolate)) {
729 // 2. b. i. Let value be ToBoolean(value).
730 *result = value->BooleanValue(isolate);
731
732 // 2. e. return value
733 return Just(true);
734 }
735
736 return Just(false);
737 }
738
739 namespace {
740
IsTwoLetterLanguage(const std::string & locale)741 bool IsTwoLetterLanguage(const std::string& locale) {
742 // Two letters, both in range 'a'-'z'...
743 return locale.length() == 2 && IsAsciiLower(locale[0]) &&
744 IsAsciiLower(locale[1]);
745 }
746
IsDeprecatedLanguage(const std::string & locale)747 bool IsDeprecatedLanguage(const std::string& locale) {
748 // Check if locale is one of the deprecated language tags:
749 return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
750 locale == "mo";
751 }
752
753 // Reference:
754 // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
IsGrandfatheredTagWithoutPreferredVaule(const std::string & locale)755 bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
756 if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
757 if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
758 V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
759 return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
760 locale.substr(2) == "mingo";
761 }
762 return false;
763 }
764
IsStructurallyValidLanguageTag(const std::string & tag)765 bool IsStructurallyValidLanguageTag(const std::string& tag) {
766 return JSLocale::StartsWithUnicodeLanguageId(tag);
767 }
768
769 // Canonicalize the locale.
770 // https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
771 // including type check and structural validity check.
CanonicalizeLanguageTag(Isolate * isolate,const std::string & locale_in)772 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
773 const std::string& locale_in) {
774 std::string locale = locale_in;
775
776 if (locale.length() == 0 ||
777 !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
778 THROW_NEW_ERROR_RETURN_VALUE(
779 isolate,
780 NewRangeError(
781 MessageTemplate::kInvalidLanguageTag,
782 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
783 Nothing<std::string>());
784 }
785
786 // Optimize for the most common case: a 2-letter language code in the
787 // canonical form/lowercase that is not one of the deprecated codes
788 // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
789 // codes. Instead, let them be handled by ICU in the slow path. However,
790 // fast-track 'fil' (3-letter canonical code).
791 if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
792 locale == "fil") {
793 return Just(locale);
794 }
795
796 // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
797 // the input before any more check.
798 std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
799
800 // ICU maps a few grandfathered tags to what looks like a regular language
801 // tag even though IANA language tag registry does not have a preferred
802 // entry map for them. Return them as they're with lowercasing.
803 if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
804 return Just(locale);
805 }
806
807 // // ECMA 402 6.2.3
808 // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
809 // language tag if it's too long (much longer than 100 chars). Even if we
810 // allocate a longer buffer, ICU will still fail if it's too long. Either
811 // propose to Ecma 402 to put a limit on the locale length or change ICU to
812 // handle long locale names better. See
813 // https://unicode-org.atlassian.net/browse/ICU-13417
814 UErrorCode error = U_ZERO_ERROR;
815 // uloc_forLanguageTag checks the structrual validity. If the input BCP47
816 // language tag is parsed all the way to the end, it indicates that the input
817 // is structurally valid. Due to a couple of bugs, we can't use it
818 // without Chromium patches or ICU 62 or earlier.
819 icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
820 if (U_FAILURE(error) || icu_locale.isBogus()) {
821 THROW_NEW_ERROR_RETURN_VALUE(
822 isolate,
823 NewRangeError(
824 MessageTemplate::kInvalidLanguageTag,
825 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
826 Nothing<std::string>());
827 }
828 Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
829 if (maybe_to_language_tag.IsNothing()) {
830 THROW_NEW_ERROR_RETURN_VALUE(
831 isolate,
832 NewRangeError(
833 MessageTemplate::kInvalidLanguageTag,
834 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
835 Nothing<std::string>());
836 }
837
838 return maybe_to_language_tag;
839 }
840
CanonicalizeLanguageTag(Isolate * isolate,Handle<Object> locale_in)841 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
842 Handle<Object> locale_in) {
843 Handle<String> locale_str;
844 // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
845 // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
846 // exception.
847 // 7c iii. Let tag be ? ToString(kValue).
848 // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
849 // RangeError exception.
850
851 if (locale_in->IsString()) {
852 locale_str = Handle<String>::cast(locale_in);
853 } else if (locale_in->IsJSReceiver()) {
854 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
855 Object::ToString(isolate, locale_in),
856 Nothing<std::string>());
857 } else {
858 THROW_NEW_ERROR_RETURN_VALUE(isolate,
859 NewTypeError(MessageTemplate::kLanguageID),
860 Nothing<std::string>());
861 }
862 std::string locale(locale_str->ToCString().get());
863
864 if (!IsStructurallyValidLanguageTag(locale)) {
865 THROW_NEW_ERROR_RETURN_VALUE(
866 isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
867 Nothing<std::string>());
868 }
869 return CanonicalizeLanguageTag(isolate, locale);
870 }
871
872 } // anonymous namespace
873
CanonicalizeLocaleList(Isolate * isolate,Handle<Object> locales,bool only_return_one_result)874 Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
875 Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
876 // 1. If locales is undefined, then
877 if (locales->IsUndefined(isolate)) {
878 // 1a. Return a new empty List.
879 return Just(std::vector<std::string>());
880 }
881 // 2. Let seen be a new empty List.
882 std::vector<std::string> seen;
883 // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
884 // internal slot, then
885 if (locales->IsJSLocale()) {
886 // Since this value came from JSLocale, which is already went though the
887 // CanonializeLanguageTag process once, therefore there are no need to
888 // call CanonializeLanguageTag again.
889 seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
890 return Just(seen);
891 }
892 if (locales->IsString()) {
893 // 3a. Let O be CreateArrayFromList(« locales »).
894 // Instead of creating a one-element array and then iterating over it,
895 // we inline the body of the iteration:
896 std::string canonicalized_tag;
897 if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
898 return Nothing<std::vector<std::string>>();
899 }
900 seen.push_back(canonicalized_tag);
901 return Just(seen);
902 }
903 // 4. Else,
904 // 4a. Let O be ? ToObject(locales).
905 Handle<JSReceiver> o;
906 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
907 Object::ToObject(isolate, locales),
908 Nothing<std::vector<std::string>>());
909 // 5. Let len be ? ToLength(? Get(O, "length")).
910 Handle<Object> length_obj;
911 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
912 Object::GetLengthFromArrayLike(isolate, o),
913 Nothing<std::vector<std::string>>());
914 // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
915 // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
916 // don't happen in practice (and would be very slow if they do), we'll keep
917 // the code simple for now by using a saturating to-uint32 conversion.
918 double raw_length = length_obj->Number();
919 uint32_t len =
920 raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
921 // 6. Let k be 0.
922 // 7. Repeat, while k < len
923 for (uint32_t k = 0; k < len; k++) {
924 // 7a. Let Pk be ToString(k).
925 // 7b. Let kPresent be ? HasProperty(O, Pk).
926 LookupIterator it(isolate, o, k);
927 Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
928 MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
929 // 7c. If kPresent is true, then
930 if (!maybe_found.FromJust()) continue;
931 // 7c i. Let kValue be ? Get(O, Pk).
932 Handle<Object> k_value;
933 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
934 Nothing<std::vector<std::string>>());
935 // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
936 // exception.
937 // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
938 // internal slot, then
939 std::string canonicalized_tag;
940 if (k_value->IsJSLocale()) {
941 // 7c iii. 1. Let tag be kValue.[[Locale]].
942 canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
943 // 7c iv. Else,
944 } else {
945 // 7c iv 1. Let tag be ? ToString(kValue).
946 // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
947 // RangeError exception.
948 // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
949 if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
950 return Nothing<std::vector<std::string>>();
951 }
952 }
953 // 7c vi. If canonicalizedTag is not an element of seen, append
954 // canonicalizedTag as the last element of seen.
955 if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
956 seen.push_back(canonicalized_tag);
957 }
958 // 7d. Increase k by 1. (See loop header.)
959 // Optimization: some callers only need one result.
960 if (only_return_one_result) return Just(seen);
961 }
962 // 8. Return seen.
963 return Just(seen);
964 }
965
966 // ecma402 #sup-string.prototype.tolocalelowercase
967 // ecma402 #sup-string.prototype.tolocaleuppercase
StringLocaleConvertCase(Isolate * isolate,Handle<String> s,bool to_upper,Handle<Object> locales)968 MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
969 Handle<String> s,
970 bool to_upper,
971 Handle<Object> locales) {
972 std::vector<std::string> requested_locales;
973 if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
974 return MaybeHandle<String>();
975 }
976 std::string requested_locale = requested_locales.size() == 0
977 ? DefaultLocale(isolate)
978 : requested_locales[0];
979 size_t dash = requested_locale.find('-');
980 if (dash != std::string::npos) {
981 requested_locale = requested_locale.substr(0, dash);
982 }
983
984 // Primary language tag can be up to 8 characters long in theory.
985 // https://tools.ietf.org/html/bcp47#section-2.2.1
986 DCHECK_LE(requested_locale.length(), 8);
987 s = String::Flatten(isolate, s);
988
989 // All the languages requiring special-handling have two-letter codes.
990 // Note that we have to check for '!= 2' here because private-use language
991 // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
992 // only 'x' or 'i' when they get here.
993 if (V8_UNLIKELY(requested_locale.length() != 2)) {
994 if (to_upper) {
995 return ConvertToUpper(isolate, s);
996 }
997 return ConvertToLower(isolate, s);
998 }
999 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1000 // in the root locale needs to be adjusted for az, lt and tr because even case
1001 // mapping of ASCII range characters are different in those locales.
1002 // Greek (el) does not require any adjustment.
1003 if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
1004 (requested_locale == "lt") || (requested_locale == "az"))) {
1005 return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
1006 } else {
1007 if (to_upper) {
1008 return ConvertToUpper(isolate, s);
1009 }
1010 return ConvertToLower(isolate, s);
1011 }
1012 }
1013
StringLocaleCompare(Isolate * isolate,Handle<String> string1,Handle<String> string2,Handle<Object> locales,Handle<Object> options,const char * method)1014 MaybeHandle<Object> Intl::StringLocaleCompare(
1015 Isolate* isolate, Handle<String> string1, Handle<String> string2,
1016 Handle<Object> locales, Handle<Object> options, const char* method) {
1017 // We only cache the instance when both locales and options are undefined,
1018 // as that is the only case when the specified side-effects of examining
1019 // those arguments are unobservable.
1020 bool can_cache =
1021 locales->IsUndefined(isolate) && options->IsUndefined(isolate);
1022 if (can_cache) {
1023 // Both locales and options are undefined, check the cache.
1024 icu::Collator* cached_icu_collator =
1025 static_cast<icu::Collator*>(isolate->get_cached_icu_object(
1026 Isolate::ICUObjectCacheType::kDefaultCollator));
1027 // We may use the cached icu::Collator for a fast path.
1028 if (cached_icu_collator != nullptr) {
1029 return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
1030 string2);
1031 }
1032 }
1033
1034 Handle<JSFunction> constructor = Handle<JSFunction>(
1035 JSFunction::cast(
1036 isolate->context().native_context().intl_collator_function()),
1037 isolate);
1038
1039 Handle<JSCollator> collator;
1040 ASSIGN_RETURN_ON_EXCEPTION(
1041 isolate, collator,
1042 New<JSCollator>(isolate, constructor, locales, options, method), Object);
1043 if (can_cache) {
1044 isolate->set_icu_object_in_cache(
1045 Isolate::ICUObjectCacheType::kDefaultCollator,
1046 std::static_pointer_cast<icu::UMemory>(collator->icu_collator().get()));
1047 }
1048 icu::Collator* icu_collator = collator->icu_collator().raw();
1049 return Intl::CompareStrings(isolate, *icu_collator, string1, string2);
1050 }
1051
1052 // ecma402/#sec-collator-comparestrings
CompareStrings(Isolate * isolate,const icu::Collator & icu_collator,Handle<String> string1,Handle<String> string2)1053 Handle<Object> Intl::CompareStrings(Isolate* isolate,
1054 const icu::Collator& icu_collator,
1055 Handle<String> string1,
1056 Handle<String> string2) {
1057 Factory* factory = isolate->factory();
1058
1059 // Early return for identical strings.
1060 if (string1.is_identical_to(string2)) {
1061 return factory->NewNumberFromInt(UCollationResult::UCOL_EQUAL);
1062 }
1063
1064 // Early return for empty strings.
1065 if (string1->length() == 0) {
1066 return factory->NewNumberFromInt(string2->length() == 0
1067 ? UCollationResult::UCOL_EQUAL
1068 : UCollationResult::UCOL_LESS);
1069 }
1070 if (string2->length() == 0) {
1071 return factory->NewNumberFromInt(UCollationResult::UCOL_GREATER);
1072 }
1073
1074 string1 = String::Flatten(isolate, string1);
1075 string2 = String::Flatten(isolate, string2);
1076
1077 UCollationResult result;
1078 UErrorCode status = U_ZERO_ERROR;
1079 icu::StringPiece string_piece1 = ToICUStringPiece(isolate, string1);
1080 if (!string_piece1.empty()) {
1081 icu::StringPiece string_piece2 = ToICUStringPiece(isolate, string2);
1082 if (!string_piece2.empty()) {
1083 result = icu_collator.compareUTF8(string_piece1, string_piece2, status);
1084 DCHECK(U_SUCCESS(status));
1085 return factory->NewNumberFromInt(result);
1086 }
1087 }
1088
1089 icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1);
1090 icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2);
1091 result = icu_collator.compare(string_val1, string_val2, status);
1092 DCHECK(U_SUCCESS(status));
1093
1094 return factory->NewNumberFromInt(result);
1095 }
1096
1097 // ecma402/#sup-properties-of-the-number-prototype-object
NumberToLocaleString(Isolate * isolate,Handle<Object> num,Handle<Object> locales,Handle<Object> options,const char * method)1098 MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
1099 Handle<Object> num,
1100 Handle<Object> locales,
1101 Handle<Object> options,
1102 const char* method) {
1103 Handle<Object> numeric_obj;
1104 ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1105 Object::ToNumeric(isolate, num), String);
1106
1107 // We only cache the instance when both locales and options are undefined,
1108 // as that is the only case when the specified side-effects of examining
1109 // those arguments are unobservable.
1110 bool can_cache =
1111 locales->IsUndefined(isolate) && options->IsUndefined(isolate);
1112 if (can_cache) {
1113 icu::number::LocalizedNumberFormatter* cached_number_format =
1114 static_cast<icu::number::LocalizedNumberFormatter*>(
1115 isolate->get_cached_icu_object(
1116 Isolate::ICUObjectCacheType::kDefaultNumberFormat));
1117 // We may use the cached icu::NumberFormat for a fast path.
1118 if (cached_number_format != nullptr) {
1119 return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
1120 numeric_obj);
1121 }
1122 }
1123
1124 Handle<JSFunction> constructor = Handle<JSFunction>(
1125 JSFunction::cast(
1126 isolate->context().native_context().intl_number_format_function()),
1127 isolate);
1128 Handle<JSNumberFormat> number_format;
1129 // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1130 ASSIGN_RETURN_ON_EXCEPTION(
1131 isolate, number_format,
1132 New<JSNumberFormat>(isolate, constructor, locales, options, method),
1133 String);
1134
1135 if (can_cache) {
1136 isolate->set_icu_object_in_cache(
1137 Isolate::ICUObjectCacheType::kDefaultNumberFormat,
1138 std::static_pointer_cast<icu::UMemory>(
1139 number_format->icu_number_formatter().get()));
1140 }
1141
1142 // Return FormatNumber(numberFormat, x).
1143 icu::number::LocalizedNumberFormatter* icu_number_format =
1144 number_format->icu_number_formatter().raw();
1145 return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
1146 numeric_obj);
1147 }
1148
1149 namespace {
1150
1151 // ecma402/#sec-defaultnumberoption
DefaultNumberOption(Isolate * isolate,Handle<Object> value,int min,int max,int fallback,Handle<String> property)1152 Maybe<int> DefaultNumberOption(Isolate* isolate, Handle<Object> value, int min,
1153 int max, int fallback, Handle<String> property) {
1154 // 2. Else, return fallback.
1155 if (value->IsUndefined()) return Just(fallback);
1156
1157 // 1. If value is not undefined, then
1158 // a. Let value be ? ToNumber(value).
1159 Handle<Object> value_num;
1160 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1161 isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
1162 DCHECK(value_num->IsNumber());
1163
1164 // b. If value is NaN or less than minimum or greater than maximum, throw a
1165 // RangeError exception.
1166 if (value_num->IsNaN() || value_num->Number() < min ||
1167 value_num->Number() > max) {
1168 THROW_NEW_ERROR_RETURN_VALUE(
1169 isolate,
1170 NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
1171 Nothing<int>());
1172 }
1173
1174 // The max and min arguments are integers and the above check makes
1175 // sure that we are within the integer range making this double to
1176 // int conversion safe.
1177 //
1178 // c. Return floor(value).
1179 return Just(FastD2I(floor(value_num->Number())));
1180 }
1181
1182 } // namespace
1183
1184 // ecma402/#sec-getnumberoption
GetNumberOption(Isolate * isolate,Handle<JSReceiver> options,Handle<String> property,int min,int max,int fallback)1185 Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
1186 Handle<String> property, int min, int max,
1187 int fallback) {
1188 // 1. Let value be ? Get(options, property).
1189 Handle<Object> value;
1190 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1191 isolate, value, JSReceiver::GetProperty(isolate, options, property),
1192 Nothing<int>());
1193
1194 // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
1195 return DefaultNumberOption(isolate, value, min, max, fallback, property);
1196 }
1197
SetNumberFormatDigitOptions(Isolate * isolate,Handle<JSReceiver> options,int mnfd_default,int mxfd_default,bool notation_is_compact)1198 Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
1199 Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
1200 int mxfd_default, bool notation_is_compact) {
1201 Factory* factory = isolate->factory();
1202 Intl::NumberFormatDigitOptions digit_options;
1203
1204 // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1205 // 1).
1206 int mnid = 1;
1207 if (!Intl::GetNumberOption(isolate, options,
1208 factory->minimumIntegerDigits_string(), 1, 21, 1)
1209 .To(&mnid)) {
1210 return Nothing<NumberFormatDigitOptions>();
1211 }
1212
1213 int mnfd = 0;
1214 int mxfd = 0;
1215 Handle<Object> mnfd_obj;
1216 Handle<Object> mxfd_obj;
1217
1218 // 6. Let mnfd be ? Get(options, "minimumFractionDigits").
1219 Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1220 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1221 isolate, mnfd_obj, JSReceiver::GetProperty(isolate, options, mnfd_str),
1222 Nothing<NumberFormatDigitOptions>());
1223
1224 // 8. Let mxfd be ? Get(options, "maximumFractionDigits").
1225 Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1226 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1227 isolate, mxfd_obj, JSReceiver::GetProperty(isolate, options, mxfd_str),
1228 Nothing<NumberFormatDigitOptions>());
1229
1230 // 9. Let mnsd be ? Get(options, "minimumSignificantDigits").
1231 Handle<Object> mnsd_obj;
1232 Handle<String> mnsd_str = factory->minimumSignificantDigits_string();
1233 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1234 isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
1235 Nothing<NumberFormatDigitOptions>());
1236
1237 // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
1238 Handle<Object> mxsd_obj;
1239 Handle<String> mxsd_str = factory->maximumSignificantDigits_string();
1240 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1241 isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
1242 Nothing<NumberFormatDigitOptions>());
1243
1244 // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1245 digit_options.minimum_integer_digits = mnid;
1246
1247 // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1248 digit_options.minimum_fraction_digits = mnfd;
1249
1250 // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1251 digit_options.maximum_fraction_digits = mxfd;
1252
1253 // 14. If mnsd is not undefined or mxsd is not undefined, then
1254 if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
1255 // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1256 int mnsd;
1257 if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
1258 return Nothing<NumberFormatDigitOptions>();
1259 }
1260
1261 // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1262 int mxsd;
1263 if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
1264 .To(&mxsd)) {
1265 return Nothing<NumberFormatDigitOptions>();
1266 }
1267
1268 // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1269 digit_options.minimum_significant_digits = mnsd;
1270
1271 // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1272 digit_options.maximum_significant_digits = mxsd;
1273 } else {
1274 digit_options.minimum_significant_digits = 0;
1275 digit_options.maximum_significant_digits = 0;
1276
1277 // 15. Else If mnfd is not undefined or mxfd is not undefined, then
1278 if (!mnfd_obj->IsUndefined(isolate) || !mxfd_obj->IsUndefined(isolate)) {
1279 Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1280 Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1281
1282 int specified_mnfd;
1283 int specified_mxfd;
1284
1285 // a. Let _specifiedMnfd_ be ? DefaultNumberOption(_mnfd_, 0, 20,
1286 // *undefined*).
1287 if (!DefaultNumberOption(isolate, mnfd_obj, 0, 20, -1, mnfd_str)
1288 .To(&specified_mnfd)) {
1289 return Nothing<NumberFormatDigitOptions>();
1290 }
1291 Handle<Object> specifiedMnfd_obj;
1292 if (specified_mnfd < 0) {
1293 specifiedMnfd_obj = factory->undefined_value();
1294 } else {
1295 specifiedMnfd_obj = handle(Smi::FromInt(specified_mnfd), isolate);
1296 }
1297
1298 // b. Let _specifiedMxfd_ be ? DefaultNumberOption(_mxfd_, 0, 20,
1299 // *undefined*).
1300 if (!DefaultNumberOption(isolate, mxfd_obj, 0, 20, -1, mxfd_str)
1301 .To(&specified_mxfd)) {
1302 return Nothing<NumberFormatDigitOptions>();
1303 }
1304 Handle<Object> specifiedMxfd_obj;
1305 if (specified_mxfd < 0) {
1306 specifiedMxfd_obj = factory->undefined_value();
1307 } else {
1308 specifiedMxfd_obj = handle(Smi::FromInt(specified_mxfd), isolate);
1309 }
1310
1311 // c. If _specifiedMxfd_ is not *undefined*, set _mnfdDefault_ to
1312 // min(_mnfdDefault_, _specifiedMxfd_).
1313 if (specified_mxfd >= 0) {
1314 mnfd_default = std::min(mnfd_default, specified_mxfd);
1315 }
1316
1317 // d. Set _mnfd_ to ! DefaultNumberOption(_specifiedMnfd_, 0, 20,
1318 // _mnfdDefault_).
1319 if (!DefaultNumberOption(isolate, specifiedMnfd_obj, 0, 20, mnfd_default,
1320 mnfd_str)
1321 .To(&mnfd)) {
1322 return Nothing<NumberFormatDigitOptions>();
1323 }
1324
1325 // e. Set _mxfd_ to ! DefaultNumberOption(_specifiedMxfd_, 0, 20,
1326 // max(_mxfdDefault_, _mnfd_)).
1327 if (!DefaultNumberOption(isolate, specifiedMxfd_obj, 0, 20,
1328 std::max(mxfd_default, mnfd), mxfd_str)
1329 .To(&mxfd)) {
1330 return Nothing<NumberFormatDigitOptions>();
1331 }
1332
1333 // f. If _mnfd_ is greater than _mxfd_, throw a *RangeError* exception.
1334 if (mnfd > mxfd) {
1335 THROW_NEW_ERROR_RETURN_VALUE(
1336 isolate,
1337 NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str),
1338 Nothing<NumberFormatDigitOptions>());
1339 }
1340
1341 // g. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1342 digit_options.minimum_fraction_digits = mnfd;
1343
1344 // h. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1345 digit_options.maximum_fraction_digits = mxfd;
1346 // Else If intlObj.[[Notation]] is "compact", then
1347 } else if (notation_is_compact) {
1348 // a. Set intlObj.[[RoundingType]] to "compact-rounding".
1349 // Set minimum_significant_digits to -1 to represent roundingtype is
1350 // "compact-rounding".
1351 digit_options.minimum_significant_digits = -1;
1352 // 17. Else,
1353 } else {
1354 // 17. b. Set intlObj.[[MinimumFractionDigits]] to mnfdDefault.
1355 digit_options.minimum_fraction_digits = mnfd_default;
1356
1357 // 17. c. Set intlObj.[[MaximumFractionDigits]] to mxfdDefault.
1358 digit_options.maximum_fraction_digits = mxfd_default;
1359 }
1360 }
1361 return Just(digit_options);
1362 }
1363
1364 namespace {
1365
1366 // ecma402/#sec-bestavailablelocale
BestAvailableLocale(const std::set<std::string> & available_locales,const std::string & locale)1367 std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1368 const std::string& locale) {
1369 // 1. Let candidate be locale.
1370 std::string candidate = locale;
1371
1372 // 2. Repeat,
1373 while (true) {
1374 // 2.a. If availableLocales contains an element equal to candidate, return
1375 // candidate.
1376 if (available_locales.find(candidate) != available_locales.end()) {
1377 return candidate;
1378 }
1379
1380 // 2.b. Let pos be the character index of the last occurrence of "-"
1381 // (U+002D) within candidate. If that character does not occur, return
1382 // undefined.
1383 size_t pos = candidate.rfind('-');
1384 if (pos == std::string::npos) {
1385 return std::string();
1386 }
1387
1388 // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1389 // decrease pos by 2.
1390 if (pos >= 2 && candidate[pos - 2] == '-') {
1391 pos -= 2;
1392 }
1393
1394 // 2.d. Let candidate be the substring of candidate from position 0,
1395 // inclusive, to position pos, exclusive.
1396 candidate = candidate.substr(0, pos);
1397 }
1398 }
1399
1400 struct ParsedLocale {
1401 std::string no_extensions_locale;
1402 std::string extension;
1403 };
1404
1405 // Returns a struct containing a bcp47 tag without unicode extensions
1406 // and the removed unicode extensions.
1407 //
1408 // For example, given 'en-US-u-co-emoji' returns 'en-US' and
1409 // 'u-co-emoji'.
ParseBCP47Locale(const std::string & locale)1410 ParsedLocale ParseBCP47Locale(const std::string& locale) {
1411 size_t length = locale.length();
1412 ParsedLocale parsed_locale;
1413
1414 // Privateuse or grandfathered locales have no extension sequences.
1415 if ((length > 1) && (locale[1] == '-')) {
1416 // Check to make sure that this really is a grandfathered or
1417 // privateuse extension. ICU can sometimes mess up the
1418 // canonicalization.
1419 DCHECK(locale[0] == 'x' || locale[0] == 'i');
1420 parsed_locale.no_extensions_locale = locale;
1421 return parsed_locale;
1422 }
1423
1424 size_t unicode_extension_start = locale.find("-u-");
1425
1426 // No unicode extensions found.
1427 if (unicode_extension_start == std::string::npos) {
1428 parsed_locale.no_extensions_locale = locale;
1429 return parsed_locale;
1430 }
1431
1432 size_t private_extension_start = locale.find("-x-");
1433
1434 // Unicode extensions found within privateuse subtags don't count.
1435 if (private_extension_start != std::string::npos &&
1436 private_extension_start < unicode_extension_start) {
1437 parsed_locale.no_extensions_locale = locale;
1438 return parsed_locale;
1439 }
1440
1441 const std::string beginning = locale.substr(0, unicode_extension_start);
1442 size_t unicode_extension_end = length;
1443 DCHECK_GT(length, 2);
1444
1445 // Find the end of the extension production as per the bcp47 grammar
1446 // by looking for '-' followed by 2 chars and then another '-'.
1447 for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1448 if (locale[i] != '-') continue;
1449
1450 if (locale[i + 2] == '-') {
1451 unicode_extension_end = i;
1452 break;
1453 }
1454
1455 i += 2;
1456 }
1457
1458 const std::string end = locale.substr(unicode_extension_end);
1459 parsed_locale.no_extensions_locale = beginning + end;
1460 parsed_locale.extension = locale.substr(
1461 unicode_extension_start, unicode_extension_end - unicode_extension_start);
1462 return parsed_locale;
1463 }
1464
1465 // ecma402/#sec-lookupsupportedlocales
LookupSupportedLocales(const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1466 std::vector<std::string> LookupSupportedLocales(
1467 const std::set<std::string>& available_locales,
1468 const std::vector<std::string>& requested_locales) {
1469 // 1. Let subset be a new empty List.
1470 std::vector<std::string> subset;
1471
1472 // 2. For each element locale of requestedLocales in List order, do
1473 for (const std::string& locale : requested_locales) {
1474 // 2. a. Let noExtensionsLocale be the String value that is locale
1475 // with all Unicode locale extension sequences removed.
1476 std::string no_extension_locale =
1477 ParseBCP47Locale(locale).no_extensions_locale;
1478
1479 // 2. b. Let availableLocale be
1480 // BestAvailableLocale(availableLocales, noExtensionsLocale).
1481 std::string available_locale =
1482 BestAvailableLocale(available_locales, no_extension_locale);
1483
1484 // 2. c. If availableLocale is not undefined, append locale to the
1485 // end of subset.
1486 if (!available_locale.empty()) {
1487 subset.push_back(locale);
1488 }
1489 }
1490
1491 // 3. Return subset.
1492 return subset;
1493 }
1494
BuildLocaleMatcher(Isolate * isolate,const std::set<std::string> & available_locales,UErrorCode * status)1495 icu::LocaleMatcher BuildLocaleMatcher(
1496 Isolate* isolate, const std::set<std::string>& available_locales,
1497 UErrorCode* status) {
1498 icu::Locale default_locale =
1499 icu::Locale::forLanguageTag(DefaultLocale(isolate), *status);
1500 DCHECK(U_SUCCESS(*status));
1501 icu::LocaleMatcher::Builder builder;
1502 builder.setDefaultLocale(&default_locale);
1503 for (auto it = available_locales.begin(); it != available_locales.end();
1504 ++it) {
1505 builder.addSupportedLocale(
1506 icu::Locale::forLanguageTag(it->c_str(), *status));
1507 }
1508
1509 return builder.build(*status);
1510 }
1511
1512 class Iterator : public icu::Locale::Iterator {
1513 public:
Iterator(std::vector<std::string>::const_iterator begin,std::vector<std::string>::const_iterator end)1514 Iterator(std::vector<std::string>::const_iterator begin,
1515 std::vector<std::string>::const_iterator end)
1516 : iter_(begin), end_(end) {}
1517 ~Iterator() override = default;
1518
hasNext() const1519 UBool hasNext() const override { return iter_ != end_; }
1520
next()1521 const icu::Locale& next() override {
1522 UErrorCode status = U_ZERO_ERROR;
1523 locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
1524 DCHECK(U_SUCCESS(status));
1525 ++iter_;
1526 return locale_;
1527 }
1528
1529 private:
1530 std::vector<std::string>::const_iterator iter_;
1531 std::vector<std::string>::const_iterator end_;
1532 icu::Locale locale_;
1533 };
1534
1535 // ecma402/#sec-bestfitmatcher
1536 // The BestFitMatcher abstract operation compares requestedLocales, which must
1537 // be a List as returned by CanonicalizeLocaleList, against the locales in
1538 // availableLocales and determines the best available language to meet the
1539 // request. The algorithm is implementation dependent, but should produce
1540 // results that a typical user of the requested locales would perceive
1541 // as at least as good as those produced by the LookupMatcher abstract
1542 // operation. Options specified through Unicode locale extension sequences must
1543 // be ignored by the algorithm. Information about such subsequences is returned
1544 // separately. The abstract operation returns a record with a [[locale]] field,
1545 // whose value is the language tag of the selected locale, which must be an
1546 // element of availableLocales. If the language tag of the request locale that
1547 // led to the selected locale contained a Unicode locale extension sequence,
1548 // then the returned record also contains an [[extension]] field whose value is
1549 // the first Unicode locale extension sequence within the request locale
1550 // language tag.
BestFitMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1551 std::string BestFitMatcher(Isolate* isolate,
1552 const std::set<std::string>& available_locales,
1553 const std::vector<std::string>& requested_locales) {
1554 UErrorCode status = U_ZERO_ERROR;
1555 icu::LocaleMatcher matcher =
1556 BuildLocaleMatcher(isolate, available_locales, &status);
1557 DCHECK(U_SUCCESS(status));
1558
1559 Iterator iter(requested_locales.cbegin(), requested_locales.cend());
1560 std::string bestfit =
1561 matcher.getBestMatch(iter, status)->toLanguageTag<std::string>(status);
1562 if (U_FAILURE(status)) {
1563 return DefaultLocale(isolate);
1564 }
1565 // We need to return the extensions with it.
1566 for (auto it = requested_locales.begin(); it != requested_locales.end();
1567 ++it) {
1568 if (it->find(bestfit) == 0) {
1569 return *it;
1570 }
1571 }
1572 return bestfit;
1573 }
1574
1575 // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1576 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
BestFitSupportedLocales(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1577 std::vector<std::string> BestFitSupportedLocales(
1578 Isolate* isolate, const std::set<std::string>& available_locales,
1579 const std::vector<std::string>& requested_locales) {
1580 UErrorCode status = U_ZERO_ERROR;
1581 icu::LocaleMatcher matcher =
1582 BuildLocaleMatcher(isolate, available_locales, &status);
1583 DCHECK(U_SUCCESS(status));
1584
1585 std::string default_locale = DefaultLocale(isolate);
1586 std::vector<std::string> result;
1587 for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
1588 it++) {
1589 if (*it == default_locale) {
1590 result.push_back(*it);
1591 } else {
1592 status = U_ZERO_ERROR;
1593 icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
1594 std::string bestfit = matcher.getBestMatch(desired, status)
1595 ->toLanguageTag<std::string>(status);
1596 // We need to return the extensions with it.
1597 if (U_SUCCESS(status) && it->find(bestfit) == 0) {
1598 result.push_back(*it);
1599 }
1600 }
1601 }
1602 return result;
1603 }
1604
1605 // ecma262 #sec-createarrayfromlist
CreateArrayFromList(Isolate * isolate,std::vector<std::string> elements,PropertyAttributes attr)1606 Handle<JSArray> CreateArrayFromList(Isolate* isolate,
1607 std::vector<std::string> elements,
1608 PropertyAttributes attr) {
1609 Factory* factory = isolate->factory();
1610 // Let array be ! ArrayCreate(0).
1611 Handle<JSArray> array = factory->NewJSArray(0);
1612
1613 uint32_t length = static_cast<uint32_t>(elements.size());
1614 // 3. Let n be 0.
1615 // 4. For each element e of elements, do
1616 for (uint32_t i = 0; i < length; i++) {
1617 // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1618 const std::string& part = elements[i];
1619 Handle<String> value =
1620 factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
1621 JSObject::AddDataElement(array, i, value, attr);
1622 }
1623 // 5. Return array.
1624 return array;
1625 }
1626
1627 // To mitigate the risk of bestfit locale matcher, we first check in without
1628 // turnning it on.
1629 static bool implement_bestfit = false;
1630
1631 // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1632 // https://tc39.github.io/ecma402/#sec-supportedlocales
SupportedLocales(Isolate * isolate,const char * method,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,Handle<Object> options)1633 MaybeHandle<JSObject> SupportedLocales(
1634 Isolate* isolate, const char* method,
1635 const std::set<std::string>& available_locales,
1636 const std::vector<std::string>& requested_locales, Handle<Object> options) {
1637 std::vector<std::string> supported_locales;
1638
1639 // 2. Else, let matcher be "best fit".
1640 Intl::MatcherOption matcher = Intl::MatcherOption::kBestFit;
1641
1642 // 1. If options is not undefined, then
1643 if (!options->IsUndefined(isolate)) {
1644 // 1. a. Let options be ? ToObject(options).
1645 Handle<JSReceiver> options_obj;
1646 ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
1647 Object::ToObject(isolate, options), JSObject);
1648
1649 // 1. b. Let matcher be ? GetOption(options, "localeMatcher", "string",
1650 // « "lookup", "best fit" », "best fit").
1651 Maybe<Intl::MatcherOption> maybe_locale_matcher =
1652 Intl::GetLocaleMatcher(isolate, options_obj, method);
1653 MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1654 matcher = maybe_locale_matcher.FromJust();
1655 }
1656
1657 // 3. If matcher is "best fit", then
1658 // a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
1659 // requestedLocales).
1660 if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
1661 supported_locales =
1662 BestFitSupportedLocales(isolate, available_locales, requested_locales);
1663 } else {
1664 // 4. Else,
1665 // a. Let supportedLocales be LookupSupportedLocales(availableLocales,
1666 // requestedLocales).
1667 supported_locales =
1668 LookupSupportedLocales(available_locales, requested_locales);
1669 }
1670
1671 // 5. Return CreateArrayFromList(supportedLocales).
1672 PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1673 return CreateArrayFromList(isolate, supported_locales, attr);
1674 }
1675
1676 } // namespace
1677
1678 // ecma-402 #sec-intl.getcanonicallocales
GetCanonicalLocales(Isolate * isolate,Handle<Object> locales)1679 MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
1680 Handle<Object> locales) {
1681 // 1. Let ll be ? CanonicalizeLocaleList(locales).
1682 Maybe<std::vector<std::string>> maybe_ll =
1683 CanonicalizeLocaleList(isolate, locales, false);
1684 MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
1685
1686 // 2. Return CreateArrayFromList(ll).
1687 PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1688 return CreateArrayFromList(isolate, maybe_ll.FromJust(), attr);
1689 }
1690
1691 // ECMA 402 Intl.*.supportedLocalesOf
SupportedLocalesOf(Isolate * isolate,const char * method,const std::set<std::string> & available_locales,Handle<Object> locales,Handle<Object> options)1692 MaybeHandle<JSObject> Intl::SupportedLocalesOf(
1693 Isolate* isolate, const char* method,
1694 const std::set<std::string>& available_locales, Handle<Object> locales,
1695 Handle<Object> options) {
1696 // Let availableLocales be %Collator%.[[AvailableLocales]].
1697
1698 // Let requestedLocales be ? CanonicalizeLocaleList(locales).
1699 Maybe<std::vector<std::string>> requested_locales =
1700 CanonicalizeLocaleList(isolate, locales, false);
1701 MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
1702
1703 // Return ? SupportedLocales(availableLocales, requestedLocales, options).
1704 return SupportedLocales(isolate, method, available_locales,
1705 requested_locales.FromJust(), options);
1706 }
1707
1708 namespace {
1709
1710 template <typename T>
IsValidExtension(const icu::Locale & locale,const char * key,const std::string & value)1711 bool IsValidExtension(const icu::Locale& locale, const char* key,
1712 const std::string& value) {
1713 const char* legacy_type = uloc_toLegacyType(key, value.c_str());
1714 if (legacy_type == nullptr) {
1715 return false;
1716 }
1717 UErrorCode status = U_ZERO_ERROR;
1718 std::unique_ptr<icu::StringEnumeration> enumeration(
1719 T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
1720 false, status));
1721 if (U_FAILURE(status)) {
1722 return false;
1723 }
1724 int32_t length;
1725 for (const char* item = enumeration->next(&length, status);
1726 U_SUCCESS(status) && item != nullptr;
1727 item = enumeration->next(&length, status)) {
1728 if (strcmp(legacy_type, item) == 0) {
1729 return true;
1730 }
1731 }
1732 return false;
1733 }
1734
1735 } // namespace
1736
IsValidCollation(const icu::Locale & locale,const std::string & value)1737 bool Intl::IsValidCollation(const icu::Locale& locale,
1738 const std::string& value) {
1739 std::set<std::string> invalid_values = {"standard", "search"};
1740 if (invalid_values.find(value) != invalid_values.end()) return false;
1741 return IsValidExtension<icu::Collator>(locale, "collation", value);
1742 }
1743
IsWellFormedCalendar(const std::string & value)1744 bool Intl::IsWellFormedCalendar(const std::string& value) {
1745 return JSLocale::Is38AlphaNumList(value);
1746 }
1747
1748 // ecma402/#sec-iswellformedcurrencycode
IsWellFormedCurrency(const std::string & currency)1749 bool Intl::IsWellFormedCurrency(const std::string& currency) {
1750 return JSLocale::Is3Alpha(currency);
1751 }
1752
IsValidCalendar(const icu::Locale & locale,const std::string & value)1753 bool Intl::IsValidCalendar(const icu::Locale& locale,
1754 const std::string& value) {
1755 return IsValidExtension<icu::Calendar>(locale, "calendar", value);
1756 }
1757
IsValidNumberingSystem(const std::string & value)1758 bool Intl::IsValidNumberingSystem(const std::string& value) {
1759 std::set<std::string> invalid_values = {"native", "traditio", "finance"};
1760 if (invalid_values.find(value) != invalid_values.end()) return false;
1761 UErrorCode status = U_ZERO_ERROR;
1762 std::unique_ptr<icu::NumberingSystem> numbering_system(
1763 icu::NumberingSystem::createInstanceByName(value.c_str(), status));
1764 return U_SUCCESS(status) && numbering_system.get() != nullptr;
1765 }
1766
1767 namespace {
1768
IsWellFormedNumberingSystem(const std::string & value)1769 bool IsWellFormedNumberingSystem(const std::string& value) {
1770 return JSLocale::Is38AlphaNumList(value);
1771 }
1772
LookupAndValidateUnicodeExtensions(icu::Locale * icu_locale,const std::set<std::string> & relevant_keys)1773 std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
1774 icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
1775 std::map<std::string, std::string> extensions;
1776
1777 UErrorCode status = U_ZERO_ERROR;
1778 icu::LocaleBuilder builder;
1779 builder.setLocale(*icu_locale).clearExtensions();
1780 std::unique_ptr<icu::StringEnumeration> keywords(
1781 icu_locale->createKeywords(status));
1782 if (U_FAILURE(status)) return extensions;
1783
1784 if (!keywords) return extensions;
1785 char value[ULOC_FULLNAME_CAPACITY];
1786
1787 int32_t length;
1788 status = U_ZERO_ERROR;
1789 for (const char* keyword = keywords->next(&length, status);
1790 keyword != nullptr; keyword = keywords->next(&length, status)) {
1791 // Ignore failures in ICU and skip to the next keyword.
1792 //
1793 // This is fine.™
1794 if (U_FAILURE(status)) {
1795 status = U_ZERO_ERROR;
1796 continue;
1797 }
1798
1799 icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
1800
1801 // Ignore failures in ICU and skip to the next keyword.
1802 //
1803 // This is fine.™
1804 if (U_FAILURE(status)) {
1805 status = U_ZERO_ERROR;
1806 continue;
1807 }
1808
1809 const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
1810
1811 if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
1812 const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
1813 bool is_valid_value = false;
1814 // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
1815 if (strcmp("ca", bcp47_key) == 0) {
1816 is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value);
1817 } else if (strcmp("co", bcp47_key) == 0) {
1818 is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value);
1819 } else if (strcmp("hc", bcp47_key) == 0) {
1820 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
1821 std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
1822 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1823 } else if (strcmp("lb", bcp47_key) == 0) {
1824 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
1825 std::set<std::string> valid_values = {"strict", "normal", "loose"};
1826 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1827 } else if (strcmp("kn", bcp47_key) == 0) {
1828 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1829 std::set<std::string> valid_values = {"true", "false"};
1830 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1831 } else if (strcmp("kf", bcp47_key) == 0) {
1832 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1833 std::set<std::string> valid_values = {"upper", "lower", "false"};
1834 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1835 } else if (strcmp("nu", bcp47_key) == 0) {
1836 is_valid_value = Intl::IsValidNumberingSystem(bcp47_value);
1837 }
1838 if (is_valid_value) {
1839 extensions.insert(
1840 std::pair<std::string, std::string>(bcp47_key, bcp47_value));
1841 builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value);
1842 }
1843 }
1844 }
1845
1846 status = U_ZERO_ERROR;
1847 *icu_locale = builder.build(status);
1848
1849 return extensions;
1850 }
1851
1852 // ecma402/#sec-lookupmatcher
LookupMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1853 std::string LookupMatcher(Isolate* isolate,
1854 const std::set<std::string>& available_locales,
1855 const std::vector<std::string>& requested_locales) {
1856 // 1. Let result be a new Record.
1857 std::string result;
1858
1859 // 2. For each element locale of requestedLocales in List order, do
1860 for (const std::string& locale : requested_locales) {
1861 // 2. a. Let noExtensionsLocale be the String value that is locale
1862 // with all Unicode locale extension sequences removed.
1863 ParsedLocale parsed_locale = ParseBCP47Locale(locale);
1864 std::string no_extensions_locale = parsed_locale.no_extensions_locale;
1865
1866 // 2. b. Let availableLocale be
1867 // BestAvailableLocale(availableLocales, noExtensionsLocale).
1868 std::string available_locale =
1869 BestAvailableLocale(available_locales, no_extensions_locale);
1870
1871 // 2. c. If availableLocale is not undefined, append locale to the
1872 // end of subset.
1873 if (!available_locale.empty()) {
1874 // Note: The following steps are not performed here because we
1875 // can use ICU to parse the unicode locale extension sequence
1876 // as part of Intl::ResolveLocale.
1877 //
1878 // There's no need to separate the unicode locale extensions
1879 // right here. Instead just return the available locale with the
1880 // extensions.
1881 //
1882 // 2. c. i. Set result.[[locale]] to availableLocale.
1883 // 2. c. ii. If locale and noExtensionsLocale are not the same
1884 // String value, then
1885 // 2. c. ii. 1. Let extension be the String value consisting of
1886 // the first substring of locale that is a Unicode locale
1887 // extension sequence.
1888 // 2. c. ii. 2. Set result.[[extension]] to extension.
1889 // 2. c. iii. Return result.
1890 return available_locale + parsed_locale.extension;
1891 }
1892 }
1893
1894 // 3. Let defLocale be DefaultLocale();
1895 // 4. Set result.[[locale]] to defLocale.
1896 // 5. Return result.
1897 return DefaultLocale(isolate);
1898 }
1899
1900 } // namespace
1901
1902 // This function doesn't correspond exactly with the spec. Instead
1903 // we use ICU to do all the string manipulations that the spec
1904 // peforms.
1905 //
1906 // The spec uses this function to normalize values for various
1907 // relevant extension keys (such as disallowing "search" for
1908 // collation). Instead of doing this here, we let the callers of
1909 // this method perform such normalization.
1910 //
1911 // ecma402/#sec-resolvelocale
ResolveLocale(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,MatcherOption matcher,const std::set<std::string> & relevant_extension_keys)1912 Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
1913 Isolate* isolate, const std::set<std::string>& available_locales,
1914 const std::vector<std::string>& requested_locales, MatcherOption matcher,
1915 const std::set<std::string>& relevant_extension_keys) {
1916 std::string locale;
1917 if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
1918 locale = BestFitMatcher(isolate, available_locales, requested_locales);
1919 } else {
1920 locale = LookupMatcher(isolate, available_locales, requested_locales);
1921 }
1922
1923 Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale);
1924 MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>());
1925 icu::Locale icu_locale = maybe_icu_locale.FromJust();
1926 std::map<std::string, std::string> extensions =
1927 LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
1928
1929 std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
1930
1931 // TODO(gsathya): Remove privateuse subtags from extensions.
1932
1933 return Just(
1934 Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions});
1935 }
1936
SetTextToBreakIterator(Isolate * isolate,Handle<String> text,icu::BreakIterator * break_iterator)1937 Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
1938 Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
1939 text = String::Flatten(isolate, text);
1940 icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>(
1941 Intl::ToICUUnicodeString(isolate, text).clone());
1942
1943 Handle<Managed<icu::UnicodeString>> new_u_text =
1944 Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
1945
1946 break_iterator->setText(*u_text);
1947 return new_u_text;
1948 }
1949
1950 // ecma262 #sec-string.prototype.normalize
Normalize(Isolate * isolate,Handle<String> string,Handle<Object> form_input)1951 MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
1952 Handle<Object> form_input) {
1953 const char* form_name;
1954 UNormalization2Mode form_mode;
1955 if (form_input->IsUndefined(isolate)) {
1956 // default is FNC
1957 form_name = "nfc";
1958 form_mode = UNORM2_COMPOSE;
1959 } else {
1960 Handle<String> form;
1961 ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
1962 Object::ToString(isolate, form_input), String);
1963
1964 if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
1965 form_name = "nfc";
1966 form_mode = UNORM2_COMPOSE;
1967 } else if (String::Equals(isolate, form,
1968 isolate->factory()->NFD_string())) {
1969 form_name = "nfc";
1970 form_mode = UNORM2_DECOMPOSE;
1971 } else if (String::Equals(isolate, form,
1972 isolate->factory()->NFKC_string())) {
1973 form_name = "nfkc";
1974 form_mode = UNORM2_COMPOSE;
1975 } else if (String::Equals(isolate, form,
1976 isolate->factory()->NFKD_string())) {
1977 form_name = "nfkc";
1978 form_mode = UNORM2_DECOMPOSE;
1979 } else {
1980 Handle<String> valid_forms =
1981 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
1982 THROW_NEW_ERROR(
1983 isolate,
1984 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
1985 String);
1986 }
1987 }
1988
1989 int length = string->length();
1990 string = String::Flatten(isolate, string);
1991 icu::UnicodeString result;
1992 std::unique_ptr<uc16[]> sap;
1993 UErrorCode status = U_ZERO_ERROR;
1994 icu::UnicodeString input = ToICUUnicodeString(isolate, string);
1995 // Getting a singleton. Should not free it.
1996 const icu::Normalizer2* normalizer =
1997 icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
1998 DCHECK(U_SUCCESS(status));
1999 DCHECK_NOT_NULL(normalizer);
2000 int32_t normalized_prefix_length =
2001 normalizer->spanQuickCheckYes(input, status);
2002 // Quick return if the input is already normalized.
2003 if (length == normalized_prefix_length) return string;
2004 icu::UnicodeString unnormalized =
2005 input.tempSubString(normalized_prefix_length);
2006 // Read-only alias of the normalized prefix.
2007 result.setTo(false, input.getBuffer(), normalized_prefix_length);
2008 // copy-on-write; normalize the suffix and append to |result|.
2009 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
2010
2011 if (U_FAILURE(status)) {
2012 THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2013 }
2014
2015 return Intl::ToString(isolate, result);
2016 }
2017
2018 // ICUTimezoneCache calls out to ICU for TimezoneCache
2019 // functionality in a straightforward way.
2020 class ICUTimezoneCache : public base::TimezoneCache {
2021 public:
ICUTimezoneCache()2022 ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
2023
~ICUTimezoneCache()2024 ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
2025
2026 const char* LocalTimezone(double time_ms) override;
2027
2028 double DaylightSavingsOffset(double time_ms) override;
2029
2030 double LocalTimeOffset(double time_ms, bool is_utc) override;
2031
2032 void Clear(TimeZoneDetection time_zone_detection) override;
2033
2034 private:
2035 icu::TimeZone* GetTimeZone();
2036
2037 bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
2038 int32_t* dst_offset);
2039
2040 icu::TimeZone* timezone_;
2041
2042 std::string timezone_name_;
2043 std::string dst_timezone_name_;
2044 };
2045
LocalTimezone(double time_ms)2046 const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
2047 bool is_dst = DaylightSavingsOffset(time_ms) != 0;
2048 std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
2049 if (name->empty()) {
2050 icu::UnicodeString result;
2051 GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
2052 result += '\0';
2053
2054 icu::StringByteSink<std::string> byte_sink(name);
2055 result.toUTF8(byte_sink);
2056 }
2057 DCHECK(!name->empty());
2058 return name->c_str();
2059 }
2060
GetTimeZone()2061 icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
2062 if (timezone_ == nullptr) {
2063 timezone_ = icu::TimeZone::createDefault();
2064 }
2065 return timezone_;
2066 }
2067
GetOffsets(double time_ms,bool is_utc,int32_t * raw_offset,int32_t * dst_offset)2068 bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
2069 int32_t* raw_offset, int32_t* dst_offset) {
2070 UErrorCode status = U_ZERO_ERROR;
2071 // TODO(jshin): ICU TimeZone class handles skipped time differently from
2072 // Ecma 262 (https://github.com/tc39/ecma262/pull/778) and icu::TimeZone
2073 // class does not expose the necessary API. Fixing
2074 // http://bugs.icu-project.org/trac/ticket/13268 would make it easy to
2075 // implement the proposed spec change. A proposed fix for ICU is
2076 // https://chromium-review.googlesource.com/851265 .
2077 // In the meantime, use an internal (still public) API of icu::BasicTimeZone.
2078 // Once it's accepted by the upstream, get rid of cast. Note that casting
2079 // TimeZone to BasicTimeZone is safe because we know that icu::TimeZone used
2080 // here is a BasicTimeZone.
2081 if (is_utc) {
2082 GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
2083 } else {
2084 static_cast<const icu::BasicTimeZone*>(GetTimeZone())
2085 ->getOffsetFromLocal(time_ms, icu::BasicTimeZone::kFormer,
2086 icu::BasicTimeZone::kFormer, *raw_offset,
2087 *dst_offset, status);
2088 }
2089
2090 return U_SUCCESS(status);
2091 }
2092
DaylightSavingsOffset(double time_ms)2093 double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
2094 int32_t raw_offset, dst_offset;
2095 if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
2096 return dst_offset;
2097 }
2098
LocalTimeOffset(double time_ms,bool is_utc)2099 double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
2100 int32_t raw_offset, dst_offset;
2101 if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
2102 return raw_offset + dst_offset;
2103 }
2104
Clear(TimeZoneDetection time_zone_detection)2105 void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
2106 delete timezone_;
2107 timezone_ = nullptr;
2108 timezone_name_.clear();
2109 dst_timezone_name_.clear();
2110 if (time_zone_detection == TimeZoneDetection::kRedetect) {
2111 icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
2112 }
2113 }
2114
CreateTimeZoneCache()2115 base::TimezoneCache* Intl::CreateTimeZoneCache() {
2116 return FLAG_icu_timezone_data ? new ICUTimezoneCache()
2117 : base::OS::CreateTimezoneCache();
2118 }
2119
GetLocaleMatcher(Isolate * isolate,Handle<JSReceiver> options,const char * method)2120 Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
2121 Handle<JSReceiver> options,
2122 const char* method) {
2123 return Intl::GetStringOption<Intl::MatcherOption>(
2124 isolate, options, "localeMatcher", method, {"best fit", "lookup"},
2125 {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
2126 Intl::MatcherOption::kBestFit);
2127 }
2128
GetNumberingSystem(Isolate * isolate,Handle<JSReceiver> options,const char * method,std::unique_ptr<char[]> * result)2129 Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
2130 Handle<JSReceiver> options,
2131 const char* method,
2132 std::unique_ptr<char[]>* result) {
2133 const std::vector<const char*> empty_values = {};
2134 Maybe<bool> maybe = Intl::GetStringOption(isolate, options, "numberingSystem",
2135 empty_values, method, result);
2136 MAYBE_RETURN(maybe, Nothing<bool>());
2137 if (maybe.FromJust() && *result != nullptr) {
2138 if (!IsWellFormedNumberingSystem(result->get())) {
2139 THROW_NEW_ERROR_RETURN_VALUE(
2140 isolate,
2141 NewRangeError(
2142 MessageTemplate::kInvalid,
2143 isolate->factory()->numberingSystem_string(),
2144 isolate->factory()->NewStringFromAsciiChecked(result->get())),
2145 Nothing<bool>());
2146 }
2147 return Just(true);
2148 }
2149 return Just(false);
2150 }
2151
GetAvailableLocales()2152 const std::set<std::string>& Intl::GetAvailableLocales() {
2153 static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales =
2154 LAZY_INSTANCE_INITIALIZER;
2155 return available_locales.Pointer()->Get();
2156 }
2157
2158 namespace {
2159
2160 struct CheckCalendar {
keyv8::internal::__anon8de594ab0c11::CheckCalendar2161 static const char* key() { return "calendar"; }
pathv8::internal::__anon8de594ab0c11::CheckCalendar2162 static const char* path() { return nullptr; }
2163 };
2164
2165 } // namespace
2166
GetAvailableLocalesForDateFormat()2167 const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
2168 static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type
2169 available_locales = LAZY_INSTANCE_INITIALIZER;
2170 return available_locales.Pointer()->Get();
2171 }
2172
NumberFieldToType(Isolate * isolate,Handle<Object> numeric_obj,int32_t field_id)2173 Handle<String> Intl::NumberFieldToType(Isolate* isolate,
2174 Handle<Object> numeric_obj,
2175 int32_t field_id) {
2176 DCHECK(numeric_obj->IsNumeric());
2177 switch (static_cast<UNumberFormatFields>(field_id)) {
2178 case UNUM_INTEGER_FIELD:
2179 if (numeric_obj->IsBigInt()) {
2180 // Neither NaN nor Infinite could be stored into BigInt
2181 // so just return integer.
2182 return isolate->factory()->integer_string();
2183 } else {
2184 double number = numeric_obj->Number();
2185 if (std::isfinite(number)) return isolate->factory()->integer_string();
2186 if (std::isnan(number)) return isolate->factory()->nan_string();
2187 return isolate->factory()->infinity_string();
2188 }
2189 case UNUM_FRACTION_FIELD:
2190 return isolate->factory()->fraction_string();
2191 case UNUM_DECIMAL_SEPARATOR_FIELD:
2192 return isolate->factory()->decimal_string();
2193 case UNUM_GROUPING_SEPARATOR_FIELD:
2194 return isolate->factory()->group_string();
2195 case UNUM_CURRENCY_FIELD:
2196 return isolate->factory()->currency_string();
2197 case UNUM_PERCENT_FIELD:
2198 return isolate->factory()->percentSign_string();
2199 case UNUM_SIGN_FIELD:
2200 if (numeric_obj->IsBigInt()) {
2201 Handle<BigInt> big_int = Handle<BigInt>::cast(numeric_obj);
2202 return big_int->IsNegative() ? isolate->factory()->minusSign_string()
2203 : isolate->factory()->plusSign_string();
2204 } else {
2205 double number = numeric_obj->Number();
2206 return std::signbit(number) ? isolate->factory()->minusSign_string()
2207 : isolate->factory()->plusSign_string();
2208 }
2209 case UNUM_EXPONENT_SYMBOL_FIELD:
2210 return isolate->factory()->exponentSeparator_string();
2211
2212 case UNUM_EXPONENT_SIGN_FIELD:
2213 return isolate->factory()->exponentMinusSign_string();
2214
2215 case UNUM_EXPONENT_FIELD:
2216 return isolate->factory()->exponentInteger_string();
2217
2218 case UNUM_PERMILL_FIELD:
2219 // We're not creating any permill formatter, and it's not even clear how
2220 // that would be possible with the ICU API.
2221 UNREACHABLE();
2222 return Handle<String>();
2223
2224 case UNUM_COMPACT_FIELD:
2225 return isolate->factory()->compact_string();
2226 case UNUM_MEASURE_UNIT_FIELD:
2227 return isolate->factory()->unit_string();
2228
2229 default:
2230 UNREACHABLE();
2231 return Handle<String>();
2232 }
2233 }
2234
2235 // A helper function to convert the FormattedValue for several Intl objects.
FormattedToString(Isolate * isolate,const icu::FormattedValue & formatted)2236 MaybeHandle<String> Intl::FormattedToString(
2237 Isolate* isolate, const icu::FormattedValue& formatted) {
2238 UErrorCode status = U_ZERO_ERROR;
2239 icu::UnicodeString result = formatted.toString(status);
2240 if (U_FAILURE(status)) {
2241 THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2242 }
2243 return Intl::ToString(isolate, result);
2244 }
2245
2246
2247 } // namespace internal
2248 } // namespace v8
2249