• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/intl-objects.h"
10 
11 #include <algorithm>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "src/api/api-inl.h"
17 #include "src/execution/isolate.h"
18 #include "src/handles/global-handles.h"
19 #include "src/heap/factory.h"
20 #include "src/objects/js-collator-inl.h"
21 #include "src/objects/js-date-time-format-inl.h"
22 #include "src/objects/js-locale-inl.h"
23 #include "src/objects/js-locale.h"
24 #include "src/objects/js-number-format-inl.h"
25 #include "src/objects/objects-inl.h"
26 #include "src/objects/property-descriptor.h"
27 #include "src/objects/smi.h"
28 #include "src/objects/string.h"
29 #include "src/strings/string-case.h"
30 #include "unicode/basictz.h"
31 #include "unicode/brkiter.h"
32 #include "unicode/calendar.h"
33 #include "unicode/coll.h"
34 #include "unicode/datefmt.h"
35 #include "unicode/decimfmt.h"
36 #include "unicode/formattedvalue.h"
37 #include "unicode/localebuilder.h"
38 #include "unicode/localematcher.h"
39 #include "unicode/locid.h"
40 #include "unicode/normalizer2.h"
41 #include "unicode/numberformatter.h"
42 #include "unicode/numfmt.h"
43 #include "unicode/numsys.h"
44 #include "unicode/timezone.h"
45 #include "unicode/ures.h"
46 #include "unicode/ustring.h"
47 #include "unicode/uvernum.h"  // U_ICU_VERSION_MAJOR_NUM
48 
49 #define XSTR(s) STR(s)
50 #define STR(s) #s
51 static_assert(
52     V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
53     "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
54 #undef STR
55 #undef XSTR
56 
57 namespace v8 {
58 namespace internal {
59 
60 namespace {
61 
62 constexpr uint8_t kToLower[256] = {
63     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
64     0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
65     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
66     0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
67     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
68     0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
69     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
70     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
71     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
72     0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
73     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
74     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
75     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
76     0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
77     0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
78     0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
79     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
80     0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
81     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
82     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
83     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
84     0xFC, 0xFD, 0xFE, 0xFF,
85 };
86 
ToLatin1Lower(uint16_t ch)87 inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
88   return static_cast<uint16_t>(kToLower[ch]);
89 }
90 
91 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)92 inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
93   CONSTEXPR_DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
94   return ch &
95          ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
96 }
97 
98 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)99 bool ToUpperFastASCII(const Vector<const Char>& src,
100                       Handle<SeqOneByteString> result) {
101   // Do a faster loop for the case where all the characters are ASCII.
102   uint16_t ored = 0;
103   int32_t index = 0;
104   for (auto it = src.begin(); it != src.end(); ++it) {
105     uint16_t ch = static_cast<uint16_t>(*it);
106     ored |= ch;
107     result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
108   }
109   return !(ored & ~0x7F);
110 }
111 
112 const uint16_t sharp_s = 0xDF;
113 
114 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,uint8_t * dest,int * sharp_s_count)115 bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
116                     int* sharp_s_count) {
117   // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
118 
119   // There are two special cases.
120   //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
121   //  2. Lower case sharp-S converts to "SS" (two characters)
122   *sharp_s_count = 0;
123   for (auto it = src.begin(); it != src.end(); ++it) {
124     uint16_t ch = static_cast<uint16_t>(*it);
125     if (V8_UNLIKELY(ch == sharp_s)) {
126       ++(*sharp_s_count);
127       continue;
128     }
129     if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
130       // Since this upper-cased character does not fit in an 8-bit string, we
131       // need to take the 16-bit path.
132       return false;
133     }
134     *dest++ = ToLatin1Upper(ch);
135   }
136 
137   return true;
138 }
139 
140 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)141 void ToUpperWithSharpS(const Vector<const Char>& src,
142                        Handle<SeqOneByteString> result) {
143   int32_t dest_index = 0;
144   for (auto it = src.begin(); it != src.end(); ++it) {
145     uint16_t ch = static_cast<uint16_t>(*it);
146     if (ch == sharp_s) {
147       result->SeqOneByteStringSet(dest_index++, 'S');
148       result->SeqOneByteStringSet(dest_index++, 'S');
149     } else {
150       result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
151     }
152   }
153 }
154 
FindFirstUpperOrNonAscii(String s,int length)155 inline int FindFirstUpperOrNonAscii(String s, int length) {
156   for (int index = 0; index < length; ++index) {
157     uint16_t ch = s.Get(index);
158     if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
159       return index;
160     }
161   }
162   return length;
163 }
164 
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<uc16[]> * dest,int32_t length)165 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
166                                     std::unique_ptr<uc16[]>* dest,
167                                     int32_t length) {
168   DCHECK(flat.IsFlat());
169   if (flat.IsOneByte()) {
170     if (!*dest) {
171       dest->reset(NewArray<uc16>(length));
172       CopyChars(dest->get(), flat.ToOneByteVector().begin(), length);
173     }
174     return reinterpret_cast<const UChar*>(dest->get());
175   } else {
176     return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin());
177   }
178 }
179 
180 template <typename T>
New(Isolate * isolate,Handle<JSFunction> constructor,Handle<Object> locales,Handle<Object> options,const char * method)181 MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
182                    Handle<Object> locales, Handle<Object> options,
183                    const char* method) {
184   Handle<Map> map;
185   ASSIGN_RETURN_ON_EXCEPTION(
186       isolate, map,
187       JSFunction::GetDerivedMap(isolate, constructor, constructor), T);
188   return T::New(isolate, map, locales, options, method);
189 }
190 }  // namespace
191 
ToLatin1LowerTable()192 const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
193 
ToICUUnicodeString(Isolate * isolate,Handle<String> string)194 icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
195                                             Handle<String> string) {
196   DCHECK(string->IsFlat());
197   DisallowHeapAllocation no_gc;
198   std::unique_ptr<uc16[]> sap;
199   // Short one-byte strings can be expanded on the stack to avoid allocating a
200   // temporary buffer.
201   constexpr int kShortStringSize = 80;
202   UChar short_string_buffer[kShortStringSize];
203   const UChar* uchar_buffer = nullptr;
204   const String::FlatContent& flat = string->GetFlatContent(no_gc);
205   int32_t length = string->length();
206   if (flat.IsOneByte() && length <= kShortStringSize) {
207     CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length);
208     uchar_buffer = short_string_buffer;
209   } else {
210     uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length);
211   }
212   return icu::UnicodeString(uchar_buffer, length);
213 }
214 
215 namespace {
ToICUStringPiece(Isolate * isolate,Handle<String> string)216 icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string) {
217   DCHECK(string->IsFlat());
218   DisallowHeapAllocation no_gc;
219 
220   const String::FlatContent& flat = string->GetFlatContent(no_gc);
221   if (!flat.IsOneByte()) return icu::StringPiece(nullptr, 0);
222 
223   int32_t length = string->length();
224   const char* char_buffer =
225       reinterpret_cast<const char*>(flat.ToOneByteVector().begin());
226   if (!String::IsAscii(char_buffer, length)) {
227     return icu::StringPiece(nullptr, 0);
228   }
229 
230   return icu::StringPiece(char_buffer, length);
231 }
232 
LocaleConvertCase(Isolate * isolate,Handle<String> s,bool is_to_upper,const char * lang)233 MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
234                                       bool is_to_upper, const char* lang) {
235   auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
236   int32_t src_length = s->length();
237   int32_t dest_length = src_length;
238   UErrorCode status;
239   Handle<SeqTwoByteString> result;
240   std::unique_ptr<uc16[]> sap;
241 
242   if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
243 
244   // This is not a real loop. It'll be executed only once (no overflow) or
245   // twice (overflow).
246   for (int i = 0; i < 2; ++i) {
247     // Case conversion can increase the string length (e.g. sharp-S => SS) so
248     // that we have to handle RangeError exceptions here.
249     ASSIGN_RETURN_ON_EXCEPTION(
250         isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
251         String);
252     DisallowHeapAllocation no_gc;
253     DCHECK(s->IsFlat());
254     String::FlatContent flat = s->GetFlatContent(no_gc);
255     const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
256     status = U_ZERO_ERROR;
257     dest_length =
258         case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
259                        dest_length, src, src_length, lang, &status);
260     if (status != U_BUFFER_OVERFLOW_ERROR) break;
261   }
262 
263   // In most cases, the output will fill the destination buffer completely
264   // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
265   // Only in rare cases, it'll be shorter than the destination buffer and
266   // |result| has to be truncated.
267   DCHECK(U_SUCCESS(status));
268   if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
269     DCHECK(dest_length == result->length());
270     return result;
271   }
272   DCHECK(dest_length < result->length());
273   return SeqString::Truncate(result, dest_length);
274 }
275 
276 }  // namespace
277 
278 // A stripped-down version of ConvertToLower that can only handle flat one-byte
279 // strings and does not allocate. Note that {src} could still be, e.g., a
280 // one-byte sliced string with a two-byte parent string.
281 // Called from TF builtins.
ConvertOneByteToLower(String src,String dst)282 String Intl::ConvertOneByteToLower(String src, String dst) {
283   DCHECK_EQ(src.length(), dst.length());
284   DCHECK(src.IsOneByteRepresentation());
285   DCHECK(src.IsFlat());
286   DCHECK(dst.IsSeqOneByteString());
287 
288   DisallowHeapAllocation no_gc;
289 
290   const int length = src.length();
291   String::FlatContent src_flat = src.GetFlatContent(no_gc);
292   uint8_t* dst_data = SeqOneByteString::cast(dst).GetChars(no_gc);
293 
294   if (src_flat.IsOneByte()) {
295     const uint8_t* src_data = src_flat.ToOneByteVector().begin();
296 
297     bool has_changed_character = false;
298     int index_to_first_unprocessed =
299         FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
300                                reinterpret_cast<const char*>(src_data), length,
301                                &has_changed_character);
302 
303     if (index_to_first_unprocessed == length) {
304       return has_changed_character ? dst : src;
305     }
306 
307     // If not ASCII, we keep the result up to index_to_first_unprocessed and
308     // process the rest.
309     for (int index = index_to_first_unprocessed; index < length; ++index) {
310       dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
311     }
312   } else {
313     DCHECK(src_flat.IsTwoByte());
314     int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
315     if (index_to_first_unprocessed == length) return src;
316 
317     const uint16_t* src_data = src_flat.ToUC16Vector().begin();
318     CopyChars(dst_data, src_data, index_to_first_unprocessed);
319     for (int index = index_to_first_unprocessed; index < length; ++index) {
320       dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
321     }
322   }
323 
324   return dst;
325 }
326 
ConvertToLower(Isolate * isolate,Handle<String> s)327 MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
328   if (!s->IsOneByteRepresentation()) {
329     // Use a slower implementation for strings with characters beyond U+00FF.
330     return LocaleConvertCase(isolate, s, false, "");
331   }
332 
333   int length = s->length();
334 
335   // We depend here on the invariant that the length of a Latin1
336   // string is invariant under ToLowerCase, and the result always
337   // fits in the Latin1 range in the *root locale*. It does not hold
338   // for ToUpperCase even in the root locale.
339 
340   // Scan the string for uppercase and non-ASCII characters for strings
341   // shorter than a machine-word without any memory allocation overhead.
342   // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
343   // to two parts, one for scanning the prefix with no change and the other for
344   // handling ASCII-only characters.
345 
346   bool is_short = length < static_cast<int>(sizeof(uintptr_t));
347   if (is_short) {
348     bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
349     if (is_lower_ascii) return s;
350   }
351 
352   Handle<SeqOneByteString> result =
353       isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
354 
355   return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
356 }
357 
ConvertToUpper(Isolate * isolate,Handle<String> s)358 MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
359   int32_t length = s->length();
360   if (s->IsOneByteRepresentation() && length > 0) {
361     Handle<SeqOneByteString> result =
362         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363 
364     DCHECK(s->IsFlat());
365     int sharp_s_count;
366     bool is_result_single_byte;
367     {
368       DisallowHeapAllocation no_gc;
369       String::FlatContent flat = s->GetFlatContent(no_gc);
370       uint8_t* dest = result->GetChars(no_gc);
371       if (flat.IsOneByte()) {
372         Vector<const uint8_t> src = flat.ToOneByteVector();
373         bool has_changed_character = false;
374         int index_to_first_unprocessed = FastAsciiConvert<false>(
375             reinterpret_cast<char*>(result->GetChars(no_gc)),
376             reinterpret_cast<const char*>(src.begin()), length,
377             &has_changed_character);
378         if (index_to_first_unprocessed == length) {
379           return has_changed_character ? result : s;
380         }
381         // If not ASCII, we keep the result up to index_to_first_unprocessed and
382         // process the rest.
383         is_result_single_byte =
384             ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
385                            dest + index_to_first_unprocessed, &sharp_s_count);
386       } else {
387         DCHECK(flat.IsTwoByte());
388         Vector<const uint16_t> src = flat.ToUC16Vector();
389         if (ToUpperFastASCII(src, result)) return result;
390         is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
391       }
392     }
393 
394     // Go to the full Unicode path if there are characters whose uppercase
395     // is beyond the Latin-1 range (cannot be represented in OneByteString).
396     if (V8_UNLIKELY(!is_result_single_byte)) {
397       return LocaleConvertCase(isolate, s, true, "");
398     }
399 
400     if (sharp_s_count == 0) return result;
401 
402     // We have sharp_s_count sharp-s characters, but the result is still
403     // in the Latin-1 range.
404     ASSIGN_RETURN_ON_EXCEPTION(
405         isolate, result,
406         isolate->factory()->NewRawOneByteString(length + sharp_s_count),
407         String);
408     DisallowHeapAllocation no_gc;
409     String::FlatContent flat = s->GetFlatContent(no_gc);
410     if (flat.IsOneByte()) {
411       ToUpperWithSharpS(flat.ToOneByteVector(), result);
412     } else {
413       ToUpperWithSharpS(flat.ToUC16Vector(), result);
414     }
415 
416     return result;
417   }
418 
419   return LocaleConvertCase(isolate, s, true, "");
420 }
421 
GetNumberingSystem(const icu::Locale & icu_locale)422 std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
423   // Ugly hack. ICU doesn't expose numbering system in any way, so we have
424   // to assume that for given locale NumberingSystem constructor produces the
425   // same digits as NumberFormat/Calendar would.
426   UErrorCode status = U_ZERO_ERROR;
427   std::unique_ptr<icu::NumberingSystem> numbering_system(
428       icu::NumberingSystem::createInstance(icu_locale, status));
429   if (U_SUCCESS(status)) return numbering_system->getName();
430   return "latn";
431 }
432 
433 namespace {
434 
CreateICULocale(const std::string & bcp47_locale)435 Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {
436   DisallowHeapAllocation no_gc;
437 
438   // Convert BCP47 into ICU locale format.
439   UErrorCode status = U_ZERO_ERROR;
440 
441   icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
442   DCHECK(U_SUCCESS(status));
443   if (icu_locale.isBogus()) {
444     return Nothing<icu::Locale>();
445   }
446 
447   return Just(icu_locale);
448 }
449 
450 }  // anonymous namespace
451 
452 // static
453 
ToString(Isolate * isolate,const icu::UnicodeString & string)454 MaybeHandle<String> Intl::ToString(Isolate* isolate,
455                                    const icu::UnicodeString& string) {
456   return isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
457       reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
458 }
459 
ToString(Isolate * isolate,const icu::UnicodeString & string,int32_t begin,int32_t end)460 MaybeHandle<String> Intl::ToString(Isolate* isolate,
461                                    const icu::UnicodeString& string,
462                                    int32_t begin, int32_t end) {
463   return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
464 }
465 
466 namespace {
467 
InnerAddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)468 Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
469                                  int index, Handle<String> field_type_string,
470                                  Handle<String> value) {
471   // let element = $array[$index] = {
472   //   type: $field_type_string,
473   //   value: $value
474   // }
475   // return element;
476   Factory* factory = isolate->factory();
477   Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
478   JSObject::AddProperty(isolate, element, factory->type_string(),
479                         field_type_string, NONE);
480 
481   JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
482   JSObject::AddDataElement(array, index, element, NONE);
483   return element;
484 }
485 
486 }  // namespace
487 
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)488 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
489                       Handle<String> field_type_string, Handle<String> value) {
490   // Same as $array[$index] = {type: $field_type_string, value: $value};
491   InnerAddElement(isolate, array, index, field_type_string, value);
492 }
493 
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value,Handle<String> additional_property_name,Handle<String> additional_property_value)494 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
495                       Handle<String> field_type_string, Handle<String> value,
496                       Handle<String> additional_property_name,
497                       Handle<String> additional_property_value) {
498   // Same as $array[$index] = {
499   //   type: $field_type_string, value: $value,
500   //   $additional_property_name: $additional_property_value
501   // }
502   Handle<JSObject> element =
503       InnerAddElement(isolate, array, index, field_type_string, value);
504   JSObject::AddProperty(isolate, element, additional_property_name,
505                         additional_property_value, NONE);
506 }
507 
508 namespace {
509 
510 // Build the shortened locale; eg, convert xx_Yyyy_ZZ  to xx_ZZ.
511 //
512 // If locale has a script tag then return true and the locale without the
513 // script else return false and an empty string.
RemoveLocaleScriptTag(const std::string & icu_locale,std::string * locale_less_script)514 bool RemoveLocaleScriptTag(const std::string& icu_locale,
515                            std::string* locale_less_script) {
516   icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
517   const char* icu_script = new_locale.getScript();
518   if (icu_script == nullptr || strlen(icu_script) == 0) {
519     *locale_less_script = std::string();
520     return false;
521   }
522 
523   const char* icu_language = new_locale.getLanguage();
524   const char* icu_country = new_locale.getCountry();
525   icu::Locale short_locale = icu::Locale(icu_language, icu_country);
526   *locale_less_script = short_locale.getName();
527   return true;
528 }
529 
ValidateResource(const icu::Locale locale,const char * path,const char * key)530 bool ValidateResource(const icu::Locale locale, const char* path,
531                       const char* key) {
532   bool result = false;
533   UErrorCode status = U_ZERO_ERROR;
534   UResourceBundle* bundle = ures_open(path, locale.getName(), &status);
535   if (bundle != nullptr && status == U_ZERO_ERROR) {
536     if (key == nullptr) {
537       result = true;
538     } else {
539       UResourceBundle* key_bundle =
540           ures_getByKey(bundle, key, nullptr, &status);
541       result = key_bundle != nullptr && (status == U_ZERO_ERROR);
542       ures_close(key_bundle);
543     }
544   }
545   ures_close(bundle);
546   if (!result) {
547     if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) {
548       // Fallback to try without country.
549       std::string without_country(locale.getLanguage());
550       without_country = without_country.append("-").append(locale.getScript());
551       return ValidateResource(without_country.c_str(), path, key);
552     } else if ((locale.getCountry()[0] != '\0') ||
553                (locale.getScript()[0] != '\0')) {
554       // Fallback to try with only language.
555       std::string language(locale.getLanguage());
556       return ValidateResource(language.c_str(), path, key);
557     }
558   }
559   return result;
560 }
561 
562 }  // namespace
563 
BuildLocaleSet(const std::vector<std::string> & icu_available_locales,const char * path,const char * validate_key)564 std::set<std::string> Intl::BuildLocaleSet(
565     const std::vector<std::string>& icu_available_locales, const char* path,
566     const char* validate_key) {
567   std::set<std::string> locales;
568   for (const std::string& locale : icu_available_locales) {
569     if (path != nullptr || validate_key != nullptr) {
570       if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) {
571         continue;
572       }
573     }
574     locales.insert(locale);
575     std::string shortened_locale;
576     if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
577       std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
578       locales.insert(shortened_locale);
579     }
580   }
581   return locales;
582 }
583 
ToLanguageTag(const icu::Locale & locale)584 Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
585   UErrorCode status = U_ZERO_ERROR;
586   std::string res = locale.toLanguageTag<std::string>(status);
587   if (U_FAILURE(status)) {
588     return Nothing<std::string>();
589   }
590   DCHECK(U_SUCCESS(status));
591 
592   // Hack to remove -true and -yes from unicode extensions
593   // Address https://crbug.com/v8/8565
594   // TODO(ftang): Move the following "remove true" logic into ICU toLanguageTag
595   // by fixing ICU-20310.
596   size_t u_ext_start = res.find("-u-");
597   if (u_ext_start != std::string::npos) {
598     // remove "-true" and "-yes" after -u-
599     const std::vector<std::string> remove_items({"-true", "-yes"});
600     for (auto item = remove_items.begin(); item != remove_items.end(); item++) {
601       for (size_t sep_remove =
602                res.find(*item, u_ext_start + 5 /* strlen("-u-xx") == 5 */);
603            sep_remove != std::string::npos; sep_remove = res.find(*item)) {
604         size_t end_of_sep_remove = sep_remove + item->length();
605         if (res.length() == end_of_sep_remove ||
606             res.at(end_of_sep_remove) == '-') {
607           res.erase(sep_remove, item->length());
608         }
609       }
610     }
611   }
612   return Just(res);
613 }
614 
615 namespace {
DefaultLocale(Isolate * isolate)616 std::string DefaultLocale(Isolate* isolate) {
617   if (isolate->default_locale().empty()) {
618     icu::Locale default_locale;
619     // Translate ICU's fallback locale to a well-known locale.
620     if (strcmp(default_locale.getName(), "en_US_POSIX") == 0 ||
621         strcmp(default_locale.getName(), "c") == 0) {
622       isolate->set_default_locale("en-US");
623     } else {
624       // Set the locale
625       isolate->set_default_locale(
626           default_locale.isBogus()
627               ? "und"
628               : Intl::ToLanguageTag(default_locale).FromJust());
629     }
630     DCHECK(!isolate->default_locale().empty());
631   }
632   return isolate->default_locale();
633 }
634 }  // namespace
635 
636 // See ecma402/#legacy-constructor.
LegacyUnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,bool has_initialized_slot)637 MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
638                                                Handle<JSReceiver> receiver,
639                                                Handle<JSFunction> constructor,
640                                                bool has_initialized_slot) {
641   Handle<Object> obj_is_instance_of;
642   ASSIGN_RETURN_ON_EXCEPTION(isolate, obj_is_instance_of,
643                              Object::InstanceOf(isolate, receiver, constructor),
644                              Object);
645   bool is_instance_of = obj_is_instance_of->BooleanValue(isolate);
646 
647   // 2. If receiver does not have an [[Initialized...]] internal slot
648   //    and ? InstanceofOperator(receiver, constructor) is true, then
649   if (!has_initialized_slot && is_instance_of) {
650     // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
651     Handle<Object> new_receiver;
652     ASSIGN_RETURN_ON_EXCEPTION(
653         isolate, new_receiver,
654         JSReceiver::GetProperty(isolate, receiver,
655                                 isolate->factory()->intl_fallback_symbol()),
656         Object);
657     return new_receiver;
658   }
659 
660   return receiver;
661 }
662 
GetStringOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,std::vector<const char * > values,const char * service,std::unique_ptr<char[]> * result)663 Maybe<bool> Intl::GetStringOption(Isolate* isolate, Handle<JSReceiver> options,
664                                   const char* property,
665                                   std::vector<const char*> values,
666                                   const char* service,
667                                   std::unique_ptr<char[]>* result) {
668   Handle<String> property_str =
669       isolate->factory()->NewStringFromAsciiChecked(property);
670 
671   // 1. Let value be ? Get(options, property).
672   Handle<Object> value;
673   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
674       isolate, value,
675       Object::GetPropertyOrElement(isolate, options, property_str),
676       Nothing<bool>());
677 
678   if (value->IsUndefined(isolate)) {
679     return Just(false);
680   }
681 
682   // 2. c. Let value be ? ToString(value).
683   Handle<String> value_str;
684   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
685       isolate, value_str, Object::ToString(isolate, value), Nothing<bool>());
686   std::unique_ptr<char[]> value_cstr = value_str->ToCString();
687 
688   // 2. d. if values is not undefined, then
689   if (values.size() > 0) {
690     // 2. d. i. If values does not contain an element equal to value,
691     // throw a RangeError exception.
692     for (size_t i = 0; i < values.size(); i++) {
693       if (strcmp(values.at(i), value_cstr.get()) == 0) {
694         // 2. e. return value
695         *result = std::move(value_cstr);
696         return Just(true);
697       }
698     }
699 
700     Handle<String> service_str =
701         isolate->factory()->NewStringFromAsciiChecked(service);
702     THROW_NEW_ERROR_RETURN_VALUE(
703         isolate,
704         NewRangeError(MessageTemplate::kValueOutOfRange, value, service_str,
705                       property_str),
706         Nothing<bool>());
707   }
708 
709   // 2. e. return value
710   *result = std::move(value_cstr);
711   return Just(true);
712 }
713 
GetBoolOption(Isolate * isolate,Handle<JSReceiver> options,const char * property,const char * service,bool * result)714 V8_WARN_UNUSED_RESULT Maybe<bool> Intl::GetBoolOption(
715     Isolate* isolate, Handle<JSReceiver> options, const char* property,
716     const char* service, bool* result) {
717   Handle<String> property_str =
718       isolate->factory()->NewStringFromAsciiChecked(property);
719 
720   // 1. Let value be ? Get(options, property).
721   Handle<Object> value;
722   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
723       isolate, value,
724       Object::GetPropertyOrElement(isolate, options, property_str),
725       Nothing<bool>());
726 
727   // 2. If value is not undefined, then
728   if (!value->IsUndefined(isolate)) {
729     // 2. b. i. Let value be ToBoolean(value).
730     *result = value->BooleanValue(isolate);
731 
732     // 2. e. return value
733     return Just(true);
734   }
735 
736   return Just(false);
737 }
738 
739 namespace {
740 
IsTwoLetterLanguage(const std::string & locale)741 bool IsTwoLetterLanguage(const std::string& locale) {
742   // Two letters, both in range 'a'-'z'...
743   return locale.length() == 2 && IsAsciiLower(locale[0]) &&
744          IsAsciiLower(locale[1]);
745 }
746 
IsDeprecatedLanguage(const std::string & locale)747 bool IsDeprecatedLanguage(const std::string& locale) {
748   //  Check if locale is one of the deprecated language tags:
749   return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
750          locale == "mo";
751 }
752 
753 // Reference:
754 // https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
IsGrandfatheredTagWithoutPreferredVaule(const std::string & locale)755 bool IsGrandfatheredTagWithoutPreferredVaule(const std::string& locale) {
756   if (V8_UNLIKELY(locale == "zh-min" || locale == "cel-gaulish")) return true;
757   if (locale.length() > 6 /* i-mingo is 7 chars long */ &&
758       V8_UNLIKELY(locale[0] == 'i' && locale[1] == '-')) {
759     return locale.substr(2) == "default" || locale.substr(2) == "enochian" ||
760            locale.substr(2) == "mingo";
761   }
762   return false;
763 }
764 
IsStructurallyValidLanguageTag(const std::string & tag)765 bool IsStructurallyValidLanguageTag(const std::string& tag) {
766   return JSLocale::StartsWithUnicodeLanguageId(tag);
767 }
768 
769 // Canonicalize the locale.
770 // https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
771 // including type check and structural validity check.
CanonicalizeLanguageTag(Isolate * isolate,const std::string & locale_in)772 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
773                                            const std::string& locale_in) {
774   std::string locale = locale_in;
775 
776   if (locale.length() == 0 ||
777       !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
778     THROW_NEW_ERROR_RETURN_VALUE(
779         isolate,
780         NewRangeError(
781             MessageTemplate::kInvalidLanguageTag,
782             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
783         Nothing<std::string>());
784   }
785 
786   // Optimize for the most common case: a 2-letter language code in the
787   // canonical form/lowercase that is not one of the deprecated codes
788   // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
789   // codes. Instead, let them be handled by ICU in the slow path. However,
790   // fast-track 'fil' (3-letter canonical code).
791   if ((IsTwoLetterLanguage(locale) && !IsDeprecatedLanguage(locale)) ||
792       locale == "fil") {
793     return Just(locale);
794   }
795 
796   // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
797   // the input before any more check.
798   std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
799 
800   // ICU maps a few grandfathered tags to what looks like a regular language
801   // tag even though IANA language tag registry does not have a preferred
802   // entry map for them. Return them as they're with lowercasing.
803   if (IsGrandfatheredTagWithoutPreferredVaule(locale)) {
804     return Just(locale);
805   }
806 
807   // // ECMA 402 6.2.3
808   // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
809   // language tag if it's too long (much longer than 100 chars). Even if we
810   // allocate a longer buffer, ICU will still fail if it's too long. Either
811   // propose to Ecma 402 to put a limit on the locale length or change ICU to
812   // handle long locale names better. See
813   // https://unicode-org.atlassian.net/browse/ICU-13417
814   UErrorCode error = U_ZERO_ERROR;
815   // uloc_forLanguageTag checks the structrual validity. If the input BCP47
816   // language tag is parsed all the way to the end, it indicates that the input
817   // is structurally valid. Due to a couple of bugs, we can't use it
818   // without Chromium patches or ICU 62 or earlier.
819   icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
820   if (U_FAILURE(error) || icu_locale.isBogus()) {
821     THROW_NEW_ERROR_RETURN_VALUE(
822         isolate,
823         NewRangeError(
824             MessageTemplate::kInvalidLanguageTag,
825             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
826         Nothing<std::string>());
827   }
828   Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
829   if (maybe_to_language_tag.IsNothing()) {
830     THROW_NEW_ERROR_RETURN_VALUE(
831         isolate,
832         NewRangeError(
833             MessageTemplate::kInvalidLanguageTag,
834             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
835         Nothing<std::string>());
836   }
837 
838   return maybe_to_language_tag;
839 }
840 
CanonicalizeLanguageTag(Isolate * isolate,Handle<Object> locale_in)841 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
842                                            Handle<Object> locale_in) {
843   Handle<String> locale_str;
844   // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
845   // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
846   // exception.
847   // 7c iii. Let tag be ? ToString(kValue).
848   // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
849   // RangeError exception.
850 
851   if (locale_in->IsString()) {
852     locale_str = Handle<String>::cast(locale_in);
853   } else if (locale_in->IsJSReceiver()) {
854     ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
855                                      Object::ToString(isolate, locale_in),
856                                      Nothing<std::string>());
857   } else {
858     THROW_NEW_ERROR_RETURN_VALUE(isolate,
859                                  NewTypeError(MessageTemplate::kLanguageID),
860                                  Nothing<std::string>());
861   }
862   std::string locale(locale_str->ToCString().get());
863 
864   if (!IsStructurallyValidLanguageTag(locale)) {
865     THROW_NEW_ERROR_RETURN_VALUE(
866         isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
867         Nothing<std::string>());
868   }
869   return CanonicalizeLanguageTag(isolate, locale);
870 }
871 
872 }  // anonymous namespace
873 
CanonicalizeLocaleList(Isolate * isolate,Handle<Object> locales,bool only_return_one_result)874 Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
875     Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
876   // 1. If locales is undefined, then
877   if (locales->IsUndefined(isolate)) {
878     // 1a. Return a new empty List.
879     return Just(std::vector<std::string>());
880   }
881   // 2. Let seen be a new empty List.
882   std::vector<std::string> seen;
883   // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
884   // internal slot,  then
885   if (locales->IsJSLocale()) {
886     // Since this value came from JSLocale, which is already went though the
887     // CanonializeLanguageTag process once, therefore there are no need to
888     // call CanonializeLanguageTag again.
889     seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
890     return Just(seen);
891   }
892   if (locales->IsString()) {
893     // 3a. Let O be CreateArrayFromList(« locales »).
894     // Instead of creating a one-element array and then iterating over it,
895     // we inline the body of the iteration:
896     std::string canonicalized_tag;
897     if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
898       return Nothing<std::vector<std::string>>();
899     }
900     seen.push_back(canonicalized_tag);
901     return Just(seen);
902   }
903   // 4. Else,
904   // 4a. Let O be ? ToObject(locales).
905   Handle<JSReceiver> o;
906   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
907                                    Object::ToObject(isolate, locales),
908                                    Nothing<std::vector<std::string>>());
909   // 5. Let len be ? ToLength(? Get(O, "length")).
910   Handle<Object> length_obj;
911   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
912                                    Object::GetLengthFromArrayLike(isolate, o),
913                                    Nothing<std::vector<std::string>>());
914   // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
915   // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
916   // don't happen in practice (and would be very slow if they do), we'll keep
917   // the code simple for now by using a saturating to-uint32 conversion.
918   double raw_length = length_obj->Number();
919   uint32_t len =
920       raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
921   // 6. Let k be 0.
922   // 7. Repeat, while k < len
923   for (uint32_t k = 0; k < len; k++) {
924     // 7a. Let Pk be ToString(k).
925     // 7b. Let kPresent be ? HasProperty(O, Pk).
926     LookupIterator it(isolate, o, k);
927     Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
928     MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
929     // 7c. If kPresent is true, then
930     if (!maybe_found.FromJust()) continue;
931     // 7c i. Let kValue be ? Get(O, Pk).
932     Handle<Object> k_value;
933     ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
934                                      Nothing<std::vector<std::string>>());
935     // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
936     // exception.
937     // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
938     // internal slot, then
939     std::string canonicalized_tag;
940     if (k_value->IsJSLocale()) {
941       // 7c iii. 1. Let tag be kValue.[[Locale]].
942       canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
943       // 7c iv. Else,
944     } else {
945       // 7c iv 1. Let tag be ? ToString(kValue).
946       // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
947       // RangeError exception.
948       // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
949       if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
950         return Nothing<std::vector<std::string>>();
951       }
952     }
953     // 7c vi. If canonicalizedTag is not an element of seen, append
954     // canonicalizedTag as the last element of seen.
955     if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
956       seen.push_back(canonicalized_tag);
957     }
958     // 7d. Increase k by 1. (See loop header.)
959     // Optimization: some callers only need one result.
960     if (only_return_one_result) return Just(seen);
961   }
962   // 8. Return seen.
963   return Just(seen);
964 }
965 
966 // ecma402 #sup-string.prototype.tolocalelowercase
967 // ecma402 #sup-string.prototype.tolocaleuppercase
StringLocaleConvertCase(Isolate * isolate,Handle<String> s,bool to_upper,Handle<Object> locales)968 MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
969                                                   Handle<String> s,
970                                                   bool to_upper,
971                                                   Handle<Object> locales) {
972   std::vector<std::string> requested_locales;
973   if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
974     return MaybeHandle<String>();
975   }
976   std::string requested_locale = requested_locales.size() == 0
977                                      ? DefaultLocale(isolate)
978                                      : requested_locales[0];
979   size_t dash = requested_locale.find('-');
980   if (dash != std::string::npos) {
981     requested_locale = requested_locale.substr(0, dash);
982   }
983 
984   // Primary language tag can be up to 8 characters long in theory.
985   // https://tools.ietf.org/html/bcp47#section-2.2.1
986   DCHECK_LE(requested_locale.length(), 8);
987   s = String::Flatten(isolate, s);
988 
989   // All the languages requiring special-handling have two-letter codes.
990   // Note that we have to check for '!= 2' here because private-use language
991   // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
992   // only 'x' or 'i' when they get here.
993   if (V8_UNLIKELY(requested_locale.length() != 2)) {
994     if (to_upper) {
995       return ConvertToUpper(isolate, s);
996     }
997     return ConvertToLower(isolate, s);
998   }
999   // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1000   // in the root locale needs to be adjusted for az, lt and tr because even case
1001   // mapping of ASCII range characters are different in those locales.
1002   // Greek (el) does not require any adjustment.
1003   if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
1004                   (requested_locale == "lt") || (requested_locale == "az"))) {
1005     return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
1006   } else {
1007     if (to_upper) {
1008       return ConvertToUpper(isolate, s);
1009     }
1010     return ConvertToLower(isolate, s);
1011   }
1012 }
1013 
StringLocaleCompare(Isolate * isolate,Handle<String> string1,Handle<String> string2,Handle<Object> locales,Handle<Object> options,const char * method)1014 MaybeHandle<Object> Intl::StringLocaleCompare(
1015     Isolate* isolate, Handle<String> string1, Handle<String> string2,
1016     Handle<Object> locales, Handle<Object> options, const char* method) {
1017   // We only cache the instance when both locales and options are undefined,
1018   // as that is the only case when the specified side-effects of examining
1019   // those arguments are unobservable.
1020   bool can_cache =
1021       locales->IsUndefined(isolate) && options->IsUndefined(isolate);
1022   if (can_cache) {
1023     // Both locales and options are undefined, check the cache.
1024     icu::Collator* cached_icu_collator =
1025         static_cast<icu::Collator*>(isolate->get_cached_icu_object(
1026             Isolate::ICUObjectCacheType::kDefaultCollator));
1027     // We may use the cached icu::Collator for a fast path.
1028     if (cached_icu_collator != nullptr) {
1029       return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
1030                                   string2);
1031     }
1032   }
1033 
1034   Handle<JSFunction> constructor = Handle<JSFunction>(
1035       JSFunction::cast(
1036           isolate->context().native_context().intl_collator_function()),
1037       isolate);
1038 
1039   Handle<JSCollator> collator;
1040   ASSIGN_RETURN_ON_EXCEPTION(
1041       isolate, collator,
1042       New<JSCollator>(isolate, constructor, locales, options, method), Object);
1043   if (can_cache) {
1044     isolate->set_icu_object_in_cache(
1045         Isolate::ICUObjectCacheType::kDefaultCollator,
1046         std::static_pointer_cast<icu::UMemory>(collator->icu_collator().get()));
1047   }
1048   icu::Collator* icu_collator = collator->icu_collator().raw();
1049   return Intl::CompareStrings(isolate, *icu_collator, string1, string2);
1050 }
1051 
1052 // ecma402/#sec-collator-comparestrings
CompareStrings(Isolate * isolate,const icu::Collator & icu_collator,Handle<String> string1,Handle<String> string2)1053 Handle<Object> Intl::CompareStrings(Isolate* isolate,
1054                                     const icu::Collator& icu_collator,
1055                                     Handle<String> string1,
1056                                     Handle<String> string2) {
1057   Factory* factory = isolate->factory();
1058 
1059   // Early return for identical strings.
1060   if (string1.is_identical_to(string2)) {
1061     return factory->NewNumberFromInt(UCollationResult::UCOL_EQUAL);
1062   }
1063 
1064   // Early return for empty strings.
1065   if (string1->length() == 0) {
1066     return factory->NewNumberFromInt(string2->length() == 0
1067                                          ? UCollationResult::UCOL_EQUAL
1068                                          : UCollationResult::UCOL_LESS);
1069   }
1070   if (string2->length() == 0) {
1071     return factory->NewNumberFromInt(UCollationResult::UCOL_GREATER);
1072   }
1073 
1074   string1 = String::Flatten(isolate, string1);
1075   string2 = String::Flatten(isolate, string2);
1076 
1077   UCollationResult result;
1078   UErrorCode status = U_ZERO_ERROR;
1079   icu::StringPiece string_piece1 = ToICUStringPiece(isolate, string1);
1080   if (!string_piece1.empty()) {
1081     icu::StringPiece string_piece2 = ToICUStringPiece(isolate, string2);
1082     if (!string_piece2.empty()) {
1083       result = icu_collator.compareUTF8(string_piece1, string_piece2, status);
1084       DCHECK(U_SUCCESS(status));
1085       return factory->NewNumberFromInt(result);
1086     }
1087   }
1088 
1089   icu::UnicodeString string_val1 = Intl::ToICUUnicodeString(isolate, string1);
1090   icu::UnicodeString string_val2 = Intl::ToICUUnicodeString(isolate, string2);
1091   result = icu_collator.compare(string_val1, string_val2, status);
1092   DCHECK(U_SUCCESS(status));
1093 
1094   return factory->NewNumberFromInt(result);
1095 }
1096 
1097 // ecma402/#sup-properties-of-the-number-prototype-object
NumberToLocaleString(Isolate * isolate,Handle<Object> num,Handle<Object> locales,Handle<Object> options,const char * method)1098 MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
1099                                                Handle<Object> num,
1100                                                Handle<Object> locales,
1101                                                Handle<Object> options,
1102                                                const char* method) {
1103   Handle<Object> numeric_obj;
1104   ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1105                              Object::ToNumeric(isolate, num), String);
1106 
1107   // We only cache the instance when both locales and options are undefined,
1108   // as that is the only case when the specified side-effects of examining
1109   // those arguments are unobservable.
1110   bool can_cache =
1111       locales->IsUndefined(isolate) && options->IsUndefined(isolate);
1112   if (can_cache) {
1113     icu::number::LocalizedNumberFormatter* cached_number_format =
1114         static_cast<icu::number::LocalizedNumberFormatter*>(
1115             isolate->get_cached_icu_object(
1116                 Isolate::ICUObjectCacheType::kDefaultNumberFormat));
1117     // We may use the cached icu::NumberFormat for a fast path.
1118     if (cached_number_format != nullptr) {
1119       return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
1120                                            numeric_obj);
1121     }
1122   }
1123 
1124   Handle<JSFunction> constructor = Handle<JSFunction>(
1125       JSFunction::cast(
1126           isolate->context().native_context().intl_number_format_function()),
1127       isolate);
1128   Handle<JSNumberFormat> number_format;
1129   // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1130   ASSIGN_RETURN_ON_EXCEPTION(
1131       isolate, number_format,
1132       New<JSNumberFormat>(isolate, constructor, locales, options, method),
1133       String);
1134 
1135   if (can_cache) {
1136     isolate->set_icu_object_in_cache(
1137         Isolate::ICUObjectCacheType::kDefaultNumberFormat,
1138         std::static_pointer_cast<icu::UMemory>(
1139             number_format->icu_number_formatter().get()));
1140   }
1141 
1142   // Return FormatNumber(numberFormat, x).
1143   icu::number::LocalizedNumberFormatter* icu_number_format =
1144       number_format->icu_number_formatter().raw();
1145   return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
1146                                        numeric_obj);
1147 }
1148 
1149 namespace {
1150 
1151 // ecma402/#sec-defaultnumberoption
DefaultNumberOption(Isolate * isolate,Handle<Object> value,int min,int max,int fallback,Handle<String> property)1152 Maybe<int> DefaultNumberOption(Isolate* isolate, Handle<Object> value, int min,
1153                                int max, int fallback, Handle<String> property) {
1154   // 2. Else, return fallback.
1155   if (value->IsUndefined()) return Just(fallback);
1156 
1157   // 1. If value is not undefined, then
1158   // a. Let value be ? ToNumber(value).
1159   Handle<Object> value_num;
1160   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1161       isolate, value_num, Object::ToNumber(isolate, value), Nothing<int>());
1162   DCHECK(value_num->IsNumber());
1163 
1164   // b. If value is NaN or less than minimum or greater than maximum, throw a
1165   // RangeError exception.
1166   if (value_num->IsNaN() || value_num->Number() < min ||
1167       value_num->Number() > max) {
1168     THROW_NEW_ERROR_RETURN_VALUE(
1169         isolate,
1170         NewRangeError(MessageTemplate::kPropertyValueOutOfRange, property),
1171         Nothing<int>());
1172   }
1173 
1174   // The max and min arguments are integers and the above check makes
1175   // sure that we are within the integer range making this double to
1176   // int conversion safe.
1177   //
1178   // c. Return floor(value).
1179   return Just(FastD2I(floor(value_num->Number())));
1180 }
1181 
1182 }  // namespace
1183 
1184 // ecma402/#sec-getnumberoption
GetNumberOption(Isolate * isolate,Handle<JSReceiver> options,Handle<String> property,int min,int max,int fallback)1185 Maybe<int> Intl::GetNumberOption(Isolate* isolate, Handle<JSReceiver> options,
1186                                  Handle<String> property, int min, int max,
1187                                  int fallback) {
1188   // 1. Let value be ? Get(options, property).
1189   Handle<Object> value;
1190   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1191       isolate, value, JSReceiver::GetProperty(isolate, options, property),
1192       Nothing<int>());
1193 
1194   // Return ? DefaultNumberOption(value, minimum, maximum, fallback).
1195   return DefaultNumberOption(isolate, value, min, max, fallback, property);
1196 }
1197 
SetNumberFormatDigitOptions(Isolate * isolate,Handle<JSReceiver> options,int mnfd_default,int mxfd_default,bool notation_is_compact)1198 Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
1199     Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
1200     int mxfd_default, bool notation_is_compact) {
1201   Factory* factory = isolate->factory();
1202   Intl::NumberFormatDigitOptions digit_options;
1203 
1204   // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1205   // 1).
1206   int mnid = 1;
1207   if (!Intl::GetNumberOption(isolate, options,
1208                              factory->minimumIntegerDigits_string(), 1, 21, 1)
1209            .To(&mnid)) {
1210     return Nothing<NumberFormatDigitOptions>();
1211   }
1212 
1213   int mnfd = 0;
1214   int mxfd = 0;
1215   Handle<Object> mnfd_obj;
1216   Handle<Object> mxfd_obj;
1217 
1218   // 6. Let mnfd be ? Get(options, "minimumFractionDigits").
1219   Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1220   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1221       isolate, mnfd_obj, JSReceiver::GetProperty(isolate, options, mnfd_str),
1222       Nothing<NumberFormatDigitOptions>());
1223 
1224   // 8. Let mxfd be ? Get(options, "maximumFractionDigits").
1225   Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1226   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1227       isolate, mxfd_obj, JSReceiver::GetProperty(isolate, options, mxfd_str),
1228       Nothing<NumberFormatDigitOptions>());
1229 
1230   // 9.  Let mnsd be ? Get(options, "minimumSignificantDigits").
1231   Handle<Object> mnsd_obj;
1232   Handle<String> mnsd_str = factory->minimumSignificantDigits_string();
1233   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1234       isolate, mnsd_obj, JSReceiver::GetProperty(isolate, options, mnsd_str),
1235       Nothing<NumberFormatDigitOptions>());
1236 
1237   // 10. Let mxsd be ? Get(options, "maximumSignificantDigits").
1238   Handle<Object> mxsd_obj;
1239   Handle<String> mxsd_str = factory->maximumSignificantDigits_string();
1240   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1241       isolate, mxsd_obj, JSReceiver::GetProperty(isolate, options, mxsd_str),
1242       Nothing<NumberFormatDigitOptions>());
1243 
1244   // 11. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1245   digit_options.minimum_integer_digits = mnid;
1246 
1247   // 12. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1248   digit_options.minimum_fraction_digits = mnfd;
1249 
1250   // 13. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1251   digit_options.maximum_fraction_digits = mxfd;
1252 
1253   // 14. If mnsd is not undefined or mxsd is not undefined, then
1254   if (!mnsd_obj->IsUndefined(isolate) || !mxsd_obj->IsUndefined(isolate)) {
1255     // 14. a. Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1256     int mnsd;
1257     if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1, mnsd_str).To(&mnsd)) {
1258       return Nothing<NumberFormatDigitOptions>();
1259     }
1260 
1261     // 14. b. Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1262     int mxsd;
1263     if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21, mxsd_str)
1264              .To(&mxsd)) {
1265       return Nothing<NumberFormatDigitOptions>();
1266     }
1267 
1268     // 14. c. Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1269     digit_options.minimum_significant_digits = mnsd;
1270 
1271     // 14. d. Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1272     digit_options.maximum_significant_digits = mxsd;
1273   } else {
1274     digit_options.minimum_significant_digits = 0;
1275     digit_options.maximum_significant_digits = 0;
1276 
1277     // 15. Else If mnfd is not undefined or mxfd is not undefined, then
1278     if (!mnfd_obj->IsUndefined(isolate) || !mxfd_obj->IsUndefined(isolate)) {
1279       Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1280       Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1281 
1282       int specified_mnfd;
1283       int specified_mxfd;
1284 
1285       // a. Let _specifiedMnfd_ be ? DefaultNumberOption(_mnfd_, 0, 20,
1286       // *undefined*).
1287       if (!DefaultNumberOption(isolate, mnfd_obj, 0, 20, -1, mnfd_str)
1288                .To(&specified_mnfd)) {
1289         return Nothing<NumberFormatDigitOptions>();
1290       }
1291       Handle<Object> specifiedMnfd_obj;
1292       if (specified_mnfd < 0) {
1293         specifiedMnfd_obj = factory->undefined_value();
1294       } else {
1295         specifiedMnfd_obj = handle(Smi::FromInt(specified_mnfd), isolate);
1296       }
1297 
1298       // b.  Let _specifiedMxfd_ be ? DefaultNumberOption(_mxfd_, 0, 20,
1299       // *undefined*).
1300       if (!DefaultNumberOption(isolate, mxfd_obj, 0, 20, -1, mxfd_str)
1301                .To(&specified_mxfd)) {
1302         return Nothing<NumberFormatDigitOptions>();
1303       }
1304       Handle<Object> specifiedMxfd_obj;
1305       if (specified_mxfd < 0) {
1306         specifiedMxfd_obj = factory->undefined_value();
1307       } else {
1308         specifiedMxfd_obj = handle(Smi::FromInt(specified_mxfd), isolate);
1309       }
1310 
1311       // c. If _specifiedMxfd_ is not *undefined*, set _mnfdDefault_ to
1312       // min(_mnfdDefault_, _specifiedMxfd_).
1313       if (specified_mxfd >= 0) {
1314         mnfd_default = std::min(mnfd_default, specified_mxfd);
1315       }
1316 
1317       // d. Set _mnfd_ to ! DefaultNumberOption(_specifiedMnfd_, 0, 20,
1318       // _mnfdDefault_).
1319       if (!DefaultNumberOption(isolate, specifiedMnfd_obj, 0, 20, mnfd_default,
1320                                mnfd_str)
1321                .To(&mnfd)) {
1322         return Nothing<NumberFormatDigitOptions>();
1323       }
1324 
1325       // e. Set _mxfd_ to ! DefaultNumberOption(_specifiedMxfd_, 0, 20,
1326       // max(_mxfdDefault_, _mnfd_)).
1327       if (!DefaultNumberOption(isolate, specifiedMxfd_obj, 0, 20,
1328                                std::max(mxfd_default, mnfd), mxfd_str)
1329                .To(&mxfd)) {
1330         return Nothing<NumberFormatDigitOptions>();
1331       }
1332 
1333       // f. If _mnfd_ is greater than _mxfd_, throw a *RangeError* exception.
1334       if (mnfd > mxfd) {
1335         THROW_NEW_ERROR_RETURN_VALUE(
1336             isolate,
1337             NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str),
1338             Nothing<NumberFormatDigitOptions>());
1339       }
1340 
1341       // g. Set intlObj.[[MinimumFractionDigits]] to mnfd.
1342       digit_options.minimum_fraction_digits = mnfd;
1343 
1344       // h. Set intlObj.[[MaximumFractionDigits]] to mxfd.
1345       digit_options.maximum_fraction_digits = mxfd;
1346       // Else If intlObj.[[Notation]] is "compact", then
1347     } else if (notation_is_compact) {
1348       // a. Set intlObj.[[RoundingType]] to "compact-rounding".
1349       // Set minimum_significant_digits to -1 to represent roundingtype is
1350       // "compact-rounding".
1351       digit_options.minimum_significant_digits = -1;
1352       // 17. Else,
1353     } else {
1354       // 17. b. Set intlObj.[[MinimumFractionDigits]] to mnfdDefault.
1355       digit_options.minimum_fraction_digits = mnfd_default;
1356 
1357       // 17. c. Set intlObj.[[MaximumFractionDigits]] to mxfdDefault.
1358       digit_options.maximum_fraction_digits = mxfd_default;
1359     }
1360   }
1361   return Just(digit_options);
1362 }
1363 
1364 namespace {
1365 
1366 // ecma402/#sec-bestavailablelocale
BestAvailableLocale(const std::set<std::string> & available_locales,const std::string & locale)1367 std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1368                                 const std::string& locale) {
1369   // 1. Let candidate be locale.
1370   std::string candidate = locale;
1371 
1372   // 2. Repeat,
1373   while (true) {
1374     // 2.a. If availableLocales contains an element equal to candidate, return
1375     //      candidate.
1376     if (available_locales.find(candidate) != available_locales.end()) {
1377       return candidate;
1378     }
1379 
1380     // 2.b. Let pos be the character index of the last occurrence of "-"
1381     //      (U+002D) within candidate. If that character does not occur, return
1382     //      undefined.
1383     size_t pos = candidate.rfind('-');
1384     if (pos == std::string::npos) {
1385       return std::string();
1386     }
1387 
1388     // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1389     //      decrease pos by 2.
1390     if (pos >= 2 && candidate[pos - 2] == '-') {
1391       pos -= 2;
1392     }
1393 
1394     // 2.d. Let candidate be the substring of candidate from position 0,
1395     //      inclusive, to position pos, exclusive.
1396     candidate = candidate.substr(0, pos);
1397   }
1398 }
1399 
1400 struct ParsedLocale {
1401   std::string no_extensions_locale;
1402   std::string extension;
1403 };
1404 
1405 // Returns a struct containing a bcp47 tag without unicode extensions
1406 // and the removed unicode extensions.
1407 //
1408 // For example, given 'en-US-u-co-emoji' returns 'en-US' and
1409 // 'u-co-emoji'.
ParseBCP47Locale(const std::string & locale)1410 ParsedLocale ParseBCP47Locale(const std::string& locale) {
1411   size_t length = locale.length();
1412   ParsedLocale parsed_locale;
1413 
1414   // Privateuse or grandfathered locales have no extension sequences.
1415   if ((length > 1) && (locale[1] == '-')) {
1416     // Check to make sure that this really is a grandfathered or
1417     // privateuse extension. ICU can sometimes mess up the
1418     // canonicalization.
1419     DCHECK(locale[0] == 'x' || locale[0] == 'i');
1420     parsed_locale.no_extensions_locale = locale;
1421     return parsed_locale;
1422   }
1423 
1424   size_t unicode_extension_start = locale.find("-u-");
1425 
1426   // No unicode extensions found.
1427   if (unicode_extension_start == std::string::npos) {
1428     parsed_locale.no_extensions_locale = locale;
1429     return parsed_locale;
1430   }
1431 
1432   size_t private_extension_start = locale.find("-x-");
1433 
1434   // Unicode extensions found within privateuse subtags don't count.
1435   if (private_extension_start != std::string::npos &&
1436       private_extension_start < unicode_extension_start) {
1437     parsed_locale.no_extensions_locale = locale;
1438     return parsed_locale;
1439   }
1440 
1441   const std::string beginning = locale.substr(0, unicode_extension_start);
1442   size_t unicode_extension_end = length;
1443   DCHECK_GT(length, 2);
1444 
1445   // Find the end of the extension production as per the bcp47 grammar
1446   // by looking for '-' followed by 2 chars and then another '-'.
1447   for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1448     if (locale[i] != '-') continue;
1449 
1450     if (locale[i + 2] == '-') {
1451       unicode_extension_end = i;
1452       break;
1453     }
1454 
1455     i += 2;
1456   }
1457 
1458   const std::string end = locale.substr(unicode_extension_end);
1459   parsed_locale.no_extensions_locale = beginning + end;
1460   parsed_locale.extension = locale.substr(
1461       unicode_extension_start, unicode_extension_end - unicode_extension_start);
1462   return parsed_locale;
1463 }
1464 
1465 // ecma402/#sec-lookupsupportedlocales
LookupSupportedLocales(const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1466 std::vector<std::string> LookupSupportedLocales(
1467     const std::set<std::string>& available_locales,
1468     const std::vector<std::string>& requested_locales) {
1469   // 1. Let subset be a new empty List.
1470   std::vector<std::string> subset;
1471 
1472   // 2. For each element locale of requestedLocales in List order, do
1473   for (const std::string& locale : requested_locales) {
1474     // 2. a. Let noExtensionsLocale be the String value that is locale
1475     //       with all Unicode locale extension sequences removed.
1476     std::string no_extension_locale =
1477         ParseBCP47Locale(locale).no_extensions_locale;
1478 
1479     // 2. b. Let availableLocale be
1480     //       BestAvailableLocale(availableLocales, noExtensionsLocale).
1481     std::string available_locale =
1482         BestAvailableLocale(available_locales, no_extension_locale);
1483 
1484     // 2. c. If availableLocale is not undefined, append locale to the
1485     //       end of subset.
1486     if (!available_locale.empty()) {
1487       subset.push_back(locale);
1488     }
1489   }
1490 
1491   // 3. Return subset.
1492   return subset;
1493 }
1494 
BuildLocaleMatcher(Isolate * isolate,const std::set<std::string> & available_locales,UErrorCode * status)1495 icu::LocaleMatcher BuildLocaleMatcher(
1496     Isolate* isolate, const std::set<std::string>& available_locales,
1497     UErrorCode* status) {
1498   icu::Locale default_locale =
1499       icu::Locale::forLanguageTag(DefaultLocale(isolate), *status);
1500   DCHECK(U_SUCCESS(*status));
1501   icu::LocaleMatcher::Builder builder;
1502   builder.setDefaultLocale(&default_locale);
1503   for (auto it = available_locales.begin(); it != available_locales.end();
1504        ++it) {
1505     builder.addSupportedLocale(
1506         icu::Locale::forLanguageTag(it->c_str(), *status));
1507   }
1508 
1509   return builder.build(*status);
1510 }
1511 
1512 class Iterator : public icu::Locale::Iterator {
1513  public:
Iterator(std::vector<std::string>::const_iterator begin,std::vector<std::string>::const_iterator end)1514   Iterator(std::vector<std::string>::const_iterator begin,
1515            std::vector<std::string>::const_iterator end)
1516       : iter_(begin), end_(end) {}
1517   ~Iterator() override = default;
1518 
hasNext() const1519   UBool hasNext() const override { return iter_ != end_; }
1520 
next()1521   const icu::Locale& next() override {
1522     UErrorCode status = U_ZERO_ERROR;
1523     locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
1524     DCHECK(U_SUCCESS(status));
1525     ++iter_;
1526     return locale_;
1527   }
1528 
1529  private:
1530   std::vector<std::string>::const_iterator iter_;
1531   std::vector<std::string>::const_iterator end_;
1532   icu::Locale locale_;
1533 };
1534 
1535 // ecma402/#sec-bestfitmatcher
1536 // The BestFitMatcher abstract operation compares requestedLocales, which must
1537 // be a List as returned by CanonicalizeLocaleList, against the locales in
1538 // availableLocales and determines the best available language to meet the
1539 // request. The algorithm is implementation dependent, but should produce
1540 // results that a typical user of the requested locales would perceive
1541 // as at least as good as those produced by the LookupMatcher abstract
1542 // operation. Options specified through Unicode locale extension sequences must
1543 // be ignored by the algorithm. Information about such subsequences is returned
1544 // separately. The abstract operation returns a record with a [[locale]] field,
1545 // whose value is the language tag of the selected locale, which must be an
1546 // element of availableLocales. If the language tag of the request locale that
1547 // led to the selected locale contained a Unicode locale extension sequence,
1548 // then the returned record also contains an [[extension]] field whose value is
1549 // the first Unicode locale extension sequence within the request locale
1550 // language tag.
BestFitMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1551 std::string BestFitMatcher(Isolate* isolate,
1552                            const std::set<std::string>& available_locales,
1553                            const std::vector<std::string>& requested_locales) {
1554   UErrorCode status = U_ZERO_ERROR;
1555   icu::LocaleMatcher matcher =
1556       BuildLocaleMatcher(isolate, available_locales, &status);
1557   DCHECK(U_SUCCESS(status));
1558 
1559   Iterator iter(requested_locales.cbegin(), requested_locales.cend());
1560   std::string bestfit =
1561       matcher.getBestMatch(iter, status)->toLanguageTag<std::string>(status);
1562   if (U_FAILURE(status)) {
1563     return DefaultLocale(isolate);
1564   }
1565   // We need to return the extensions with it.
1566   for (auto it = requested_locales.begin(); it != requested_locales.end();
1567        ++it) {
1568     if (it->find(bestfit) == 0) {
1569       return *it;
1570     }
1571   }
1572   return bestfit;
1573 }
1574 
1575 // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1576 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
BestFitSupportedLocales(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1577 std::vector<std::string> BestFitSupportedLocales(
1578     Isolate* isolate, const std::set<std::string>& available_locales,
1579     const std::vector<std::string>& requested_locales) {
1580   UErrorCode status = U_ZERO_ERROR;
1581   icu::LocaleMatcher matcher =
1582       BuildLocaleMatcher(isolate, available_locales, &status);
1583   DCHECK(U_SUCCESS(status));
1584 
1585   std::string default_locale = DefaultLocale(isolate);
1586   std::vector<std::string> result;
1587   for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
1588        it++) {
1589     if (*it == default_locale) {
1590       result.push_back(*it);
1591     } else {
1592       status = U_ZERO_ERROR;
1593       icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
1594       std::string bestfit = matcher.getBestMatch(desired, status)
1595                                 ->toLanguageTag<std::string>(status);
1596       // We need to return the extensions with it.
1597       if (U_SUCCESS(status) && it->find(bestfit) == 0) {
1598         result.push_back(*it);
1599       }
1600     }
1601   }
1602   return result;
1603 }
1604 
1605 // ecma262 #sec-createarrayfromlist
CreateArrayFromList(Isolate * isolate,std::vector<std::string> elements,PropertyAttributes attr)1606 Handle<JSArray> CreateArrayFromList(Isolate* isolate,
1607                                     std::vector<std::string> elements,
1608                                     PropertyAttributes attr) {
1609   Factory* factory = isolate->factory();
1610   // Let array be ! ArrayCreate(0).
1611   Handle<JSArray> array = factory->NewJSArray(0);
1612 
1613   uint32_t length = static_cast<uint32_t>(elements.size());
1614   // 3. Let n be 0.
1615   // 4. For each element e of elements, do
1616   for (uint32_t i = 0; i < length; i++) {
1617     // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1618     const std::string& part = elements[i];
1619     Handle<String> value =
1620         factory->NewStringFromUtf8(CStrVector(part.c_str())).ToHandleChecked();
1621     JSObject::AddDataElement(array, i, value, attr);
1622   }
1623   // 5. Return array.
1624   return array;
1625 }
1626 
1627 // To mitigate the risk of bestfit locale matcher, we first check in without
1628 // turnning it on.
1629 static bool implement_bestfit = false;
1630 
1631 // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1632 // https://tc39.github.io/ecma402/#sec-supportedlocales
SupportedLocales(Isolate * isolate,const char * method,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,Handle<Object> options)1633 MaybeHandle<JSObject> SupportedLocales(
1634     Isolate* isolate, const char* method,
1635     const std::set<std::string>& available_locales,
1636     const std::vector<std::string>& requested_locales, Handle<Object> options) {
1637   std::vector<std::string> supported_locales;
1638 
1639   // 2. Else, let matcher be "best fit".
1640   Intl::MatcherOption matcher = Intl::MatcherOption::kBestFit;
1641 
1642   // 1. If options is not undefined, then
1643   if (!options->IsUndefined(isolate)) {
1644     // 1. a. Let options be ? ToObject(options).
1645     Handle<JSReceiver> options_obj;
1646     ASSIGN_RETURN_ON_EXCEPTION(isolate, options_obj,
1647                                Object::ToObject(isolate, options), JSObject);
1648 
1649     // 1. b. Let matcher be ? GetOption(options, "localeMatcher", "string",
1650     //       « "lookup", "best fit" », "best fit").
1651     Maybe<Intl::MatcherOption> maybe_locale_matcher =
1652         Intl::GetLocaleMatcher(isolate, options_obj, method);
1653     MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1654     matcher = maybe_locale_matcher.FromJust();
1655   }
1656 
1657   // 3. If matcher is "best fit", then
1658   //    a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
1659   //       requestedLocales).
1660   if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
1661     supported_locales =
1662         BestFitSupportedLocales(isolate, available_locales, requested_locales);
1663   } else {
1664     // 4. Else,
1665     //    a. Let supportedLocales be LookupSupportedLocales(availableLocales,
1666     //       requestedLocales).
1667     supported_locales =
1668         LookupSupportedLocales(available_locales, requested_locales);
1669   }
1670 
1671   // 5. Return CreateArrayFromList(supportedLocales).
1672   PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1673   return CreateArrayFromList(isolate, supported_locales, attr);
1674 }
1675 
1676 }  // namespace
1677 
1678 // ecma-402 #sec-intl.getcanonicallocales
GetCanonicalLocales(Isolate * isolate,Handle<Object> locales)1679 MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
1680                                                Handle<Object> locales) {
1681   // 1. Let ll be ? CanonicalizeLocaleList(locales).
1682   Maybe<std::vector<std::string>> maybe_ll =
1683       CanonicalizeLocaleList(isolate, locales, false);
1684   MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
1685 
1686   // 2. Return CreateArrayFromList(ll).
1687   PropertyAttributes attr = static_cast<PropertyAttributes>(NONE);
1688   return CreateArrayFromList(isolate, maybe_ll.FromJust(), attr);
1689 }
1690 
1691 // ECMA 402 Intl.*.supportedLocalesOf
SupportedLocalesOf(Isolate * isolate,const char * method,const std::set<std::string> & available_locales,Handle<Object> locales,Handle<Object> options)1692 MaybeHandle<JSObject> Intl::SupportedLocalesOf(
1693     Isolate* isolate, const char* method,
1694     const std::set<std::string>& available_locales, Handle<Object> locales,
1695     Handle<Object> options) {
1696   // Let availableLocales be %Collator%.[[AvailableLocales]].
1697 
1698   // Let requestedLocales be ? CanonicalizeLocaleList(locales).
1699   Maybe<std::vector<std::string>> requested_locales =
1700       CanonicalizeLocaleList(isolate, locales, false);
1701   MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
1702 
1703   // Return ? SupportedLocales(availableLocales, requestedLocales, options).
1704   return SupportedLocales(isolate, method, available_locales,
1705                           requested_locales.FromJust(), options);
1706 }
1707 
1708 namespace {
1709 
1710 template <typename T>
IsValidExtension(const icu::Locale & locale,const char * key,const std::string & value)1711 bool IsValidExtension(const icu::Locale& locale, const char* key,
1712                       const std::string& value) {
1713   const char* legacy_type = uloc_toLegacyType(key, value.c_str());
1714   if (legacy_type == nullptr) {
1715     return false;
1716   }
1717   UErrorCode status = U_ZERO_ERROR;
1718   std::unique_ptr<icu::StringEnumeration> enumeration(
1719       T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
1720                                    false, status));
1721   if (U_FAILURE(status)) {
1722     return false;
1723   }
1724   int32_t length;
1725   for (const char* item = enumeration->next(&length, status);
1726        U_SUCCESS(status) && item != nullptr;
1727        item = enumeration->next(&length, status)) {
1728     if (strcmp(legacy_type, item) == 0) {
1729       return true;
1730     }
1731   }
1732   return false;
1733 }
1734 
1735 }  // namespace
1736 
IsValidCollation(const icu::Locale & locale,const std::string & value)1737 bool Intl::IsValidCollation(const icu::Locale& locale,
1738                             const std::string& value) {
1739   std::set<std::string> invalid_values = {"standard", "search"};
1740   if (invalid_values.find(value) != invalid_values.end()) return false;
1741   return IsValidExtension<icu::Collator>(locale, "collation", value);
1742 }
1743 
IsWellFormedCalendar(const std::string & value)1744 bool Intl::IsWellFormedCalendar(const std::string& value) {
1745   return JSLocale::Is38AlphaNumList(value);
1746 }
1747 
1748 // ecma402/#sec-iswellformedcurrencycode
IsWellFormedCurrency(const std::string & currency)1749 bool Intl::IsWellFormedCurrency(const std::string& currency) {
1750   return JSLocale::Is3Alpha(currency);
1751 }
1752 
IsValidCalendar(const icu::Locale & locale,const std::string & value)1753 bool Intl::IsValidCalendar(const icu::Locale& locale,
1754                            const std::string& value) {
1755   return IsValidExtension<icu::Calendar>(locale, "calendar", value);
1756 }
1757 
IsValidNumberingSystem(const std::string & value)1758 bool Intl::IsValidNumberingSystem(const std::string& value) {
1759   std::set<std::string> invalid_values = {"native", "traditio", "finance"};
1760   if (invalid_values.find(value) != invalid_values.end()) return false;
1761   UErrorCode status = U_ZERO_ERROR;
1762   std::unique_ptr<icu::NumberingSystem> numbering_system(
1763       icu::NumberingSystem::createInstanceByName(value.c_str(), status));
1764   return U_SUCCESS(status) && numbering_system.get() != nullptr;
1765 }
1766 
1767 namespace {
1768 
IsWellFormedNumberingSystem(const std::string & value)1769 bool IsWellFormedNumberingSystem(const std::string& value) {
1770   return JSLocale::Is38AlphaNumList(value);
1771 }
1772 
LookupAndValidateUnicodeExtensions(icu::Locale * icu_locale,const std::set<std::string> & relevant_keys)1773 std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
1774     icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
1775   std::map<std::string, std::string> extensions;
1776 
1777   UErrorCode status = U_ZERO_ERROR;
1778   icu::LocaleBuilder builder;
1779   builder.setLocale(*icu_locale).clearExtensions();
1780   std::unique_ptr<icu::StringEnumeration> keywords(
1781       icu_locale->createKeywords(status));
1782   if (U_FAILURE(status)) return extensions;
1783 
1784   if (!keywords) return extensions;
1785   char value[ULOC_FULLNAME_CAPACITY];
1786 
1787   int32_t length;
1788   status = U_ZERO_ERROR;
1789   for (const char* keyword = keywords->next(&length, status);
1790        keyword != nullptr; keyword = keywords->next(&length, status)) {
1791     // Ignore failures in ICU and skip to the next keyword.
1792     //
1793     // This is fine.™
1794     if (U_FAILURE(status)) {
1795       status = U_ZERO_ERROR;
1796       continue;
1797     }
1798 
1799     icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
1800 
1801     // Ignore failures in ICU and skip to the next keyword.
1802     //
1803     // This is fine.™
1804     if (U_FAILURE(status)) {
1805       status = U_ZERO_ERROR;
1806       continue;
1807     }
1808 
1809     const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
1810 
1811     if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
1812       const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
1813       bool is_valid_value = false;
1814       // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
1815       if (strcmp("ca", bcp47_key) == 0) {
1816         is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value);
1817       } else if (strcmp("co", bcp47_key) == 0) {
1818         is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value);
1819       } else if (strcmp("hc", bcp47_key) == 0) {
1820         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
1821         std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
1822         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1823       } else if (strcmp("lb", bcp47_key) == 0) {
1824         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
1825         std::set<std::string> valid_values = {"strict", "normal", "loose"};
1826         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1827       } else if (strcmp("kn", bcp47_key) == 0) {
1828         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1829         std::set<std::string> valid_values = {"true", "false"};
1830         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1831       } else if (strcmp("kf", bcp47_key) == 0) {
1832         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
1833         std::set<std::string> valid_values = {"upper", "lower", "false"};
1834         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
1835       } else if (strcmp("nu", bcp47_key) == 0) {
1836         is_valid_value = Intl::IsValidNumberingSystem(bcp47_value);
1837       }
1838       if (is_valid_value) {
1839         extensions.insert(
1840             std::pair<std::string, std::string>(bcp47_key, bcp47_value));
1841         builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value);
1842       }
1843     }
1844   }
1845 
1846   status = U_ZERO_ERROR;
1847   *icu_locale = builder.build(status);
1848 
1849   return extensions;
1850 }
1851 
1852 // ecma402/#sec-lookupmatcher
LookupMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1853 std::string LookupMatcher(Isolate* isolate,
1854                           const std::set<std::string>& available_locales,
1855                           const std::vector<std::string>& requested_locales) {
1856   // 1. Let result be a new Record.
1857   std::string result;
1858 
1859   // 2. For each element locale of requestedLocales in List order, do
1860   for (const std::string& locale : requested_locales) {
1861     // 2. a. Let noExtensionsLocale be the String value that is locale
1862     //       with all Unicode locale extension sequences removed.
1863     ParsedLocale parsed_locale = ParseBCP47Locale(locale);
1864     std::string no_extensions_locale = parsed_locale.no_extensions_locale;
1865 
1866     // 2. b. Let availableLocale be
1867     //       BestAvailableLocale(availableLocales, noExtensionsLocale).
1868     std::string available_locale =
1869         BestAvailableLocale(available_locales, no_extensions_locale);
1870 
1871     // 2. c. If availableLocale is not undefined, append locale to the
1872     //       end of subset.
1873     if (!available_locale.empty()) {
1874       // Note: The following steps are not performed here because we
1875       // can use ICU to parse the unicode locale extension sequence
1876       // as part of Intl::ResolveLocale.
1877       //
1878       // There's no need to separate the unicode locale extensions
1879       // right here. Instead just return the available locale with the
1880       // extensions.
1881       //
1882       // 2. c. i. Set result.[[locale]] to availableLocale.
1883       // 2. c. ii. If locale and noExtensionsLocale are not the same
1884       // String value, then
1885       // 2. c. ii. 1. Let extension be the String value consisting of
1886       // the first substring of locale that is a Unicode locale
1887       // extension sequence.
1888       // 2. c. ii. 2. Set result.[[extension]] to extension.
1889       // 2. c. iii. Return result.
1890       return available_locale + parsed_locale.extension;
1891     }
1892   }
1893 
1894   // 3. Let defLocale be DefaultLocale();
1895   // 4. Set result.[[locale]] to defLocale.
1896   // 5. Return result.
1897   return DefaultLocale(isolate);
1898 }
1899 
1900 }  // namespace
1901 
1902 // This function doesn't correspond exactly with the spec. Instead
1903 // we use ICU to do all the string manipulations that the spec
1904 // peforms.
1905 //
1906 // The spec uses this function to normalize values for various
1907 // relevant extension keys (such as disallowing "search" for
1908 // collation). Instead of doing this here, we let the callers of
1909 // this method perform such normalization.
1910 //
1911 // ecma402/#sec-resolvelocale
ResolveLocale(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,MatcherOption matcher,const std::set<std::string> & relevant_extension_keys)1912 Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
1913     Isolate* isolate, const std::set<std::string>& available_locales,
1914     const std::vector<std::string>& requested_locales, MatcherOption matcher,
1915     const std::set<std::string>& relevant_extension_keys) {
1916   std::string locale;
1917   if (matcher == Intl::MatcherOption::kBestFit && implement_bestfit) {
1918     locale = BestFitMatcher(isolate, available_locales, requested_locales);
1919   } else {
1920     locale = LookupMatcher(isolate, available_locales, requested_locales);
1921   }
1922 
1923   Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale);
1924   MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>());
1925   icu::Locale icu_locale = maybe_icu_locale.FromJust();
1926   std::map<std::string, std::string> extensions =
1927       LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
1928 
1929   std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
1930 
1931   // TODO(gsathya): Remove privateuse subtags from extensions.
1932 
1933   return Just(
1934       Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions});
1935 }
1936 
SetTextToBreakIterator(Isolate * isolate,Handle<String> text,icu::BreakIterator * break_iterator)1937 Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
1938     Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
1939   text = String::Flatten(isolate, text);
1940   icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>(
1941       Intl::ToICUUnicodeString(isolate, text).clone());
1942 
1943   Handle<Managed<icu::UnicodeString>> new_u_text =
1944       Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
1945 
1946   break_iterator->setText(*u_text);
1947   return new_u_text;
1948 }
1949 
1950 // ecma262 #sec-string.prototype.normalize
Normalize(Isolate * isolate,Handle<String> string,Handle<Object> form_input)1951 MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
1952                                     Handle<Object> form_input) {
1953   const char* form_name;
1954   UNormalization2Mode form_mode;
1955   if (form_input->IsUndefined(isolate)) {
1956     // default is FNC
1957     form_name = "nfc";
1958     form_mode = UNORM2_COMPOSE;
1959   } else {
1960     Handle<String> form;
1961     ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
1962                                Object::ToString(isolate, form_input), String);
1963 
1964     if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
1965       form_name = "nfc";
1966       form_mode = UNORM2_COMPOSE;
1967     } else if (String::Equals(isolate, form,
1968                               isolate->factory()->NFD_string())) {
1969       form_name = "nfc";
1970       form_mode = UNORM2_DECOMPOSE;
1971     } else if (String::Equals(isolate, form,
1972                               isolate->factory()->NFKC_string())) {
1973       form_name = "nfkc";
1974       form_mode = UNORM2_COMPOSE;
1975     } else if (String::Equals(isolate, form,
1976                               isolate->factory()->NFKD_string())) {
1977       form_name = "nfkc";
1978       form_mode = UNORM2_DECOMPOSE;
1979     } else {
1980       Handle<String> valid_forms =
1981           isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
1982       THROW_NEW_ERROR(
1983           isolate,
1984           NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
1985           String);
1986     }
1987   }
1988 
1989   int length = string->length();
1990   string = String::Flatten(isolate, string);
1991   icu::UnicodeString result;
1992   std::unique_ptr<uc16[]> sap;
1993   UErrorCode status = U_ZERO_ERROR;
1994   icu::UnicodeString input = ToICUUnicodeString(isolate, string);
1995   // Getting a singleton. Should not free it.
1996   const icu::Normalizer2* normalizer =
1997       icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
1998   DCHECK(U_SUCCESS(status));
1999   DCHECK_NOT_NULL(normalizer);
2000   int32_t normalized_prefix_length =
2001       normalizer->spanQuickCheckYes(input, status);
2002   // Quick return if the input is already normalized.
2003   if (length == normalized_prefix_length) return string;
2004   icu::UnicodeString unnormalized =
2005       input.tempSubString(normalized_prefix_length);
2006   // Read-only alias of the normalized prefix.
2007   result.setTo(false, input.getBuffer(), normalized_prefix_length);
2008   // copy-on-write; normalize the suffix and append to |result|.
2009   normalizer->normalizeSecondAndAppend(result, unnormalized, status);
2010 
2011   if (U_FAILURE(status)) {
2012     THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2013   }
2014 
2015   return Intl::ToString(isolate, result);
2016 }
2017 
2018 // ICUTimezoneCache calls out to ICU for TimezoneCache
2019 // functionality in a straightforward way.
2020 class ICUTimezoneCache : public base::TimezoneCache {
2021  public:
ICUTimezoneCache()2022   ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
2023 
~ICUTimezoneCache()2024   ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
2025 
2026   const char* LocalTimezone(double time_ms) override;
2027 
2028   double DaylightSavingsOffset(double time_ms) override;
2029 
2030   double LocalTimeOffset(double time_ms, bool is_utc) override;
2031 
2032   void Clear(TimeZoneDetection time_zone_detection) override;
2033 
2034  private:
2035   icu::TimeZone* GetTimeZone();
2036 
2037   bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
2038                   int32_t* dst_offset);
2039 
2040   icu::TimeZone* timezone_;
2041 
2042   std::string timezone_name_;
2043   std::string dst_timezone_name_;
2044 };
2045 
LocalTimezone(double time_ms)2046 const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
2047   bool is_dst = DaylightSavingsOffset(time_ms) != 0;
2048   std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
2049   if (name->empty()) {
2050     icu::UnicodeString result;
2051     GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
2052     result += '\0';
2053 
2054     icu::StringByteSink<std::string> byte_sink(name);
2055     result.toUTF8(byte_sink);
2056   }
2057   DCHECK(!name->empty());
2058   return name->c_str();
2059 }
2060 
GetTimeZone()2061 icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
2062   if (timezone_ == nullptr) {
2063     timezone_ = icu::TimeZone::createDefault();
2064   }
2065   return timezone_;
2066 }
2067 
GetOffsets(double time_ms,bool is_utc,int32_t * raw_offset,int32_t * dst_offset)2068 bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
2069                                   int32_t* raw_offset, int32_t* dst_offset) {
2070   UErrorCode status = U_ZERO_ERROR;
2071   // TODO(jshin): ICU TimeZone class handles skipped time differently from
2072   // Ecma 262 (https://github.com/tc39/ecma262/pull/778) and icu::TimeZone
2073   // class does not expose the necessary API. Fixing
2074   // http://bugs.icu-project.org/trac/ticket/13268 would make it easy to
2075   // implement the proposed spec change. A proposed fix for ICU is
2076   //    https://chromium-review.googlesource.com/851265 .
2077   // In the meantime, use an internal (still public) API of icu::BasicTimeZone.
2078   // Once it's accepted by the upstream, get rid of cast. Note that casting
2079   // TimeZone to BasicTimeZone is safe because we know that icu::TimeZone used
2080   // here is a BasicTimeZone.
2081   if (is_utc) {
2082     GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
2083   } else {
2084     static_cast<const icu::BasicTimeZone*>(GetTimeZone())
2085         ->getOffsetFromLocal(time_ms, icu::BasicTimeZone::kFormer,
2086                              icu::BasicTimeZone::kFormer, *raw_offset,
2087                              *dst_offset, status);
2088   }
2089 
2090   return U_SUCCESS(status);
2091 }
2092 
DaylightSavingsOffset(double time_ms)2093 double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
2094   int32_t raw_offset, dst_offset;
2095   if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
2096   return dst_offset;
2097 }
2098 
LocalTimeOffset(double time_ms,bool is_utc)2099 double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
2100   int32_t raw_offset, dst_offset;
2101   if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
2102   return raw_offset + dst_offset;
2103 }
2104 
Clear(TimeZoneDetection time_zone_detection)2105 void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
2106   delete timezone_;
2107   timezone_ = nullptr;
2108   timezone_name_.clear();
2109   dst_timezone_name_.clear();
2110   if (time_zone_detection == TimeZoneDetection::kRedetect) {
2111     icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
2112   }
2113 }
2114 
CreateTimeZoneCache()2115 base::TimezoneCache* Intl::CreateTimeZoneCache() {
2116   return FLAG_icu_timezone_data ? new ICUTimezoneCache()
2117                                 : base::OS::CreateTimezoneCache();
2118 }
2119 
GetLocaleMatcher(Isolate * isolate,Handle<JSReceiver> options,const char * method)2120 Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
2121                                                   Handle<JSReceiver> options,
2122                                                   const char* method) {
2123   return Intl::GetStringOption<Intl::MatcherOption>(
2124       isolate, options, "localeMatcher", method, {"best fit", "lookup"},
2125       {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
2126       Intl::MatcherOption::kBestFit);
2127 }
2128 
GetNumberingSystem(Isolate * isolate,Handle<JSReceiver> options,const char * method,std::unique_ptr<char[]> * result)2129 Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
2130                                      Handle<JSReceiver> options,
2131                                      const char* method,
2132                                      std::unique_ptr<char[]>* result) {
2133   const std::vector<const char*> empty_values = {};
2134   Maybe<bool> maybe = Intl::GetStringOption(isolate, options, "numberingSystem",
2135                                             empty_values, method, result);
2136   MAYBE_RETURN(maybe, Nothing<bool>());
2137   if (maybe.FromJust() && *result != nullptr) {
2138     if (!IsWellFormedNumberingSystem(result->get())) {
2139       THROW_NEW_ERROR_RETURN_VALUE(
2140           isolate,
2141           NewRangeError(
2142               MessageTemplate::kInvalid,
2143               isolate->factory()->numberingSystem_string(),
2144               isolate->factory()->NewStringFromAsciiChecked(result->get())),
2145           Nothing<bool>());
2146     }
2147     return Just(true);
2148   }
2149   return Just(false);
2150 }
2151 
GetAvailableLocales()2152 const std::set<std::string>& Intl::GetAvailableLocales() {
2153   static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales =
2154       LAZY_INSTANCE_INITIALIZER;
2155   return available_locales.Pointer()->Get();
2156 }
2157 
2158 namespace {
2159 
2160 struct CheckCalendar {
keyv8::internal::__anon8de594ab0c11::CheckCalendar2161   static const char* key() { return "calendar"; }
pathv8::internal::__anon8de594ab0c11::CheckCalendar2162   static const char* path() { return nullptr; }
2163 };
2164 
2165 }  // namespace
2166 
GetAvailableLocalesForDateFormat()2167 const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
2168   static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type
2169       available_locales = LAZY_INSTANCE_INITIALIZER;
2170   return available_locales.Pointer()->Get();
2171 }
2172 
NumberFieldToType(Isolate * isolate,Handle<Object> numeric_obj,int32_t field_id)2173 Handle<String> Intl::NumberFieldToType(Isolate* isolate,
2174                                        Handle<Object> numeric_obj,
2175                                        int32_t field_id) {
2176   DCHECK(numeric_obj->IsNumeric());
2177   switch (static_cast<UNumberFormatFields>(field_id)) {
2178     case UNUM_INTEGER_FIELD:
2179       if (numeric_obj->IsBigInt()) {
2180         // Neither NaN nor Infinite could be stored into BigInt
2181         // so just return integer.
2182         return isolate->factory()->integer_string();
2183       } else {
2184         double number = numeric_obj->Number();
2185         if (std::isfinite(number)) return isolate->factory()->integer_string();
2186         if (std::isnan(number)) return isolate->factory()->nan_string();
2187         return isolate->factory()->infinity_string();
2188       }
2189     case UNUM_FRACTION_FIELD:
2190       return isolate->factory()->fraction_string();
2191     case UNUM_DECIMAL_SEPARATOR_FIELD:
2192       return isolate->factory()->decimal_string();
2193     case UNUM_GROUPING_SEPARATOR_FIELD:
2194       return isolate->factory()->group_string();
2195     case UNUM_CURRENCY_FIELD:
2196       return isolate->factory()->currency_string();
2197     case UNUM_PERCENT_FIELD:
2198       return isolate->factory()->percentSign_string();
2199     case UNUM_SIGN_FIELD:
2200       if (numeric_obj->IsBigInt()) {
2201         Handle<BigInt> big_int = Handle<BigInt>::cast(numeric_obj);
2202         return big_int->IsNegative() ? isolate->factory()->minusSign_string()
2203                                      : isolate->factory()->plusSign_string();
2204       } else {
2205         double number = numeric_obj->Number();
2206         return std::signbit(number) ? isolate->factory()->minusSign_string()
2207                                     : isolate->factory()->plusSign_string();
2208       }
2209     case UNUM_EXPONENT_SYMBOL_FIELD:
2210       return isolate->factory()->exponentSeparator_string();
2211 
2212     case UNUM_EXPONENT_SIGN_FIELD:
2213       return isolate->factory()->exponentMinusSign_string();
2214 
2215     case UNUM_EXPONENT_FIELD:
2216       return isolate->factory()->exponentInteger_string();
2217 
2218     case UNUM_PERMILL_FIELD:
2219       // We're not creating any permill formatter, and it's not even clear how
2220       // that would be possible with the ICU API.
2221       UNREACHABLE();
2222       return Handle<String>();
2223 
2224     case UNUM_COMPACT_FIELD:
2225       return isolate->factory()->compact_string();
2226     case UNUM_MEASURE_UNIT_FIELD:
2227       return isolate->factory()->unit_string();
2228 
2229     default:
2230       UNREACHABLE();
2231       return Handle<String>();
2232   }
2233 }
2234 
2235 // A helper function to convert the FormattedValue for several Intl objects.
FormattedToString(Isolate * isolate,const icu::FormattedValue & formatted)2236 MaybeHandle<String> Intl::FormattedToString(
2237     Isolate* isolate, const icu::FormattedValue& formatted) {
2238   UErrorCode status = U_ZERO_ERROR;
2239   icu::UnicodeString result = formatted.toString(status);
2240   if (U_FAILURE(status)) {
2241     THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2242   }
2243   return Intl::ToString(isolate, result);
2244 }
2245 
2246 
2247 }  // namespace internal
2248 }  // namespace v8
2249