• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif  // V8_INTL_SUPPORT
8 
9 #include "src/objects/intl-objects.h"
10 
11 #include <algorithm>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "src/api/api-inl.h"
17 #include "src/base/strings.h"
18 #include "src/date/date.h"
19 #include "src/execution/isolate.h"
20 #include "src/execution/local-isolate.h"
21 #include "src/handles/global-handles.h"
22 #include "src/heap/factory.h"
23 #include "src/objects/js-collator-inl.h"
24 #include "src/objects/js-date-time-format-inl.h"
25 #include "src/objects/js-locale-inl.h"
26 #include "src/objects/js-locale.h"
27 #include "src/objects/js-number-format-inl.h"
28 #include "src/objects/managed-inl.h"
29 #include "src/objects/objects-inl.h"
30 #include "src/objects/option-utils.h"
31 #include "src/objects/property-descriptor.h"
32 #include "src/objects/smi.h"
33 #include "src/objects/string.h"
34 #include "src/strings/string-case.h"
35 #include "unicode/basictz.h"
36 #include "unicode/brkiter.h"
37 #include "unicode/calendar.h"
38 #include "unicode/coll.h"
39 #include "unicode/datefmt.h"
40 #include "unicode/decimfmt.h"
41 #include "unicode/formattedvalue.h"
42 #include "unicode/localebuilder.h"
43 #include "unicode/localematcher.h"
44 #include "unicode/locid.h"
45 #include "unicode/normalizer2.h"
46 #include "unicode/numberformatter.h"
47 #include "unicode/numfmt.h"
48 #include "unicode/numsys.h"
49 #include "unicode/timezone.h"
50 #include "unicode/ures.h"
51 #include "unicode/ustring.h"
52 #include "unicode/uvernum.h"  // U_ICU_VERSION_MAJOR_NUM
53 
54 #define XSTR(s) STR(s)
55 #define STR(s) #s
56 static_assert(
57     V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
58     "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
59 #undef STR
60 #undef XSTR
61 
62 namespace v8 {
63 namespace internal {
64 
65 namespace {
66 
67 constexpr uint8_t kToLower[256] = {
68     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
69     0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
70     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
71     0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
72     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
73     0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
74     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
75     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
76     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
77     0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
78     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
79     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
80     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
81     0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
82     0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
83     0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
84     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
85     0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
86     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
87     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
88     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
89     0xFC, 0xFD, 0xFE, 0xFF,
90 };
91 
ToLatin1Lower(uint16_t ch)92 inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
93   return static_cast<uint16_t>(kToLower[ch]);
94 }
95 
96 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)97 inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
98   DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
99   return ch &
100          ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
101 }
102 
103 template <typename Char>
ToUpperFastASCII(const base::Vector<const Char> & src,Handle<SeqOneByteString> result)104 bool ToUpperFastASCII(const base::Vector<const Char>& src,
105                       Handle<SeqOneByteString> result) {
106   // Do a faster loop for the case where all the characters are ASCII.
107   uint16_t ored = 0;
108   int32_t index = 0;
109   for (auto it = src.begin(); it != src.end(); ++it) {
110     uint16_t ch = static_cast<uint16_t>(*it);
111     ored |= ch;
112     result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
113   }
114   return !(ored & ~0x7F);
115 }
116 
117 const uint16_t sharp_s = 0xDF;
118 
119 template <typename Char>
ToUpperOneByte(const base::Vector<const Char> & src,uint8_t * dest,int * sharp_s_count)120 bool ToUpperOneByte(const base::Vector<const Char>& src, uint8_t* dest,
121                     int* sharp_s_count) {
122   // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
123 
124   // There are two special cases.
125   //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
126   //  2. Lower case sharp-S converts to "SS" (two characters)
127   *sharp_s_count = 0;
128   for (auto it = src.begin(); it != src.end(); ++it) {
129     uint16_t ch = static_cast<uint16_t>(*it);
130     if (V8_UNLIKELY(ch == sharp_s)) {
131       ++(*sharp_s_count);
132       continue;
133     }
134     if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
135       // Since this upper-cased character does not fit in an 8-bit string, we
136       // need to take the 16-bit path.
137       return false;
138     }
139     *dest++ = ToLatin1Upper(ch);
140   }
141 
142   return true;
143 }
144 
145 template <typename Char>
ToUpperWithSharpS(const base::Vector<const Char> & src,Handle<SeqOneByteString> result)146 void ToUpperWithSharpS(const base::Vector<const Char>& src,
147                        Handle<SeqOneByteString> result) {
148   int32_t dest_index = 0;
149   for (auto it = src.begin(); it != src.end(); ++it) {
150     uint16_t ch = static_cast<uint16_t>(*it);
151     if (ch == sharp_s) {
152       result->SeqOneByteStringSet(dest_index++, 'S');
153       result->SeqOneByteStringSet(dest_index++, 'S');
154     } else {
155       result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
156     }
157   }
158 }
159 
FindFirstUpperOrNonAscii(String s,int length)160 inline int FindFirstUpperOrNonAscii(String s, int length) {
161   for (int index = 0; index < length; ++index) {
162     uint16_t ch = s.Get(index);
163     if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
164       return index;
165     }
166   }
167   return length;
168 }
169 
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<base::uc16[]> * dest,int32_t length)170 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
171                                     std::unique_ptr<base::uc16[]>* dest,
172                                     int32_t length) {
173   DCHECK(flat.IsFlat());
174   if (flat.IsOneByte()) {
175     if (!*dest) {
176       dest->reset(NewArray<base::uc16>(length));
177       CopyChars(dest->get(), flat.ToOneByteVector().begin(), length);
178     }
179     return reinterpret_cast<const UChar*>(dest->get());
180   } else {
181     return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin());
182   }
183 }
184 
185 template <typename T>
New(Isolate * isolate,Handle<JSFunction> constructor,Handle<Object> locales,Handle<Object> options,const char * method_name)186 MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
187                    Handle<Object> locales, Handle<Object> options,
188                    const char* method_name) {
189   Handle<Map> map;
190   ASSIGN_RETURN_ON_EXCEPTION(
191       isolate, map,
192       JSFunction::GetDerivedMap(isolate, constructor, constructor), T);
193   return T::New(isolate, map, locales, options, method_name);
194 }
195 }  // namespace
196 
ToLatin1LowerTable()197 const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
198 
ToICUUnicodeString(Isolate * isolate,Handle<String> string,int offset)199 icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
200                                             Handle<String> string, int offset) {
201   DCHECK(string->IsFlat());
202   DisallowGarbageCollection no_gc;
203   std::unique_ptr<base::uc16[]> sap;
204   // Short one-byte strings can be expanded on the stack to avoid allocating a
205   // temporary buffer.
206   constexpr int kShortStringSize = 80;
207   UChar short_string_buffer[kShortStringSize];
208   const UChar* uchar_buffer = nullptr;
209   const String::FlatContent& flat = string->GetFlatContent(no_gc);
210   int32_t length = string->length();
211   DCHECK_LE(offset, length);
212   if (flat.IsOneByte() && length <= kShortStringSize) {
213     CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length);
214     uchar_buffer = short_string_buffer;
215   } else {
216     uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length);
217   }
218   return icu::UnicodeString(uchar_buffer + offset, length - offset);
219 }
220 
221 namespace {
222 
ToICUStringPiece(Isolate * isolate,Handle<String> string,int offset=0)223 icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string,
224                                   int offset = 0) {
225   DCHECK(string->IsFlat());
226   DisallowGarbageCollection no_gc;
227 
228   const String::FlatContent& flat = string->GetFlatContent(no_gc);
229   if (!flat.IsOneByte()) return icu::StringPiece();
230 
231   int32_t length = string->length();
232   DCHECK_LT(offset, length);
233   const char* char_buffer =
234       reinterpret_cast<const char*>(flat.ToOneByteVector().begin());
235   if (!String::IsAscii(char_buffer, length)) {
236     return icu::StringPiece();
237   }
238 
239   return icu::StringPiece(char_buffer + offset, length - offset);
240 }
241 
LocaleConvertCase(Isolate * isolate,Handle<String> s,bool is_to_upper,const char * lang)242 MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
243                                       bool is_to_upper, const char* lang) {
244   auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
245   int32_t src_length = s->length();
246   int32_t dest_length = src_length;
247   UErrorCode status;
248   Handle<SeqTwoByteString> result;
249   std::unique_ptr<base::uc16[]> sap;
250 
251   if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
252 
253   // This is not a real loop. It'll be executed only once (no overflow) or
254   // twice (overflow).
255   for (int i = 0; i < 2; ++i) {
256     // Case conversion can increase the string length (e.g. sharp-S => SS) so
257     // that we have to handle RangeError exceptions here.
258     ASSIGN_RETURN_ON_EXCEPTION(
259         isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
260         String);
261     DisallowGarbageCollection no_gc;
262     DCHECK(s->IsFlat());
263     String::FlatContent flat = s->GetFlatContent(no_gc);
264     const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
265     status = U_ZERO_ERROR;
266     dest_length =
267         case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
268                        dest_length, src, src_length, lang, &status);
269     if (status != U_BUFFER_OVERFLOW_ERROR) break;
270   }
271 
272   // In most cases, the output will fill the destination buffer completely
273   // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
274   // Only in rare cases, it'll be shorter than the destination buffer and
275   // |result| has to be truncated.
276   DCHECK(U_SUCCESS(status));
277   if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
278     DCHECK(dest_length == result->length());
279     return result;
280   }
281   DCHECK(dest_length < result->length());
282   return SeqString::Truncate(result, dest_length);
283 }
284 
285 }  // namespace
286 
287 // A stripped-down version of ConvertToLower that can only handle flat one-byte
288 // strings and does not allocate. Note that {src} could still be, e.g., a
289 // one-byte sliced string with a two-byte parent string.
290 // Called from TF builtins.
ConvertOneByteToLower(String src,String dst)291 String Intl::ConvertOneByteToLower(String src, String dst) {
292   DCHECK_EQ(src.length(), dst.length());
293   DCHECK(src.IsOneByteRepresentation());
294   DCHECK(src.IsFlat());
295   DCHECK(dst.IsSeqOneByteString());
296 
297   DisallowGarbageCollection no_gc;
298 
299   const int length = src.length();
300   String::FlatContent src_flat = src.GetFlatContent(no_gc);
301   uint8_t* dst_data = SeqOneByteString::cast(dst).GetChars(no_gc);
302 
303   if (src_flat.IsOneByte()) {
304     const uint8_t* src_data = src_flat.ToOneByteVector().begin();
305 
306     bool has_changed_character = false;
307     int index_to_first_unprocessed =
308         FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
309                                reinterpret_cast<const char*>(src_data), length,
310                                &has_changed_character);
311 
312     if (index_to_first_unprocessed == length) {
313       return has_changed_character ? dst : src;
314     }
315 
316     // If not ASCII, we keep the result up to index_to_first_unprocessed and
317     // process the rest.
318     for (int index = index_to_first_unprocessed; index < length; ++index) {
319       dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
320     }
321   } else {
322     DCHECK(src_flat.IsTwoByte());
323     int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
324     if (index_to_first_unprocessed == length) return src;
325 
326     const uint16_t* src_data = src_flat.ToUC16Vector().begin();
327     CopyChars(dst_data, src_data, index_to_first_unprocessed);
328     for (int index = index_to_first_unprocessed; index < length; ++index) {
329       dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
330     }
331   }
332 
333   return dst;
334 }
335 
ConvertToLower(Isolate * isolate,Handle<String> s)336 MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
337   if (!s->IsOneByteRepresentation()) {
338     // Use a slower implementation for strings with characters beyond U+00FF.
339     return LocaleConvertCase(isolate, s, false, "");
340   }
341 
342   int length = s->length();
343 
344   // We depend here on the invariant that the length of a Latin1
345   // string is invariant under ToLowerCase, and the result always
346   // fits in the Latin1 range in the *root locale*. It does not hold
347   // for ToUpperCase even in the root locale.
348 
349   // Scan the string for uppercase and non-ASCII characters for strings
350   // shorter than a machine-word without any memory allocation overhead.
351   // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
352   // to two parts, one for scanning the prefix with no change and the other for
353   // handling ASCII-only characters.
354 
355   bool is_short = length < static_cast<int>(sizeof(uintptr_t));
356   if (is_short) {
357     bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
358     if (is_lower_ascii) return s;
359   }
360 
361   Handle<SeqOneByteString> result =
362       isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363 
364   return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
365 }
366 
ConvertToUpper(Isolate * isolate,Handle<String> s)367 MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
368   int32_t length = s->length();
369   if (s->IsOneByteRepresentation() && length > 0) {
370     Handle<SeqOneByteString> result =
371         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
372 
373     DCHECK(s->IsFlat());
374     int sharp_s_count;
375     bool is_result_single_byte;
376     {
377       DisallowGarbageCollection no_gc;
378       String::FlatContent flat = s->GetFlatContent(no_gc);
379       uint8_t* dest = result->GetChars(no_gc);
380       if (flat.IsOneByte()) {
381         base::Vector<const uint8_t> src = flat.ToOneByteVector();
382         bool has_changed_character = false;
383         int index_to_first_unprocessed = FastAsciiConvert<false>(
384             reinterpret_cast<char*>(result->GetChars(no_gc)),
385             reinterpret_cast<const char*>(src.begin()), length,
386             &has_changed_character);
387         if (index_to_first_unprocessed == length) {
388           return has_changed_character ? result : s;
389         }
390         // If not ASCII, we keep the result up to index_to_first_unprocessed and
391         // process the rest.
392         is_result_single_byte =
393             ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
394                            dest + index_to_first_unprocessed, &sharp_s_count);
395       } else {
396         DCHECK(flat.IsTwoByte());
397         base::Vector<const uint16_t> src = flat.ToUC16Vector();
398         if (ToUpperFastASCII(src, result)) return result;
399         is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
400       }
401     }
402 
403     // Go to the full Unicode path if there are characters whose uppercase
404     // is beyond the Latin-1 range (cannot be represented in OneByteString).
405     if (V8_UNLIKELY(!is_result_single_byte)) {
406       return LocaleConvertCase(isolate, s, true, "");
407     }
408 
409     if (sharp_s_count == 0) return result;
410 
411     // We have sharp_s_count sharp-s characters, but the result is still
412     // in the Latin-1 range.
413     ASSIGN_RETURN_ON_EXCEPTION(
414         isolate, result,
415         isolate->factory()->NewRawOneByteString(length + sharp_s_count),
416         String);
417     DisallowGarbageCollection no_gc;
418     String::FlatContent flat = s->GetFlatContent(no_gc);
419     if (flat.IsOneByte()) {
420       ToUpperWithSharpS(flat.ToOneByteVector(), result);
421     } else {
422       ToUpperWithSharpS(flat.ToUC16Vector(), result);
423     }
424 
425     return result;
426   }
427 
428   return LocaleConvertCase(isolate, s, true, "");
429 }
430 
GetNumberingSystem(const icu::Locale & icu_locale)431 std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
432   // Ugly hack. ICU doesn't expose numbering system in any way, so we have
433   // to assume that for given locale NumberingSystem constructor produces the
434   // same digits as NumberFormat/Calendar would.
435   UErrorCode status = U_ZERO_ERROR;
436   std::unique_ptr<icu::NumberingSystem> numbering_system(
437       icu::NumberingSystem::createInstance(icu_locale, status));
438   if (U_SUCCESS(status) && !numbering_system->isAlgorithmic()) {
439     return numbering_system->getName();
440   }
441   return "latn";
442 }
443 
444 namespace {
445 
CreateICULocale(const std::string & bcp47_locale)446 Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {
447   DisallowGarbageCollection no_gc;
448 
449   // Convert BCP47 into ICU locale format.
450   UErrorCode status = U_ZERO_ERROR;
451 
452   icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
453   DCHECK(U_SUCCESS(status));
454   if (icu_locale.isBogus()) {
455     return Nothing<icu::Locale>();
456   }
457 
458   return Just(icu_locale);
459 }
460 
461 }  // anonymous namespace
462 
463 // static
464 
ToString(Isolate * isolate,const icu::UnicodeString & string)465 MaybeHandle<String> Intl::ToString(Isolate* isolate,
466                                    const icu::UnicodeString& string) {
467   return isolate->factory()->NewStringFromTwoByte(base::Vector<const uint16_t>(
468       reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
469 }
470 
ToString(Isolate * isolate,const icu::UnicodeString & string,int32_t begin,int32_t end)471 MaybeHandle<String> Intl::ToString(Isolate* isolate,
472                                    const icu::UnicodeString& string,
473                                    int32_t begin, int32_t end) {
474   return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
475 }
476 
477 namespace {
478 
InnerAddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)479 Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
480                                  int index, Handle<String> field_type_string,
481                                  Handle<String> value) {
482   // let element = $array[$index] = {
483   //   type: $field_type_string,
484   //   value: $value
485   // }
486   // return element;
487   Factory* factory = isolate->factory();
488   Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
489   JSObject::AddProperty(isolate, element, factory->type_string(),
490                         field_type_string, NONE);
491 
492   JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
493   // TODO(victorgomes): Temporarily forcing a fatal error here in case of
494   // overflow, until Intl::AddElement can handle exceptions.
495   if (JSObject::AddDataElement(array, index, element, NONE).IsNothing()) {
496     FATAL("Fatal JavaScript invalid size error when adding element");
497     UNREACHABLE();
498   }
499   return element;
500 }
501 
502 }  // namespace
503 
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)504 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
505                       Handle<String> field_type_string, Handle<String> value) {
506   // Same as $array[$index] = {type: $field_type_string, value: $value};
507   InnerAddElement(isolate, array, index, field_type_string, value);
508 }
509 
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value,Handle<String> additional_property_name,Handle<String> additional_property_value)510 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
511                       Handle<String> field_type_string, Handle<String> value,
512                       Handle<String> additional_property_name,
513                       Handle<String> additional_property_value) {
514   // Same as $array[$index] = {
515   //   type: $field_type_string, value: $value,
516   //   $additional_property_name: $additional_property_value
517   // }
518   Handle<JSObject> element =
519       InnerAddElement(isolate, array, index, field_type_string, value);
520   JSObject::AddProperty(isolate, element, additional_property_name,
521                         additional_property_value, NONE);
522 }
523 
524 namespace {
525 
526 // Build the shortened locale; eg, convert xx_Yyyy_ZZ  to xx_ZZ.
527 //
528 // If locale has a script tag then return true and the locale without the
529 // script else return false and an empty string.
RemoveLocaleScriptTag(const std::string & icu_locale,std::string * locale_less_script)530 bool RemoveLocaleScriptTag(const std::string& icu_locale,
531                            std::string* locale_less_script) {
532   icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
533   const char* icu_script = new_locale.getScript();
534   if (icu_script == nullptr || strlen(icu_script) == 0) {
535     *locale_less_script = std::string();
536     return false;
537   }
538 
539   const char* icu_language = new_locale.getLanguage();
540   const char* icu_country = new_locale.getCountry();
541   icu::Locale short_locale = icu::Locale(icu_language, icu_country);
542   *locale_less_script = short_locale.getName();
543   return true;
544 }
545 
ValidateResource(const icu::Locale locale,const char * path,const char * key)546 bool ValidateResource(const icu::Locale locale, const char* path,
547                       const char* key) {
548   bool result = false;
549   UErrorCode status = U_ZERO_ERROR;
550   UResourceBundle* bundle = ures_open(path, locale.getName(), &status);
551   if (bundle != nullptr && status == U_ZERO_ERROR) {
552     if (key == nullptr) {
553       result = true;
554     } else {
555       UResourceBundle* key_bundle =
556           ures_getByKey(bundle, key, nullptr, &status);
557       result = key_bundle != nullptr && (status == U_ZERO_ERROR);
558       ures_close(key_bundle);
559     }
560   }
561   ures_close(bundle);
562   if (!result) {
563     if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) {
564       // Fallback to try without country.
565       std::string without_country(locale.getLanguage());
566       without_country = without_country.append("-").append(locale.getScript());
567       return ValidateResource(without_country.c_str(), path, key);
568     } else if ((locale.getCountry()[0] != '\0') ||
569                (locale.getScript()[0] != '\0')) {
570       // Fallback to try with only language.
571       std::string language(locale.getLanguage());
572       return ValidateResource(language.c_str(), path, key);
573     }
574   }
575   return result;
576 }
577 
578 }  // namespace
579 
BuildLocaleSet(const std::vector<std::string> & icu_available_locales,const char * path,const char * validate_key)580 std::set<std::string> Intl::BuildLocaleSet(
581     const std::vector<std::string>& icu_available_locales, const char* path,
582     const char* validate_key) {
583   std::set<std::string> locales;
584   for (const std::string& locale : icu_available_locales) {
585     if (path != nullptr || validate_key != nullptr) {
586       if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) {
587         // FIXME(chromium:1215606) Find a beter fix for nb->no fallback
588         if (locale != "nb") {
589           continue;
590         }
591         // Try no for nb
592         if (!ValidateResource(icu::Locale("no"), path, validate_key)) {
593           continue;
594         }
595       }
596     }
597     locales.insert(locale);
598     std::string shortened_locale;
599     if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
600       std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
601       locales.insert(shortened_locale);
602     }
603   }
604   return locales;
605 }
606 
ToLanguageTag(const icu::Locale & locale)607 Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
608   UErrorCode status = U_ZERO_ERROR;
609   std::string res = locale.toLanguageTag<std::string>(status);
610   if (U_FAILURE(status)) {
611     return Nothing<std::string>();
612   }
613   DCHECK(U_SUCCESS(status));
614   return Just(res);
615 }
616 
617 // See ecma402/#legacy-constructor.
LegacyUnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,bool has_initialized_slot)618 MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
619                                                Handle<JSReceiver> receiver,
620                                                Handle<JSFunction> constructor,
621                                                bool has_initialized_slot) {
622   Handle<Object> obj_ordinary_has_instance;
623   ASSIGN_RETURN_ON_EXCEPTION(
624       isolate, obj_ordinary_has_instance,
625       Object::OrdinaryHasInstance(isolate, constructor, receiver), Object);
626   bool ordinary_has_instance = obj_ordinary_has_instance->BooleanValue(isolate);
627 
628   // 2. If receiver does not have an [[Initialized...]] internal slot
629   //    and ? OrdinaryHasInstance(constructor, receiver) is true, then
630   if (!has_initialized_slot && ordinary_has_instance) {
631     // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
632     Handle<Object> new_receiver;
633     ASSIGN_RETURN_ON_EXCEPTION(
634         isolate, new_receiver,
635         JSReceiver::GetProperty(isolate, receiver,
636                                 isolate->factory()->intl_fallback_symbol()),
637         Object);
638     return new_receiver;
639   }
640 
641   return receiver;
642 }
643 
644 namespace {
645 
IsTwoLetterLanguage(const std::string & locale)646 bool IsTwoLetterLanguage(const std::string& locale) {
647   // Two letters, both in range 'a'-'z'...
648   return locale.length() == 2 && IsAsciiLower(locale[0]) &&
649          IsAsciiLower(locale[1]);
650 }
651 
IsDeprecatedOrLegacyLanguage(const std::string & locale)652 bool IsDeprecatedOrLegacyLanguage(const std::string& locale) {
653   //  Check if locale is one of the deprecated language tags:
654   return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
655          locale == "mo" ||
656          //  Check if locale is one of the legacy language tags:
657          locale == "sh" || locale == "tl" || locale == "no";
658 }
659 
IsStructurallyValidLanguageTag(const std::string & tag)660 bool IsStructurallyValidLanguageTag(const std::string& tag) {
661   return JSLocale::StartsWithUnicodeLanguageId(tag);
662 }
663 
664 // Canonicalize the locale.
665 // https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
666 // including type check and structural validity check.
CanonicalizeLanguageTag(Isolate * isolate,const std::string & locale_in)667 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
668                                            const std::string& locale_in) {
669   std::string locale = locale_in;
670 
671   if (locale.length() == 0 ||
672       !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
673     THROW_NEW_ERROR_RETURN_VALUE(
674         isolate,
675         NewRangeError(
676             MessageTemplate::kInvalidLanguageTag,
677             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
678         Nothing<std::string>());
679   }
680 
681   // Optimize for the most common case: a 2-letter language code in the
682   // canonical form/lowercase that is not one of the deprecated codes
683   // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
684   // codes. Instead, let them be handled by ICU in the slow path. However,
685   // fast-track 'fil' (3-letter canonical code).
686   if ((IsTwoLetterLanguage(locale) && !IsDeprecatedOrLegacyLanguage(locale)) ||
687       locale == "fil") {
688     return Just(locale);
689   }
690 
691   // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
692   // the input before any more check.
693   std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
694 
695   // // ECMA 402 6.2.3
696   // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
697   // language tag if it's too long (much longer than 100 chars). Even if we
698   // allocate a longer buffer, ICU will still fail if it's too long. Either
699   // propose to Ecma 402 to put a limit on the locale length or change ICU to
700   // handle long locale names better. See
701   // https://unicode-org.atlassian.net/browse/ICU-13417
702   UErrorCode error = U_ZERO_ERROR;
703   // uloc_forLanguageTag checks the structrual validity. If the input BCP47
704   // language tag is parsed all the way to the end, it indicates that the input
705   // is structurally valid. Due to a couple of bugs, we can't use it
706   // without Chromium patches or ICU 62 or earlier.
707   icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
708 
709   if (U_FAILURE(error) || icu_locale.isBogus()) {
710     THROW_NEW_ERROR_RETURN_VALUE(
711         isolate,
712         NewRangeError(
713             MessageTemplate::kInvalidLanguageTag,
714             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
715         Nothing<std::string>());
716   }
717 
718   // Use LocaleBuilder to validate locale.
719   icu_locale = icu::LocaleBuilder().setLocale(icu_locale).build(error);
720   icu_locale.canonicalize(error);
721   if (U_FAILURE(error) || icu_locale.isBogus()) {
722     THROW_NEW_ERROR_RETURN_VALUE(
723         isolate,
724         NewRangeError(
725             MessageTemplate::kInvalidLanguageTag,
726             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
727         Nothing<std::string>());
728   }
729   Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
730   if (maybe_to_language_tag.IsNothing()) {
731     THROW_NEW_ERROR_RETURN_VALUE(
732         isolate,
733         NewRangeError(
734             MessageTemplate::kInvalidLanguageTag,
735             isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
736         Nothing<std::string>());
737   }
738 
739   return maybe_to_language_tag;
740 }
741 
CanonicalizeLanguageTag(Isolate * isolate,Handle<Object> locale_in)742 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
743                                            Handle<Object> locale_in) {
744   Handle<String> locale_str;
745   // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
746   // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
747   // exception.
748   // 7c iii. Let tag be ? ToString(kValue).
749   // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
750   // RangeError exception.
751 
752   if (locale_in->IsString()) {
753     locale_str = Handle<String>::cast(locale_in);
754   } else if (locale_in->IsJSReceiver()) {
755     ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
756                                      Object::ToString(isolate, locale_in),
757                                      Nothing<std::string>());
758   } else {
759     THROW_NEW_ERROR_RETURN_VALUE(isolate,
760                                  NewTypeError(MessageTemplate::kLanguageID),
761                                  Nothing<std::string>());
762   }
763   std::string locale(locale_str->ToCString().get());
764 
765   if (!IsStructurallyValidLanguageTag(locale)) {
766     THROW_NEW_ERROR_RETURN_VALUE(
767         isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
768         Nothing<std::string>());
769   }
770   return CanonicalizeLanguageTag(isolate, locale);
771 }
772 
773 }  // anonymous namespace
774 
CanonicalizeLocaleList(Isolate * isolate,Handle<Object> locales,bool only_return_one_result)775 Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
776     Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
777   // 1. If locales is undefined, then
778   if (locales->IsUndefined(isolate)) {
779     // 1a. Return a new empty List.
780     return Just(std::vector<std::string>());
781   }
782   // 2. Let seen be a new empty List.
783   std::vector<std::string> seen;
784   // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
785   // internal slot,  then
786   if (locales->IsJSLocale()) {
787     // Since this value came from JSLocale, which is already went though the
788     // CanonializeLanguageTag process once, therefore there are no need to
789     // call CanonializeLanguageTag again.
790     seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
791     return Just(seen);
792   }
793   if (locales->IsString()) {
794     // 3a. Let O be CreateArrayFromList(« locales »).
795     // Instead of creating a one-element array and then iterating over it,
796     // we inline the body of the iteration:
797     std::string canonicalized_tag;
798     if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
799       return Nothing<std::vector<std::string>>();
800     }
801     seen.push_back(canonicalized_tag);
802     return Just(seen);
803   }
804   // 4. Else,
805   // 4a. Let O be ? ToObject(locales).
806   Handle<JSReceiver> o;
807   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
808                                    Object::ToObject(isolate, locales),
809                                    Nothing<std::vector<std::string>>());
810   // 5. Let len be ? ToLength(? Get(O, "length")).
811   Handle<Object> length_obj;
812   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
813                                    Object::GetLengthFromArrayLike(isolate, o),
814                                    Nothing<std::vector<std::string>>());
815   // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
816   // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
817   // don't happen in practice (and would be very slow if they do), we'll keep
818   // the code simple for now by using a saturating to-uint32 conversion.
819   double raw_length = length_obj->Number();
820   uint32_t len =
821       raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
822   // 6. Let k be 0.
823   // 7. Repeat, while k < len
824   for (uint32_t k = 0; k < len; k++) {
825     // 7a. Let Pk be ToString(k).
826     // 7b. Let kPresent be ? HasProperty(O, Pk).
827     LookupIterator it(isolate, o, k);
828     Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
829     MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
830     // 7c. If kPresent is true, then
831     if (!maybe_found.FromJust()) continue;
832     // 7c i. Let kValue be ? Get(O, Pk).
833     Handle<Object> k_value;
834     ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
835                                      Nothing<std::vector<std::string>>());
836     // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
837     // exception.
838     // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
839     // internal slot, then
840     std::string canonicalized_tag;
841     if (k_value->IsJSLocale()) {
842       // 7c iii. 1. Let tag be kValue.[[Locale]].
843       canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
844       // 7c iv. Else,
845     } else {
846       // 7c iv 1. Let tag be ? ToString(kValue).
847       // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
848       // RangeError exception.
849       // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
850       if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
851         return Nothing<std::vector<std::string>>();
852       }
853     }
854     // 7c vi. If canonicalizedTag is not an element of seen, append
855     // canonicalizedTag as the last element of seen.
856     if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
857       seen.push_back(canonicalized_tag);
858     }
859     // 7d. Increase k by 1. (See loop header.)
860     // Optimization: some callers only need one result.
861     if (only_return_one_result) return Just(seen);
862   }
863   // 8. Return seen.
864   return Just(seen);
865 }
866 
867 // ecma402 #sup-string.prototype.tolocalelowercase
868 // ecma402 #sup-string.prototype.tolocaleuppercase
StringLocaleConvertCase(Isolate * isolate,Handle<String> s,bool to_upper,Handle<Object> locales)869 MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
870                                                   Handle<String> s,
871                                                   bool to_upper,
872                                                   Handle<Object> locales) {
873   std::vector<std::string> requested_locales;
874   if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
875     return MaybeHandle<String>();
876   }
877   std::string requested_locale = requested_locales.size() == 0
878                                      ? isolate->DefaultLocale()
879                                      : requested_locales[0];
880   size_t dash = requested_locale.find('-');
881   if (dash != std::string::npos) {
882     requested_locale = requested_locale.substr(0, dash);
883   }
884 
885   // Primary language tag can be up to 8 characters long in theory.
886   // https://tools.ietf.org/html/bcp47#section-2.2.1
887   DCHECK_LE(requested_locale.length(), 8);
888   s = String::Flatten(isolate, s);
889 
890   // All the languages requiring special-handling have two-letter codes.
891   // Note that we have to check for '!= 2' here because private-use language
892   // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
893   // only 'x' or 'i' when they get here.
894   if (V8_UNLIKELY(requested_locale.length() != 2)) {
895     if (to_upper) {
896       return ConvertToUpper(isolate, s);
897     }
898     return ConvertToLower(isolate, s);
899   }
900   // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
901   // in the root locale needs to be adjusted for az, lt and tr because even case
902   // mapping of ASCII range characters are different in those locales.
903   // Greek (el) does not require any adjustment.
904   if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
905                   (requested_locale == "lt") || (requested_locale == "az"))) {
906     return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
907   } else {
908     if (to_upper) {
909       return ConvertToUpper(isolate, s);
910     }
911     return ConvertToLower(isolate, s);
912   }
913 }
914 
915 // static
916 template <class IsolateT>
CompareStringsOptionsFor(IsolateT * isolate,Handle<Object> locales,Handle<Object> options)917 Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
918     IsolateT* isolate, Handle<Object> locales, Handle<Object> options) {
919   if (!options->IsUndefined(isolate)) {
920     return CompareStringsOptions::kNone;
921   }
922 
923   // Lists all of the available locales that are statically known to fulfill
924   // fast path conditions. See the StringLocaleCompareFastPath test as a
925   // starting point to update this list.
926   //
927   // Locale entries are roughly sorted s.t. common locales come first.
928   //
929   // The actual conditions are verified in debug builds in
930   // CollatorAllowsFastComparison.
931   static const char* const kFastLocales[] = {
932       "en-US", "en", "fr", "es",    "de",    "pt",    "it", "ca",
933       "de-AT", "fi", "id", "id-ID", "ms",    "nl",    "pl", "ro",
934       "sl",    "sv", "sw", "vi",    "en-DE", "en-GB",
935   };
936 
937   if (locales->IsUndefined(isolate)) {
938     const std::string& default_locale = isolate->DefaultLocale();
939     for (const char* fast_locale : kFastLocales) {
940       if (strcmp(fast_locale, default_locale.c_str()) == 0) {
941         return CompareStringsOptions::kTryFastPath;
942       }
943     }
944 
945     return CompareStringsOptions::kNone;
946   }
947 
948   if (!locales->IsString()) return CompareStringsOptions::kNone;
949 
950   Handle<String> locales_string = Handle<String>::cast(locales);
951   for (const char* fast_locale : kFastLocales) {
952     if (locales_string->IsEqualTo(base::CStrVector(fast_locale), isolate)) {
953       return CompareStringsOptions::kTryFastPath;
954     }
955   }
956 
957   return CompareStringsOptions::kNone;
958 }
959 
960 // Instantiations.
961 template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
962     Isolate*, Handle<Object>, Handle<Object>);
963 template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
964     LocalIsolate*, Handle<Object>, Handle<Object>);
965 
StringLocaleCompare(Isolate * isolate,Handle<String> string1,Handle<String> string2,Handle<Object> locales,Handle<Object> options,const char * method_name)966 base::Optional<int> Intl::StringLocaleCompare(
967     Isolate* isolate, Handle<String> string1, Handle<String> string2,
968     Handle<Object> locales, Handle<Object> options, const char* method_name) {
969   // We only cache the instance when locales is a string/undefined and
970   // options is undefined, as that is the only case when the specified
971   // side-effects of examining those arguments are unobservable.
972   const bool can_cache =
973       (locales->IsString() || locales->IsUndefined(isolate)) &&
974       options->IsUndefined(isolate);
975   // We may be able to take the fast path, depending on the `locales` and
976   // `options` arguments.
977   const CompareStringsOptions compare_strings_options =
978       CompareStringsOptionsFor(isolate, locales, options);
979   if (can_cache) {
980     // Both locales and options are undefined, check the cache.
981     icu::Collator* cached_icu_collator =
982         static_cast<icu::Collator*>(isolate->get_cached_icu_object(
983             Isolate::ICUObjectCacheType::kDefaultCollator, locales));
984     // We may use the cached icu::Collator for a fast path.
985     if (cached_icu_collator != nullptr) {
986       return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
987                                   string2, compare_strings_options);
988     }
989   }
990 
991   Handle<JSFunction> constructor = Handle<JSFunction>(
992       JSFunction::cast(
993           isolate->context().native_context().intl_collator_function()),
994       isolate);
995 
996   Handle<JSCollator> collator;
997   MaybeHandle<JSCollator> maybe_collator =
998       New<JSCollator>(isolate, constructor, locales, options, method_name);
999   if (!maybe_collator.ToHandle(&collator)) return {};
1000   if (can_cache) {
1001     isolate->set_icu_object_in_cache(
1002         Isolate::ICUObjectCacheType::kDefaultCollator, locales,
1003         std::static_pointer_cast<icu::UMemory>(collator->icu_collator().get()));
1004   }
1005   icu::Collator* icu_collator = collator->icu_collator().raw();
1006   return Intl::CompareStrings(isolate, *icu_collator, string1, string2,
1007                               compare_strings_options);
1008 }
1009 
1010 namespace {
1011 
1012 // Weights for the Unicode Collation Algorithm for charcodes [0x00,0x7F].
1013 // https://unicode.org/reports/tr10/.
1014 //
1015 // Generated from:
1016 //
1017 // $ wget http://www.unicode.org/Public/UCA/latest/allkeys.txt
1018 // $ cat ~/allkeys.txt | grep '^00[0-7].  ;' | sort | sed 's/[*.]/ /g' |\
1019 //   sed 's/.*\[ \(.*\)\].*/\1/' | python ~/gen_weights.py
1020 //
1021 // Where gen_weights.py does an ordinal rank s.t. weights fit in a uint8_t:
1022 //
1023 //   import sys
1024 //
1025 //   def to_ordinal(ws):
1026 //       weight_map = {}
1027 //       weights_uniq_sorted = sorted(set(ws))
1028 //       for i in range(0, len(weights_uniq_sorted)):
1029 //           weight_map[weights_uniq_sorted[i]] = i
1030 //       return [weight_map[x] for x in ws]
1031 //
1032 //   def print_weight_list(array_name, ws):
1033 //       print("constexpr uint8_t %s[256] = {" % array_name, end = "")
1034 //       i = 0
1035 //       for w in ws:
1036 //           if (i % 16) == 0:
1037 //               print("\n  ", end = "")
1038 //           print("%3d," % w, end = "")
1039 //           i += 1
1040 //       print("\n};\n")
1041 //
1042 //   if __name__ == "__main__":
1043 //       l1s = []
1044 //       l3s = []
1045 //       for line in sys.stdin:
1046 //           weights = line.split()
1047 //           l1s.append(int(weights[0], 16))
1048 //           l3s.append(int(weights[2], 16))
1049 //       print_weight_list("kCollationWeightsL1", to_ordinal(l1s))
1050 //       print_weight_list("kCollationWeightsL3", to_ordinal(l3s))
1051 
1052 // clang-format off
1053 constexpr uint8_t kCollationWeightsL1[256] = {
1054     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  0,  0,
1055     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1056     6, 12, 16, 28, 38, 29, 27, 15, 17, 18, 24, 32,  9,  8, 14, 25,
1057    39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10, 33, 34, 35, 13,
1058    23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
1059    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31,  7,
1060    30, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
1061    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 21, 36, 22, 37,  0,
1062 };
1063 constexpr uint8_t kCollationWeightsL3[256] = {
1064     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,
1065     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
1066     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1067     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1068     1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
1069     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,
1070     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
1071     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,
1072 };
1073 constexpr int kCollationWeightsLength = arraysize(kCollationWeightsL1);
1074 STATIC_ASSERT(kCollationWeightsLength == arraysize(kCollationWeightsL3));
1075 // clang-format on
1076 
1077 // Normalize a comparison delta (usually `lhs - rhs`) to UCollationResult
1078 // values.
ToUCollationResult(int delta)1079 constexpr UCollationResult ToUCollationResult(int delta) {
1080   return delta < 0 ? UCollationResult::UCOL_LESS
1081                    : (delta > 0 ? UCollationResult::UCOL_GREATER
1082                                 : UCollationResult::UCOL_EQUAL);
1083 }
1084 
1085 struct FastCompareStringsData {
1086   UCollationResult l1_result = UCollationResult::UCOL_EQUAL;
1087   UCollationResult l3_result = UCollationResult::UCOL_EQUAL;
1088   int processed_until = 0;
1089   int first_diff_at = 0;  // The first relevant diff (L1 if exists, else L3).
1090   bool has_diff = false;
1091 
FastCompareFailedv8::internal::__anon0bda2b820711::FastCompareStringsData1092   base::Optional<UCollationResult> FastCompareFailed(
1093       int* processed_until_out) const {
1094     if (has_diff) {
1095       // Found some difference, continue there to ensure the generic algorithm
1096       // picks it up.
1097       *processed_until_out = first_diff_at;
1098     } else {
1099       // No difference found, reprocess the last processed character since it
1100       // may be followed by a unicode combining character (which alters it's
1101       // meaning).
1102       *processed_until_out = std::max(processed_until - 1, 0);
1103     }
1104     return {};
1105   }
1106 };
1107 
1108 template <class CharT>
CanFastCompare(CharT c)1109 constexpr bool CanFastCompare(CharT c) {
1110   return c < kCollationWeightsLength && kCollationWeightsL1[c] != 0;
1111 }
1112 
1113 template <class Char1T, class Char2T>
FastCompareFlatString(const Char1T * lhs,const Char2T * rhs,int length,FastCompareStringsData * d)1114 bool FastCompareFlatString(const Char1T* lhs, const Char2T* rhs, int length,
1115                            FastCompareStringsData* d) {
1116   for (int i = 0; i < length; i++) {
1117     const Char1T l = lhs[i];
1118     const Char2T r = rhs[i];
1119     if (!CanFastCompare(l) || !CanFastCompare(r)) {
1120       d->processed_until = i;
1121       return false;
1122     }
1123     UCollationResult l1_result =
1124         ToUCollationResult(kCollationWeightsL1[l] - kCollationWeightsL1[r]);
1125     if (l1_result != UCollationResult::UCOL_EQUAL) {
1126       d->has_diff = true;
1127       d->first_diff_at = i;
1128       d->processed_until = i;
1129       d->l1_result = l1_result;
1130       return true;
1131     }
1132     if (l != r && d->l3_result == UCollationResult::UCOL_EQUAL) {
1133       // Collapse the two-pass algorithm into one: if we find a difference in
1134       // L1 weights, that is our result. If not, use the first L3 weight
1135       // difference.
1136       UCollationResult l3_result =
1137           ToUCollationResult(kCollationWeightsL3[l] - kCollationWeightsL3[r]);
1138       d->l3_result = l3_result;
1139       if (!d->has_diff) {
1140         d->has_diff = true;
1141         d->first_diff_at = i;
1142       }
1143     }
1144   }
1145   d->processed_until = length;
1146   return true;
1147 }
1148 
FastCompareStringFlatContent(const String::FlatContent & lhs,const String::FlatContent & rhs,int length,FastCompareStringsData * d)1149 bool FastCompareStringFlatContent(const String::FlatContent& lhs,
1150                                   const String::FlatContent& rhs, int length,
1151                                   FastCompareStringsData* d) {
1152   if (lhs.IsOneByte()) {
1153     base::Vector<const uint8_t> l = lhs.ToOneByteVector();
1154     if (rhs.IsOneByte()) {
1155       base::Vector<const uint8_t> r = rhs.ToOneByteVector();
1156       return FastCompareFlatString(l.data(), r.data(), length, d);
1157     } else {
1158       base::Vector<const uint16_t> r = rhs.ToUC16Vector();
1159       return FastCompareFlatString(l.data(), r.data(), length, d);
1160     }
1161   } else {
1162     base::Vector<const uint16_t> l = lhs.ToUC16Vector();
1163     if (rhs.IsOneByte()) {
1164       base::Vector<const uint8_t> r = rhs.ToOneByteVector();
1165       return FastCompareFlatString(l.data(), r.data(), length, d);
1166     } else {
1167       base::Vector<const uint16_t> r = rhs.ToUC16Vector();
1168       return FastCompareFlatString(l.data(), r.data(), length, d);
1169     }
1170   }
1171   UNREACHABLE();
1172 }
1173 
CharIsAsciiOrOutOfBounds(const String::FlatContent & string,int string_length,int index)1174 bool CharIsAsciiOrOutOfBounds(const String::FlatContent& string,
1175                               int string_length, int index) {
1176   DCHECK_EQ(string.length(), string_length);
1177   return index >= string_length || isascii(string.Get(index));
1178 }
1179 
CharCanFastCompareOrOutOfBounds(const String::FlatContent & string,int string_length,int index)1180 bool CharCanFastCompareOrOutOfBounds(const String::FlatContent& string,
1181                                      int string_length, int index) {
1182   DCHECK_EQ(string.length(), string_length);
1183   return index >= string_length || CanFastCompare(string.Get(index));
1184 }
1185 
1186 #ifdef DEBUG
USetContainsAllAsciiItem(USet * set)1187 bool USetContainsAllAsciiItem(USet* set) {
1188   static constexpr int kBufferSize = 64;
1189   UChar buffer[kBufferSize];
1190 
1191   const int length = uset_getItemCount(set);
1192   for (int i = 0; i < length; i++) {
1193     UChar32 start, end;
1194     UErrorCode status = U_ZERO_ERROR;
1195     const int item_length =
1196         uset_getItem(set, i, &start, &end, buffer, kBufferSize, &status);
1197     CHECK(U_SUCCESS(status));
1198     DCHECK_GE(item_length, 0);
1199 
1200     if (item_length == 0) {
1201       // Empty string or a range.
1202       if (isascii(start)) return true;
1203     } else {
1204       // A non-empty string.
1205       bool all_ascii = true;
1206       for (int j = 0; j < item_length; j++) {
1207         if (!isascii(buffer[j])) {
1208           all_ascii = false;
1209           break;
1210         }
1211       }
1212 
1213       if (all_ascii) return true;
1214     }
1215   }
1216 
1217   return false;
1218 }
1219 
CollatorAllowsFastComparison(const icu::Collator & icu_collator)1220 bool CollatorAllowsFastComparison(const icu::Collator& icu_collator) {
1221   UErrorCode status = U_ZERO_ERROR;
1222 
1223   icu::Locale icu_locale(icu_collator.getLocale(ULOC_VALID_LOCALE, status));
1224   DCHECK(U_SUCCESS(status));
1225 
1226   static constexpr int kBufferSize = 64;
1227   char buffer[kBufferSize];
1228   const int collation_keyword_length =
1229       icu_locale.getKeywordValue("collation", buffer, kBufferSize, status);
1230   DCHECK(U_SUCCESS(status));
1231   if (collation_keyword_length != 0) return false;
1232 
1233   // These attributes must be set to the expected value for fast comparisons.
1234   static constexpr struct {
1235     UColAttribute attribute;
1236     UColAttributeValue legal_value;
1237   } kAttributeChecks[] = {
1238       {UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE},
1239       {UCOL_CASE_FIRST, UCOL_OFF},
1240       {UCOL_CASE_LEVEL, UCOL_OFF},
1241       {UCOL_FRENCH_COLLATION, UCOL_OFF},
1242       {UCOL_NUMERIC_COLLATION, UCOL_OFF},
1243       {UCOL_STRENGTH, UCOL_TERTIARY},
1244   };
1245 
1246   for (const auto& check : kAttributeChecks) {
1247     if (icu_collator.getAttribute(check.attribute, status) !=
1248         check.legal_value) {
1249       return false;
1250     }
1251     DCHECK(U_SUCCESS(status));
1252   }
1253 
1254   // No reordering codes are allowed.
1255   int num_reorder_codes =
1256       ucol_getReorderCodes(icu_collator.toUCollator(), nullptr, 0, &status);
1257   if (num_reorder_codes != 0) return false;
1258   DCHECK(U_SUCCESS(status));  // Must check *after* num_reorder_codes != 0.
1259 
1260   // No tailored rules are allowed.
1261   int32_t rules_length = 0;
1262   ucol_getRules(icu_collator.toUCollator(), &rules_length);
1263   if (rules_length != 0) return false;
1264 
1265   USet* tailored_set = ucol_getTailoredSet(icu_collator.toUCollator(), &status);
1266   DCHECK(U_SUCCESS(status));
1267   if (USetContainsAllAsciiItem(tailored_set)) return false;
1268   uset_close(tailored_set);
1269 
1270   // No ASCII contractions or expansions are allowed.
1271   USet* contractions = uset_openEmpty();
1272   USet* expansions = uset_openEmpty();
1273   ucol_getContractionsAndExpansions(icu_collator.toUCollator(), contractions,
1274                                     expansions, true, &status);
1275   if (USetContainsAllAsciiItem(contractions)) return false;
1276   if (USetContainsAllAsciiItem(expansions)) return false;
1277   DCHECK(U_SUCCESS(status));
1278   uset_close(contractions);
1279   uset_close(expansions);
1280 
1281   return true;
1282 }
1283 #endif  // DEBUG
1284 
1285 // Fast comparison is implemented for charcodes for which the L1 collation
1286 // weight (see kCollactionWeightsL1 above) is not 0.
1287 //
1288 // Note it's possible to partially process strings as long as their leading
1289 // characters all satisfy the above criteria. In that case, and if the L3
1290 // result is EQUAL, we set `processed_until_out` to the first non-processed
1291 // index - future processing can begin at that offset.
1292 //
1293 // This fast path looks somewhat complex; mostly because it combines multiple
1294 // passes into one. The pseudo-code for simplified multi-pass algorithm is:
1295 //
1296 // {
1297 //   // We can only fast-compare a certain subset of the ASCII range.
1298 //   // Additionally, unicode characters can change the meaning of preceding
1299 //   // characters, for example: "o\u0308" is treated like "ö".
1300 //   //
1301 //   // Note, in the actual single-pass algorithm below, we tolerate non-ASCII
1302 //   // contents outside the relevant range.
1303 //   for (int i = 0; i < string1.length; i++) {
1304 //     if (!CanFastCompare(string1[i])) return {};
1305 //   }
1306 //   for (int i = 0; i < string2.length; i++) {
1307 //     if (!CanFastCompare(string2[i])) return {};
1308 //   }
1309 //
1310 //   // Apply L1 weights.
1311 //   for (int i = 0; i < common_length; i++) {
1312 //     Char1T c1 = string1[i];
1313 //     Char2T c2 = string2[i];
1314 //     if (L1Weight[c1] != L1Weight[c2]) {
1315 //       return L1Weight[c1] - L1Weight[c2];
1316 //     }
1317 //   }
1318 //
1319 //   // Strings are L1-equal up to the common length; if lengths differ, the
1320 //   // longer string is treated as 'greater'.
1321 //   if (string1.length != string2.length) string1.length - string2.length;
1322 //
1323 //   // Apply L3 weights.
1324 //   for (int i = 0; i < common_length; i++) {
1325 //     Char1T c1 = string1[i];
1326 //     Char2T c2 = string2[i];
1327 //     if (L3Weight[c1] != L3Weight[c2]) {
1328 //       return L3Weight[c1] - L3Weight[c2];
1329 //     }
1330 //   }
1331 //
1332 //   return UCOL_EQUAL;
1333 // }
TryFastCompareStrings(Isolate * isolate,const icu::Collator & icu_collator,Handle<String> string1,Handle<String> string2,int * processed_until_out)1334 base::Optional<UCollationResult> TryFastCompareStrings(
1335     Isolate* isolate, const icu::Collator& icu_collator, Handle<String> string1,
1336     Handle<String> string2, int* processed_until_out) {
1337   // TODO(jgruber): We could avoid the flattening (done by the caller) as well
1338   // by implementing comparison through string iteration. This has visible
1339   // performance benefits (e.g. 7% on CDJS) but complicates the code. Consider
1340   // doing this in the future.
1341   DCHECK(string1->IsFlat());
1342   DCHECK(string2->IsFlat());
1343 
1344   *processed_until_out = 0;
1345 
1346 #ifdef DEBUG
1347   // Checked by the caller, see CompareStringsOptionsFor.
1348   SLOW_DCHECK(CollatorAllowsFastComparison(icu_collator));
1349   USE(CollatorAllowsFastComparison);
1350 #endif  // DEBUG
1351 
1352   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string1));
1353   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string2));
1354 
1355   const int length1 = string1->length();
1356   const int length2 = string2->length();
1357   int common_length = std::min(length1, length2);
1358 
1359   FastCompareStringsData d;
1360   DisallowGarbageCollection no_gc;
1361   const String::FlatContent& flat1 = string1->GetFlatContent(no_gc);
1362   const String::FlatContent& flat2 = string2->GetFlatContent(no_gc);
1363   if (!FastCompareStringFlatContent(flat1, flat2, common_length, &d)) {
1364     DCHECK_EQ(d.l1_result, UCollationResult::UCOL_EQUAL);
1365     return d.FastCompareFailed(processed_until_out);
1366   }
1367 
1368   // The result is only valid if the last processed character is not followed
1369   // by a unicode combining character (we are overly strict and restrict to
1370   // ASCII).
1371   if (!CharIsAsciiOrOutOfBounds(flat1, length1, d.processed_until + 1) ||
1372       !CharIsAsciiOrOutOfBounds(flat2, length2, d.processed_until + 1)) {
1373     return d.FastCompareFailed(processed_until_out);
1374   }
1375 
1376   if (d.l1_result != UCollationResult::UCOL_EQUAL) {
1377     return d.l1_result;
1378   }
1379 
1380   // Strings are L1-equal up to their common length, length differences win.
1381   UCollationResult length_result = ToUCollationResult(length1 - length2);
1382   if (length_result != UCollationResult::UCOL_EQUAL) {
1383     // Strings of different lengths may still compare as equal if the longer
1384     // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
1385     if (!CharCanFastCompareOrOutOfBounds(flat1, length1, common_length) ||
1386         !CharCanFastCompareOrOutOfBounds(flat2, length2, common_length)) {
1387       return d.FastCompareFailed(processed_until_out);
1388     }
1389     return length_result;
1390   }
1391 
1392   // L1-equal and same length, the L3 result wins.
1393   return d.l3_result;
1394 }
1395 
1396 }  // namespace
1397 
1398 // static
AsciiCollationWeightsL1()1399 const uint8_t* Intl::AsciiCollationWeightsL1() {
1400   return &kCollationWeightsL1[0];
1401 }
1402 
1403 // static
AsciiCollationWeightsL3()1404 const uint8_t* Intl::AsciiCollationWeightsL3() {
1405   return &kCollationWeightsL3[0];
1406 }
1407 
1408 // static
1409 const int Intl::kAsciiCollationWeightsLength = kCollationWeightsLength;
1410 
1411 // ecma402/#sec-collator-comparestrings
CompareStrings(Isolate * isolate,const icu::Collator & icu_collator,Handle<String> string1,Handle<String> string2,CompareStringsOptions compare_strings_options)1412 int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator,
1413                          Handle<String> string1, Handle<String> string2,
1414                          CompareStringsOptions compare_strings_options) {
1415   // Early return for identical strings.
1416   if (string1.is_identical_to(string2)) {
1417     return UCollationResult::UCOL_EQUAL;
1418   }
1419 
1420   // Early return for empty strings.
1421   if (string1->length() == 0 || string2->length() == 0) {
1422     return ToUCollationResult(string1->length() - string2->length());
1423   }
1424 
1425   string1 = String::Flatten(isolate, string1);
1426   string2 = String::Flatten(isolate, string2);
1427 
1428   int processed_until = 0;
1429   if (compare_strings_options == CompareStringsOptions::kTryFastPath) {
1430     base::Optional<int> maybe_result = TryFastCompareStrings(
1431         isolate, icu_collator, string1, string2, &processed_until);
1432     if (maybe_result.has_value()) return maybe_result.value();
1433   }
1434 
1435   UCollationResult result;
1436   UErrorCode status = U_ZERO_ERROR;
1437   icu::StringPiece string_piece1 =
1438       ToICUStringPiece(isolate, string1, processed_until);
1439   if (!string_piece1.empty()) {
1440     icu::StringPiece string_piece2 =
1441         ToICUStringPiece(isolate, string2, processed_until);
1442     if (!string_piece2.empty()) {
1443       result = icu_collator.compareUTF8(string_piece1, string_piece2, status);
1444       DCHECK(U_SUCCESS(status));
1445       return result;
1446     }
1447   }
1448 
1449   icu::UnicodeString string_val1 =
1450       Intl::ToICUUnicodeString(isolate, string1, processed_until);
1451   icu::UnicodeString string_val2 =
1452       Intl::ToICUUnicodeString(isolate, string2, processed_until);
1453   result = icu_collator.compare(string_val1, string_val2, status);
1454   DCHECK(U_SUCCESS(status));
1455   return result;
1456 }
1457 
1458 // ecma402/#sup-properties-of-the-number-prototype-object
NumberToLocaleString(Isolate * isolate,Handle<Object> num,Handle<Object> locales,Handle<Object> options,const char * method_name)1459 MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
1460                                                Handle<Object> num,
1461                                                Handle<Object> locales,
1462                                                Handle<Object> options,
1463                                                const char* method_name) {
1464   Handle<Object> numeric_obj;
1465   ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1466                              Object::ToNumeric(isolate, num), String);
1467 
1468   // We only cache the instance when locales is a string/undefined and
1469   // options is undefined, as that is the only case when the specified
1470   // side-effects of examining those arguments are unobservable.
1471   bool can_cache = (locales->IsString() || locales->IsUndefined(isolate)) &&
1472                    options->IsUndefined(isolate);
1473   if (can_cache) {
1474     icu::number::LocalizedNumberFormatter* cached_number_format =
1475         static_cast<icu::number::LocalizedNumberFormatter*>(
1476             isolate->get_cached_icu_object(
1477                 Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales));
1478     // We may use the cached icu::NumberFormat for a fast path.
1479     if (cached_number_format != nullptr) {
1480       return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
1481                                            numeric_obj);
1482     }
1483   }
1484 
1485   Handle<JSFunction> constructor = Handle<JSFunction>(
1486       JSFunction::cast(
1487           isolate->context().native_context().intl_number_format_function()),
1488       isolate);
1489   Handle<JSNumberFormat> number_format;
1490   // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1491   ASSIGN_RETURN_ON_EXCEPTION(
1492       isolate, number_format,
1493       New<JSNumberFormat>(isolate, constructor, locales, options, method_name),
1494       String);
1495 
1496   if (can_cache) {
1497     isolate->set_icu_object_in_cache(
1498         Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales,
1499         std::static_pointer_cast<icu::UMemory>(
1500             number_format->icu_number_formatter().get()));
1501   }
1502 
1503   // Return FormatNumber(numberFormat, x).
1504   icu::number::LocalizedNumberFormatter* icu_number_format =
1505       number_format->icu_number_formatter().raw();
1506   return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
1507                                        numeric_obj);
1508 }
1509 
SetNumberFormatDigitOptions(Isolate * isolate,Handle<JSReceiver> options,int mnfd_default,int mxfd_default,bool notation_is_compact)1510 Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
1511     Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
1512     int mxfd_default, bool notation_is_compact) {
1513   Factory* factory = isolate->factory();
1514   Intl::NumberFormatDigitOptions digit_options;
1515 
1516   // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1517   // 1).
1518   int mnid = 1;
1519   if (!GetNumberOption(isolate, options, factory->minimumIntegerDigits_string(),
1520                        1, 21, 1)
1521            .To(&mnid)) {
1522     return Nothing<NumberFormatDigitOptions>();
1523   }
1524 
1525   // 6. Let mnfd be ? Get(options, "minimumFractionDigits").
1526   Handle<Object> mnfd_obj;
1527   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1528       isolate, mnfd_obj,
1529       JSReceiver::GetProperty(isolate, options,
1530                               factory->minimumFractionDigits_string()),
1531       Nothing<NumberFormatDigitOptions>());
1532 
1533   // 7. Let mxfd be ? Get(options, "maximumFractionDigits").
1534   Handle<Object> mxfd_obj;
1535   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1536       isolate, mxfd_obj,
1537       JSReceiver::GetProperty(isolate, options,
1538                               factory->maximumFractionDigits_string()),
1539       Nothing<NumberFormatDigitOptions>());
1540 
1541   // 8.  Let mnsd be ? Get(options, "minimumSignificantDigits").
1542   Handle<Object> mnsd_obj;
1543   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1544       isolate, mnsd_obj,
1545       JSReceiver::GetProperty(isolate, options,
1546                               factory->minimumSignificantDigits_string()),
1547       Nothing<NumberFormatDigitOptions>());
1548 
1549   // 9. Let mxsd be ? Get(options, "maximumSignificantDigits").
1550   Handle<Object> mxsd_obj;
1551   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1552       isolate, mxsd_obj,
1553       JSReceiver::GetProperty(isolate, options,
1554                               factory->maximumSignificantDigits_string()),
1555       Nothing<NumberFormatDigitOptions>());
1556 
1557   digit_options.rounding_priority = RoundingPriority::kAuto;
1558   digit_options.minimum_significant_digits = 0;
1559   digit_options.maximum_significant_digits = 0;
1560 
1561   // 10. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1562   digit_options.minimum_integer_digits = mnid;
1563 
1564   if (FLAG_harmony_intl_number_format_v3) {
1565     // 11. Let roundingPriority be ? GetOption(options, "roundingPriority",
1566     // "string", « "auto", "morePrecision", "lessPrecision" », "auto").
1567 
1568     Maybe<RoundingPriority> maybe_rounding_priority =
1569         GetStringOption<RoundingPriority>(
1570             isolate, options, "roundingPriority", "SetNumberFormatDigitOptions",
1571             {"auto", "morePrecision", "lessPrecision"},
1572             {RoundingPriority::kAuto, RoundingPriority::kMorePrecision,
1573              RoundingPriority::kLessPrecision},
1574             RoundingPriority::kAuto);
1575     MAYBE_RETURN(maybe_rounding_priority, Nothing<NumberFormatDigitOptions>());
1576     digit_options.rounding_priority = maybe_rounding_priority.FromJust();
1577   }
1578 
1579   // 12. If mnsd is not undefined or mxsd is not undefined, then
1580   // a. Set hasSd to true.
1581   // 13. Else,
1582   // a. Set hasSd to false.
1583   bool has_sd =
1584       (!mnsd_obj->IsUndefined(isolate)) || (!mxsd_obj->IsUndefined(isolate));
1585 
1586   // 14. If mnfd is not undefined or mxfd is not undefined, then
1587   // a. Set hasFd to true.
1588   // 15. Else,
1589   // a. Set hasFd to false.
1590   bool has_fd =
1591       (!mnfd_obj->IsUndefined(isolate)) || (!mxfd_obj->IsUndefined(isolate));
1592 
1593   // 17. If hasSd or roundingPriority is not "auto", set needSd to true; else,
1594   // set needSd to false.
1595   bool need_sd =
1596       has_sd || (RoundingPriority::kAuto != digit_options.rounding_priority);
1597 
1598   // 18. If ( not hasSd and (hasFd or notation is not "compact") ) or
1599   // roundingPriority is not "auto", then a. Set needFd to true.
1600   // 19. Else,
1601   // a. Set needFd to false.
1602   bool need_fd = ((!has_sd) && (has_fd || !notation_is_compact)) ||
1603                  (RoundingPriority::kAuto != digit_options.rounding_priority);
1604 
1605   // 20. If needSd, then
1606   if (need_sd) {
1607     // 20.b If hasSd, then
1608     if (has_sd) {
1609       // 20.b.i Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1610       int mnsd;
1611       if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1,
1612                                factory->minimumSignificantDigits_string())
1613                .To(&mnsd)) {
1614         return Nothing<NumberFormatDigitOptions>();
1615       }
1616       // 20.b.ii Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1617       int mxsd;
1618       if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21,
1619                                factory->maximumSignificantDigits_string())
1620                .To(&mxsd)) {
1621         return Nothing<NumberFormatDigitOptions>();
1622       }
1623       // 20.b.iii Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1624       digit_options.minimum_significant_digits = mnsd;
1625       // 20.b.iv Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1626       digit_options.maximum_significant_digits = mxsd;
1627     } else {
1628       // 20.c Else
1629       // 20.c.i Set intlObj.[[MinimumSignificantDigits]] to 1.
1630       digit_options.minimum_significant_digits = 1;
1631       // 20.c.ii Set intlObj.[[MaximumSignificantDigits]] to 21.
1632       digit_options.maximum_significant_digits = 21;
1633     }
1634   }
1635 
1636   // 21. If needFd, then
1637   if (need_fd) {
1638     // 21.a If hasFd, then
1639     if (has_fd) {
1640       Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1641       Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1642       // 21.a.i Let mnfd be ? DefaultNumberOption(mnfd, 0, 20, undefined).
1643       int mnfd;
1644       if (!DefaultNumberOption(isolate, mnfd_obj, 0, 20, -1, mnfd_str)
1645                .To(&mnfd)) {
1646         return Nothing<NumberFormatDigitOptions>();
1647       }
1648       // 21.a.ii Let mxfd be ? DefaultNumberOption(mxfd, 0, 20, undefined).
1649       int mxfd;
1650       if (!DefaultNumberOption(isolate, mxfd_obj, 0, 20, -1, mxfd_str)
1651                .To(&mxfd)) {
1652         return Nothing<NumberFormatDigitOptions>();
1653       }
1654       // 21.a.iii If mnfd is undefined, set mnfd to min(mnfdDefault, mxfd).
1655       if (mnfd_obj->IsUndefined(isolate)) {
1656         mnfd = std::min(mnfd_default, mxfd);
1657       } else if (mxfd_obj->IsUndefined(isolate)) {
1658         // 21.a.iv Else if mxfd is undefined, set mxfd to max(mxfdDefault,
1659         // mnfd).
1660         mxfd = std::max(mxfd_default, mnfd);
1661       } else if (mnfd > mxfd) {
1662         // 21.a.v Else if mnfd is greater than mxfd, throw a RangeError
1663         // exception.
1664         THROW_NEW_ERROR_RETURN_VALUE(
1665             isolate,
1666             NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str),
1667             Nothing<NumberFormatDigitOptions>());
1668       }
1669       // 21.a.vi Set intlObj.[[MinimumFractionDigits]] to mnfd.
1670       digit_options.minimum_fraction_digits = mnfd;
1671       // 21.a.vii Set intlObj.[[MaximumFractionDigits]] to mxfd.
1672       digit_options.maximum_fraction_digits = mxfd;
1673     } else {  // 17.b Else
1674       // 21.b.i Set intlObj.[[MinimumFractionDigits]] to mnfdDefault.
1675       digit_options.minimum_fraction_digits = mnfd_default;
1676       // 21.b.ii Set intlObj.[[MaximumFractionDigits]] to mxfdDefault.
1677       digit_options.maximum_fraction_digits = mxfd_default;
1678     }
1679   }
1680 
1681   // 22. If needSd or needFd, then
1682   if (need_sd || need_fd) {
1683     // a. If roundingPriority is "morePrecision", then
1684     if (digit_options.rounding_priority == RoundingPriority::kMorePrecision) {
1685       // i. Set intlObj.[[RoundingType]] to morePrecision.
1686       digit_options.rounding_type = RoundingType::kMorePrecision;
1687       // b. Else if roundingPriority is "lessPrecision", then
1688     } else if (digit_options.rounding_priority ==
1689                RoundingPriority::kLessPrecision) {
1690       // i. Set intlObj.[[RoundingType]] to lessPrecision.
1691       digit_options.rounding_type = RoundingType::kLessPrecision;
1692       // c. Else if hasSd, then
1693     } else if (has_sd) {
1694       // i. Set intlObj.[[RoundingType]] to significantDigits.
1695       digit_options.rounding_type = RoundingType::kSignificantDigits;
1696       // d. Else,
1697     } else {
1698       // i.Set intlObj.[[RoundingType]] to fractionDigits.
1699       digit_options.rounding_type = RoundingType::kFractionDigits;
1700     }
1701     // 23. Else
1702   } else {
1703     // a. Set intlObj.[[RoundingType]] to morePrecision.
1704     digit_options.rounding_type = RoundingType::kMorePrecision;
1705     // b. Set intlObj.[[MinimumFractionDigits]] to 0.
1706     digit_options.minimum_fraction_digits = 0;
1707     // c. Set intlObj.[[MaximumFractionDigits]] to 0.
1708     digit_options.maximum_fraction_digits = 0;
1709     // d. Set intlObj.[[MinimumSignificantDigits]] to 1.
1710     digit_options.minimum_significant_digits = 1;
1711     // e. Set intlObj.[[MaximumSignificantDigits]] to 2.
1712     digit_options.maximum_significant_digits = 2;
1713   }
1714   return Just(digit_options);
1715 }
1716 
1717 namespace {
1718 
1719 // ecma402/#sec-bestavailablelocale
BestAvailableLocale(const std::set<std::string> & available_locales,const std::string & locale)1720 std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1721                                 const std::string& locale) {
1722   // 1. Let candidate be locale.
1723   std::string candidate = locale;
1724 
1725   // 2. Repeat,
1726   while (true) {
1727     // 2.a. If availableLocales contains an element equal to candidate, return
1728     //      candidate.
1729     if (available_locales.find(candidate) != available_locales.end()) {
1730       return candidate;
1731     }
1732 
1733     // 2.b. Let pos be the character index of the last occurrence of "-"
1734     //      (U+002D) within candidate. If that character does not occur, return
1735     //      undefined.
1736     size_t pos = candidate.rfind('-');
1737     if (pos == std::string::npos) {
1738       return std::string();
1739     }
1740 
1741     // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1742     //      decrease pos by 2.
1743     if (pos >= 2 && candidate[pos - 2] == '-') {
1744       pos -= 2;
1745     }
1746 
1747     // 2.d. Let candidate be the substring of candidate from position 0,
1748     //      inclusive, to position pos, exclusive.
1749     candidate = candidate.substr(0, pos);
1750   }
1751 }
1752 
1753 struct ParsedLocale {
1754   std::string no_extensions_locale;
1755   std::string extension;
1756 };
1757 
1758 // Returns a struct containing a bcp47 tag without unicode extensions
1759 // and the removed unicode extensions.
1760 //
1761 // For example, given 'en-US-u-co-emoji' returns 'en-US' and
1762 // 'u-co-emoji'.
ParseBCP47Locale(const std::string & locale)1763 ParsedLocale ParseBCP47Locale(const std::string& locale) {
1764   size_t length = locale.length();
1765   ParsedLocale parsed_locale;
1766 
1767   // Privateuse or grandfathered locales have no extension sequences.
1768   if ((length > 1) && (locale[1] == '-')) {
1769     // Check to make sure that this really is a grandfathered or
1770     // privateuse extension. ICU can sometimes mess up the
1771     // canonicalization.
1772     DCHECK(locale[0] == 'x' || locale[0] == 'i');
1773     parsed_locale.no_extensions_locale = locale;
1774     return parsed_locale;
1775   }
1776 
1777   size_t unicode_extension_start = locale.find("-u-");
1778 
1779   // No unicode extensions found.
1780   if (unicode_extension_start == std::string::npos) {
1781     parsed_locale.no_extensions_locale = locale;
1782     return parsed_locale;
1783   }
1784 
1785   size_t private_extension_start = locale.find("-x-");
1786 
1787   // Unicode extensions found within privateuse subtags don't count.
1788   if (private_extension_start != std::string::npos &&
1789       private_extension_start < unicode_extension_start) {
1790     parsed_locale.no_extensions_locale = locale;
1791     return parsed_locale;
1792   }
1793 
1794   const std::string beginning = locale.substr(0, unicode_extension_start);
1795   size_t unicode_extension_end = length;
1796   DCHECK_GT(length, 2);
1797 
1798   // Find the end of the extension production as per the bcp47 grammar
1799   // by looking for '-' followed by 2 chars and then another '-'.
1800   for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1801     if (locale[i] != '-') continue;
1802 
1803     if (locale[i + 2] == '-') {
1804       unicode_extension_end = i;
1805       break;
1806     }
1807 
1808     i += 2;
1809   }
1810 
1811   const std::string end = locale.substr(unicode_extension_end);
1812   parsed_locale.no_extensions_locale = beginning + end;
1813   parsed_locale.extension = locale.substr(
1814       unicode_extension_start, unicode_extension_end - unicode_extension_start);
1815   return parsed_locale;
1816 }
1817 
1818 // ecma402/#sec-lookupsupportedlocales
LookupSupportedLocales(const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1819 std::vector<std::string> LookupSupportedLocales(
1820     const std::set<std::string>& available_locales,
1821     const std::vector<std::string>& requested_locales) {
1822   // 1. Let subset be a new empty List.
1823   std::vector<std::string> subset;
1824 
1825   // 2. For each element locale of requestedLocales in List order, do
1826   for (const std::string& locale : requested_locales) {
1827     // 2. a. Let noExtensionsLocale be the String value that is locale
1828     //       with all Unicode locale extension sequences removed.
1829     std::string no_extension_locale =
1830         ParseBCP47Locale(locale).no_extensions_locale;
1831 
1832     // 2. b. Let availableLocale be
1833     //       BestAvailableLocale(availableLocales, noExtensionsLocale).
1834     std::string available_locale =
1835         BestAvailableLocale(available_locales, no_extension_locale);
1836 
1837     // 2. c. If availableLocale is not undefined, append locale to the
1838     //       end of subset.
1839     if (!available_locale.empty()) {
1840       subset.push_back(locale);
1841     }
1842   }
1843 
1844   // 3. Return subset.
1845   return subset;
1846 }
1847 
BuildLocaleMatcher(Isolate * isolate,const std::set<std::string> & available_locales,UErrorCode * status)1848 icu::LocaleMatcher BuildLocaleMatcher(
1849     Isolate* isolate, const std::set<std::string>& available_locales,
1850     UErrorCode* status) {
1851   icu::Locale default_locale =
1852       icu::Locale::forLanguageTag(isolate->DefaultLocale(), *status);
1853   icu::LocaleMatcher::Builder builder;
1854   if (U_FAILURE(*status)) {
1855     return builder.build(*status);
1856   }
1857   builder.setDefaultLocale(&default_locale);
1858   for (auto it = available_locales.begin(); it != available_locales.end();
1859        ++it) {
1860     *status = U_ZERO_ERROR;
1861     icu::Locale l = icu::Locale::forLanguageTag(it->c_str(), *status);
1862     // skip invalid locale such as no-NO-NY
1863     if (U_SUCCESS(*status)) {
1864       builder.addSupportedLocale(l);
1865     }
1866   }
1867   return builder.build(*status);
1868 }
1869 
1870 class Iterator : public icu::Locale::Iterator {
1871  public:
Iterator(std::vector<std::string>::const_iterator begin,std::vector<std::string>::const_iterator end)1872   Iterator(std::vector<std::string>::const_iterator begin,
1873            std::vector<std::string>::const_iterator end)
1874       : iter_(begin), end_(end) {}
1875   ~Iterator() override = default;
1876 
hasNext() const1877   UBool hasNext() const override { return iter_ != end_; }
1878 
next()1879   const icu::Locale& next() override {
1880     UErrorCode status = U_ZERO_ERROR;
1881     locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
1882     DCHECK(U_SUCCESS(status));
1883     ++iter_;
1884     return locale_;
1885   }
1886 
1887  private:
1888   std::vector<std::string>::const_iterator iter_;
1889   std::vector<std::string>::const_iterator end_;
1890   icu::Locale locale_;
1891 };
1892 
1893 // ecma402/#sec-bestfitmatcher
1894 // The BestFitMatcher abstract operation compares requestedLocales, which must
1895 // be a List as returned by CanonicalizeLocaleList, against the locales in
1896 // availableLocales and determines the best available language to meet the
1897 // request. The algorithm is implementation dependent, but should produce
1898 // results that a typical user of the requested locales would perceive
1899 // as at least as good as those produced by the LookupMatcher abstract
1900 // operation. Options specified through Unicode locale extension sequences must
1901 // be ignored by the algorithm. Information about such subsequences is returned
1902 // separately. The abstract operation returns a record with a [[locale]] field,
1903 // whose value is the language tag of the selected locale, which must be an
1904 // element of availableLocales. If the language tag of the request locale that
1905 // led to the selected locale contained a Unicode locale extension sequence,
1906 // then the returned record also contains an [[extension]] field whose value is
1907 // the first Unicode locale extension sequence within the request locale
1908 // language tag.
BestFitMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1909 std::string BestFitMatcher(Isolate* isolate,
1910                            const std::set<std::string>& available_locales,
1911                            const std::vector<std::string>& requested_locales) {
1912   UErrorCode status = U_ZERO_ERROR;
1913   Iterator iter(requested_locales.cbegin(), requested_locales.cend());
1914   std::string bestfit = BuildLocaleMatcher(isolate, available_locales, &status)
1915                             .getBestMatchResult(iter, status)
1916                             .makeResolvedLocale(status)
1917                             .toLanguageTag<std::string>(status);
1918   DCHECK(U_SUCCESS(status));
1919   return bestfit;
1920 }
1921 
1922 // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1923 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
BestFitSupportedLocales(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1924 std::vector<std::string> BestFitSupportedLocales(
1925     Isolate* isolate, const std::set<std::string>& available_locales,
1926     const std::vector<std::string>& requested_locales) {
1927   UErrorCode status = U_ZERO_ERROR;
1928   icu::LocaleMatcher matcher =
1929       BuildLocaleMatcher(isolate, available_locales, &status);
1930   std::vector<std::string> result;
1931   if (U_SUCCESS(status)) {
1932     for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
1933          it++) {
1934       status = U_ZERO_ERROR;
1935       icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
1936       icu::LocaleMatcher::Result matched =
1937           matcher.getBestMatchResult(desired, status);
1938       if (U_FAILURE(status)) continue;
1939       if (matched.getSupportedIndex() < 0) continue;
1940 
1941       // The BestFitSupportedLocales abstract operation returns the *SUBSET* of
1942       // the provided BCP 47 language priority list requestedLocales for which
1943       // availableLocales has a matching locale when using the Best Fit Matcher
1944       // algorithm. Locales appear in the same order in the returned list as in
1945       // requestedLocales. The steps taken are implementation dependent.
1946       std::string bestfit = desired.toLanguageTag<std::string>(status);
1947       if (U_FAILURE(status)) continue;
1948       result.push_back(bestfit);
1949     }
1950   }
1951   return result;
1952 }
1953 
1954 // ecma262 #sec-createarrayfromlist
CreateArrayFromList(Isolate * isolate,std::vector<std::string> elements,PropertyAttributes attr)1955 MaybeHandle<JSArray> CreateArrayFromList(Isolate* isolate,
1956                                          std::vector<std::string> elements,
1957                                          PropertyAttributes attr) {
1958   Factory* factory = isolate->factory();
1959   // Let array be ! ArrayCreate(0).
1960   Handle<JSArray> array = factory->NewJSArray(0);
1961 
1962   uint32_t length = static_cast<uint32_t>(elements.size());
1963   // 3. Let n be 0.
1964   // 4. For each element e of elements, do
1965   for (uint32_t i = 0; i < length; i++) {
1966     // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1967     const std::string& part = elements[i];
1968     Handle<String> value =
1969         factory->NewStringFromUtf8(base::CStrVector(part.c_str()))
1970             .ToHandleChecked();
1971     MAYBE_RETURN(JSObject::AddDataElement(array, i, value, attr),
1972                  MaybeHandle<JSArray>());
1973   }
1974   // 5. Return array.
1975   return MaybeHandle<JSArray>(array);
1976 }
1977 
1978 // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1979 // https://tc39.github.io/ecma402/#sec-supportedlocales
SupportedLocales(Isolate * isolate,const char * method_name,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,Handle<Object> options)1980 MaybeHandle<JSObject> SupportedLocales(
1981     Isolate* isolate, const char* method_name,
1982     const std::set<std::string>& available_locales,
1983     const std::vector<std::string>& requested_locales, Handle<Object> options) {
1984   std::vector<std::string> supported_locales;
1985 
1986   // 1. Set options to ? CoerceOptionsToObject(options).
1987   Handle<JSReceiver> options_obj;
1988   ASSIGN_RETURN_ON_EXCEPTION(
1989       isolate, options_obj,
1990       CoerceOptionsToObject(isolate, options, method_name), JSObject);
1991 
1992   // 2. Let matcher be ? GetOption(options, "localeMatcher", "string",
1993   //       « "lookup", "best fit" », "best fit").
1994   Maybe<Intl::MatcherOption> maybe_locale_matcher =
1995       Intl::GetLocaleMatcher(isolate, options_obj, method_name);
1996   MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1997   Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
1998 
1999   // 3. If matcher is "best fit", then
2000   //    a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
2001   //       requestedLocales).
2002   if (matcher == Intl::MatcherOption::kBestFit &&
2003       FLAG_harmony_intl_best_fit_matcher) {
2004     supported_locales =
2005         BestFitSupportedLocales(isolate, available_locales, requested_locales);
2006   } else {
2007     // 4. Else,
2008     //    a. Let supportedLocales be LookupSupportedLocales(availableLocales,
2009     //       requestedLocales).
2010     supported_locales =
2011         LookupSupportedLocales(available_locales, requested_locales);
2012   }
2013 
2014   // 5. Return CreateArrayFromList(supportedLocales).
2015   return CreateArrayFromList(isolate, supported_locales,
2016                              PropertyAttributes::NONE);
2017 }
2018 
2019 }  // namespace
2020 
2021 // ecma-402 #sec-intl.getcanonicallocales
GetCanonicalLocales(Isolate * isolate,Handle<Object> locales)2022 MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
2023                                                Handle<Object> locales) {
2024   // 1. Let ll be ? CanonicalizeLocaleList(locales).
2025   Maybe<std::vector<std::string>> maybe_ll =
2026       CanonicalizeLocaleList(isolate, locales, false);
2027   MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
2028 
2029   // 2. Return CreateArrayFromList(ll).
2030   return CreateArrayFromList(isolate, maybe_ll.FromJust(),
2031                              PropertyAttributes::NONE);
2032 }
2033 
2034 namespace {
2035 
AvailableCollations(Isolate * isolate)2036 MaybeHandle<JSArray> AvailableCollations(Isolate* isolate) {
2037   UErrorCode status = U_ZERO_ERROR;
2038   std::unique_ptr<icu::StringEnumeration> enumeration(
2039       icu::Collator::getKeywordValues("collation", status));
2040   if (U_FAILURE(status)) {
2041     THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2042                     JSArray);
2043   }
2044   return Intl::ToJSArray(isolate, "co", enumeration.get(),
2045                          Intl::RemoveCollation, true);
2046 }
2047 
VectorToJSArray(Isolate * isolate,const std::vector<std::string> & array)2048 MaybeHandle<JSArray> VectorToJSArray(Isolate* isolate,
2049                                      const std::vector<std::string>& array) {
2050   Factory* factory = isolate->factory();
2051   Handle<FixedArray> fixed_array =
2052       factory->NewFixedArray(static_cast<int32_t>(array.size()));
2053   int32_t index = 0;
2054   for (std::string item : array) {
2055     Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str());
2056     fixed_array->set(index++, *str);
2057   }
2058   return factory->NewJSArrayWithElements(fixed_array);
2059 }
2060 
2061 namespace {
2062 
2063 class ResourceAvailableCurrencies {
2064  public:
ResourceAvailableCurrencies()2065   ResourceAvailableCurrencies() {
2066     UErrorCode status = U_ZERO_ERROR;
2067     UEnumeration* uenum =
2068         ucurr_openISOCurrencies(UCURR_COMMON | UCURR_NON_DEPRECATED, &status);
2069     DCHECK(U_SUCCESS(status));
2070     const char* next = nullptr;
2071     while (U_SUCCESS(status) &&
2072            (next = uenum_next(uenum, nullptr, &status)) != nullptr) {
2073       // Work around the issue that we do not support VEF currency code
2074       // in DisplayNames by not reporting it.
2075       if (strcmp(next, "VEF") == 0) continue;
2076       AddIfAvailable(next);
2077     }
2078     // Work around the issue that we do support the following currency codes
2079     // in DisplayNames but the ICU API is not reporting it.
2080     AddIfAvailable("SVC");
2081     AddIfAvailable("XDR");
2082     AddIfAvailable("XSU");
2083     AddIfAvailable("ZWL");
2084     std::sort(list_.begin(), list_.end());
2085     uenum_close(uenum);
2086   }
2087 
Get() const2088   const std::vector<std::string>& Get() const { return list_; }
2089 
AddIfAvailable(const char * currency)2090   void AddIfAvailable(const char* currency) {
2091     icu::UnicodeString code(currency, -1, US_INV);
2092     UErrorCode status = U_ZERO_ERROR;
2093     int32_t len = 0;
2094     const UChar* result =
2095         ucurr_getName(code.getTerminatedBuffer(), "en", UCURR_LONG_NAME,
2096                       nullptr, &len, &status);
2097     if (U_SUCCESS(status) &&
2098         u_strcmp(result, code.getTerminatedBuffer()) != 0) {
2099       list_.push_back(currency);
2100     }
2101   }
2102 
2103  private:
2104   std::vector<std::string> list_;
2105 };
2106 
GetAvailableCurrencies()2107 const std::vector<std::string>& GetAvailableCurrencies() {
2108   static base::LazyInstance<ResourceAvailableCurrencies>::type
2109       available_currencies = LAZY_INSTANCE_INITIALIZER;
2110   return available_currencies.Pointer()->Get();
2111 }
2112 }  // namespace
2113 
AvailableCurrencies(Isolate * isolate)2114 MaybeHandle<JSArray> AvailableCurrencies(Isolate* isolate) {
2115   return VectorToJSArray(isolate, GetAvailableCurrencies());
2116 }
2117 
AvailableNumberingSystems(Isolate * isolate)2118 MaybeHandle<JSArray> AvailableNumberingSystems(Isolate* isolate) {
2119   UErrorCode status = U_ZERO_ERROR;
2120   std::unique_ptr<icu::StringEnumeration> enumeration(
2121       icu::NumberingSystem::getAvailableNames(status));
2122   if (U_FAILURE(status)) {
2123     THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2124                     JSArray);
2125   }
2126   // Need to filter out isAlgorithmic
2127   return Intl::ToJSArray(
2128       isolate, "nu", enumeration.get(),
2129       [](const char* value) {
2130         UErrorCode status = U_ZERO_ERROR;
2131         std::unique_ptr<icu::NumberingSystem> numbering_system(
2132             icu::NumberingSystem::createInstanceByName(value, status));
2133         // Skip algorithmic one since chrome filter out the resource.
2134         return U_FAILURE(status) || numbering_system->isAlgorithmic();
2135       },
2136       true);
2137 }
2138 
AvailableTimeZones(Isolate * isolate)2139 MaybeHandle<JSArray> AvailableTimeZones(Isolate* isolate) {
2140   UErrorCode status = U_ZERO_ERROR;
2141   std::unique_ptr<icu::StringEnumeration> enumeration(
2142       icu::TimeZone::createTimeZoneIDEnumeration(
2143           UCAL_ZONE_TYPE_CANONICAL_LOCATION, nullptr, nullptr, status));
2144   if (U_FAILURE(status)) {
2145     THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2146                     JSArray);
2147   }
2148   return Intl::ToJSArray(isolate, nullptr, enumeration.get(), nullptr, true);
2149 }
2150 
AvailableUnits(Isolate * isolate)2151 MaybeHandle<JSArray> AvailableUnits(Isolate* isolate) {
2152   Factory* factory = isolate->factory();
2153   std::set<std::string> sanctioned(Intl::SanctionedSimpleUnits());
2154   Handle<FixedArray> fixed_array =
2155       factory->NewFixedArray(static_cast<int32_t>(sanctioned.size()));
2156   int32_t index = 0;
2157   for (std::string item : sanctioned) {
2158     Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str());
2159     fixed_array->set(index++, *str);
2160   }
2161   return factory->NewJSArrayWithElements(fixed_array);
2162 }
2163 
2164 }  // namespace
2165 
2166 // ecma-402 #sec-intl.supportedvaluesof
SupportedValuesOf(Isolate * isolate,Handle<Object> key_obj)2167 MaybeHandle<JSArray> Intl::SupportedValuesOf(Isolate* isolate,
2168                                              Handle<Object> key_obj) {
2169   Factory* factory = isolate->factory();
2170   // 1. 1. Let key be ? ToString(key).
2171   Handle<String> key_str;
2172   ASSIGN_RETURN_ON_EXCEPTION(isolate, key_str,
2173                              Object::ToString(isolate, key_obj), JSArray);
2174   // 2. If key is "calendar", then
2175   if (factory->calendar_string()->Equals(*key_str)) {
2176     // a. Let list be ! AvailableCalendars( ).
2177     return Intl::AvailableCalendars(isolate);
2178   }
2179   // 3. Else if key is "collation", then
2180   if (factory->collation_string()->Equals(*key_str)) {
2181     // a. Let list be ! AvailableCollations( ).
2182     return AvailableCollations(isolate);
2183   }
2184   // 4. Else if key is "currency", then
2185   if (factory->currency_string()->Equals(*key_str)) {
2186     // a. Let list be ! AvailableCurrencies( ).
2187     return AvailableCurrencies(isolate);
2188   }
2189   // 5. Else if key is "numberingSystem", then
2190   if (factory->numberingSystem_string()->Equals(*key_str)) {
2191     // a. Let list be ! AvailableNumberingSystems( ).
2192     return AvailableNumberingSystems(isolate);
2193   }
2194   // 6. Else if key is "timeZone", then
2195   if (factory->timeZone_string()->Equals(*key_str)) {
2196     // a. Let list be ! AvailableTimeZones( ).
2197     return AvailableTimeZones(isolate);
2198   }
2199   // 7. Else if key is "unit", then
2200   if (factory->unit_string()->Equals(*key_str)) {
2201     // a. Let list be ! AvailableUnits( ).
2202     return AvailableUnits(isolate);
2203   }
2204   // 8. Else,
2205   // a. Throw a RangeError exception.
2206   // 9. Return ! CreateArrayFromList( list ).
2207 
2208   THROW_NEW_ERROR(
2209       isolate,
2210       NewRangeError(MessageTemplate::kInvalid,
2211                     factory->NewStringFromStaticChars("key"), key_str),
2212       JSArray);
2213 }
2214 
2215 // ECMA 402 Intl.*.supportedLocalesOf
SupportedLocalesOf(Isolate * isolate,const char * method_name,const std::set<std::string> & available_locales,Handle<Object> locales,Handle<Object> options)2216 MaybeHandle<JSObject> Intl::SupportedLocalesOf(
2217     Isolate* isolate, const char* method_name,
2218     const std::set<std::string>& available_locales, Handle<Object> locales,
2219     Handle<Object> options) {
2220   // Let availableLocales be %Collator%.[[AvailableLocales]].
2221 
2222   // Let requestedLocales be ? CanonicalizeLocaleList(locales).
2223   Maybe<std::vector<std::string>> requested_locales =
2224       CanonicalizeLocaleList(isolate, locales, false);
2225   MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
2226 
2227   // Return ? SupportedLocales(availableLocales, requestedLocales, options).
2228   return SupportedLocales(isolate, method_name, available_locales,
2229                           requested_locales.FromJust(), options);
2230 }
2231 
2232 namespace {
2233 
2234 template <typename T>
IsValidExtension(const icu::Locale & locale,const char * key,const std::string & value)2235 bool IsValidExtension(const icu::Locale& locale, const char* key,
2236                       const std::string& value) {
2237   const char* legacy_type = uloc_toLegacyType(key, value.c_str());
2238   if (legacy_type == nullptr) {
2239     return false;
2240   }
2241   UErrorCode status = U_ZERO_ERROR;
2242   std::unique_ptr<icu::StringEnumeration> enumeration(
2243       T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
2244                                    false, status));
2245   if (U_FAILURE(status)) {
2246     return false;
2247   }
2248   int32_t length;
2249   for (const char* item = enumeration->next(&length, status);
2250        U_SUCCESS(status) && item != nullptr;
2251        item = enumeration->next(&length, status)) {
2252     if (strcmp(legacy_type, item) == 0) {
2253       return true;
2254     }
2255   }
2256   return false;
2257 }
2258 
2259 }  // namespace
2260 
IsValidCollation(const icu::Locale & locale,const std::string & value)2261 bool Intl::IsValidCollation(const icu::Locale& locale,
2262                             const std::string& value) {
2263   std::set<std::string> invalid_values = {"standard", "search"};
2264   if (invalid_values.find(value) != invalid_values.end()) return false;
2265   return IsValidExtension<icu::Collator>(locale, "collation", value);
2266 }
2267 
IsWellFormedCalendar(const std::string & value)2268 bool Intl::IsWellFormedCalendar(const std::string& value) {
2269   return JSLocale::Is38AlphaNumList(value);
2270 }
2271 
2272 // ecma402/#sec-iswellformedcurrencycode
IsWellFormedCurrency(const std::string & currency)2273 bool Intl::IsWellFormedCurrency(const std::string& currency) {
2274   return JSLocale::Is3Alpha(currency);
2275 }
2276 
IsValidCalendar(const icu::Locale & locale,const std::string & value)2277 bool Intl::IsValidCalendar(const icu::Locale& locale,
2278                            const std::string& value) {
2279   return IsValidExtension<icu::Calendar>(locale, "calendar", value);
2280 }
2281 
IsValidNumberingSystem(const std::string & value)2282 bool Intl::IsValidNumberingSystem(const std::string& value) {
2283   std::set<std::string> invalid_values = {"native", "traditio", "finance"};
2284   if (invalid_values.find(value) != invalid_values.end()) return false;
2285   UErrorCode status = U_ZERO_ERROR;
2286   std::unique_ptr<icu::NumberingSystem> numbering_system(
2287       icu::NumberingSystem::createInstanceByName(value.c_str(), status));
2288   return U_SUCCESS(status) && numbering_system.get() != nullptr &&
2289          !numbering_system->isAlgorithmic();
2290 }
2291 
2292 namespace {
2293 
IsWellFormedNumberingSystem(const std::string & value)2294 bool IsWellFormedNumberingSystem(const std::string& value) {
2295   return JSLocale::Is38AlphaNumList(value);
2296 }
2297 
LookupAndValidateUnicodeExtensions(icu::Locale * icu_locale,const std::set<std::string> & relevant_keys)2298 std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
2299     icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
2300   std::map<std::string, std::string> extensions;
2301 
2302   UErrorCode status = U_ZERO_ERROR;
2303   icu::LocaleBuilder builder;
2304   builder.setLocale(*icu_locale).clearExtensions();
2305   std::unique_ptr<icu::StringEnumeration> keywords(
2306       icu_locale->createKeywords(status));
2307   if (U_FAILURE(status)) return extensions;
2308 
2309   if (!keywords) return extensions;
2310   char value[ULOC_FULLNAME_CAPACITY];
2311 
2312   int32_t length;
2313   status = U_ZERO_ERROR;
2314   for (const char* keyword = keywords->next(&length, status);
2315        keyword != nullptr; keyword = keywords->next(&length, status)) {
2316     // Ignore failures in ICU and skip to the next keyword.
2317     //
2318     // This is fine.™
2319     if (U_FAILURE(status)) {
2320       status = U_ZERO_ERROR;
2321       continue;
2322     }
2323 
2324     icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
2325 
2326     // Ignore failures in ICU and skip to the next keyword.
2327     //
2328     // This is fine.™
2329     if (U_FAILURE(status)) {
2330       status = U_ZERO_ERROR;
2331       continue;
2332     }
2333 
2334     const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
2335 
2336     if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
2337       const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
2338       bool is_valid_value = false;
2339       // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
2340       if (strcmp("ca", bcp47_key) == 0) {
2341         is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value);
2342       } else if (strcmp("co", bcp47_key) == 0) {
2343         is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value);
2344       } else if (strcmp("hc", bcp47_key) == 0) {
2345         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
2346         std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
2347         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2348       } else if (strcmp("lb", bcp47_key) == 0) {
2349         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
2350         std::set<std::string> valid_values = {"strict", "normal", "loose"};
2351         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2352       } else if (strcmp("kn", bcp47_key) == 0) {
2353         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
2354         std::set<std::string> valid_values = {"true", "false"};
2355         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2356       } else if (strcmp("kf", bcp47_key) == 0) {
2357         // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
2358         std::set<std::string> valid_values = {"upper", "lower", "false"};
2359         is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2360       } else if (strcmp("nu", bcp47_key) == 0) {
2361         is_valid_value = Intl::IsValidNumberingSystem(bcp47_value);
2362       }
2363       if (is_valid_value) {
2364         extensions.insert(
2365             std::pair<std::string, std::string>(bcp47_key, bcp47_value));
2366         builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value);
2367       }
2368     }
2369   }
2370 
2371   status = U_ZERO_ERROR;
2372   *icu_locale = builder.build(status);
2373 
2374   return extensions;
2375 }
2376 
2377 // ecma402/#sec-lookupmatcher
LookupMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)2378 std::string LookupMatcher(Isolate* isolate,
2379                           const std::set<std::string>& available_locales,
2380                           const std::vector<std::string>& requested_locales) {
2381   // 1. Let result be a new Record.
2382   std::string result;
2383 
2384   // 2. For each element locale of requestedLocales in List order, do
2385   for (const std::string& locale : requested_locales) {
2386     // 2. a. Let noExtensionsLocale be the String value that is locale
2387     //       with all Unicode locale extension sequences removed.
2388     ParsedLocale parsed_locale = ParseBCP47Locale(locale);
2389     std::string no_extensions_locale = parsed_locale.no_extensions_locale;
2390 
2391     // 2. b. Let availableLocale be
2392     //       BestAvailableLocale(availableLocales, noExtensionsLocale).
2393     std::string available_locale =
2394         BestAvailableLocale(available_locales, no_extensions_locale);
2395 
2396     // 2. c. If availableLocale is not undefined, append locale to the
2397     //       end of subset.
2398     if (!available_locale.empty()) {
2399       // Note: The following steps are not performed here because we
2400       // can use ICU to parse the unicode locale extension sequence
2401       // as part of Intl::ResolveLocale.
2402       //
2403       // There's no need to separate the unicode locale extensions
2404       // right here. Instead just return the available locale with the
2405       // extensions.
2406       //
2407       // 2. c. i. Set result.[[locale]] to availableLocale.
2408       // 2. c. ii. If locale and noExtensionsLocale are not the same
2409       // String value, then
2410       // 2. c. ii. 1. Let extension be the String value consisting of
2411       // the first substring of locale that is a Unicode locale
2412       // extension sequence.
2413       // 2. c. ii. 2. Set result.[[extension]] to extension.
2414       // 2. c. iii. Return result.
2415       return available_locale + parsed_locale.extension;
2416     }
2417   }
2418 
2419   // 3. Let defLocale be DefaultLocale();
2420   // 4. Set result.[[locale]] to defLocale.
2421   // 5. Return result.
2422   return isolate->DefaultLocale();
2423 }
2424 
2425 }  // namespace
2426 
2427 // This function doesn't correspond exactly with the spec. Instead
2428 // we use ICU to do all the string manipulations that the spec
2429 // peforms.
2430 //
2431 // The spec uses this function to normalize values for various
2432 // relevant extension keys (such as disallowing "search" for
2433 // collation). Instead of doing this here, we let the callers of
2434 // this method perform such normalization.
2435 //
2436 // ecma402/#sec-resolvelocale
ResolveLocale(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,MatcherOption matcher,const std::set<std::string> & relevant_extension_keys)2437 Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
2438     Isolate* isolate, const std::set<std::string>& available_locales,
2439     const std::vector<std::string>& requested_locales, MatcherOption matcher,
2440     const std::set<std::string>& relevant_extension_keys) {
2441   std::string locale;
2442   if (matcher == Intl::MatcherOption::kBestFit &&
2443       FLAG_harmony_intl_best_fit_matcher) {
2444     locale = BestFitMatcher(isolate, available_locales, requested_locales);
2445   } else {
2446     locale = LookupMatcher(isolate, available_locales, requested_locales);
2447   }
2448 
2449   Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale);
2450   MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>());
2451   icu::Locale icu_locale = maybe_icu_locale.FromJust();
2452   std::map<std::string, std::string> extensions =
2453       LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
2454 
2455   std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
2456 
2457   // TODO(gsathya): Remove privateuse subtags from extensions.
2458 
2459   return Just(
2460       Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions});
2461 }
2462 
SetTextToBreakIterator(Isolate * isolate,Handle<String> text,icu::BreakIterator * break_iterator)2463 Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
2464     Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
2465   text = String::Flatten(isolate, text);
2466   icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>(
2467       Intl::ToICUUnicodeString(isolate, text).clone());
2468 
2469   Handle<Managed<icu::UnicodeString>> new_u_text =
2470       Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
2471 
2472   break_iterator->setText(*u_text);
2473   return new_u_text;
2474 }
2475 
2476 // ecma262 #sec-string.prototype.normalize
Normalize(Isolate * isolate,Handle<String> string,Handle<Object> form_input)2477 MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
2478                                     Handle<Object> form_input) {
2479   const char* form_name;
2480   UNormalization2Mode form_mode;
2481   if (form_input->IsUndefined(isolate)) {
2482     // default is FNC
2483     form_name = "nfc";
2484     form_mode = UNORM2_COMPOSE;
2485   } else {
2486     Handle<String> form;
2487     ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
2488                                Object::ToString(isolate, form_input), String);
2489 
2490     if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
2491       form_name = "nfc";
2492       form_mode = UNORM2_COMPOSE;
2493     } else if (String::Equals(isolate, form,
2494                               isolate->factory()->NFD_string())) {
2495       form_name = "nfc";
2496       form_mode = UNORM2_DECOMPOSE;
2497     } else if (String::Equals(isolate, form,
2498                               isolate->factory()->NFKC_string())) {
2499       form_name = "nfkc";
2500       form_mode = UNORM2_COMPOSE;
2501     } else if (String::Equals(isolate, form,
2502                               isolate->factory()->NFKD_string())) {
2503       form_name = "nfkc";
2504       form_mode = UNORM2_DECOMPOSE;
2505     } else {
2506       Handle<String> valid_forms =
2507           isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
2508       THROW_NEW_ERROR(
2509           isolate,
2510           NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
2511           String);
2512     }
2513   }
2514 
2515   int length = string->length();
2516   string = String::Flatten(isolate, string);
2517   icu::UnicodeString result;
2518   std::unique_ptr<base::uc16[]> sap;
2519   UErrorCode status = U_ZERO_ERROR;
2520   icu::UnicodeString input = ToICUUnicodeString(isolate, string);
2521   // Getting a singleton. Should not free it.
2522   const icu::Normalizer2* normalizer =
2523       icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
2524   DCHECK(U_SUCCESS(status));
2525   DCHECK_NOT_NULL(normalizer);
2526   int32_t normalized_prefix_length =
2527       normalizer->spanQuickCheckYes(input, status);
2528   // Quick return if the input is already normalized.
2529   if (length == normalized_prefix_length) return string;
2530   icu::UnicodeString unnormalized =
2531       input.tempSubString(normalized_prefix_length);
2532   // Read-only alias of the normalized prefix.
2533   result.setTo(false, input.getBuffer(), normalized_prefix_length);
2534   // copy-on-write; normalize the suffix and append to |result|.
2535   normalizer->normalizeSecondAndAppend(result, unnormalized, status);
2536 
2537   if (U_FAILURE(status)) {
2538     THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2539   }
2540 
2541   return Intl::ToString(isolate, result);
2542 }
2543 
2544 // ICUTimezoneCache calls out to ICU for TimezoneCache
2545 // functionality in a straightforward way.
2546 class ICUTimezoneCache : public base::TimezoneCache {
2547  public:
ICUTimezoneCache()2548   ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
2549 
~ICUTimezoneCache()2550   ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
2551 
2552   const char* LocalTimezone(double time_ms) override;
2553 
2554   double DaylightSavingsOffset(double time_ms) override;
2555 
2556   double LocalTimeOffset(double time_ms, bool is_utc) override;
2557 
2558   void Clear(TimeZoneDetection time_zone_detection) override;
2559 
2560  private:
2561   icu::TimeZone* GetTimeZone();
2562 
2563   bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
2564                   int32_t* dst_offset);
2565 
2566   icu::TimeZone* timezone_;
2567 
2568   std::string timezone_name_;
2569   std::string dst_timezone_name_;
2570 };
2571 
LocalTimezone(double time_ms)2572 const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
2573   bool is_dst = DaylightSavingsOffset(time_ms) != 0;
2574   std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
2575   if (name->empty()) {
2576     icu::UnicodeString result;
2577     GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
2578     result += '\0';
2579 
2580     icu::StringByteSink<std::string> byte_sink(name);
2581     result.toUTF8(byte_sink);
2582   }
2583   DCHECK(!name->empty());
2584   return name->c_str();
2585 }
2586 
GetTimeZone()2587 icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
2588   if (timezone_ == nullptr) {
2589     timezone_ = icu::TimeZone::createDefault();
2590   }
2591   return timezone_;
2592 }
2593 
GetOffsets(double time_ms,bool is_utc,int32_t * raw_offset,int32_t * dst_offset)2594 bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
2595                                   int32_t* raw_offset, int32_t* dst_offset) {
2596   UErrorCode status = U_ZERO_ERROR;
2597   if (is_utc) {
2598     GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
2599   } else {
2600     // Note that casting TimeZone to BasicTimeZone is safe because we know that
2601     // icu::TimeZone used here is a BasicTimeZone.
2602     static_cast<const icu::BasicTimeZone*>(GetTimeZone())
2603         ->getOffsetFromLocal(time_ms, UCAL_TZ_LOCAL_FORMER,
2604                              UCAL_TZ_LOCAL_FORMER, *raw_offset, *dst_offset,
2605                              status);
2606   }
2607 
2608   return U_SUCCESS(status);
2609 }
2610 
DaylightSavingsOffset(double time_ms)2611 double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
2612   int32_t raw_offset, dst_offset;
2613   if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
2614   return dst_offset;
2615 }
2616 
LocalTimeOffset(double time_ms,bool is_utc)2617 double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
2618   int32_t raw_offset, dst_offset;
2619   if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
2620   return raw_offset + dst_offset;
2621 }
2622 
Clear(TimeZoneDetection time_zone_detection)2623 void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
2624   delete timezone_;
2625   timezone_ = nullptr;
2626   timezone_name_.clear();
2627   dst_timezone_name_.clear();
2628   if (time_zone_detection == TimeZoneDetection::kRedetect) {
2629     icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
2630   }
2631 }
2632 
CreateTimeZoneCache()2633 base::TimezoneCache* Intl::CreateTimeZoneCache() {
2634   return FLAG_icu_timezone_data ? new ICUTimezoneCache()
2635                                 : base::OS::CreateTimezoneCache();
2636 }
2637 
GetLocaleMatcher(Isolate * isolate,Handle<JSReceiver> options,const char * method_name)2638 Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
2639                                                   Handle<JSReceiver> options,
2640                                                   const char* method_name) {
2641   return GetStringOption<Intl::MatcherOption>(
2642       isolate, options, "localeMatcher", method_name, {"best fit", "lookup"},
2643       {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
2644       Intl::MatcherOption::kBestFit);
2645 }
2646 
GetNumberingSystem(Isolate * isolate,Handle<JSReceiver> options,const char * method_name,std::unique_ptr<char[]> * result)2647 Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
2648                                      Handle<JSReceiver> options,
2649                                      const char* method_name,
2650                                      std::unique_ptr<char[]>* result) {
2651   const std::vector<const char*> empty_values = {};
2652   Maybe<bool> maybe = GetStringOption(isolate, options, "numberingSystem",
2653                                       empty_values, method_name, result);
2654   MAYBE_RETURN(maybe, Nothing<bool>());
2655   if (maybe.FromJust() && *result != nullptr) {
2656     if (!IsWellFormedNumberingSystem(result->get())) {
2657       THROW_NEW_ERROR_RETURN_VALUE(
2658           isolate,
2659           NewRangeError(
2660               MessageTemplate::kInvalid,
2661               isolate->factory()->numberingSystem_string(),
2662               isolate->factory()->NewStringFromAsciiChecked(result->get())),
2663           Nothing<bool>());
2664     }
2665     return Just(true);
2666   }
2667   return Just(false);
2668 }
2669 
GetAvailableLocales()2670 const std::set<std::string>& Intl::GetAvailableLocales() {
2671   static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales =
2672       LAZY_INSTANCE_INITIALIZER;
2673   return available_locales.Pointer()->Get();
2674 }
2675 
2676 namespace {
2677 
2678 struct CheckCalendar {
keyv8::internal::__anon0bda2b820f11::CheckCalendar2679   static const char* key() { return "calendar"; }
pathv8::internal::__anon0bda2b820f11::CheckCalendar2680   static const char* path() { return nullptr; }
2681 };
2682 
2683 }  // namespace
2684 
GetAvailableLocalesForDateFormat()2685 const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
2686   static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type
2687       available_locales = LAZY_INSTANCE_INITIALIZER;
2688   return available_locales.Pointer()->Get();
2689 }
2690 
2691 constexpr uint16_t kInfinityChar = 0x221e;
2692 
NumberFieldToType(Isolate * isolate,const NumberFormatSpan & part,const icu::UnicodeString & text,bool is_nan)2693 Handle<String> Intl::NumberFieldToType(Isolate* isolate,
2694                                        const NumberFormatSpan& part,
2695                                        const icu::UnicodeString& text,
2696                                        bool is_nan) {
2697   switch (static_cast<UNumberFormatFields>(part.field_id)) {
2698     case UNUM_INTEGER_FIELD:
2699       if (is_nan) return isolate->factory()->nan_string();
2700       if (text.charAt(part.begin_pos) == kInfinityChar ||
2701           // en-US-POSIX output "INF" for Infinity
2702           (part.end_pos - part.begin_pos == 3 &&
2703            text.tempSubString(part.begin_pos, 3) == "INF")) {
2704         return isolate->factory()->infinity_string();
2705       }
2706       return isolate->factory()->integer_string();
2707     case UNUM_FRACTION_FIELD:
2708       return isolate->factory()->fraction_string();
2709     case UNUM_DECIMAL_SEPARATOR_FIELD:
2710       return isolate->factory()->decimal_string();
2711     case UNUM_GROUPING_SEPARATOR_FIELD:
2712       return isolate->factory()->group_string();
2713     case UNUM_CURRENCY_FIELD:
2714       return isolate->factory()->currency_string();
2715     case UNUM_PERCENT_FIELD:
2716       return isolate->factory()->percentSign_string();
2717     case UNUM_SIGN_FIELD:
2718       return (text.charAt(part.begin_pos) == '+')
2719                  ? isolate->factory()->plusSign_string()
2720                  : isolate->factory()->minusSign_string();
2721     case UNUM_EXPONENT_SYMBOL_FIELD:
2722       return isolate->factory()->exponentSeparator_string();
2723 
2724     case UNUM_EXPONENT_SIGN_FIELD:
2725       return isolate->factory()->exponentMinusSign_string();
2726 
2727     case UNUM_EXPONENT_FIELD:
2728       return isolate->factory()->exponentInteger_string();
2729 
2730     case UNUM_PERMILL_FIELD:
2731       // We're not creating any permill formatter, and it's not even clear how
2732       // that would be possible with the ICU API.
2733       UNREACHABLE();
2734 
2735     case UNUM_COMPACT_FIELD:
2736       return isolate->factory()->compact_string();
2737     case UNUM_MEASURE_UNIT_FIELD:
2738       return isolate->factory()->unit_string();
2739 
2740     default:
2741       UNREACHABLE();
2742   }
2743 }
2744 
2745 // A helper function to convert the FormattedValue for several Intl objects.
FormattedToString(Isolate * isolate,const icu::FormattedValue & formatted)2746 MaybeHandle<String> Intl::FormattedToString(
2747     Isolate* isolate, const icu::FormattedValue& formatted) {
2748   UErrorCode status = U_ZERO_ERROR;
2749   icu::UnicodeString result = formatted.toString(status);
2750   if (U_FAILURE(status)) {
2751     THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2752   }
2753   return Intl::ToString(isolate, result);
2754 }
2755 
ToJSArray(Isolate * isolate,const char * unicode_key,icu::StringEnumeration * enumeration,const std::function<bool (const char *)> & removes,bool sort)2756 MaybeHandle<JSArray> Intl::ToJSArray(
2757     Isolate* isolate, const char* unicode_key,
2758     icu::StringEnumeration* enumeration,
2759     const std::function<bool(const char*)>& removes, bool sort) {
2760   UErrorCode status = U_ZERO_ERROR;
2761   std::vector<std::string> array;
2762   for (const char* item = enumeration->next(nullptr, status);
2763        U_SUCCESS(status) && item != nullptr;
2764        item = enumeration->next(nullptr, status)) {
2765     if (unicode_key != nullptr) {
2766       item = uloc_toUnicodeLocaleType(unicode_key, item);
2767     }
2768     if (removes == nullptr || !(removes)(item)) {
2769       array.push_back(item);
2770     }
2771   }
2772 
2773   if (sort) {
2774     std::sort(array.begin(), array.end());
2775   }
2776   return VectorToJSArray(isolate, array);
2777 }
2778 
RemoveCollation(const char * collation)2779 bool Intl::RemoveCollation(const char* collation) {
2780   return strcmp("standard", collation) == 0 || strcmp("search", collation) == 0;
2781 }
2782 
2783 // See the list in ecma402 #sec-issanctionedsimpleunitidentifier
SanctionedSimpleUnits()2784 std::set<std::string> Intl::SanctionedSimpleUnits() {
2785   return std::set<std::string>({"acre",       "bit",        "byte",
2786                                 "celsius",    "centimeter", "day",
2787                                 "degree",     "fahrenheit", "fluid-ounce",
2788                                 "foot",       "gallon",     "gigabit",
2789                                 "gigabyte",   "gram",       "hectare",
2790                                 "hour",       "inch",       "kilobit",
2791                                 "kilobyte",   "kilogram",   "kilometer",
2792                                 "liter",      "megabit",    "megabyte",
2793                                 "meter",      "mile",       "mile-scandinavian",
2794                                 "millimeter", "milliliter", "millisecond",
2795                                 "minute",     "month",      "ounce",
2796                                 "percent",    "petabyte",   "pound",
2797                                 "second",     "stone",      "terabit",
2798                                 "terabyte",   "week",       "yard",
2799                                 "year"});
2800 }
2801 
2802 // ecma-402/#sec-isvalidtimezonename
2803 
2804 namespace {
IsUnicodeStringValidTimeZoneName(const icu::UnicodeString & id)2805 bool IsUnicodeStringValidTimeZoneName(const icu::UnicodeString& id) {
2806   UErrorCode status = U_ZERO_ERROR;
2807   icu::UnicodeString canonical;
2808   icu::TimeZone::getCanonicalID(id, canonical, status);
2809   return U_SUCCESS(status) &&
2810          canonical != icu::UnicodeString("Etc/Unknown", -1, US_INV);
2811 }
2812 }  // namespace
2813 
CanonicalizeTimeZoneName(Isolate * isolate,Handle<String> identifier)2814 MaybeHandle<String> Intl::CanonicalizeTimeZoneName(Isolate* isolate,
2815                                                    Handle<String> identifier) {
2816   UErrorCode status = U_ZERO_ERROR;
2817   std::string time_zone =
2818       JSDateTimeFormat::CanonicalizeTimeZoneID(identifier->ToCString().get());
2819   icu::UnicodeString time_zone_ustring =
2820       icu::UnicodeString(time_zone.c_str(), -1, US_INV);
2821   icu::UnicodeString canonical;
2822   icu::TimeZone::getCanonicalID(time_zone_ustring, canonical, status);
2823   CHECK(U_SUCCESS(status));
2824   if (canonical == UNICODE_STRING_SIMPLE("Etc/UTC") ||
2825       canonical == UNICODE_STRING_SIMPLE("Etc/GMT")) {
2826     return isolate->factory()->UTC_string();
2827   }
2828   return Intl::ToString(isolate, canonical);
2829 }
2830 
IsValidTimeZoneName(Isolate * isolate,Handle<String> id)2831 bool Intl::IsValidTimeZoneName(Isolate* isolate, Handle<String> id) {
2832   std::string time_zone =
2833       JSDateTimeFormat::CanonicalizeTimeZoneID(id->ToCString().get());
2834   icu::UnicodeString time_zone_ustring =
2835       icu::UnicodeString(time_zone.c_str(), -1, US_INV);
2836   return IsUnicodeStringValidTimeZoneName(time_zone_ustring);
2837 }
2838 
IsValidTimeZoneName(const icu::TimeZone & tz)2839 bool Intl::IsValidTimeZoneName(const icu::TimeZone& tz) {
2840   icu::UnicodeString id;
2841   tz.getID(id);
2842   return IsUnicodeStringValidTimeZoneName(id);
2843 }
2844 
2845 // Function to support Temporal
TimeZoneIdFromIndex(int32_t index)2846 std::string Intl::TimeZoneIdFromIndex(int32_t index) {
2847   if (index == 0) return "UTC";
2848   std::unique_ptr<icu::StringEnumeration> enumeration(
2849       icu::TimeZone::createEnumeration());
2850   int32_t curr = 0;
2851   const char* id;
2852 
2853   UErrorCode status = U_ZERO_ERROR;
2854   while (U_SUCCESS(status) && curr < index &&
2855          ((id = enumeration->next(nullptr, status)) != nullptr)) {
2856     CHECK(U_SUCCESS(status));
2857     curr++;
2858   }
2859   CHECK(U_SUCCESS(status));
2860   CHECK(id != nullptr);
2861   return id;
2862 }
2863 
GetTimeZoneIndex(Isolate * isolate,Handle<String> identifier,int32_t * index)2864 Maybe<bool> Intl::GetTimeZoneIndex(Isolate* isolate, Handle<String> identifier,
2865                                    int32_t* index) {
2866   if (identifier->Equals(*isolate->factory()->UTC_string())) {
2867     *index = 0;
2868     return Just(true);
2869   }
2870 
2871   std::string identifier_str(identifier->ToCString().get());
2872   std::unique_ptr<icu::TimeZone> tz(
2873       icu::TimeZone::createTimeZone(identifier_str.c_str()));
2874   if (!IsValidTimeZoneName(*tz)) {
2875     return Just(false);
2876   }
2877 
2878   std::unique_ptr<icu::StringEnumeration> enumeration(
2879       icu::TimeZone::createEnumeration());
2880   int32_t curr = 0;
2881   const char* id;
2882 
2883   UErrorCode status = U_ZERO_ERROR;
2884   while (U_SUCCESS(status) &&
2885          (id = enumeration->next(nullptr, status)) != nullptr) {
2886     if (identifier_str == id) {
2887       *index = curr + 1;
2888       return Just(true);
2889     }
2890     curr++;
2891   }
2892   CHECK(U_SUCCESS(status));
2893   // We should not reach here, the !IsValidTimeZoneName should return earlier
2894   UNREACHABLE();
2895 }
2896 
2897 // #sec-tointlmathematicalvalue
ToIntlMathematicalValueAsNumberBigIntOrString(Isolate * isolate,Handle<Object> input)2898 MaybeHandle<Object> Intl::ToIntlMathematicalValueAsNumberBigIntOrString(
2899     Isolate* isolate, Handle<Object> input) {
2900   if (input->IsNumber() || input->IsBigInt()) return input;  // Shortcut.
2901   // TODO(ftang) revisit the following after the resolution of
2902   // https://github.com/tc39/proposal-intl-numberformat-v3/pull/82
2903   if (input->IsOddball()) {
2904     return Oddball::ToNumber(isolate, Handle<Oddball>::cast(input));
2905   }
2906   if (input->IsSymbol()) {
2907     THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kSymbolToNumber),
2908                     Object);
2909   }
2910   ASSIGN_RETURN_ON_EXCEPTION(
2911       isolate, input,
2912       JSReceiver::ToPrimitive(isolate, Handle<JSReceiver>::cast(input),
2913                               ToPrimitiveHint::kNumber),
2914       Object);
2915   if (input->IsString()) UNIMPLEMENTED();
2916   return input;
2917 }
2918 
FormatRangeSourceTracker()2919 Intl::FormatRangeSourceTracker::FormatRangeSourceTracker() {
2920   start_[0] = start_[1] = limit_[0] = limit_[1] = 0;
2921 }
2922 
Add(int32_t field,int32_t start,int32_t limit)2923 void Intl::FormatRangeSourceTracker::Add(int32_t field, int32_t start,
2924                                          int32_t limit) {
2925   DCHECK_LT(field, 2);
2926   start_[field] = start;
2927   limit_[field] = limit;
2928 }
2929 
GetSource(int32_t start,int32_t limit) const2930 Intl::FormatRangeSource Intl::FormatRangeSourceTracker::GetSource(
2931     int32_t start, int32_t limit) const {
2932   FormatRangeSource source = FormatRangeSource::kShared;
2933   if (FieldContains(0, start, limit)) {
2934     source = FormatRangeSource::kStartRange;
2935   } else if (FieldContains(1, start, limit)) {
2936     source = FormatRangeSource::kEndRange;
2937   }
2938   return source;
2939 }
2940 
FieldContains(int32_t field,int32_t start,int32_t limit) const2941 bool Intl::FormatRangeSourceTracker::FieldContains(int32_t field, int32_t start,
2942                                                    int32_t limit) const {
2943   DCHECK_LT(field, 2);
2944   return (start_[field] <= start) && (start <= limit_[field]) &&
2945          (start_[field] <= limit) && (limit <= limit_[field]);
2946 }
2947 
SourceString(Isolate * isolate,FormatRangeSource source)2948 Handle<String> Intl::SourceString(Isolate* isolate, FormatRangeSource source) {
2949   switch (source) {
2950     case FormatRangeSource::kShared:
2951       return ReadOnlyRoots(isolate).shared_string_handle();
2952     case FormatRangeSource::kStartRange:
2953       return ReadOnlyRoots(isolate).startRange_string_handle();
2954     case FormatRangeSource::kEndRange:
2955       return ReadOnlyRoots(isolate).endRange_string_handle();
2956   }
2957 }
2958 
2959 }  // namespace internal
2960 }  // namespace v8
2961