1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8
9 #include "src/objects/intl-objects.h"
10
11 #include <algorithm>
12 #include <memory>
13 #include <string>
14 #include <vector>
15
16 #include "src/api/api-inl.h"
17 #include "src/base/strings.h"
18 #include "src/date/date.h"
19 #include "src/execution/isolate.h"
20 #include "src/execution/local-isolate.h"
21 #include "src/handles/global-handles.h"
22 #include "src/heap/factory.h"
23 #include "src/objects/js-collator-inl.h"
24 #include "src/objects/js-date-time-format-inl.h"
25 #include "src/objects/js-locale-inl.h"
26 #include "src/objects/js-locale.h"
27 #include "src/objects/js-number-format-inl.h"
28 #include "src/objects/managed-inl.h"
29 #include "src/objects/objects-inl.h"
30 #include "src/objects/option-utils.h"
31 #include "src/objects/property-descriptor.h"
32 #include "src/objects/smi.h"
33 #include "src/objects/string.h"
34 #include "src/strings/string-case.h"
35 #include "unicode/basictz.h"
36 #include "unicode/brkiter.h"
37 #include "unicode/calendar.h"
38 #include "unicode/coll.h"
39 #include "unicode/datefmt.h"
40 #include "unicode/decimfmt.h"
41 #include "unicode/formattedvalue.h"
42 #include "unicode/localebuilder.h"
43 #include "unicode/localematcher.h"
44 #include "unicode/locid.h"
45 #include "unicode/normalizer2.h"
46 #include "unicode/numberformatter.h"
47 #include "unicode/numfmt.h"
48 #include "unicode/numsys.h"
49 #include "unicode/timezone.h"
50 #include "unicode/ures.h"
51 #include "unicode/ustring.h"
52 #include "unicode/uvernum.h" // U_ICU_VERSION_MAJOR_NUM
53
54 #define XSTR(s) STR(s)
55 #define STR(s) #s
56 static_assert(
57 V8_MINIMUM_ICU_VERSION <= U_ICU_VERSION_MAJOR_NUM,
58 "v8 is required to build with ICU " XSTR(V8_MINIMUM_ICU_VERSION) " and up");
59 #undef STR
60 #undef XSTR
61
62 namespace v8 {
63 namespace internal {
64
65 namespace {
66
67 constexpr uint8_t kToLower[256] = {
68 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
69 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
70 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
71 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
72 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
73 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
74 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
75 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
76 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
77 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
78 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
79 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
80 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
81 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
82 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
83 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
84 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
85 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
86 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
87 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
88 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
89 0xFC, 0xFD, 0xFE, 0xFF,
90 };
91
ToLatin1Lower(uint16_t ch)92 inline constexpr uint16_t ToLatin1Lower(uint16_t ch) {
93 return static_cast<uint16_t>(kToLower[ch]);
94 }
95
96 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)97 inline constexpr uint16_t ToLatin1Upper(uint16_t ch) {
98 DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
99 return ch &
100 ~((IsAsciiLower(ch) || (((ch & 0xE0) == 0xE0) && ch != 0xF7)) << 5);
101 }
102
103 template <typename Char>
ToUpperFastASCII(const base::Vector<const Char> & src,Handle<SeqOneByteString> result)104 bool ToUpperFastASCII(const base::Vector<const Char>& src,
105 Handle<SeqOneByteString> result) {
106 // Do a faster loop for the case where all the characters are ASCII.
107 uint16_t ored = 0;
108 int32_t index = 0;
109 for (auto it = src.begin(); it != src.end(); ++it) {
110 uint16_t ch = static_cast<uint16_t>(*it);
111 ored |= ch;
112 result->SeqOneByteStringSet(index++, ToAsciiUpper(ch));
113 }
114 return !(ored & ~0x7F);
115 }
116
117 const uint16_t sharp_s = 0xDF;
118
119 template <typename Char>
ToUpperOneByte(const base::Vector<const Char> & src,uint8_t * dest,int * sharp_s_count)120 bool ToUpperOneByte(const base::Vector<const Char>& src, uint8_t* dest,
121 int* sharp_s_count) {
122 // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
123
124 // There are two special cases.
125 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
126 // 2. Lower case sharp-S converts to "SS" (two characters)
127 *sharp_s_count = 0;
128 for (auto it = src.begin(); it != src.end(); ++it) {
129 uint16_t ch = static_cast<uint16_t>(*it);
130 if (V8_UNLIKELY(ch == sharp_s)) {
131 ++(*sharp_s_count);
132 continue;
133 }
134 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
135 // Since this upper-cased character does not fit in an 8-bit string, we
136 // need to take the 16-bit path.
137 return false;
138 }
139 *dest++ = ToLatin1Upper(ch);
140 }
141
142 return true;
143 }
144
145 template <typename Char>
ToUpperWithSharpS(const base::Vector<const Char> & src,Handle<SeqOneByteString> result)146 void ToUpperWithSharpS(const base::Vector<const Char>& src,
147 Handle<SeqOneByteString> result) {
148 int32_t dest_index = 0;
149 for (auto it = src.begin(); it != src.end(); ++it) {
150 uint16_t ch = static_cast<uint16_t>(*it);
151 if (ch == sharp_s) {
152 result->SeqOneByteStringSet(dest_index++, 'S');
153 result->SeqOneByteStringSet(dest_index++, 'S');
154 } else {
155 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
156 }
157 }
158 }
159
FindFirstUpperOrNonAscii(String s,int length)160 inline int FindFirstUpperOrNonAscii(String s, int length) {
161 for (int index = 0; index < length; ++index) {
162 uint16_t ch = s.Get(index);
163 if (V8_UNLIKELY(IsAsciiUpper(ch) || ch & ~0x7F)) {
164 return index;
165 }
166 }
167 return length;
168 }
169
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<base::uc16[]> * dest,int32_t length)170 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
171 std::unique_ptr<base::uc16[]>* dest,
172 int32_t length) {
173 DCHECK(flat.IsFlat());
174 if (flat.IsOneByte()) {
175 if (!*dest) {
176 dest->reset(NewArray<base::uc16>(length));
177 CopyChars(dest->get(), flat.ToOneByteVector().begin(), length);
178 }
179 return reinterpret_cast<const UChar*>(dest->get());
180 } else {
181 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().begin());
182 }
183 }
184
185 template <typename T>
New(Isolate * isolate,Handle<JSFunction> constructor,Handle<Object> locales,Handle<Object> options,const char * method_name)186 MaybeHandle<T> New(Isolate* isolate, Handle<JSFunction> constructor,
187 Handle<Object> locales, Handle<Object> options,
188 const char* method_name) {
189 Handle<Map> map;
190 ASSIGN_RETURN_ON_EXCEPTION(
191 isolate, map,
192 JSFunction::GetDerivedMap(isolate, constructor, constructor), T);
193 return T::New(isolate, map, locales, options, method_name);
194 }
195 } // namespace
196
ToLatin1LowerTable()197 const uint8_t* Intl::ToLatin1LowerTable() { return &kToLower[0]; }
198
ToICUUnicodeString(Isolate * isolate,Handle<String> string,int offset)199 icu::UnicodeString Intl::ToICUUnicodeString(Isolate* isolate,
200 Handle<String> string, int offset) {
201 DCHECK(string->IsFlat());
202 DisallowGarbageCollection no_gc;
203 std::unique_ptr<base::uc16[]> sap;
204 // Short one-byte strings can be expanded on the stack to avoid allocating a
205 // temporary buffer.
206 constexpr int kShortStringSize = 80;
207 UChar short_string_buffer[kShortStringSize];
208 const UChar* uchar_buffer = nullptr;
209 const String::FlatContent& flat = string->GetFlatContent(no_gc);
210 int32_t length = string->length();
211 DCHECK_LE(offset, length);
212 if (flat.IsOneByte() && length <= kShortStringSize) {
213 CopyChars(short_string_buffer, flat.ToOneByteVector().begin(), length);
214 uchar_buffer = short_string_buffer;
215 } else {
216 uchar_buffer = GetUCharBufferFromFlat(flat, &sap, length);
217 }
218 return icu::UnicodeString(uchar_buffer + offset, length - offset);
219 }
220
221 namespace {
222
ToICUStringPiece(Isolate * isolate,Handle<String> string,int offset=0)223 icu::StringPiece ToICUStringPiece(Isolate* isolate, Handle<String> string,
224 int offset = 0) {
225 DCHECK(string->IsFlat());
226 DisallowGarbageCollection no_gc;
227
228 const String::FlatContent& flat = string->GetFlatContent(no_gc);
229 if (!flat.IsOneByte()) return icu::StringPiece();
230
231 int32_t length = string->length();
232 DCHECK_LT(offset, length);
233 const char* char_buffer =
234 reinterpret_cast<const char*>(flat.ToOneByteVector().begin());
235 if (!String::IsAscii(char_buffer, length)) {
236 return icu::StringPiece();
237 }
238
239 return icu::StringPiece(char_buffer + offset, length - offset);
240 }
241
LocaleConvertCase(Isolate * isolate,Handle<String> s,bool is_to_upper,const char * lang)242 MaybeHandle<String> LocaleConvertCase(Isolate* isolate, Handle<String> s,
243 bool is_to_upper, const char* lang) {
244 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
245 int32_t src_length = s->length();
246 int32_t dest_length = src_length;
247 UErrorCode status;
248 Handle<SeqTwoByteString> result;
249 std::unique_ptr<base::uc16[]> sap;
250
251 if (dest_length == 0) return ReadOnlyRoots(isolate).empty_string_handle();
252
253 // This is not a real loop. It'll be executed only once (no overflow) or
254 // twice (overflow).
255 for (int i = 0; i < 2; ++i) {
256 // Case conversion can increase the string length (e.g. sharp-S => SS) so
257 // that we have to handle RangeError exceptions here.
258 ASSIGN_RETURN_ON_EXCEPTION(
259 isolate, result, isolate->factory()->NewRawTwoByteString(dest_length),
260 String);
261 DisallowGarbageCollection no_gc;
262 DCHECK(s->IsFlat());
263 String::FlatContent flat = s->GetFlatContent(no_gc);
264 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
265 status = U_ZERO_ERROR;
266 dest_length =
267 case_converter(reinterpret_cast<UChar*>(result->GetChars(no_gc)),
268 dest_length, src, src_length, lang, &status);
269 if (status != U_BUFFER_OVERFLOW_ERROR) break;
270 }
271
272 // In most cases, the output will fill the destination buffer completely
273 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
274 // Only in rare cases, it'll be shorter than the destination buffer and
275 // |result| has to be truncated.
276 DCHECK(U_SUCCESS(status));
277 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
278 DCHECK(dest_length == result->length());
279 return result;
280 }
281 DCHECK(dest_length < result->length());
282 return SeqString::Truncate(result, dest_length);
283 }
284
285 } // namespace
286
287 // A stripped-down version of ConvertToLower that can only handle flat one-byte
288 // strings and does not allocate. Note that {src} could still be, e.g., a
289 // one-byte sliced string with a two-byte parent string.
290 // Called from TF builtins.
ConvertOneByteToLower(String src,String dst)291 String Intl::ConvertOneByteToLower(String src, String dst) {
292 DCHECK_EQ(src.length(), dst.length());
293 DCHECK(src.IsOneByteRepresentation());
294 DCHECK(src.IsFlat());
295 DCHECK(dst.IsSeqOneByteString());
296
297 DisallowGarbageCollection no_gc;
298
299 const int length = src.length();
300 String::FlatContent src_flat = src.GetFlatContent(no_gc);
301 uint8_t* dst_data = SeqOneByteString::cast(dst).GetChars(no_gc);
302
303 if (src_flat.IsOneByte()) {
304 const uint8_t* src_data = src_flat.ToOneByteVector().begin();
305
306 bool has_changed_character = false;
307 int index_to_first_unprocessed =
308 FastAsciiConvert<true>(reinterpret_cast<char*>(dst_data),
309 reinterpret_cast<const char*>(src_data), length,
310 &has_changed_character);
311
312 if (index_to_first_unprocessed == length) {
313 return has_changed_character ? dst : src;
314 }
315
316 // If not ASCII, we keep the result up to index_to_first_unprocessed and
317 // process the rest.
318 for (int index = index_to_first_unprocessed; index < length; ++index) {
319 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
320 }
321 } else {
322 DCHECK(src_flat.IsTwoByte());
323 int index_to_first_unprocessed = FindFirstUpperOrNonAscii(src, length);
324 if (index_to_first_unprocessed == length) return src;
325
326 const uint16_t* src_data = src_flat.ToUC16Vector().begin();
327 CopyChars(dst_data, src_data, index_to_first_unprocessed);
328 for (int index = index_to_first_unprocessed; index < length; ++index) {
329 dst_data[index] = ToLatin1Lower(static_cast<uint16_t>(src_data[index]));
330 }
331 }
332
333 return dst;
334 }
335
ConvertToLower(Isolate * isolate,Handle<String> s)336 MaybeHandle<String> Intl::ConvertToLower(Isolate* isolate, Handle<String> s) {
337 if (!s->IsOneByteRepresentation()) {
338 // Use a slower implementation for strings with characters beyond U+00FF.
339 return LocaleConvertCase(isolate, s, false, "");
340 }
341
342 int length = s->length();
343
344 // We depend here on the invariant that the length of a Latin1
345 // string is invariant under ToLowerCase, and the result always
346 // fits in the Latin1 range in the *root locale*. It does not hold
347 // for ToUpperCase even in the root locale.
348
349 // Scan the string for uppercase and non-ASCII characters for strings
350 // shorter than a machine-word without any memory allocation overhead.
351 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
352 // to two parts, one for scanning the prefix with no change and the other for
353 // handling ASCII-only characters.
354
355 bool is_short = length < static_cast<int>(sizeof(uintptr_t));
356 if (is_short) {
357 bool is_lower_ascii = FindFirstUpperOrNonAscii(*s, length) == length;
358 if (is_lower_ascii) return s;
359 }
360
361 Handle<SeqOneByteString> result =
362 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363
364 return Handle<String>(Intl::ConvertOneByteToLower(*s, *result), isolate);
365 }
366
ConvertToUpper(Isolate * isolate,Handle<String> s)367 MaybeHandle<String> Intl::ConvertToUpper(Isolate* isolate, Handle<String> s) {
368 int32_t length = s->length();
369 if (s->IsOneByteRepresentation() && length > 0) {
370 Handle<SeqOneByteString> result =
371 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
372
373 DCHECK(s->IsFlat());
374 int sharp_s_count;
375 bool is_result_single_byte;
376 {
377 DisallowGarbageCollection no_gc;
378 String::FlatContent flat = s->GetFlatContent(no_gc);
379 uint8_t* dest = result->GetChars(no_gc);
380 if (flat.IsOneByte()) {
381 base::Vector<const uint8_t> src = flat.ToOneByteVector();
382 bool has_changed_character = false;
383 int index_to_first_unprocessed = FastAsciiConvert<false>(
384 reinterpret_cast<char*>(result->GetChars(no_gc)),
385 reinterpret_cast<const char*>(src.begin()), length,
386 &has_changed_character);
387 if (index_to_first_unprocessed == length) {
388 return has_changed_character ? result : s;
389 }
390 // If not ASCII, we keep the result up to index_to_first_unprocessed and
391 // process the rest.
392 is_result_single_byte =
393 ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
394 dest + index_to_first_unprocessed, &sharp_s_count);
395 } else {
396 DCHECK(flat.IsTwoByte());
397 base::Vector<const uint16_t> src = flat.ToUC16Vector();
398 if (ToUpperFastASCII(src, result)) return result;
399 is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
400 }
401 }
402
403 // Go to the full Unicode path if there are characters whose uppercase
404 // is beyond the Latin-1 range (cannot be represented in OneByteString).
405 if (V8_UNLIKELY(!is_result_single_byte)) {
406 return LocaleConvertCase(isolate, s, true, "");
407 }
408
409 if (sharp_s_count == 0) return result;
410
411 // We have sharp_s_count sharp-s characters, but the result is still
412 // in the Latin-1 range.
413 ASSIGN_RETURN_ON_EXCEPTION(
414 isolate, result,
415 isolate->factory()->NewRawOneByteString(length + sharp_s_count),
416 String);
417 DisallowGarbageCollection no_gc;
418 String::FlatContent flat = s->GetFlatContent(no_gc);
419 if (flat.IsOneByte()) {
420 ToUpperWithSharpS(flat.ToOneByteVector(), result);
421 } else {
422 ToUpperWithSharpS(flat.ToUC16Vector(), result);
423 }
424
425 return result;
426 }
427
428 return LocaleConvertCase(isolate, s, true, "");
429 }
430
GetNumberingSystem(const icu::Locale & icu_locale)431 std::string Intl::GetNumberingSystem(const icu::Locale& icu_locale) {
432 // Ugly hack. ICU doesn't expose numbering system in any way, so we have
433 // to assume that for given locale NumberingSystem constructor produces the
434 // same digits as NumberFormat/Calendar would.
435 UErrorCode status = U_ZERO_ERROR;
436 std::unique_ptr<icu::NumberingSystem> numbering_system(
437 icu::NumberingSystem::createInstance(icu_locale, status));
438 if (U_SUCCESS(status) && !numbering_system->isAlgorithmic()) {
439 return numbering_system->getName();
440 }
441 return "latn";
442 }
443
444 namespace {
445
CreateICULocale(const std::string & bcp47_locale)446 Maybe<icu::Locale> CreateICULocale(const std::string& bcp47_locale) {
447 DisallowGarbageCollection no_gc;
448
449 // Convert BCP47 into ICU locale format.
450 UErrorCode status = U_ZERO_ERROR;
451
452 icu::Locale icu_locale = icu::Locale::forLanguageTag(bcp47_locale, status);
453 DCHECK(U_SUCCESS(status));
454 if (icu_locale.isBogus()) {
455 return Nothing<icu::Locale>();
456 }
457
458 return Just(icu_locale);
459 }
460
461 } // anonymous namespace
462
463 // static
464
ToString(Isolate * isolate,const icu::UnicodeString & string)465 MaybeHandle<String> Intl::ToString(Isolate* isolate,
466 const icu::UnicodeString& string) {
467 return isolate->factory()->NewStringFromTwoByte(base::Vector<const uint16_t>(
468 reinterpret_cast<const uint16_t*>(string.getBuffer()), string.length()));
469 }
470
ToString(Isolate * isolate,const icu::UnicodeString & string,int32_t begin,int32_t end)471 MaybeHandle<String> Intl::ToString(Isolate* isolate,
472 const icu::UnicodeString& string,
473 int32_t begin, int32_t end) {
474 return Intl::ToString(isolate, string.tempSubStringBetween(begin, end));
475 }
476
477 namespace {
478
InnerAddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)479 Handle<JSObject> InnerAddElement(Isolate* isolate, Handle<JSArray> array,
480 int index, Handle<String> field_type_string,
481 Handle<String> value) {
482 // let element = $array[$index] = {
483 // type: $field_type_string,
484 // value: $value
485 // }
486 // return element;
487 Factory* factory = isolate->factory();
488 Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
489 JSObject::AddProperty(isolate, element, factory->type_string(),
490 field_type_string, NONE);
491
492 JSObject::AddProperty(isolate, element, factory->value_string(), value, NONE);
493 // TODO(victorgomes): Temporarily forcing a fatal error here in case of
494 // overflow, until Intl::AddElement can handle exceptions.
495 if (JSObject::AddDataElement(array, index, element, NONE).IsNothing()) {
496 FATAL("Fatal JavaScript invalid size error when adding element");
497 UNREACHABLE();
498 }
499 return element;
500 }
501
502 } // namespace
503
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value)504 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
505 Handle<String> field_type_string, Handle<String> value) {
506 // Same as $array[$index] = {type: $field_type_string, value: $value};
507 InnerAddElement(isolate, array, index, field_type_string, value);
508 }
509
AddElement(Isolate * isolate,Handle<JSArray> array,int index,Handle<String> field_type_string,Handle<String> value,Handle<String> additional_property_name,Handle<String> additional_property_value)510 void Intl::AddElement(Isolate* isolate, Handle<JSArray> array, int index,
511 Handle<String> field_type_string, Handle<String> value,
512 Handle<String> additional_property_name,
513 Handle<String> additional_property_value) {
514 // Same as $array[$index] = {
515 // type: $field_type_string, value: $value,
516 // $additional_property_name: $additional_property_value
517 // }
518 Handle<JSObject> element =
519 InnerAddElement(isolate, array, index, field_type_string, value);
520 JSObject::AddProperty(isolate, element, additional_property_name,
521 additional_property_value, NONE);
522 }
523
524 namespace {
525
526 // Build the shortened locale; eg, convert xx_Yyyy_ZZ to xx_ZZ.
527 //
528 // If locale has a script tag then return true and the locale without the
529 // script else return false and an empty string.
RemoveLocaleScriptTag(const std::string & icu_locale,std::string * locale_less_script)530 bool RemoveLocaleScriptTag(const std::string& icu_locale,
531 std::string* locale_less_script) {
532 icu::Locale new_locale = icu::Locale::createCanonical(icu_locale.c_str());
533 const char* icu_script = new_locale.getScript();
534 if (icu_script == nullptr || strlen(icu_script) == 0) {
535 *locale_less_script = std::string();
536 return false;
537 }
538
539 const char* icu_language = new_locale.getLanguage();
540 const char* icu_country = new_locale.getCountry();
541 icu::Locale short_locale = icu::Locale(icu_language, icu_country);
542 *locale_less_script = short_locale.getName();
543 return true;
544 }
545
ValidateResource(const icu::Locale locale,const char * path,const char * key)546 bool ValidateResource(const icu::Locale locale, const char* path,
547 const char* key) {
548 bool result = false;
549 UErrorCode status = U_ZERO_ERROR;
550 UResourceBundle* bundle = ures_open(path, locale.getName(), &status);
551 if (bundle != nullptr && status == U_ZERO_ERROR) {
552 if (key == nullptr) {
553 result = true;
554 } else {
555 UResourceBundle* key_bundle =
556 ures_getByKey(bundle, key, nullptr, &status);
557 result = key_bundle != nullptr && (status == U_ZERO_ERROR);
558 ures_close(key_bundle);
559 }
560 }
561 ures_close(bundle);
562 if (!result) {
563 if ((locale.getCountry()[0] != '\0') && (locale.getScript()[0] != '\0')) {
564 // Fallback to try without country.
565 std::string without_country(locale.getLanguage());
566 without_country = without_country.append("-").append(locale.getScript());
567 return ValidateResource(without_country.c_str(), path, key);
568 } else if ((locale.getCountry()[0] != '\0') ||
569 (locale.getScript()[0] != '\0')) {
570 // Fallback to try with only language.
571 std::string language(locale.getLanguage());
572 return ValidateResource(language.c_str(), path, key);
573 }
574 }
575 return result;
576 }
577
578 } // namespace
579
BuildLocaleSet(const std::vector<std::string> & icu_available_locales,const char * path,const char * validate_key)580 std::set<std::string> Intl::BuildLocaleSet(
581 const std::vector<std::string>& icu_available_locales, const char* path,
582 const char* validate_key) {
583 std::set<std::string> locales;
584 for (const std::string& locale : icu_available_locales) {
585 if (path != nullptr || validate_key != nullptr) {
586 if (!ValidateResource(icu::Locale(locale.c_str()), path, validate_key)) {
587 // FIXME(chromium:1215606) Find a beter fix for nb->no fallback
588 if (locale != "nb") {
589 continue;
590 }
591 // Try no for nb
592 if (!ValidateResource(icu::Locale("no"), path, validate_key)) {
593 continue;
594 }
595 }
596 }
597 locales.insert(locale);
598 std::string shortened_locale;
599 if (RemoveLocaleScriptTag(locale, &shortened_locale)) {
600 std::replace(shortened_locale.begin(), shortened_locale.end(), '_', '-');
601 locales.insert(shortened_locale);
602 }
603 }
604 return locales;
605 }
606
ToLanguageTag(const icu::Locale & locale)607 Maybe<std::string> Intl::ToLanguageTag(const icu::Locale& locale) {
608 UErrorCode status = U_ZERO_ERROR;
609 std::string res = locale.toLanguageTag<std::string>(status);
610 if (U_FAILURE(status)) {
611 return Nothing<std::string>();
612 }
613 DCHECK(U_SUCCESS(status));
614 return Just(res);
615 }
616
617 // See ecma402/#legacy-constructor.
LegacyUnwrapReceiver(Isolate * isolate,Handle<JSReceiver> receiver,Handle<JSFunction> constructor,bool has_initialized_slot)618 MaybeHandle<Object> Intl::LegacyUnwrapReceiver(Isolate* isolate,
619 Handle<JSReceiver> receiver,
620 Handle<JSFunction> constructor,
621 bool has_initialized_slot) {
622 Handle<Object> obj_ordinary_has_instance;
623 ASSIGN_RETURN_ON_EXCEPTION(
624 isolate, obj_ordinary_has_instance,
625 Object::OrdinaryHasInstance(isolate, constructor, receiver), Object);
626 bool ordinary_has_instance = obj_ordinary_has_instance->BooleanValue(isolate);
627
628 // 2. If receiver does not have an [[Initialized...]] internal slot
629 // and ? OrdinaryHasInstance(constructor, receiver) is true, then
630 if (!has_initialized_slot && ordinary_has_instance) {
631 // 2. a. Let new_receiver be ? Get(receiver, %Intl%.[[FallbackSymbol]]).
632 Handle<Object> new_receiver;
633 ASSIGN_RETURN_ON_EXCEPTION(
634 isolate, new_receiver,
635 JSReceiver::GetProperty(isolate, receiver,
636 isolate->factory()->intl_fallback_symbol()),
637 Object);
638 return new_receiver;
639 }
640
641 return receiver;
642 }
643
644 namespace {
645
IsTwoLetterLanguage(const std::string & locale)646 bool IsTwoLetterLanguage(const std::string& locale) {
647 // Two letters, both in range 'a'-'z'...
648 return locale.length() == 2 && IsAsciiLower(locale[0]) &&
649 IsAsciiLower(locale[1]);
650 }
651
IsDeprecatedOrLegacyLanguage(const std::string & locale)652 bool IsDeprecatedOrLegacyLanguage(const std::string& locale) {
653 // Check if locale is one of the deprecated language tags:
654 return locale == "in" || locale == "iw" || locale == "ji" || locale == "jw" ||
655 locale == "mo" ||
656 // Check if locale is one of the legacy language tags:
657 locale == "sh" || locale == "tl" || locale == "no";
658 }
659
IsStructurallyValidLanguageTag(const std::string & tag)660 bool IsStructurallyValidLanguageTag(const std::string& tag) {
661 return JSLocale::StartsWithUnicodeLanguageId(tag);
662 }
663
664 // Canonicalize the locale.
665 // https://tc39.github.io/ecma402/#sec-canonicalizelanguagetag,
666 // including type check and structural validity check.
CanonicalizeLanguageTag(Isolate * isolate,const std::string & locale_in)667 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
668 const std::string& locale_in) {
669 std::string locale = locale_in;
670
671 if (locale.length() == 0 ||
672 !String::IsAscii(locale.data(), static_cast<int>(locale.length()))) {
673 THROW_NEW_ERROR_RETURN_VALUE(
674 isolate,
675 NewRangeError(
676 MessageTemplate::kInvalidLanguageTag,
677 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
678 Nothing<std::string>());
679 }
680
681 // Optimize for the most common case: a 2-letter language code in the
682 // canonical form/lowercase that is not one of the deprecated codes
683 // (in, iw, ji, jw). Don't check for ~70 of 3-letter deprecated language
684 // codes. Instead, let them be handled by ICU in the slow path. However,
685 // fast-track 'fil' (3-letter canonical code).
686 if ((IsTwoLetterLanguage(locale) && !IsDeprecatedOrLegacyLanguage(locale)) ||
687 locale == "fil") {
688 return Just(locale);
689 }
690
691 // Because per BCP 47 2.1.1 language tags are case-insensitive, lowercase
692 // the input before any more check.
693 std::transform(locale.begin(), locale.end(), locale.begin(), ToAsciiLower);
694
695 // // ECMA 402 6.2.3
696 // TODO(jshin): uloc_{for,to}TanguageTag can fail even for a structually valid
697 // language tag if it's too long (much longer than 100 chars). Even if we
698 // allocate a longer buffer, ICU will still fail if it's too long. Either
699 // propose to Ecma 402 to put a limit on the locale length or change ICU to
700 // handle long locale names better. See
701 // https://unicode-org.atlassian.net/browse/ICU-13417
702 UErrorCode error = U_ZERO_ERROR;
703 // uloc_forLanguageTag checks the structrual validity. If the input BCP47
704 // language tag is parsed all the way to the end, it indicates that the input
705 // is structurally valid. Due to a couple of bugs, we can't use it
706 // without Chromium patches or ICU 62 or earlier.
707 icu::Locale icu_locale = icu::Locale::forLanguageTag(locale.c_str(), error);
708
709 if (U_FAILURE(error) || icu_locale.isBogus()) {
710 THROW_NEW_ERROR_RETURN_VALUE(
711 isolate,
712 NewRangeError(
713 MessageTemplate::kInvalidLanguageTag,
714 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
715 Nothing<std::string>());
716 }
717
718 // Use LocaleBuilder to validate locale.
719 icu_locale = icu::LocaleBuilder().setLocale(icu_locale).build(error);
720 icu_locale.canonicalize(error);
721 if (U_FAILURE(error) || icu_locale.isBogus()) {
722 THROW_NEW_ERROR_RETURN_VALUE(
723 isolate,
724 NewRangeError(
725 MessageTemplate::kInvalidLanguageTag,
726 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
727 Nothing<std::string>());
728 }
729 Maybe<std::string> maybe_to_language_tag = Intl::ToLanguageTag(icu_locale);
730 if (maybe_to_language_tag.IsNothing()) {
731 THROW_NEW_ERROR_RETURN_VALUE(
732 isolate,
733 NewRangeError(
734 MessageTemplate::kInvalidLanguageTag,
735 isolate->factory()->NewStringFromAsciiChecked(locale.c_str())),
736 Nothing<std::string>());
737 }
738
739 return maybe_to_language_tag;
740 }
741
CanonicalizeLanguageTag(Isolate * isolate,Handle<Object> locale_in)742 Maybe<std::string> CanonicalizeLanguageTag(Isolate* isolate,
743 Handle<Object> locale_in) {
744 Handle<String> locale_str;
745 // This does part of the validity checking spec'ed in CanonicalizeLocaleList:
746 // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
747 // exception.
748 // 7c iii. Let tag be ? ToString(kValue).
749 // 7c iv. If IsStructurallyValidLanguageTag(tag) is false, throw a
750 // RangeError exception.
751
752 if (locale_in->IsString()) {
753 locale_str = Handle<String>::cast(locale_in);
754 } else if (locale_in->IsJSReceiver()) {
755 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, locale_str,
756 Object::ToString(isolate, locale_in),
757 Nothing<std::string>());
758 } else {
759 THROW_NEW_ERROR_RETURN_VALUE(isolate,
760 NewTypeError(MessageTemplate::kLanguageID),
761 Nothing<std::string>());
762 }
763 std::string locale(locale_str->ToCString().get());
764
765 if (!IsStructurallyValidLanguageTag(locale)) {
766 THROW_NEW_ERROR_RETURN_VALUE(
767 isolate, NewRangeError(MessageTemplate::kLocaleBadParameters),
768 Nothing<std::string>());
769 }
770 return CanonicalizeLanguageTag(isolate, locale);
771 }
772
773 } // anonymous namespace
774
CanonicalizeLocaleList(Isolate * isolate,Handle<Object> locales,bool only_return_one_result)775 Maybe<std::vector<std::string>> Intl::CanonicalizeLocaleList(
776 Isolate* isolate, Handle<Object> locales, bool only_return_one_result) {
777 // 1. If locales is undefined, then
778 if (locales->IsUndefined(isolate)) {
779 // 1a. Return a new empty List.
780 return Just(std::vector<std::string>());
781 }
782 // 2. Let seen be a new empty List.
783 std::vector<std::string> seen;
784 // 3. If Type(locales) is String or locales has an [[InitializedLocale]]
785 // internal slot, then
786 if (locales->IsJSLocale()) {
787 // Since this value came from JSLocale, which is already went though the
788 // CanonializeLanguageTag process once, therefore there are no need to
789 // call CanonializeLanguageTag again.
790 seen.push_back(JSLocale::ToString(Handle<JSLocale>::cast(locales)));
791 return Just(seen);
792 }
793 if (locales->IsString()) {
794 // 3a. Let O be CreateArrayFromList(« locales »).
795 // Instead of creating a one-element array and then iterating over it,
796 // we inline the body of the iteration:
797 std::string canonicalized_tag;
798 if (!CanonicalizeLanguageTag(isolate, locales).To(&canonicalized_tag)) {
799 return Nothing<std::vector<std::string>>();
800 }
801 seen.push_back(canonicalized_tag);
802 return Just(seen);
803 }
804 // 4. Else,
805 // 4a. Let O be ? ToObject(locales).
806 Handle<JSReceiver> o;
807 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, o,
808 Object::ToObject(isolate, locales),
809 Nothing<std::vector<std::string>>());
810 // 5. Let len be ? ToLength(? Get(O, "length")).
811 Handle<Object> length_obj;
812 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, length_obj,
813 Object::GetLengthFromArrayLike(isolate, o),
814 Nothing<std::vector<std::string>>());
815 // TODO(jkummerow): Spec violation: strictly speaking, we have to iterate
816 // up to 2^53-1 if {length_obj} says so. Since cases above 2^32 probably
817 // don't happen in practice (and would be very slow if they do), we'll keep
818 // the code simple for now by using a saturating to-uint32 conversion.
819 double raw_length = length_obj->Number();
820 uint32_t len =
821 raw_length >= kMaxUInt32 ? kMaxUInt32 : static_cast<uint32_t>(raw_length);
822 // 6. Let k be 0.
823 // 7. Repeat, while k < len
824 for (uint32_t k = 0; k < len; k++) {
825 // 7a. Let Pk be ToString(k).
826 // 7b. Let kPresent be ? HasProperty(O, Pk).
827 LookupIterator it(isolate, o, k);
828 Maybe<bool> maybe_found = JSReceiver::HasProperty(&it);
829 MAYBE_RETURN(maybe_found, Nothing<std::vector<std::string>>());
830 // 7c. If kPresent is true, then
831 if (!maybe_found.FromJust()) continue;
832 // 7c i. Let kValue be ? Get(O, Pk).
833 Handle<Object> k_value;
834 ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, k_value, Object::GetProperty(&it),
835 Nothing<std::vector<std::string>>());
836 // 7c ii. If Type(kValue) is not String or Object, throw a TypeError
837 // exception.
838 // 7c iii. If Type(kValue) is Object and kValue has an [[InitializedLocale]]
839 // internal slot, then
840 std::string canonicalized_tag;
841 if (k_value->IsJSLocale()) {
842 // 7c iii. 1. Let tag be kValue.[[Locale]].
843 canonicalized_tag = JSLocale::ToString(Handle<JSLocale>::cast(k_value));
844 // 7c iv. Else,
845 } else {
846 // 7c iv 1. Let tag be ? ToString(kValue).
847 // 7c v. If IsStructurallyValidLanguageTag(tag) is false, throw a
848 // RangeError exception.
849 // 7c vi. Let canonicalizedTag be CanonicalizeLanguageTag(tag).
850 if (!CanonicalizeLanguageTag(isolate, k_value).To(&canonicalized_tag)) {
851 return Nothing<std::vector<std::string>>();
852 }
853 }
854 // 7c vi. If canonicalizedTag is not an element of seen, append
855 // canonicalizedTag as the last element of seen.
856 if (std::find(seen.begin(), seen.end(), canonicalized_tag) == seen.end()) {
857 seen.push_back(canonicalized_tag);
858 }
859 // 7d. Increase k by 1. (See loop header.)
860 // Optimization: some callers only need one result.
861 if (only_return_one_result) return Just(seen);
862 }
863 // 8. Return seen.
864 return Just(seen);
865 }
866
867 // ecma402 #sup-string.prototype.tolocalelowercase
868 // ecma402 #sup-string.prototype.tolocaleuppercase
StringLocaleConvertCase(Isolate * isolate,Handle<String> s,bool to_upper,Handle<Object> locales)869 MaybeHandle<String> Intl::StringLocaleConvertCase(Isolate* isolate,
870 Handle<String> s,
871 bool to_upper,
872 Handle<Object> locales) {
873 std::vector<std::string> requested_locales;
874 if (!CanonicalizeLocaleList(isolate, locales, true).To(&requested_locales)) {
875 return MaybeHandle<String>();
876 }
877 std::string requested_locale = requested_locales.size() == 0
878 ? isolate->DefaultLocale()
879 : requested_locales[0];
880 size_t dash = requested_locale.find('-');
881 if (dash != std::string::npos) {
882 requested_locale = requested_locale.substr(0, dash);
883 }
884
885 // Primary language tag can be up to 8 characters long in theory.
886 // https://tools.ietf.org/html/bcp47#section-2.2.1
887 DCHECK_LE(requested_locale.length(), 8);
888 s = String::Flatten(isolate, s);
889
890 // All the languages requiring special-handling have two-letter codes.
891 // Note that we have to check for '!= 2' here because private-use language
892 // tags (x-foo) or grandfathered irregular tags (e.g. i-enochian) would have
893 // only 'x' or 'i' when they get here.
894 if (V8_UNLIKELY(requested_locale.length() != 2)) {
895 if (to_upper) {
896 return ConvertToUpper(isolate, s);
897 }
898 return ConvertToLower(isolate, s);
899 }
900 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
901 // in the root locale needs to be adjusted for az, lt and tr because even case
902 // mapping of ASCII range characters are different in those locales.
903 // Greek (el) does not require any adjustment.
904 if (V8_UNLIKELY((requested_locale == "tr") || (requested_locale == "el") ||
905 (requested_locale == "lt") || (requested_locale == "az"))) {
906 return LocaleConvertCase(isolate, s, to_upper, requested_locale.c_str());
907 } else {
908 if (to_upper) {
909 return ConvertToUpper(isolate, s);
910 }
911 return ConvertToLower(isolate, s);
912 }
913 }
914
915 // static
916 template <class IsolateT>
CompareStringsOptionsFor(IsolateT * isolate,Handle<Object> locales,Handle<Object> options)917 Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
918 IsolateT* isolate, Handle<Object> locales, Handle<Object> options) {
919 if (!options->IsUndefined(isolate)) {
920 return CompareStringsOptions::kNone;
921 }
922
923 // Lists all of the available locales that are statically known to fulfill
924 // fast path conditions. See the StringLocaleCompareFastPath test as a
925 // starting point to update this list.
926 //
927 // Locale entries are roughly sorted s.t. common locales come first.
928 //
929 // The actual conditions are verified in debug builds in
930 // CollatorAllowsFastComparison.
931 static const char* const kFastLocales[] = {
932 "en-US", "en", "fr", "es", "de", "pt", "it", "ca",
933 "de-AT", "fi", "id", "id-ID", "ms", "nl", "pl", "ro",
934 "sl", "sv", "sw", "vi", "en-DE", "en-GB",
935 };
936
937 if (locales->IsUndefined(isolate)) {
938 const std::string& default_locale = isolate->DefaultLocale();
939 for (const char* fast_locale : kFastLocales) {
940 if (strcmp(fast_locale, default_locale.c_str()) == 0) {
941 return CompareStringsOptions::kTryFastPath;
942 }
943 }
944
945 return CompareStringsOptions::kNone;
946 }
947
948 if (!locales->IsString()) return CompareStringsOptions::kNone;
949
950 Handle<String> locales_string = Handle<String>::cast(locales);
951 for (const char* fast_locale : kFastLocales) {
952 if (locales_string->IsEqualTo(base::CStrVector(fast_locale), isolate)) {
953 return CompareStringsOptions::kTryFastPath;
954 }
955 }
956
957 return CompareStringsOptions::kNone;
958 }
959
960 // Instantiations.
961 template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
962 Isolate*, Handle<Object>, Handle<Object>);
963 template Intl::CompareStringsOptions Intl::CompareStringsOptionsFor(
964 LocalIsolate*, Handle<Object>, Handle<Object>);
965
StringLocaleCompare(Isolate * isolate,Handle<String> string1,Handle<String> string2,Handle<Object> locales,Handle<Object> options,const char * method_name)966 base::Optional<int> Intl::StringLocaleCompare(
967 Isolate* isolate, Handle<String> string1, Handle<String> string2,
968 Handle<Object> locales, Handle<Object> options, const char* method_name) {
969 // We only cache the instance when locales is a string/undefined and
970 // options is undefined, as that is the only case when the specified
971 // side-effects of examining those arguments are unobservable.
972 const bool can_cache =
973 (locales->IsString() || locales->IsUndefined(isolate)) &&
974 options->IsUndefined(isolate);
975 // We may be able to take the fast path, depending on the `locales` and
976 // `options` arguments.
977 const CompareStringsOptions compare_strings_options =
978 CompareStringsOptionsFor(isolate, locales, options);
979 if (can_cache) {
980 // Both locales and options are undefined, check the cache.
981 icu::Collator* cached_icu_collator =
982 static_cast<icu::Collator*>(isolate->get_cached_icu_object(
983 Isolate::ICUObjectCacheType::kDefaultCollator, locales));
984 // We may use the cached icu::Collator for a fast path.
985 if (cached_icu_collator != nullptr) {
986 return Intl::CompareStrings(isolate, *cached_icu_collator, string1,
987 string2, compare_strings_options);
988 }
989 }
990
991 Handle<JSFunction> constructor = Handle<JSFunction>(
992 JSFunction::cast(
993 isolate->context().native_context().intl_collator_function()),
994 isolate);
995
996 Handle<JSCollator> collator;
997 MaybeHandle<JSCollator> maybe_collator =
998 New<JSCollator>(isolate, constructor, locales, options, method_name);
999 if (!maybe_collator.ToHandle(&collator)) return {};
1000 if (can_cache) {
1001 isolate->set_icu_object_in_cache(
1002 Isolate::ICUObjectCacheType::kDefaultCollator, locales,
1003 std::static_pointer_cast<icu::UMemory>(collator->icu_collator().get()));
1004 }
1005 icu::Collator* icu_collator = collator->icu_collator().raw();
1006 return Intl::CompareStrings(isolate, *icu_collator, string1, string2,
1007 compare_strings_options);
1008 }
1009
1010 namespace {
1011
1012 // Weights for the Unicode Collation Algorithm for charcodes [0x00,0x7F].
1013 // https://unicode.org/reports/tr10/.
1014 //
1015 // Generated from:
1016 //
1017 // $ wget http://www.unicode.org/Public/UCA/latest/allkeys.txt
1018 // $ cat ~/allkeys.txt | grep '^00[0-7]. ;' | sort | sed 's/[*.]/ /g' |\
1019 // sed 's/.*\[ \(.*\)\].*/\1/' | python ~/gen_weights.py
1020 //
1021 // Where gen_weights.py does an ordinal rank s.t. weights fit in a uint8_t:
1022 //
1023 // import sys
1024 //
1025 // def to_ordinal(ws):
1026 // weight_map = {}
1027 // weights_uniq_sorted = sorted(set(ws))
1028 // for i in range(0, len(weights_uniq_sorted)):
1029 // weight_map[weights_uniq_sorted[i]] = i
1030 // return [weight_map[x] for x in ws]
1031 //
1032 // def print_weight_list(array_name, ws):
1033 // print("constexpr uint8_t %s[256] = {" % array_name, end = "")
1034 // i = 0
1035 // for w in ws:
1036 // if (i % 16) == 0:
1037 // print("\n ", end = "")
1038 // print("%3d," % w, end = "")
1039 // i += 1
1040 // print("\n};\n")
1041 //
1042 // if __name__ == "__main__":
1043 // l1s = []
1044 // l3s = []
1045 // for line in sys.stdin:
1046 // weights = line.split()
1047 // l1s.append(int(weights[0], 16))
1048 // l3s.append(int(weights[2], 16))
1049 // print_weight_list("kCollationWeightsL1", to_ordinal(l1s))
1050 // print_weight_list("kCollationWeightsL3", to_ordinal(l3s))
1051
1052 // clang-format off
1053 constexpr uint8_t kCollationWeightsL1[256] = {
1054 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 0, 0,
1055 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1056 6, 12, 16, 28, 38, 29, 27, 15, 17, 18, 24, 32, 9, 8, 14, 25,
1057 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 11, 10, 33, 34, 35, 13,
1058 23, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
1059 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 19, 26, 20, 31, 7,
1060 30, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
1061 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 21, 36, 22, 37, 0,
1062 };
1063 constexpr uint8_t kCollationWeightsL3[256] = {
1064 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
1065 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1066 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1067 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1068 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1069 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1072 };
1073 constexpr int kCollationWeightsLength = arraysize(kCollationWeightsL1);
1074 STATIC_ASSERT(kCollationWeightsLength == arraysize(kCollationWeightsL3));
1075 // clang-format on
1076
1077 // Normalize a comparison delta (usually `lhs - rhs`) to UCollationResult
1078 // values.
ToUCollationResult(int delta)1079 constexpr UCollationResult ToUCollationResult(int delta) {
1080 return delta < 0 ? UCollationResult::UCOL_LESS
1081 : (delta > 0 ? UCollationResult::UCOL_GREATER
1082 : UCollationResult::UCOL_EQUAL);
1083 }
1084
1085 struct FastCompareStringsData {
1086 UCollationResult l1_result = UCollationResult::UCOL_EQUAL;
1087 UCollationResult l3_result = UCollationResult::UCOL_EQUAL;
1088 int processed_until = 0;
1089 int first_diff_at = 0; // The first relevant diff (L1 if exists, else L3).
1090 bool has_diff = false;
1091
FastCompareFailedv8::internal::__anon0bda2b820711::FastCompareStringsData1092 base::Optional<UCollationResult> FastCompareFailed(
1093 int* processed_until_out) const {
1094 if (has_diff) {
1095 // Found some difference, continue there to ensure the generic algorithm
1096 // picks it up.
1097 *processed_until_out = first_diff_at;
1098 } else {
1099 // No difference found, reprocess the last processed character since it
1100 // may be followed by a unicode combining character (which alters it's
1101 // meaning).
1102 *processed_until_out = std::max(processed_until - 1, 0);
1103 }
1104 return {};
1105 }
1106 };
1107
1108 template <class CharT>
CanFastCompare(CharT c)1109 constexpr bool CanFastCompare(CharT c) {
1110 return c < kCollationWeightsLength && kCollationWeightsL1[c] != 0;
1111 }
1112
1113 template <class Char1T, class Char2T>
FastCompareFlatString(const Char1T * lhs,const Char2T * rhs,int length,FastCompareStringsData * d)1114 bool FastCompareFlatString(const Char1T* lhs, const Char2T* rhs, int length,
1115 FastCompareStringsData* d) {
1116 for (int i = 0; i < length; i++) {
1117 const Char1T l = lhs[i];
1118 const Char2T r = rhs[i];
1119 if (!CanFastCompare(l) || !CanFastCompare(r)) {
1120 d->processed_until = i;
1121 return false;
1122 }
1123 UCollationResult l1_result =
1124 ToUCollationResult(kCollationWeightsL1[l] - kCollationWeightsL1[r]);
1125 if (l1_result != UCollationResult::UCOL_EQUAL) {
1126 d->has_diff = true;
1127 d->first_diff_at = i;
1128 d->processed_until = i;
1129 d->l1_result = l1_result;
1130 return true;
1131 }
1132 if (l != r && d->l3_result == UCollationResult::UCOL_EQUAL) {
1133 // Collapse the two-pass algorithm into one: if we find a difference in
1134 // L1 weights, that is our result. If not, use the first L3 weight
1135 // difference.
1136 UCollationResult l3_result =
1137 ToUCollationResult(kCollationWeightsL3[l] - kCollationWeightsL3[r]);
1138 d->l3_result = l3_result;
1139 if (!d->has_diff) {
1140 d->has_diff = true;
1141 d->first_diff_at = i;
1142 }
1143 }
1144 }
1145 d->processed_until = length;
1146 return true;
1147 }
1148
FastCompareStringFlatContent(const String::FlatContent & lhs,const String::FlatContent & rhs,int length,FastCompareStringsData * d)1149 bool FastCompareStringFlatContent(const String::FlatContent& lhs,
1150 const String::FlatContent& rhs, int length,
1151 FastCompareStringsData* d) {
1152 if (lhs.IsOneByte()) {
1153 base::Vector<const uint8_t> l = lhs.ToOneByteVector();
1154 if (rhs.IsOneByte()) {
1155 base::Vector<const uint8_t> r = rhs.ToOneByteVector();
1156 return FastCompareFlatString(l.data(), r.data(), length, d);
1157 } else {
1158 base::Vector<const uint16_t> r = rhs.ToUC16Vector();
1159 return FastCompareFlatString(l.data(), r.data(), length, d);
1160 }
1161 } else {
1162 base::Vector<const uint16_t> l = lhs.ToUC16Vector();
1163 if (rhs.IsOneByte()) {
1164 base::Vector<const uint8_t> r = rhs.ToOneByteVector();
1165 return FastCompareFlatString(l.data(), r.data(), length, d);
1166 } else {
1167 base::Vector<const uint16_t> r = rhs.ToUC16Vector();
1168 return FastCompareFlatString(l.data(), r.data(), length, d);
1169 }
1170 }
1171 UNREACHABLE();
1172 }
1173
CharIsAsciiOrOutOfBounds(const String::FlatContent & string,int string_length,int index)1174 bool CharIsAsciiOrOutOfBounds(const String::FlatContent& string,
1175 int string_length, int index) {
1176 DCHECK_EQ(string.length(), string_length);
1177 return index >= string_length || isascii(string.Get(index));
1178 }
1179
CharCanFastCompareOrOutOfBounds(const String::FlatContent & string,int string_length,int index)1180 bool CharCanFastCompareOrOutOfBounds(const String::FlatContent& string,
1181 int string_length, int index) {
1182 DCHECK_EQ(string.length(), string_length);
1183 return index >= string_length || CanFastCompare(string.Get(index));
1184 }
1185
1186 #ifdef DEBUG
USetContainsAllAsciiItem(USet * set)1187 bool USetContainsAllAsciiItem(USet* set) {
1188 static constexpr int kBufferSize = 64;
1189 UChar buffer[kBufferSize];
1190
1191 const int length = uset_getItemCount(set);
1192 for (int i = 0; i < length; i++) {
1193 UChar32 start, end;
1194 UErrorCode status = U_ZERO_ERROR;
1195 const int item_length =
1196 uset_getItem(set, i, &start, &end, buffer, kBufferSize, &status);
1197 CHECK(U_SUCCESS(status));
1198 DCHECK_GE(item_length, 0);
1199
1200 if (item_length == 0) {
1201 // Empty string or a range.
1202 if (isascii(start)) return true;
1203 } else {
1204 // A non-empty string.
1205 bool all_ascii = true;
1206 for (int j = 0; j < item_length; j++) {
1207 if (!isascii(buffer[j])) {
1208 all_ascii = false;
1209 break;
1210 }
1211 }
1212
1213 if (all_ascii) return true;
1214 }
1215 }
1216
1217 return false;
1218 }
1219
CollatorAllowsFastComparison(const icu::Collator & icu_collator)1220 bool CollatorAllowsFastComparison(const icu::Collator& icu_collator) {
1221 UErrorCode status = U_ZERO_ERROR;
1222
1223 icu::Locale icu_locale(icu_collator.getLocale(ULOC_VALID_LOCALE, status));
1224 DCHECK(U_SUCCESS(status));
1225
1226 static constexpr int kBufferSize = 64;
1227 char buffer[kBufferSize];
1228 const int collation_keyword_length =
1229 icu_locale.getKeywordValue("collation", buffer, kBufferSize, status);
1230 DCHECK(U_SUCCESS(status));
1231 if (collation_keyword_length != 0) return false;
1232
1233 // These attributes must be set to the expected value for fast comparisons.
1234 static constexpr struct {
1235 UColAttribute attribute;
1236 UColAttributeValue legal_value;
1237 } kAttributeChecks[] = {
1238 {UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE},
1239 {UCOL_CASE_FIRST, UCOL_OFF},
1240 {UCOL_CASE_LEVEL, UCOL_OFF},
1241 {UCOL_FRENCH_COLLATION, UCOL_OFF},
1242 {UCOL_NUMERIC_COLLATION, UCOL_OFF},
1243 {UCOL_STRENGTH, UCOL_TERTIARY},
1244 };
1245
1246 for (const auto& check : kAttributeChecks) {
1247 if (icu_collator.getAttribute(check.attribute, status) !=
1248 check.legal_value) {
1249 return false;
1250 }
1251 DCHECK(U_SUCCESS(status));
1252 }
1253
1254 // No reordering codes are allowed.
1255 int num_reorder_codes =
1256 ucol_getReorderCodes(icu_collator.toUCollator(), nullptr, 0, &status);
1257 if (num_reorder_codes != 0) return false;
1258 DCHECK(U_SUCCESS(status)); // Must check *after* num_reorder_codes != 0.
1259
1260 // No tailored rules are allowed.
1261 int32_t rules_length = 0;
1262 ucol_getRules(icu_collator.toUCollator(), &rules_length);
1263 if (rules_length != 0) return false;
1264
1265 USet* tailored_set = ucol_getTailoredSet(icu_collator.toUCollator(), &status);
1266 DCHECK(U_SUCCESS(status));
1267 if (USetContainsAllAsciiItem(tailored_set)) return false;
1268 uset_close(tailored_set);
1269
1270 // No ASCII contractions or expansions are allowed.
1271 USet* contractions = uset_openEmpty();
1272 USet* expansions = uset_openEmpty();
1273 ucol_getContractionsAndExpansions(icu_collator.toUCollator(), contractions,
1274 expansions, true, &status);
1275 if (USetContainsAllAsciiItem(contractions)) return false;
1276 if (USetContainsAllAsciiItem(expansions)) return false;
1277 DCHECK(U_SUCCESS(status));
1278 uset_close(contractions);
1279 uset_close(expansions);
1280
1281 return true;
1282 }
1283 #endif // DEBUG
1284
1285 // Fast comparison is implemented for charcodes for which the L1 collation
1286 // weight (see kCollactionWeightsL1 above) is not 0.
1287 //
1288 // Note it's possible to partially process strings as long as their leading
1289 // characters all satisfy the above criteria. In that case, and if the L3
1290 // result is EQUAL, we set `processed_until_out` to the first non-processed
1291 // index - future processing can begin at that offset.
1292 //
1293 // This fast path looks somewhat complex; mostly because it combines multiple
1294 // passes into one. The pseudo-code for simplified multi-pass algorithm is:
1295 //
1296 // {
1297 // // We can only fast-compare a certain subset of the ASCII range.
1298 // // Additionally, unicode characters can change the meaning of preceding
1299 // // characters, for example: "o\u0308" is treated like "ö".
1300 // //
1301 // // Note, in the actual single-pass algorithm below, we tolerate non-ASCII
1302 // // contents outside the relevant range.
1303 // for (int i = 0; i < string1.length; i++) {
1304 // if (!CanFastCompare(string1[i])) return {};
1305 // }
1306 // for (int i = 0; i < string2.length; i++) {
1307 // if (!CanFastCompare(string2[i])) return {};
1308 // }
1309 //
1310 // // Apply L1 weights.
1311 // for (int i = 0; i < common_length; i++) {
1312 // Char1T c1 = string1[i];
1313 // Char2T c2 = string2[i];
1314 // if (L1Weight[c1] != L1Weight[c2]) {
1315 // return L1Weight[c1] - L1Weight[c2];
1316 // }
1317 // }
1318 //
1319 // // Strings are L1-equal up to the common length; if lengths differ, the
1320 // // longer string is treated as 'greater'.
1321 // if (string1.length != string2.length) string1.length - string2.length;
1322 //
1323 // // Apply L3 weights.
1324 // for (int i = 0; i < common_length; i++) {
1325 // Char1T c1 = string1[i];
1326 // Char2T c2 = string2[i];
1327 // if (L3Weight[c1] != L3Weight[c2]) {
1328 // return L3Weight[c1] - L3Weight[c2];
1329 // }
1330 // }
1331 //
1332 // return UCOL_EQUAL;
1333 // }
TryFastCompareStrings(Isolate * isolate,const icu::Collator & icu_collator,Handle<String> string1,Handle<String> string2,int * processed_until_out)1334 base::Optional<UCollationResult> TryFastCompareStrings(
1335 Isolate* isolate, const icu::Collator& icu_collator, Handle<String> string1,
1336 Handle<String> string2, int* processed_until_out) {
1337 // TODO(jgruber): We could avoid the flattening (done by the caller) as well
1338 // by implementing comparison through string iteration. This has visible
1339 // performance benefits (e.g. 7% on CDJS) but complicates the code. Consider
1340 // doing this in the future.
1341 DCHECK(string1->IsFlat());
1342 DCHECK(string2->IsFlat());
1343
1344 *processed_until_out = 0;
1345
1346 #ifdef DEBUG
1347 // Checked by the caller, see CompareStringsOptionsFor.
1348 SLOW_DCHECK(CollatorAllowsFastComparison(icu_collator));
1349 USE(CollatorAllowsFastComparison);
1350 #endif // DEBUG
1351
1352 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string1));
1353 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string2));
1354
1355 const int length1 = string1->length();
1356 const int length2 = string2->length();
1357 int common_length = std::min(length1, length2);
1358
1359 FastCompareStringsData d;
1360 DisallowGarbageCollection no_gc;
1361 const String::FlatContent& flat1 = string1->GetFlatContent(no_gc);
1362 const String::FlatContent& flat2 = string2->GetFlatContent(no_gc);
1363 if (!FastCompareStringFlatContent(flat1, flat2, common_length, &d)) {
1364 DCHECK_EQ(d.l1_result, UCollationResult::UCOL_EQUAL);
1365 return d.FastCompareFailed(processed_until_out);
1366 }
1367
1368 // The result is only valid if the last processed character is not followed
1369 // by a unicode combining character (we are overly strict and restrict to
1370 // ASCII).
1371 if (!CharIsAsciiOrOutOfBounds(flat1, length1, d.processed_until + 1) ||
1372 !CharIsAsciiOrOutOfBounds(flat2, length2, d.processed_until + 1)) {
1373 return d.FastCompareFailed(processed_until_out);
1374 }
1375
1376 if (d.l1_result != UCollationResult::UCOL_EQUAL) {
1377 return d.l1_result;
1378 }
1379
1380 // Strings are L1-equal up to their common length, length differences win.
1381 UCollationResult length_result = ToUCollationResult(length1 - length2);
1382 if (length_result != UCollationResult::UCOL_EQUAL) {
1383 // Strings of different lengths may still compare as equal if the longer
1384 // string has a fully ignored suffix, e.g. "a" vs. "a\u{1}".
1385 if (!CharCanFastCompareOrOutOfBounds(flat1, length1, common_length) ||
1386 !CharCanFastCompareOrOutOfBounds(flat2, length2, common_length)) {
1387 return d.FastCompareFailed(processed_until_out);
1388 }
1389 return length_result;
1390 }
1391
1392 // L1-equal and same length, the L3 result wins.
1393 return d.l3_result;
1394 }
1395
1396 } // namespace
1397
1398 // static
AsciiCollationWeightsL1()1399 const uint8_t* Intl::AsciiCollationWeightsL1() {
1400 return &kCollationWeightsL1[0];
1401 }
1402
1403 // static
AsciiCollationWeightsL3()1404 const uint8_t* Intl::AsciiCollationWeightsL3() {
1405 return &kCollationWeightsL3[0];
1406 }
1407
1408 // static
1409 const int Intl::kAsciiCollationWeightsLength = kCollationWeightsLength;
1410
1411 // ecma402/#sec-collator-comparestrings
CompareStrings(Isolate * isolate,const icu::Collator & icu_collator,Handle<String> string1,Handle<String> string2,CompareStringsOptions compare_strings_options)1412 int Intl::CompareStrings(Isolate* isolate, const icu::Collator& icu_collator,
1413 Handle<String> string1, Handle<String> string2,
1414 CompareStringsOptions compare_strings_options) {
1415 // Early return for identical strings.
1416 if (string1.is_identical_to(string2)) {
1417 return UCollationResult::UCOL_EQUAL;
1418 }
1419
1420 // Early return for empty strings.
1421 if (string1->length() == 0 || string2->length() == 0) {
1422 return ToUCollationResult(string1->length() - string2->length());
1423 }
1424
1425 string1 = String::Flatten(isolate, string1);
1426 string2 = String::Flatten(isolate, string2);
1427
1428 int processed_until = 0;
1429 if (compare_strings_options == CompareStringsOptions::kTryFastPath) {
1430 base::Optional<int> maybe_result = TryFastCompareStrings(
1431 isolate, icu_collator, string1, string2, &processed_until);
1432 if (maybe_result.has_value()) return maybe_result.value();
1433 }
1434
1435 UCollationResult result;
1436 UErrorCode status = U_ZERO_ERROR;
1437 icu::StringPiece string_piece1 =
1438 ToICUStringPiece(isolate, string1, processed_until);
1439 if (!string_piece1.empty()) {
1440 icu::StringPiece string_piece2 =
1441 ToICUStringPiece(isolate, string2, processed_until);
1442 if (!string_piece2.empty()) {
1443 result = icu_collator.compareUTF8(string_piece1, string_piece2, status);
1444 DCHECK(U_SUCCESS(status));
1445 return result;
1446 }
1447 }
1448
1449 icu::UnicodeString string_val1 =
1450 Intl::ToICUUnicodeString(isolate, string1, processed_until);
1451 icu::UnicodeString string_val2 =
1452 Intl::ToICUUnicodeString(isolate, string2, processed_until);
1453 result = icu_collator.compare(string_val1, string_val2, status);
1454 DCHECK(U_SUCCESS(status));
1455 return result;
1456 }
1457
1458 // ecma402/#sup-properties-of-the-number-prototype-object
NumberToLocaleString(Isolate * isolate,Handle<Object> num,Handle<Object> locales,Handle<Object> options,const char * method_name)1459 MaybeHandle<String> Intl::NumberToLocaleString(Isolate* isolate,
1460 Handle<Object> num,
1461 Handle<Object> locales,
1462 Handle<Object> options,
1463 const char* method_name) {
1464 Handle<Object> numeric_obj;
1465 ASSIGN_RETURN_ON_EXCEPTION(isolate, numeric_obj,
1466 Object::ToNumeric(isolate, num), String);
1467
1468 // We only cache the instance when locales is a string/undefined and
1469 // options is undefined, as that is the only case when the specified
1470 // side-effects of examining those arguments are unobservable.
1471 bool can_cache = (locales->IsString() || locales->IsUndefined(isolate)) &&
1472 options->IsUndefined(isolate);
1473 if (can_cache) {
1474 icu::number::LocalizedNumberFormatter* cached_number_format =
1475 static_cast<icu::number::LocalizedNumberFormatter*>(
1476 isolate->get_cached_icu_object(
1477 Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales));
1478 // We may use the cached icu::NumberFormat for a fast path.
1479 if (cached_number_format != nullptr) {
1480 return JSNumberFormat::FormatNumeric(isolate, *cached_number_format,
1481 numeric_obj);
1482 }
1483 }
1484
1485 Handle<JSFunction> constructor = Handle<JSFunction>(
1486 JSFunction::cast(
1487 isolate->context().native_context().intl_number_format_function()),
1488 isolate);
1489 Handle<JSNumberFormat> number_format;
1490 // 2. Let numberFormat be ? Construct(%NumberFormat%, « locales, options »).
1491 ASSIGN_RETURN_ON_EXCEPTION(
1492 isolate, number_format,
1493 New<JSNumberFormat>(isolate, constructor, locales, options, method_name),
1494 String);
1495
1496 if (can_cache) {
1497 isolate->set_icu_object_in_cache(
1498 Isolate::ICUObjectCacheType::kDefaultNumberFormat, locales,
1499 std::static_pointer_cast<icu::UMemory>(
1500 number_format->icu_number_formatter().get()));
1501 }
1502
1503 // Return FormatNumber(numberFormat, x).
1504 icu::number::LocalizedNumberFormatter* icu_number_format =
1505 number_format->icu_number_formatter().raw();
1506 return JSNumberFormat::FormatNumeric(isolate, *icu_number_format,
1507 numeric_obj);
1508 }
1509
SetNumberFormatDigitOptions(Isolate * isolate,Handle<JSReceiver> options,int mnfd_default,int mxfd_default,bool notation_is_compact)1510 Maybe<Intl::NumberFormatDigitOptions> Intl::SetNumberFormatDigitOptions(
1511 Isolate* isolate, Handle<JSReceiver> options, int mnfd_default,
1512 int mxfd_default, bool notation_is_compact) {
1513 Factory* factory = isolate->factory();
1514 Intl::NumberFormatDigitOptions digit_options;
1515
1516 // 5. Let mnid be ? GetNumberOption(options, "minimumIntegerDigits,", 1, 21,
1517 // 1).
1518 int mnid = 1;
1519 if (!GetNumberOption(isolate, options, factory->minimumIntegerDigits_string(),
1520 1, 21, 1)
1521 .To(&mnid)) {
1522 return Nothing<NumberFormatDigitOptions>();
1523 }
1524
1525 // 6. Let mnfd be ? Get(options, "minimumFractionDigits").
1526 Handle<Object> mnfd_obj;
1527 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1528 isolate, mnfd_obj,
1529 JSReceiver::GetProperty(isolate, options,
1530 factory->minimumFractionDigits_string()),
1531 Nothing<NumberFormatDigitOptions>());
1532
1533 // 7. Let mxfd be ? Get(options, "maximumFractionDigits").
1534 Handle<Object> mxfd_obj;
1535 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1536 isolate, mxfd_obj,
1537 JSReceiver::GetProperty(isolate, options,
1538 factory->maximumFractionDigits_string()),
1539 Nothing<NumberFormatDigitOptions>());
1540
1541 // 8. Let mnsd be ? Get(options, "minimumSignificantDigits").
1542 Handle<Object> mnsd_obj;
1543 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1544 isolate, mnsd_obj,
1545 JSReceiver::GetProperty(isolate, options,
1546 factory->minimumSignificantDigits_string()),
1547 Nothing<NumberFormatDigitOptions>());
1548
1549 // 9. Let mxsd be ? Get(options, "maximumSignificantDigits").
1550 Handle<Object> mxsd_obj;
1551 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
1552 isolate, mxsd_obj,
1553 JSReceiver::GetProperty(isolate, options,
1554 factory->maximumSignificantDigits_string()),
1555 Nothing<NumberFormatDigitOptions>());
1556
1557 digit_options.rounding_priority = RoundingPriority::kAuto;
1558 digit_options.minimum_significant_digits = 0;
1559 digit_options.maximum_significant_digits = 0;
1560
1561 // 10. Set intlObj.[[MinimumIntegerDigits]] to mnid.
1562 digit_options.minimum_integer_digits = mnid;
1563
1564 if (FLAG_harmony_intl_number_format_v3) {
1565 // 11. Let roundingPriority be ? GetOption(options, "roundingPriority",
1566 // "string", « "auto", "morePrecision", "lessPrecision" », "auto").
1567
1568 Maybe<RoundingPriority> maybe_rounding_priority =
1569 GetStringOption<RoundingPriority>(
1570 isolate, options, "roundingPriority", "SetNumberFormatDigitOptions",
1571 {"auto", "morePrecision", "lessPrecision"},
1572 {RoundingPriority::kAuto, RoundingPriority::kMorePrecision,
1573 RoundingPriority::kLessPrecision},
1574 RoundingPriority::kAuto);
1575 MAYBE_RETURN(maybe_rounding_priority, Nothing<NumberFormatDigitOptions>());
1576 digit_options.rounding_priority = maybe_rounding_priority.FromJust();
1577 }
1578
1579 // 12. If mnsd is not undefined or mxsd is not undefined, then
1580 // a. Set hasSd to true.
1581 // 13. Else,
1582 // a. Set hasSd to false.
1583 bool has_sd =
1584 (!mnsd_obj->IsUndefined(isolate)) || (!mxsd_obj->IsUndefined(isolate));
1585
1586 // 14. If mnfd is not undefined or mxfd is not undefined, then
1587 // a. Set hasFd to true.
1588 // 15. Else,
1589 // a. Set hasFd to false.
1590 bool has_fd =
1591 (!mnfd_obj->IsUndefined(isolate)) || (!mxfd_obj->IsUndefined(isolate));
1592
1593 // 17. If hasSd or roundingPriority is not "auto", set needSd to true; else,
1594 // set needSd to false.
1595 bool need_sd =
1596 has_sd || (RoundingPriority::kAuto != digit_options.rounding_priority);
1597
1598 // 18. If ( not hasSd and (hasFd or notation is not "compact") ) or
1599 // roundingPriority is not "auto", then a. Set needFd to true.
1600 // 19. Else,
1601 // a. Set needFd to false.
1602 bool need_fd = ((!has_sd) && (has_fd || !notation_is_compact)) ||
1603 (RoundingPriority::kAuto != digit_options.rounding_priority);
1604
1605 // 20. If needSd, then
1606 if (need_sd) {
1607 // 20.b If hasSd, then
1608 if (has_sd) {
1609 // 20.b.i Let mnsd be ? DefaultNumberOption(mnsd, 1, 21, 1).
1610 int mnsd;
1611 if (!DefaultNumberOption(isolate, mnsd_obj, 1, 21, 1,
1612 factory->minimumSignificantDigits_string())
1613 .To(&mnsd)) {
1614 return Nothing<NumberFormatDigitOptions>();
1615 }
1616 // 20.b.ii Let mxsd be ? DefaultNumberOption(mxsd, mnsd, 21, 21).
1617 int mxsd;
1618 if (!DefaultNumberOption(isolate, mxsd_obj, mnsd, 21, 21,
1619 factory->maximumSignificantDigits_string())
1620 .To(&mxsd)) {
1621 return Nothing<NumberFormatDigitOptions>();
1622 }
1623 // 20.b.iii Set intlObj.[[MinimumSignificantDigits]] to mnsd.
1624 digit_options.minimum_significant_digits = mnsd;
1625 // 20.b.iv Set intlObj.[[MaximumSignificantDigits]] to mxsd.
1626 digit_options.maximum_significant_digits = mxsd;
1627 } else {
1628 // 20.c Else
1629 // 20.c.i Set intlObj.[[MinimumSignificantDigits]] to 1.
1630 digit_options.minimum_significant_digits = 1;
1631 // 20.c.ii Set intlObj.[[MaximumSignificantDigits]] to 21.
1632 digit_options.maximum_significant_digits = 21;
1633 }
1634 }
1635
1636 // 21. If needFd, then
1637 if (need_fd) {
1638 // 21.a If hasFd, then
1639 if (has_fd) {
1640 Handle<String> mnfd_str = factory->minimumFractionDigits_string();
1641 Handle<String> mxfd_str = factory->maximumFractionDigits_string();
1642 // 21.a.i Let mnfd be ? DefaultNumberOption(mnfd, 0, 20, undefined).
1643 int mnfd;
1644 if (!DefaultNumberOption(isolate, mnfd_obj, 0, 20, -1, mnfd_str)
1645 .To(&mnfd)) {
1646 return Nothing<NumberFormatDigitOptions>();
1647 }
1648 // 21.a.ii Let mxfd be ? DefaultNumberOption(mxfd, 0, 20, undefined).
1649 int mxfd;
1650 if (!DefaultNumberOption(isolate, mxfd_obj, 0, 20, -1, mxfd_str)
1651 .To(&mxfd)) {
1652 return Nothing<NumberFormatDigitOptions>();
1653 }
1654 // 21.a.iii If mnfd is undefined, set mnfd to min(mnfdDefault, mxfd).
1655 if (mnfd_obj->IsUndefined(isolate)) {
1656 mnfd = std::min(mnfd_default, mxfd);
1657 } else if (mxfd_obj->IsUndefined(isolate)) {
1658 // 21.a.iv Else if mxfd is undefined, set mxfd to max(mxfdDefault,
1659 // mnfd).
1660 mxfd = std::max(mxfd_default, mnfd);
1661 } else if (mnfd > mxfd) {
1662 // 21.a.v Else if mnfd is greater than mxfd, throw a RangeError
1663 // exception.
1664 THROW_NEW_ERROR_RETURN_VALUE(
1665 isolate,
1666 NewRangeError(MessageTemplate::kPropertyValueOutOfRange, mxfd_str),
1667 Nothing<NumberFormatDigitOptions>());
1668 }
1669 // 21.a.vi Set intlObj.[[MinimumFractionDigits]] to mnfd.
1670 digit_options.minimum_fraction_digits = mnfd;
1671 // 21.a.vii Set intlObj.[[MaximumFractionDigits]] to mxfd.
1672 digit_options.maximum_fraction_digits = mxfd;
1673 } else { // 17.b Else
1674 // 21.b.i Set intlObj.[[MinimumFractionDigits]] to mnfdDefault.
1675 digit_options.minimum_fraction_digits = mnfd_default;
1676 // 21.b.ii Set intlObj.[[MaximumFractionDigits]] to mxfdDefault.
1677 digit_options.maximum_fraction_digits = mxfd_default;
1678 }
1679 }
1680
1681 // 22. If needSd or needFd, then
1682 if (need_sd || need_fd) {
1683 // a. If roundingPriority is "morePrecision", then
1684 if (digit_options.rounding_priority == RoundingPriority::kMorePrecision) {
1685 // i. Set intlObj.[[RoundingType]] to morePrecision.
1686 digit_options.rounding_type = RoundingType::kMorePrecision;
1687 // b. Else if roundingPriority is "lessPrecision", then
1688 } else if (digit_options.rounding_priority ==
1689 RoundingPriority::kLessPrecision) {
1690 // i. Set intlObj.[[RoundingType]] to lessPrecision.
1691 digit_options.rounding_type = RoundingType::kLessPrecision;
1692 // c. Else if hasSd, then
1693 } else if (has_sd) {
1694 // i. Set intlObj.[[RoundingType]] to significantDigits.
1695 digit_options.rounding_type = RoundingType::kSignificantDigits;
1696 // d. Else,
1697 } else {
1698 // i.Set intlObj.[[RoundingType]] to fractionDigits.
1699 digit_options.rounding_type = RoundingType::kFractionDigits;
1700 }
1701 // 23. Else
1702 } else {
1703 // a. Set intlObj.[[RoundingType]] to morePrecision.
1704 digit_options.rounding_type = RoundingType::kMorePrecision;
1705 // b. Set intlObj.[[MinimumFractionDigits]] to 0.
1706 digit_options.minimum_fraction_digits = 0;
1707 // c. Set intlObj.[[MaximumFractionDigits]] to 0.
1708 digit_options.maximum_fraction_digits = 0;
1709 // d. Set intlObj.[[MinimumSignificantDigits]] to 1.
1710 digit_options.minimum_significant_digits = 1;
1711 // e. Set intlObj.[[MaximumSignificantDigits]] to 2.
1712 digit_options.maximum_significant_digits = 2;
1713 }
1714 return Just(digit_options);
1715 }
1716
1717 namespace {
1718
1719 // ecma402/#sec-bestavailablelocale
BestAvailableLocale(const std::set<std::string> & available_locales,const std::string & locale)1720 std::string BestAvailableLocale(const std::set<std::string>& available_locales,
1721 const std::string& locale) {
1722 // 1. Let candidate be locale.
1723 std::string candidate = locale;
1724
1725 // 2. Repeat,
1726 while (true) {
1727 // 2.a. If availableLocales contains an element equal to candidate, return
1728 // candidate.
1729 if (available_locales.find(candidate) != available_locales.end()) {
1730 return candidate;
1731 }
1732
1733 // 2.b. Let pos be the character index of the last occurrence of "-"
1734 // (U+002D) within candidate. If that character does not occur, return
1735 // undefined.
1736 size_t pos = candidate.rfind('-');
1737 if (pos == std::string::npos) {
1738 return std::string();
1739 }
1740
1741 // 2.c. If pos ≥ 2 and the character "-" occurs at index pos-2 of candidate,
1742 // decrease pos by 2.
1743 if (pos >= 2 && candidate[pos - 2] == '-') {
1744 pos -= 2;
1745 }
1746
1747 // 2.d. Let candidate be the substring of candidate from position 0,
1748 // inclusive, to position pos, exclusive.
1749 candidate = candidate.substr(0, pos);
1750 }
1751 }
1752
1753 struct ParsedLocale {
1754 std::string no_extensions_locale;
1755 std::string extension;
1756 };
1757
1758 // Returns a struct containing a bcp47 tag without unicode extensions
1759 // and the removed unicode extensions.
1760 //
1761 // For example, given 'en-US-u-co-emoji' returns 'en-US' and
1762 // 'u-co-emoji'.
ParseBCP47Locale(const std::string & locale)1763 ParsedLocale ParseBCP47Locale(const std::string& locale) {
1764 size_t length = locale.length();
1765 ParsedLocale parsed_locale;
1766
1767 // Privateuse or grandfathered locales have no extension sequences.
1768 if ((length > 1) && (locale[1] == '-')) {
1769 // Check to make sure that this really is a grandfathered or
1770 // privateuse extension. ICU can sometimes mess up the
1771 // canonicalization.
1772 DCHECK(locale[0] == 'x' || locale[0] == 'i');
1773 parsed_locale.no_extensions_locale = locale;
1774 return parsed_locale;
1775 }
1776
1777 size_t unicode_extension_start = locale.find("-u-");
1778
1779 // No unicode extensions found.
1780 if (unicode_extension_start == std::string::npos) {
1781 parsed_locale.no_extensions_locale = locale;
1782 return parsed_locale;
1783 }
1784
1785 size_t private_extension_start = locale.find("-x-");
1786
1787 // Unicode extensions found within privateuse subtags don't count.
1788 if (private_extension_start != std::string::npos &&
1789 private_extension_start < unicode_extension_start) {
1790 parsed_locale.no_extensions_locale = locale;
1791 return parsed_locale;
1792 }
1793
1794 const std::string beginning = locale.substr(0, unicode_extension_start);
1795 size_t unicode_extension_end = length;
1796 DCHECK_GT(length, 2);
1797
1798 // Find the end of the extension production as per the bcp47 grammar
1799 // by looking for '-' followed by 2 chars and then another '-'.
1800 for (size_t i = unicode_extension_start + 1; i < length - 2; i++) {
1801 if (locale[i] != '-') continue;
1802
1803 if (locale[i + 2] == '-') {
1804 unicode_extension_end = i;
1805 break;
1806 }
1807
1808 i += 2;
1809 }
1810
1811 const std::string end = locale.substr(unicode_extension_end);
1812 parsed_locale.no_extensions_locale = beginning + end;
1813 parsed_locale.extension = locale.substr(
1814 unicode_extension_start, unicode_extension_end - unicode_extension_start);
1815 return parsed_locale;
1816 }
1817
1818 // ecma402/#sec-lookupsupportedlocales
LookupSupportedLocales(const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1819 std::vector<std::string> LookupSupportedLocales(
1820 const std::set<std::string>& available_locales,
1821 const std::vector<std::string>& requested_locales) {
1822 // 1. Let subset be a new empty List.
1823 std::vector<std::string> subset;
1824
1825 // 2. For each element locale of requestedLocales in List order, do
1826 for (const std::string& locale : requested_locales) {
1827 // 2. a. Let noExtensionsLocale be the String value that is locale
1828 // with all Unicode locale extension sequences removed.
1829 std::string no_extension_locale =
1830 ParseBCP47Locale(locale).no_extensions_locale;
1831
1832 // 2. b. Let availableLocale be
1833 // BestAvailableLocale(availableLocales, noExtensionsLocale).
1834 std::string available_locale =
1835 BestAvailableLocale(available_locales, no_extension_locale);
1836
1837 // 2. c. If availableLocale is not undefined, append locale to the
1838 // end of subset.
1839 if (!available_locale.empty()) {
1840 subset.push_back(locale);
1841 }
1842 }
1843
1844 // 3. Return subset.
1845 return subset;
1846 }
1847
BuildLocaleMatcher(Isolate * isolate,const std::set<std::string> & available_locales,UErrorCode * status)1848 icu::LocaleMatcher BuildLocaleMatcher(
1849 Isolate* isolate, const std::set<std::string>& available_locales,
1850 UErrorCode* status) {
1851 icu::Locale default_locale =
1852 icu::Locale::forLanguageTag(isolate->DefaultLocale(), *status);
1853 icu::LocaleMatcher::Builder builder;
1854 if (U_FAILURE(*status)) {
1855 return builder.build(*status);
1856 }
1857 builder.setDefaultLocale(&default_locale);
1858 for (auto it = available_locales.begin(); it != available_locales.end();
1859 ++it) {
1860 *status = U_ZERO_ERROR;
1861 icu::Locale l = icu::Locale::forLanguageTag(it->c_str(), *status);
1862 // skip invalid locale such as no-NO-NY
1863 if (U_SUCCESS(*status)) {
1864 builder.addSupportedLocale(l);
1865 }
1866 }
1867 return builder.build(*status);
1868 }
1869
1870 class Iterator : public icu::Locale::Iterator {
1871 public:
Iterator(std::vector<std::string>::const_iterator begin,std::vector<std::string>::const_iterator end)1872 Iterator(std::vector<std::string>::const_iterator begin,
1873 std::vector<std::string>::const_iterator end)
1874 : iter_(begin), end_(end) {}
1875 ~Iterator() override = default;
1876
hasNext() const1877 UBool hasNext() const override { return iter_ != end_; }
1878
next()1879 const icu::Locale& next() override {
1880 UErrorCode status = U_ZERO_ERROR;
1881 locale_ = icu::Locale::forLanguageTag(iter_->c_str(), status);
1882 DCHECK(U_SUCCESS(status));
1883 ++iter_;
1884 return locale_;
1885 }
1886
1887 private:
1888 std::vector<std::string>::const_iterator iter_;
1889 std::vector<std::string>::const_iterator end_;
1890 icu::Locale locale_;
1891 };
1892
1893 // ecma402/#sec-bestfitmatcher
1894 // The BestFitMatcher abstract operation compares requestedLocales, which must
1895 // be a List as returned by CanonicalizeLocaleList, against the locales in
1896 // availableLocales and determines the best available language to meet the
1897 // request. The algorithm is implementation dependent, but should produce
1898 // results that a typical user of the requested locales would perceive
1899 // as at least as good as those produced by the LookupMatcher abstract
1900 // operation. Options specified through Unicode locale extension sequences must
1901 // be ignored by the algorithm. Information about such subsequences is returned
1902 // separately. The abstract operation returns a record with a [[locale]] field,
1903 // whose value is the language tag of the selected locale, which must be an
1904 // element of availableLocales. If the language tag of the request locale that
1905 // led to the selected locale contained a Unicode locale extension sequence,
1906 // then the returned record also contains an [[extension]] field whose value is
1907 // the first Unicode locale extension sequence within the request locale
1908 // language tag.
BestFitMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1909 std::string BestFitMatcher(Isolate* isolate,
1910 const std::set<std::string>& available_locales,
1911 const std::vector<std::string>& requested_locales) {
1912 UErrorCode status = U_ZERO_ERROR;
1913 Iterator iter(requested_locales.cbegin(), requested_locales.cend());
1914 std::string bestfit = BuildLocaleMatcher(isolate, available_locales, &status)
1915 .getBestMatchResult(iter, status)
1916 .makeResolvedLocale(status)
1917 .toLanguageTag<std::string>(status);
1918 DCHECK(U_SUCCESS(status));
1919 return bestfit;
1920 }
1921
1922 // ECMA 402 9.2.8 BestFitSupportedLocales(availableLocales, requestedLocales)
1923 // https://tc39.github.io/ecma402/#sec-bestfitsupportedlocales
BestFitSupportedLocales(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)1924 std::vector<std::string> BestFitSupportedLocales(
1925 Isolate* isolate, const std::set<std::string>& available_locales,
1926 const std::vector<std::string>& requested_locales) {
1927 UErrorCode status = U_ZERO_ERROR;
1928 icu::LocaleMatcher matcher =
1929 BuildLocaleMatcher(isolate, available_locales, &status);
1930 std::vector<std::string> result;
1931 if (U_SUCCESS(status)) {
1932 for (auto it = requested_locales.cbegin(); it != requested_locales.cend();
1933 it++) {
1934 status = U_ZERO_ERROR;
1935 icu::Locale desired = icu::Locale::forLanguageTag(it->c_str(), status);
1936 icu::LocaleMatcher::Result matched =
1937 matcher.getBestMatchResult(desired, status);
1938 if (U_FAILURE(status)) continue;
1939 if (matched.getSupportedIndex() < 0) continue;
1940
1941 // The BestFitSupportedLocales abstract operation returns the *SUBSET* of
1942 // the provided BCP 47 language priority list requestedLocales for which
1943 // availableLocales has a matching locale when using the Best Fit Matcher
1944 // algorithm. Locales appear in the same order in the returned list as in
1945 // requestedLocales. The steps taken are implementation dependent.
1946 std::string bestfit = desired.toLanguageTag<std::string>(status);
1947 if (U_FAILURE(status)) continue;
1948 result.push_back(bestfit);
1949 }
1950 }
1951 return result;
1952 }
1953
1954 // ecma262 #sec-createarrayfromlist
CreateArrayFromList(Isolate * isolate,std::vector<std::string> elements,PropertyAttributes attr)1955 MaybeHandle<JSArray> CreateArrayFromList(Isolate* isolate,
1956 std::vector<std::string> elements,
1957 PropertyAttributes attr) {
1958 Factory* factory = isolate->factory();
1959 // Let array be ! ArrayCreate(0).
1960 Handle<JSArray> array = factory->NewJSArray(0);
1961
1962 uint32_t length = static_cast<uint32_t>(elements.size());
1963 // 3. Let n be 0.
1964 // 4. For each element e of elements, do
1965 for (uint32_t i = 0; i < length; i++) {
1966 // a. Let status be CreateDataProperty(array, ! ToString(n), e).
1967 const std::string& part = elements[i];
1968 Handle<String> value =
1969 factory->NewStringFromUtf8(base::CStrVector(part.c_str()))
1970 .ToHandleChecked();
1971 MAYBE_RETURN(JSObject::AddDataElement(array, i, value, attr),
1972 MaybeHandle<JSArray>());
1973 }
1974 // 5. Return array.
1975 return MaybeHandle<JSArray>(array);
1976 }
1977
1978 // ECMA 402 9.2.9 SupportedLocales(availableLocales, requestedLocales, options)
1979 // https://tc39.github.io/ecma402/#sec-supportedlocales
SupportedLocales(Isolate * isolate,const char * method_name,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,Handle<Object> options)1980 MaybeHandle<JSObject> SupportedLocales(
1981 Isolate* isolate, const char* method_name,
1982 const std::set<std::string>& available_locales,
1983 const std::vector<std::string>& requested_locales, Handle<Object> options) {
1984 std::vector<std::string> supported_locales;
1985
1986 // 1. Set options to ? CoerceOptionsToObject(options).
1987 Handle<JSReceiver> options_obj;
1988 ASSIGN_RETURN_ON_EXCEPTION(
1989 isolate, options_obj,
1990 CoerceOptionsToObject(isolate, options, method_name), JSObject);
1991
1992 // 2. Let matcher be ? GetOption(options, "localeMatcher", "string",
1993 // « "lookup", "best fit" », "best fit").
1994 Maybe<Intl::MatcherOption> maybe_locale_matcher =
1995 Intl::GetLocaleMatcher(isolate, options_obj, method_name);
1996 MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSObject>());
1997 Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
1998
1999 // 3. If matcher is "best fit", then
2000 // a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
2001 // requestedLocales).
2002 if (matcher == Intl::MatcherOption::kBestFit &&
2003 FLAG_harmony_intl_best_fit_matcher) {
2004 supported_locales =
2005 BestFitSupportedLocales(isolate, available_locales, requested_locales);
2006 } else {
2007 // 4. Else,
2008 // a. Let supportedLocales be LookupSupportedLocales(availableLocales,
2009 // requestedLocales).
2010 supported_locales =
2011 LookupSupportedLocales(available_locales, requested_locales);
2012 }
2013
2014 // 5. Return CreateArrayFromList(supportedLocales).
2015 return CreateArrayFromList(isolate, supported_locales,
2016 PropertyAttributes::NONE);
2017 }
2018
2019 } // namespace
2020
2021 // ecma-402 #sec-intl.getcanonicallocales
GetCanonicalLocales(Isolate * isolate,Handle<Object> locales)2022 MaybeHandle<JSArray> Intl::GetCanonicalLocales(Isolate* isolate,
2023 Handle<Object> locales) {
2024 // 1. Let ll be ? CanonicalizeLocaleList(locales).
2025 Maybe<std::vector<std::string>> maybe_ll =
2026 CanonicalizeLocaleList(isolate, locales, false);
2027 MAYBE_RETURN(maybe_ll, MaybeHandle<JSArray>());
2028
2029 // 2. Return CreateArrayFromList(ll).
2030 return CreateArrayFromList(isolate, maybe_ll.FromJust(),
2031 PropertyAttributes::NONE);
2032 }
2033
2034 namespace {
2035
AvailableCollations(Isolate * isolate)2036 MaybeHandle<JSArray> AvailableCollations(Isolate* isolate) {
2037 UErrorCode status = U_ZERO_ERROR;
2038 std::unique_ptr<icu::StringEnumeration> enumeration(
2039 icu::Collator::getKeywordValues("collation", status));
2040 if (U_FAILURE(status)) {
2041 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2042 JSArray);
2043 }
2044 return Intl::ToJSArray(isolate, "co", enumeration.get(),
2045 Intl::RemoveCollation, true);
2046 }
2047
VectorToJSArray(Isolate * isolate,const std::vector<std::string> & array)2048 MaybeHandle<JSArray> VectorToJSArray(Isolate* isolate,
2049 const std::vector<std::string>& array) {
2050 Factory* factory = isolate->factory();
2051 Handle<FixedArray> fixed_array =
2052 factory->NewFixedArray(static_cast<int32_t>(array.size()));
2053 int32_t index = 0;
2054 for (std::string item : array) {
2055 Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str());
2056 fixed_array->set(index++, *str);
2057 }
2058 return factory->NewJSArrayWithElements(fixed_array);
2059 }
2060
2061 namespace {
2062
2063 class ResourceAvailableCurrencies {
2064 public:
ResourceAvailableCurrencies()2065 ResourceAvailableCurrencies() {
2066 UErrorCode status = U_ZERO_ERROR;
2067 UEnumeration* uenum =
2068 ucurr_openISOCurrencies(UCURR_COMMON | UCURR_NON_DEPRECATED, &status);
2069 DCHECK(U_SUCCESS(status));
2070 const char* next = nullptr;
2071 while (U_SUCCESS(status) &&
2072 (next = uenum_next(uenum, nullptr, &status)) != nullptr) {
2073 // Work around the issue that we do not support VEF currency code
2074 // in DisplayNames by not reporting it.
2075 if (strcmp(next, "VEF") == 0) continue;
2076 AddIfAvailable(next);
2077 }
2078 // Work around the issue that we do support the following currency codes
2079 // in DisplayNames but the ICU API is not reporting it.
2080 AddIfAvailable("SVC");
2081 AddIfAvailable("XDR");
2082 AddIfAvailable("XSU");
2083 AddIfAvailable("ZWL");
2084 std::sort(list_.begin(), list_.end());
2085 uenum_close(uenum);
2086 }
2087
Get() const2088 const std::vector<std::string>& Get() const { return list_; }
2089
AddIfAvailable(const char * currency)2090 void AddIfAvailable(const char* currency) {
2091 icu::UnicodeString code(currency, -1, US_INV);
2092 UErrorCode status = U_ZERO_ERROR;
2093 int32_t len = 0;
2094 const UChar* result =
2095 ucurr_getName(code.getTerminatedBuffer(), "en", UCURR_LONG_NAME,
2096 nullptr, &len, &status);
2097 if (U_SUCCESS(status) &&
2098 u_strcmp(result, code.getTerminatedBuffer()) != 0) {
2099 list_.push_back(currency);
2100 }
2101 }
2102
2103 private:
2104 std::vector<std::string> list_;
2105 };
2106
GetAvailableCurrencies()2107 const std::vector<std::string>& GetAvailableCurrencies() {
2108 static base::LazyInstance<ResourceAvailableCurrencies>::type
2109 available_currencies = LAZY_INSTANCE_INITIALIZER;
2110 return available_currencies.Pointer()->Get();
2111 }
2112 } // namespace
2113
AvailableCurrencies(Isolate * isolate)2114 MaybeHandle<JSArray> AvailableCurrencies(Isolate* isolate) {
2115 return VectorToJSArray(isolate, GetAvailableCurrencies());
2116 }
2117
AvailableNumberingSystems(Isolate * isolate)2118 MaybeHandle<JSArray> AvailableNumberingSystems(Isolate* isolate) {
2119 UErrorCode status = U_ZERO_ERROR;
2120 std::unique_ptr<icu::StringEnumeration> enumeration(
2121 icu::NumberingSystem::getAvailableNames(status));
2122 if (U_FAILURE(status)) {
2123 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2124 JSArray);
2125 }
2126 // Need to filter out isAlgorithmic
2127 return Intl::ToJSArray(
2128 isolate, "nu", enumeration.get(),
2129 [](const char* value) {
2130 UErrorCode status = U_ZERO_ERROR;
2131 std::unique_ptr<icu::NumberingSystem> numbering_system(
2132 icu::NumberingSystem::createInstanceByName(value, status));
2133 // Skip algorithmic one since chrome filter out the resource.
2134 return U_FAILURE(status) || numbering_system->isAlgorithmic();
2135 },
2136 true);
2137 }
2138
AvailableTimeZones(Isolate * isolate)2139 MaybeHandle<JSArray> AvailableTimeZones(Isolate* isolate) {
2140 UErrorCode status = U_ZERO_ERROR;
2141 std::unique_ptr<icu::StringEnumeration> enumeration(
2142 icu::TimeZone::createTimeZoneIDEnumeration(
2143 UCAL_ZONE_TYPE_CANONICAL_LOCATION, nullptr, nullptr, status));
2144 if (U_FAILURE(status)) {
2145 THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kIcuError),
2146 JSArray);
2147 }
2148 return Intl::ToJSArray(isolate, nullptr, enumeration.get(), nullptr, true);
2149 }
2150
AvailableUnits(Isolate * isolate)2151 MaybeHandle<JSArray> AvailableUnits(Isolate* isolate) {
2152 Factory* factory = isolate->factory();
2153 std::set<std::string> sanctioned(Intl::SanctionedSimpleUnits());
2154 Handle<FixedArray> fixed_array =
2155 factory->NewFixedArray(static_cast<int32_t>(sanctioned.size()));
2156 int32_t index = 0;
2157 for (std::string item : sanctioned) {
2158 Handle<String> str = factory->NewStringFromAsciiChecked(item.c_str());
2159 fixed_array->set(index++, *str);
2160 }
2161 return factory->NewJSArrayWithElements(fixed_array);
2162 }
2163
2164 } // namespace
2165
2166 // ecma-402 #sec-intl.supportedvaluesof
SupportedValuesOf(Isolate * isolate,Handle<Object> key_obj)2167 MaybeHandle<JSArray> Intl::SupportedValuesOf(Isolate* isolate,
2168 Handle<Object> key_obj) {
2169 Factory* factory = isolate->factory();
2170 // 1. 1. Let key be ? ToString(key).
2171 Handle<String> key_str;
2172 ASSIGN_RETURN_ON_EXCEPTION(isolate, key_str,
2173 Object::ToString(isolate, key_obj), JSArray);
2174 // 2. If key is "calendar", then
2175 if (factory->calendar_string()->Equals(*key_str)) {
2176 // a. Let list be ! AvailableCalendars( ).
2177 return Intl::AvailableCalendars(isolate);
2178 }
2179 // 3. Else if key is "collation", then
2180 if (factory->collation_string()->Equals(*key_str)) {
2181 // a. Let list be ! AvailableCollations( ).
2182 return AvailableCollations(isolate);
2183 }
2184 // 4. Else if key is "currency", then
2185 if (factory->currency_string()->Equals(*key_str)) {
2186 // a. Let list be ! AvailableCurrencies( ).
2187 return AvailableCurrencies(isolate);
2188 }
2189 // 5. Else if key is "numberingSystem", then
2190 if (factory->numberingSystem_string()->Equals(*key_str)) {
2191 // a. Let list be ! AvailableNumberingSystems( ).
2192 return AvailableNumberingSystems(isolate);
2193 }
2194 // 6. Else if key is "timeZone", then
2195 if (factory->timeZone_string()->Equals(*key_str)) {
2196 // a. Let list be ! AvailableTimeZones( ).
2197 return AvailableTimeZones(isolate);
2198 }
2199 // 7. Else if key is "unit", then
2200 if (factory->unit_string()->Equals(*key_str)) {
2201 // a. Let list be ! AvailableUnits( ).
2202 return AvailableUnits(isolate);
2203 }
2204 // 8. Else,
2205 // a. Throw a RangeError exception.
2206 // 9. Return ! CreateArrayFromList( list ).
2207
2208 THROW_NEW_ERROR(
2209 isolate,
2210 NewRangeError(MessageTemplate::kInvalid,
2211 factory->NewStringFromStaticChars("key"), key_str),
2212 JSArray);
2213 }
2214
2215 // ECMA 402 Intl.*.supportedLocalesOf
SupportedLocalesOf(Isolate * isolate,const char * method_name,const std::set<std::string> & available_locales,Handle<Object> locales,Handle<Object> options)2216 MaybeHandle<JSObject> Intl::SupportedLocalesOf(
2217 Isolate* isolate, const char* method_name,
2218 const std::set<std::string>& available_locales, Handle<Object> locales,
2219 Handle<Object> options) {
2220 // Let availableLocales be %Collator%.[[AvailableLocales]].
2221
2222 // Let requestedLocales be ? CanonicalizeLocaleList(locales).
2223 Maybe<std::vector<std::string>> requested_locales =
2224 CanonicalizeLocaleList(isolate, locales, false);
2225 MAYBE_RETURN(requested_locales, MaybeHandle<JSObject>());
2226
2227 // Return ? SupportedLocales(availableLocales, requestedLocales, options).
2228 return SupportedLocales(isolate, method_name, available_locales,
2229 requested_locales.FromJust(), options);
2230 }
2231
2232 namespace {
2233
2234 template <typename T>
IsValidExtension(const icu::Locale & locale,const char * key,const std::string & value)2235 bool IsValidExtension(const icu::Locale& locale, const char* key,
2236 const std::string& value) {
2237 const char* legacy_type = uloc_toLegacyType(key, value.c_str());
2238 if (legacy_type == nullptr) {
2239 return false;
2240 }
2241 UErrorCode status = U_ZERO_ERROR;
2242 std::unique_ptr<icu::StringEnumeration> enumeration(
2243 T::getKeywordValuesForLocale(key, icu::Locale(locale.getBaseName()),
2244 false, status));
2245 if (U_FAILURE(status)) {
2246 return false;
2247 }
2248 int32_t length;
2249 for (const char* item = enumeration->next(&length, status);
2250 U_SUCCESS(status) && item != nullptr;
2251 item = enumeration->next(&length, status)) {
2252 if (strcmp(legacy_type, item) == 0) {
2253 return true;
2254 }
2255 }
2256 return false;
2257 }
2258
2259 } // namespace
2260
IsValidCollation(const icu::Locale & locale,const std::string & value)2261 bool Intl::IsValidCollation(const icu::Locale& locale,
2262 const std::string& value) {
2263 std::set<std::string> invalid_values = {"standard", "search"};
2264 if (invalid_values.find(value) != invalid_values.end()) return false;
2265 return IsValidExtension<icu::Collator>(locale, "collation", value);
2266 }
2267
IsWellFormedCalendar(const std::string & value)2268 bool Intl::IsWellFormedCalendar(const std::string& value) {
2269 return JSLocale::Is38AlphaNumList(value);
2270 }
2271
2272 // ecma402/#sec-iswellformedcurrencycode
IsWellFormedCurrency(const std::string & currency)2273 bool Intl::IsWellFormedCurrency(const std::string& currency) {
2274 return JSLocale::Is3Alpha(currency);
2275 }
2276
IsValidCalendar(const icu::Locale & locale,const std::string & value)2277 bool Intl::IsValidCalendar(const icu::Locale& locale,
2278 const std::string& value) {
2279 return IsValidExtension<icu::Calendar>(locale, "calendar", value);
2280 }
2281
IsValidNumberingSystem(const std::string & value)2282 bool Intl::IsValidNumberingSystem(const std::string& value) {
2283 std::set<std::string> invalid_values = {"native", "traditio", "finance"};
2284 if (invalid_values.find(value) != invalid_values.end()) return false;
2285 UErrorCode status = U_ZERO_ERROR;
2286 std::unique_ptr<icu::NumberingSystem> numbering_system(
2287 icu::NumberingSystem::createInstanceByName(value.c_str(), status));
2288 return U_SUCCESS(status) && numbering_system.get() != nullptr &&
2289 !numbering_system->isAlgorithmic();
2290 }
2291
2292 namespace {
2293
IsWellFormedNumberingSystem(const std::string & value)2294 bool IsWellFormedNumberingSystem(const std::string& value) {
2295 return JSLocale::Is38AlphaNumList(value);
2296 }
2297
LookupAndValidateUnicodeExtensions(icu::Locale * icu_locale,const std::set<std::string> & relevant_keys)2298 std::map<std::string, std::string> LookupAndValidateUnicodeExtensions(
2299 icu::Locale* icu_locale, const std::set<std::string>& relevant_keys) {
2300 std::map<std::string, std::string> extensions;
2301
2302 UErrorCode status = U_ZERO_ERROR;
2303 icu::LocaleBuilder builder;
2304 builder.setLocale(*icu_locale).clearExtensions();
2305 std::unique_ptr<icu::StringEnumeration> keywords(
2306 icu_locale->createKeywords(status));
2307 if (U_FAILURE(status)) return extensions;
2308
2309 if (!keywords) return extensions;
2310 char value[ULOC_FULLNAME_CAPACITY];
2311
2312 int32_t length;
2313 status = U_ZERO_ERROR;
2314 for (const char* keyword = keywords->next(&length, status);
2315 keyword != nullptr; keyword = keywords->next(&length, status)) {
2316 // Ignore failures in ICU and skip to the next keyword.
2317 //
2318 // This is fine.™
2319 if (U_FAILURE(status)) {
2320 status = U_ZERO_ERROR;
2321 continue;
2322 }
2323
2324 icu_locale->getKeywordValue(keyword, value, ULOC_FULLNAME_CAPACITY, status);
2325
2326 // Ignore failures in ICU and skip to the next keyword.
2327 //
2328 // This is fine.™
2329 if (U_FAILURE(status)) {
2330 status = U_ZERO_ERROR;
2331 continue;
2332 }
2333
2334 const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
2335
2336 if (bcp47_key && (relevant_keys.find(bcp47_key) != relevant_keys.end())) {
2337 const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
2338 bool is_valid_value = false;
2339 // 8.h.ii.1.a If keyLocaleData contains requestedValue, then
2340 if (strcmp("ca", bcp47_key) == 0) {
2341 is_valid_value = Intl::IsValidCalendar(*icu_locale, bcp47_value);
2342 } else if (strcmp("co", bcp47_key) == 0) {
2343 is_valid_value = Intl::IsValidCollation(*icu_locale, bcp47_value);
2344 } else if (strcmp("hc", bcp47_key) == 0) {
2345 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/calendar.xml
2346 std::set<std::string> valid_values = {"h11", "h12", "h23", "h24"};
2347 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2348 } else if (strcmp("lb", bcp47_key) == 0) {
2349 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/segmentation.xml
2350 std::set<std::string> valid_values = {"strict", "normal", "loose"};
2351 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2352 } else if (strcmp("kn", bcp47_key) == 0) {
2353 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
2354 std::set<std::string> valid_values = {"true", "false"};
2355 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2356 } else if (strcmp("kf", bcp47_key) == 0) {
2357 // https://www.unicode.org/repos/cldr/tags/latest/common/bcp47/collation.xml
2358 std::set<std::string> valid_values = {"upper", "lower", "false"};
2359 is_valid_value = valid_values.find(bcp47_value) != valid_values.end();
2360 } else if (strcmp("nu", bcp47_key) == 0) {
2361 is_valid_value = Intl::IsValidNumberingSystem(bcp47_value);
2362 }
2363 if (is_valid_value) {
2364 extensions.insert(
2365 std::pair<std::string, std::string>(bcp47_key, bcp47_value));
2366 builder.setUnicodeLocaleKeyword(bcp47_key, bcp47_value);
2367 }
2368 }
2369 }
2370
2371 status = U_ZERO_ERROR;
2372 *icu_locale = builder.build(status);
2373
2374 return extensions;
2375 }
2376
2377 // ecma402/#sec-lookupmatcher
LookupMatcher(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales)2378 std::string LookupMatcher(Isolate* isolate,
2379 const std::set<std::string>& available_locales,
2380 const std::vector<std::string>& requested_locales) {
2381 // 1. Let result be a new Record.
2382 std::string result;
2383
2384 // 2. For each element locale of requestedLocales in List order, do
2385 for (const std::string& locale : requested_locales) {
2386 // 2. a. Let noExtensionsLocale be the String value that is locale
2387 // with all Unicode locale extension sequences removed.
2388 ParsedLocale parsed_locale = ParseBCP47Locale(locale);
2389 std::string no_extensions_locale = parsed_locale.no_extensions_locale;
2390
2391 // 2. b. Let availableLocale be
2392 // BestAvailableLocale(availableLocales, noExtensionsLocale).
2393 std::string available_locale =
2394 BestAvailableLocale(available_locales, no_extensions_locale);
2395
2396 // 2. c. If availableLocale is not undefined, append locale to the
2397 // end of subset.
2398 if (!available_locale.empty()) {
2399 // Note: The following steps are not performed here because we
2400 // can use ICU to parse the unicode locale extension sequence
2401 // as part of Intl::ResolveLocale.
2402 //
2403 // There's no need to separate the unicode locale extensions
2404 // right here. Instead just return the available locale with the
2405 // extensions.
2406 //
2407 // 2. c. i. Set result.[[locale]] to availableLocale.
2408 // 2. c. ii. If locale and noExtensionsLocale are not the same
2409 // String value, then
2410 // 2. c. ii. 1. Let extension be the String value consisting of
2411 // the first substring of locale that is a Unicode locale
2412 // extension sequence.
2413 // 2. c. ii. 2. Set result.[[extension]] to extension.
2414 // 2. c. iii. Return result.
2415 return available_locale + parsed_locale.extension;
2416 }
2417 }
2418
2419 // 3. Let defLocale be DefaultLocale();
2420 // 4. Set result.[[locale]] to defLocale.
2421 // 5. Return result.
2422 return isolate->DefaultLocale();
2423 }
2424
2425 } // namespace
2426
2427 // This function doesn't correspond exactly with the spec. Instead
2428 // we use ICU to do all the string manipulations that the spec
2429 // peforms.
2430 //
2431 // The spec uses this function to normalize values for various
2432 // relevant extension keys (such as disallowing "search" for
2433 // collation). Instead of doing this here, we let the callers of
2434 // this method perform such normalization.
2435 //
2436 // ecma402/#sec-resolvelocale
ResolveLocale(Isolate * isolate,const std::set<std::string> & available_locales,const std::vector<std::string> & requested_locales,MatcherOption matcher,const std::set<std::string> & relevant_extension_keys)2437 Maybe<Intl::ResolvedLocale> Intl::ResolveLocale(
2438 Isolate* isolate, const std::set<std::string>& available_locales,
2439 const std::vector<std::string>& requested_locales, MatcherOption matcher,
2440 const std::set<std::string>& relevant_extension_keys) {
2441 std::string locale;
2442 if (matcher == Intl::MatcherOption::kBestFit &&
2443 FLAG_harmony_intl_best_fit_matcher) {
2444 locale = BestFitMatcher(isolate, available_locales, requested_locales);
2445 } else {
2446 locale = LookupMatcher(isolate, available_locales, requested_locales);
2447 }
2448
2449 Maybe<icu::Locale> maybe_icu_locale = CreateICULocale(locale);
2450 MAYBE_RETURN(maybe_icu_locale, Nothing<Intl::ResolvedLocale>());
2451 icu::Locale icu_locale = maybe_icu_locale.FromJust();
2452 std::map<std::string, std::string> extensions =
2453 LookupAndValidateUnicodeExtensions(&icu_locale, relevant_extension_keys);
2454
2455 std::string canonicalized_locale = Intl::ToLanguageTag(icu_locale).FromJust();
2456
2457 // TODO(gsathya): Remove privateuse subtags from extensions.
2458
2459 return Just(
2460 Intl::ResolvedLocale{canonicalized_locale, icu_locale, extensions});
2461 }
2462
SetTextToBreakIterator(Isolate * isolate,Handle<String> text,icu::BreakIterator * break_iterator)2463 Handle<Managed<icu::UnicodeString>> Intl::SetTextToBreakIterator(
2464 Isolate* isolate, Handle<String> text, icu::BreakIterator* break_iterator) {
2465 text = String::Flatten(isolate, text);
2466 icu::UnicodeString* u_text = static_cast<icu::UnicodeString*>(
2467 Intl::ToICUUnicodeString(isolate, text).clone());
2468
2469 Handle<Managed<icu::UnicodeString>> new_u_text =
2470 Managed<icu::UnicodeString>::FromRawPtr(isolate, 0, u_text);
2471
2472 break_iterator->setText(*u_text);
2473 return new_u_text;
2474 }
2475
2476 // ecma262 #sec-string.prototype.normalize
Normalize(Isolate * isolate,Handle<String> string,Handle<Object> form_input)2477 MaybeHandle<String> Intl::Normalize(Isolate* isolate, Handle<String> string,
2478 Handle<Object> form_input) {
2479 const char* form_name;
2480 UNormalization2Mode form_mode;
2481 if (form_input->IsUndefined(isolate)) {
2482 // default is FNC
2483 form_name = "nfc";
2484 form_mode = UNORM2_COMPOSE;
2485 } else {
2486 Handle<String> form;
2487 ASSIGN_RETURN_ON_EXCEPTION(isolate, form,
2488 Object::ToString(isolate, form_input), String);
2489
2490 if (String::Equals(isolate, form, isolate->factory()->NFC_string())) {
2491 form_name = "nfc";
2492 form_mode = UNORM2_COMPOSE;
2493 } else if (String::Equals(isolate, form,
2494 isolate->factory()->NFD_string())) {
2495 form_name = "nfc";
2496 form_mode = UNORM2_DECOMPOSE;
2497 } else if (String::Equals(isolate, form,
2498 isolate->factory()->NFKC_string())) {
2499 form_name = "nfkc";
2500 form_mode = UNORM2_COMPOSE;
2501 } else if (String::Equals(isolate, form,
2502 isolate->factory()->NFKD_string())) {
2503 form_name = "nfkc";
2504 form_mode = UNORM2_DECOMPOSE;
2505 } else {
2506 Handle<String> valid_forms =
2507 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
2508 THROW_NEW_ERROR(
2509 isolate,
2510 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms),
2511 String);
2512 }
2513 }
2514
2515 int length = string->length();
2516 string = String::Flatten(isolate, string);
2517 icu::UnicodeString result;
2518 std::unique_ptr<base::uc16[]> sap;
2519 UErrorCode status = U_ZERO_ERROR;
2520 icu::UnicodeString input = ToICUUnicodeString(isolate, string);
2521 // Getting a singleton. Should not free it.
2522 const icu::Normalizer2* normalizer =
2523 icu::Normalizer2::getInstance(nullptr, form_name, form_mode, status);
2524 DCHECK(U_SUCCESS(status));
2525 DCHECK_NOT_NULL(normalizer);
2526 int32_t normalized_prefix_length =
2527 normalizer->spanQuickCheckYes(input, status);
2528 // Quick return if the input is already normalized.
2529 if (length == normalized_prefix_length) return string;
2530 icu::UnicodeString unnormalized =
2531 input.tempSubString(normalized_prefix_length);
2532 // Read-only alias of the normalized prefix.
2533 result.setTo(false, input.getBuffer(), normalized_prefix_length);
2534 // copy-on-write; normalize the suffix and append to |result|.
2535 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
2536
2537 if (U_FAILURE(status)) {
2538 THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2539 }
2540
2541 return Intl::ToString(isolate, result);
2542 }
2543
2544 // ICUTimezoneCache calls out to ICU for TimezoneCache
2545 // functionality in a straightforward way.
2546 class ICUTimezoneCache : public base::TimezoneCache {
2547 public:
ICUTimezoneCache()2548 ICUTimezoneCache() : timezone_(nullptr) { Clear(TimeZoneDetection::kSkip); }
2549
~ICUTimezoneCache()2550 ~ICUTimezoneCache() override { Clear(TimeZoneDetection::kSkip); }
2551
2552 const char* LocalTimezone(double time_ms) override;
2553
2554 double DaylightSavingsOffset(double time_ms) override;
2555
2556 double LocalTimeOffset(double time_ms, bool is_utc) override;
2557
2558 void Clear(TimeZoneDetection time_zone_detection) override;
2559
2560 private:
2561 icu::TimeZone* GetTimeZone();
2562
2563 bool GetOffsets(double time_ms, bool is_utc, int32_t* raw_offset,
2564 int32_t* dst_offset);
2565
2566 icu::TimeZone* timezone_;
2567
2568 std::string timezone_name_;
2569 std::string dst_timezone_name_;
2570 };
2571
LocalTimezone(double time_ms)2572 const char* ICUTimezoneCache::LocalTimezone(double time_ms) {
2573 bool is_dst = DaylightSavingsOffset(time_ms) != 0;
2574 std::string* name = is_dst ? &dst_timezone_name_ : &timezone_name_;
2575 if (name->empty()) {
2576 icu::UnicodeString result;
2577 GetTimeZone()->getDisplayName(is_dst, icu::TimeZone::LONG, result);
2578 result += '\0';
2579
2580 icu::StringByteSink<std::string> byte_sink(name);
2581 result.toUTF8(byte_sink);
2582 }
2583 DCHECK(!name->empty());
2584 return name->c_str();
2585 }
2586
GetTimeZone()2587 icu::TimeZone* ICUTimezoneCache::GetTimeZone() {
2588 if (timezone_ == nullptr) {
2589 timezone_ = icu::TimeZone::createDefault();
2590 }
2591 return timezone_;
2592 }
2593
GetOffsets(double time_ms,bool is_utc,int32_t * raw_offset,int32_t * dst_offset)2594 bool ICUTimezoneCache::GetOffsets(double time_ms, bool is_utc,
2595 int32_t* raw_offset, int32_t* dst_offset) {
2596 UErrorCode status = U_ZERO_ERROR;
2597 if (is_utc) {
2598 GetTimeZone()->getOffset(time_ms, false, *raw_offset, *dst_offset, status);
2599 } else {
2600 // Note that casting TimeZone to BasicTimeZone is safe because we know that
2601 // icu::TimeZone used here is a BasicTimeZone.
2602 static_cast<const icu::BasicTimeZone*>(GetTimeZone())
2603 ->getOffsetFromLocal(time_ms, UCAL_TZ_LOCAL_FORMER,
2604 UCAL_TZ_LOCAL_FORMER, *raw_offset, *dst_offset,
2605 status);
2606 }
2607
2608 return U_SUCCESS(status);
2609 }
2610
DaylightSavingsOffset(double time_ms)2611 double ICUTimezoneCache::DaylightSavingsOffset(double time_ms) {
2612 int32_t raw_offset, dst_offset;
2613 if (!GetOffsets(time_ms, true, &raw_offset, &dst_offset)) return 0;
2614 return dst_offset;
2615 }
2616
LocalTimeOffset(double time_ms,bool is_utc)2617 double ICUTimezoneCache::LocalTimeOffset(double time_ms, bool is_utc) {
2618 int32_t raw_offset, dst_offset;
2619 if (!GetOffsets(time_ms, is_utc, &raw_offset, &dst_offset)) return 0;
2620 return raw_offset + dst_offset;
2621 }
2622
Clear(TimeZoneDetection time_zone_detection)2623 void ICUTimezoneCache::Clear(TimeZoneDetection time_zone_detection) {
2624 delete timezone_;
2625 timezone_ = nullptr;
2626 timezone_name_.clear();
2627 dst_timezone_name_.clear();
2628 if (time_zone_detection == TimeZoneDetection::kRedetect) {
2629 icu::TimeZone::adoptDefault(icu::TimeZone::detectHostTimeZone());
2630 }
2631 }
2632
CreateTimeZoneCache()2633 base::TimezoneCache* Intl::CreateTimeZoneCache() {
2634 return FLAG_icu_timezone_data ? new ICUTimezoneCache()
2635 : base::OS::CreateTimezoneCache();
2636 }
2637
GetLocaleMatcher(Isolate * isolate,Handle<JSReceiver> options,const char * method_name)2638 Maybe<Intl::MatcherOption> Intl::GetLocaleMatcher(Isolate* isolate,
2639 Handle<JSReceiver> options,
2640 const char* method_name) {
2641 return GetStringOption<Intl::MatcherOption>(
2642 isolate, options, "localeMatcher", method_name, {"best fit", "lookup"},
2643 {Intl::MatcherOption::kBestFit, Intl::MatcherOption::kLookup},
2644 Intl::MatcherOption::kBestFit);
2645 }
2646
GetNumberingSystem(Isolate * isolate,Handle<JSReceiver> options,const char * method_name,std::unique_ptr<char[]> * result)2647 Maybe<bool> Intl::GetNumberingSystem(Isolate* isolate,
2648 Handle<JSReceiver> options,
2649 const char* method_name,
2650 std::unique_ptr<char[]>* result) {
2651 const std::vector<const char*> empty_values = {};
2652 Maybe<bool> maybe = GetStringOption(isolate, options, "numberingSystem",
2653 empty_values, method_name, result);
2654 MAYBE_RETURN(maybe, Nothing<bool>());
2655 if (maybe.FromJust() && *result != nullptr) {
2656 if (!IsWellFormedNumberingSystem(result->get())) {
2657 THROW_NEW_ERROR_RETURN_VALUE(
2658 isolate,
2659 NewRangeError(
2660 MessageTemplate::kInvalid,
2661 isolate->factory()->numberingSystem_string(),
2662 isolate->factory()->NewStringFromAsciiChecked(result->get())),
2663 Nothing<bool>());
2664 }
2665 return Just(true);
2666 }
2667 return Just(false);
2668 }
2669
GetAvailableLocales()2670 const std::set<std::string>& Intl::GetAvailableLocales() {
2671 static base::LazyInstance<Intl::AvailableLocales<>>::type available_locales =
2672 LAZY_INSTANCE_INITIALIZER;
2673 return available_locales.Pointer()->Get();
2674 }
2675
2676 namespace {
2677
2678 struct CheckCalendar {
keyv8::internal::__anon0bda2b820f11::CheckCalendar2679 static const char* key() { return "calendar"; }
pathv8::internal::__anon0bda2b820f11::CheckCalendar2680 static const char* path() { return nullptr; }
2681 };
2682
2683 } // namespace
2684
GetAvailableLocalesForDateFormat()2685 const std::set<std::string>& Intl::GetAvailableLocalesForDateFormat() {
2686 static base::LazyInstance<Intl::AvailableLocales<CheckCalendar>>::type
2687 available_locales = LAZY_INSTANCE_INITIALIZER;
2688 return available_locales.Pointer()->Get();
2689 }
2690
2691 constexpr uint16_t kInfinityChar = 0x221e;
2692
NumberFieldToType(Isolate * isolate,const NumberFormatSpan & part,const icu::UnicodeString & text,bool is_nan)2693 Handle<String> Intl::NumberFieldToType(Isolate* isolate,
2694 const NumberFormatSpan& part,
2695 const icu::UnicodeString& text,
2696 bool is_nan) {
2697 switch (static_cast<UNumberFormatFields>(part.field_id)) {
2698 case UNUM_INTEGER_FIELD:
2699 if (is_nan) return isolate->factory()->nan_string();
2700 if (text.charAt(part.begin_pos) == kInfinityChar ||
2701 // en-US-POSIX output "INF" for Infinity
2702 (part.end_pos - part.begin_pos == 3 &&
2703 text.tempSubString(part.begin_pos, 3) == "INF")) {
2704 return isolate->factory()->infinity_string();
2705 }
2706 return isolate->factory()->integer_string();
2707 case UNUM_FRACTION_FIELD:
2708 return isolate->factory()->fraction_string();
2709 case UNUM_DECIMAL_SEPARATOR_FIELD:
2710 return isolate->factory()->decimal_string();
2711 case UNUM_GROUPING_SEPARATOR_FIELD:
2712 return isolate->factory()->group_string();
2713 case UNUM_CURRENCY_FIELD:
2714 return isolate->factory()->currency_string();
2715 case UNUM_PERCENT_FIELD:
2716 return isolate->factory()->percentSign_string();
2717 case UNUM_SIGN_FIELD:
2718 return (text.charAt(part.begin_pos) == '+')
2719 ? isolate->factory()->plusSign_string()
2720 : isolate->factory()->minusSign_string();
2721 case UNUM_EXPONENT_SYMBOL_FIELD:
2722 return isolate->factory()->exponentSeparator_string();
2723
2724 case UNUM_EXPONENT_SIGN_FIELD:
2725 return isolate->factory()->exponentMinusSign_string();
2726
2727 case UNUM_EXPONENT_FIELD:
2728 return isolate->factory()->exponentInteger_string();
2729
2730 case UNUM_PERMILL_FIELD:
2731 // We're not creating any permill formatter, and it's not even clear how
2732 // that would be possible with the ICU API.
2733 UNREACHABLE();
2734
2735 case UNUM_COMPACT_FIELD:
2736 return isolate->factory()->compact_string();
2737 case UNUM_MEASURE_UNIT_FIELD:
2738 return isolate->factory()->unit_string();
2739
2740 default:
2741 UNREACHABLE();
2742 }
2743 }
2744
2745 // A helper function to convert the FormattedValue for several Intl objects.
FormattedToString(Isolate * isolate,const icu::FormattedValue & formatted)2746 MaybeHandle<String> Intl::FormattedToString(
2747 Isolate* isolate, const icu::FormattedValue& formatted) {
2748 UErrorCode status = U_ZERO_ERROR;
2749 icu::UnicodeString result = formatted.toString(status);
2750 if (U_FAILURE(status)) {
2751 THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kIcuError), String);
2752 }
2753 return Intl::ToString(isolate, result);
2754 }
2755
ToJSArray(Isolate * isolate,const char * unicode_key,icu::StringEnumeration * enumeration,const std::function<bool (const char *)> & removes,bool sort)2756 MaybeHandle<JSArray> Intl::ToJSArray(
2757 Isolate* isolate, const char* unicode_key,
2758 icu::StringEnumeration* enumeration,
2759 const std::function<bool(const char*)>& removes, bool sort) {
2760 UErrorCode status = U_ZERO_ERROR;
2761 std::vector<std::string> array;
2762 for (const char* item = enumeration->next(nullptr, status);
2763 U_SUCCESS(status) && item != nullptr;
2764 item = enumeration->next(nullptr, status)) {
2765 if (unicode_key != nullptr) {
2766 item = uloc_toUnicodeLocaleType(unicode_key, item);
2767 }
2768 if (removes == nullptr || !(removes)(item)) {
2769 array.push_back(item);
2770 }
2771 }
2772
2773 if (sort) {
2774 std::sort(array.begin(), array.end());
2775 }
2776 return VectorToJSArray(isolate, array);
2777 }
2778
RemoveCollation(const char * collation)2779 bool Intl::RemoveCollation(const char* collation) {
2780 return strcmp("standard", collation) == 0 || strcmp("search", collation) == 0;
2781 }
2782
2783 // See the list in ecma402 #sec-issanctionedsimpleunitidentifier
SanctionedSimpleUnits()2784 std::set<std::string> Intl::SanctionedSimpleUnits() {
2785 return std::set<std::string>({"acre", "bit", "byte",
2786 "celsius", "centimeter", "day",
2787 "degree", "fahrenheit", "fluid-ounce",
2788 "foot", "gallon", "gigabit",
2789 "gigabyte", "gram", "hectare",
2790 "hour", "inch", "kilobit",
2791 "kilobyte", "kilogram", "kilometer",
2792 "liter", "megabit", "megabyte",
2793 "meter", "mile", "mile-scandinavian",
2794 "millimeter", "milliliter", "millisecond",
2795 "minute", "month", "ounce",
2796 "percent", "petabyte", "pound",
2797 "second", "stone", "terabit",
2798 "terabyte", "week", "yard",
2799 "year"});
2800 }
2801
2802 // ecma-402/#sec-isvalidtimezonename
2803
2804 namespace {
IsUnicodeStringValidTimeZoneName(const icu::UnicodeString & id)2805 bool IsUnicodeStringValidTimeZoneName(const icu::UnicodeString& id) {
2806 UErrorCode status = U_ZERO_ERROR;
2807 icu::UnicodeString canonical;
2808 icu::TimeZone::getCanonicalID(id, canonical, status);
2809 return U_SUCCESS(status) &&
2810 canonical != icu::UnicodeString("Etc/Unknown", -1, US_INV);
2811 }
2812 } // namespace
2813
CanonicalizeTimeZoneName(Isolate * isolate,Handle<String> identifier)2814 MaybeHandle<String> Intl::CanonicalizeTimeZoneName(Isolate* isolate,
2815 Handle<String> identifier) {
2816 UErrorCode status = U_ZERO_ERROR;
2817 std::string time_zone =
2818 JSDateTimeFormat::CanonicalizeTimeZoneID(identifier->ToCString().get());
2819 icu::UnicodeString time_zone_ustring =
2820 icu::UnicodeString(time_zone.c_str(), -1, US_INV);
2821 icu::UnicodeString canonical;
2822 icu::TimeZone::getCanonicalID(time_zone_ustring, canonical, status);
2823 CHECK(U_SUCCESS(status));
2824 if (canonical == UNICODE_STRING_SIMPLE("Etc/UTC") ||
2825 canonical == UNICODE_STRING_SIMPLE("Etc/GMT")) {
2826 return isolate->factory()->UTC_string();
2827 }
2828 return Intl::ToString(isolate, canonical);
2829 }
2830
IsValidTimeZoneName(Isolate * isolate,Handle<String> id)2831 bool Intl::IsValidTimeZoneName(Isolate* isolate, Handle<String> id) {
2832 std::string time_zone =
2833 JSDateTimeFormat::CanonicalizeTimeZoneID(id->ToCString().get());
2834 icu::UnicodeString time_zone_ustring =
2835 icu::UnicodeString(time_zone.c_str(), -1, US_INV);
2836 return IsUnicodeStringValidTimeZoneName(time_zone_ustring);
2837 }
2838
IsValidTimeZoneName(const icu::TimeZone & tz)2839 bool Intl::IsValidTimeZoneName(const icu::TimeZone& tz) {
2840 icu::UnicodeString id;
2841 tz.getID(id);
2842 return IsUnicodeStringValidTimeZoneName(id);
2843 }
2844
2845 // Function to support Temporal
TimeZoneIdFromIndex(int32_t index)2846 std::string Intl::TimeZoneIdFromIndex(int32_t index) {
2847 if (index == 0) return "UTC";
2848 std::unique_ptr<icu::StringEnumeration> enumeration(
2849 icu::TimeZone::createEnumeration());
2850 int32_t curr = 0;
2851 const char* id;
2852
2853 UErrorCode status = U_ZERO_ERROR;
2854 while (U_SUCCESS(status) && curr < index &&
2855 ((id = enumeration->next(nullptr, status)) != nullptr)) {
2856 CHECK(U_SUCCESS(status));
2857 curr++;
2858 }
2859 CHECK(U_SUCCESS(status));
2860 CHECK(id != nullptr);
2861 return id;
2862 }
2863
GetTimeZoneIndex(Isolate * isolate,Handle<String> identifier,int32_t * index)2864 Maybe<bool> Intl::GetTimeZoneIndex(Isolate* isolate, Handle<String> identifier,
2865 int32_t* index) {
2866 if (identifier->Equals(*isolate->factory()->UTC_string())) {
2867 *index = 0;
2868 return Just(true);
2869 }
2870
2871 std::string identifier_str(identifier->ToCString().get());
2872 std::unique_ptr<icu::TimeZone> tz(
2873 icu::TimeZone::createTimeZone(identifier_str.c_str()));
2874 if (!IsValidTimeZoneName(*tz)) {
2875 return Just(false);
2876 }
2877
2878 std::unique_ptr<icu::StringEnumeration> enumeration(
2879 icu::TimeZone::createEnumeration());
2880 int32_t curr = 0;
2881 const char* id;
2882
2883 UErrorCode status = U_ZERO_ERROR;
2884 while (U_SUCCESS(status) &&
2885 (id = enumeration->next(nullptr, status)) != nullptr) {
2886 if (identifier_str == id) {
2887 *index = curr + 1;
2888 return Just(true);
2889 }
2890 curr++;
2891 }
2892 CHECK(U_SUCCESS(status));
2893 // We should not reach here, the !IsValidTimeZoneName should return earlier
2894 UNREACHABLE();
2895 }
2896
2897 // #sec-tointlmathematicalvalue
ToIntlMathematicalValueAsNumberBigIntOrString(Isolate * isolate,Handle<Object> input)2898 MaybeHandle<Object> Intl::ToIntlMathematicalValueAsNumberBigIntOrString(
2899 Isolate* isolate, Handle<Object> input) {
2900 if (input->IsNumber() || input->IsBigInt()) return input; // Shortcut.
2901 // TODO(ftang) revisit the following after the resolution of
2902 // https://github.com/tc39/proposal-intl-numberformat-v3/pull/82
2903 if (input->IsOddball()) {
2904 return Oddball::ToNumber(isolate, Handle<Oddball>::cast(input));
2905 }
2906 if (input->IsSymbol()) {
2907 THROW_NEW_ERROR(isolate, NewTypeError(MessageTemplate::kSymbolToNumber),
2908 Object);
2909 }
2910 ASSIGN_RETURN_ON_EXCEPTION(
2911 isolate, input,
2912 JSReceiver::ToPrimitive(isolate, Handle<JSReceiver>::cast(input),
2913 ToPrimitiveHint::kNumber),
2914 Object);
2915 if (input->IsString()) UNIMPLEMENTED();
2916 return input;
2917 }
2918
FormatRangeSourceTracker()2919 Intl::FormatRangeSourceTracker::FormatRangeSourceTracker() {
2920 start_[0] = start_[1] = limit_[0] = limit_[1] = 0;
2921 }
2922
Add(int32_t field,int32_t start,int32_t limit)2923 void Intl::FormatRangeSourceTracker::Add(int32_t field, int32_t start,
2924 int32_t limit) {
2925 DCHECK_LT(field, 2);
2926 start_[field] = start;
2927 limit_[field] = limit;
2928 }
2929
GetSource(int32_t start,int32_t limit) const2930 Intl::FormatRangeSource Intl::FormatRangeSourceTracker::GetSource(
2931 int32_t start, int32_t limit) const {
2932 FormatRangeSource source = FormatRangeSource::kShared;
2933 if (FieldContains(0, start, limit)) {
2934 source = FormatRangeSource::kStartRange;
2935 } else if (FieldContains(1, start, limit)) {
2936 source = FormatRangeSource::kEndRange;
2937 }
2938 return source;
2939 }
2940
FieldContains(int32_t field,int32_t start,int32_t limit) const2941 bool Intl::FormatRangeSourceTracker::FieldContains(int32_t field, int32_t start,
2942 int32_t limit) const {
2943 DCHECK_LT(field, 2);
2944 return (start_[field] <= start) && (start <= limit_[field]) &&
2945 (start_[field] <= limit) && (limit <= limit_[field]);
2946 }
2947
SourceString(Isolate * isolate,FormatRangeSource source)2948 Handle<String> Intl::SourceString(Isolate* isolate, FormatRangeSource source) {
2949 switch (source) {
2950 case FormatRangeSource::kShared:
2951 return ReadOnlyRoots(isolate).shared_string_handle();
2952 case FormatRangeSource::kStartRange:
2953 return ReadOnlyRoots(isolate).startRange_string_handle();
2954 case FormatRangeSource::kEndRange:
2955 return ReadOnlyRoots(isolate).endRange_string_handle();
2956 }
2957 }
2958
2959 } // namespace internal
2960 } // namespace v8
2961