• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 
6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h"
8 
9 #include <memory>
10 
11 #include "src/api-natives.h"
12 #include "src/api.h"
13 #include "src/arguments.h"
14 #include "src/factory.h"
15 #include "src/i18n.h"
16 #include "src/isolate-inl.h"
17 #include "src/messages.h"
18 #include "src/string-case.h"
19 #include "src/utils.h"
20 
21 #include "unicode/brkiter.h"
22 #include "unicode/calendar.h"
23 #include "unicode/coll.h"
24 #include "unicode/curramt.h"
25 #include "unicode/datefmt.h"
26 #include "unicode/dcfmtsym.h"
27 #include "unicode/decimfmt.h"
28 #include "unicode/dtfmtsym.h"
29 #include "unicode/dtptngen.h"
30 #include "unicode/fieldpos.h"
31 #include "unicode/fpositer.h"
32 #include "unicode/locid.h"
33 #include "unicode/normalizer2.h"
34 #include "unicode/numfmt.h"
35 #include "unicode/numsys.h"
36 #include "unicode/rbbi.h"
37 #include "unicode/smpdtfmt.h"
38 #include "unicode/timezone.h"
39 #include "unicode/translit.h"
40 #include "unicode/uchar.h"
41 #include "unicode/ucol.h"
42 #include "unicode/ucurr.h"
43 #include "unicode/uloc.h"
44 #include "unicode/unistr.h"
45 #include "unicode/unum.h"
46 #include "unicode/ustring.h"
47 #include "unicode/uversion.h"
48 
49 
50 namespace v8 {
51 namespace internal {
52 namespace {
53 
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<uc16[]> * dest,int32_t length)54 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
55                                     std::unique_ptr<uc16[]>* dest,
56                                     int32_t length) {
57   DCHECK(flat.IsFlat());
58   if (flat.IsOneByte()) {
59     if (!*dest) {
60       dest->reset(NewArray<uc16>(length));
61       CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
62     }
63     return reinterpret_cast<const UChar*>(dest->get());
64   } else {
65     return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
66   }
67 }
68 
69 }  // namespace
70 
71 // ECMA 402 6.2.3
RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag)72 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
73   HandleScope scope(isolate);
74   Factory* factory = isolate->factory();
75 
76   DCHECK_EQ(1, args.length());
77   CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
78 
79   v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
80 
81   // Return value which denotes invalid language tag.
82   // TODO(jshin): Can uloc_{for,to}TanguageTag fail even for structually valid
83   // language tags? If not, just add CHECK instead of returning 'invalid-tag'.
84   const char* const kInvalidTag = "invalid-tag";
85 
86   UErrorCode error = U_ZERO_ERROR;
87   char icu_result[ULOC_FULLNAME_CAPACITY];
88   int icu_length = 0;
89 
90   uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
91                       &icu_length, &error);
92   if (U_FAILURE(error) || icu_length == 0) {
93     return *factory->NewStringFromAsciiChecked(kInvalidTag);
94   }
95 
96   char result[ULOC_FULLNAME_CAPACITY];
97 
98   // Force strict BCP47 rules.
99   uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
100 
101   if (U_FAILURE(error)) {
102     return *factory->NewStringFromAsciiChecked(kInvalidTag);
103   }
104 
105   return *factory->NewStringFromAsciiChecked(result);
106 }
107 
108 
RUNTIME_FUNCTION(Runtime_AvailableLocalesOf)109 RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
110   HandleScope scope(isolate);
111   Factory* factory = isolate->factory();
112 
113   DCHECK_EQ(1, args.length());
114   CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
115 
116   const icu::Locale* available_locales = NULL;
117   int32_t count = 0;
118 
119   if (service->IsUtf8EqualTo(CStrVector("collator"))) {
120     available_locales = icu::Collator::getAvailableLocales(count);
121   } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
122     available_locales = icu::NumberFormat::getAvailableLocales(count);
123   } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
124     available_locales = icu::DateFormat::getAvailableLocales(count);
125   } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
126     available_locales = icu::BreakIterator::getAvailableLocales(count);
127   }
128 
129   UErrorCode error = U_ZERO_ERROR;
130   char result[ULOC_FULLNAME_CAPACITY];
131   Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
132 
133   for (int32_t i = 0; i < count; ++i) {
134     const char* icu_name = available_locales[i].getName();
135 
136     error = U_ZERO_ERROR;
137     // No need to force strict BCP47 rules.
138     uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
139     if (U_FAILURE(error)) {
140       // This shouldn't happen, but lets not break the user.
141       continue;
142     }
143 
144     RETURN_FAILURE_ON_EXCEPTION(
145         isolate, JSObject::SetOwnPropertyIgnoreAttributes(
146                      locales, factory->NewStringFromAsciiChecked(result),
147                      factory->NewNumber(i), NONE));
148   }
149 
150   return *locales;
151 }
152 
153 
RUNTIME_FUNCTION(Runtime_GetDefaultICULocale)154 RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
155   HandleScope scope(isolate);
156   Factory* factory = isolate->factory();
157 
158   DCHECK_EQ(0, args.length());
159 
160   icu::Locale default_locale;
161 
162   // Set the locale
163   char result[ULOC_FULLNAME_CAPACITY];
164   UErrorCode status = U_ZERO_ERROR;
165   uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
166                      FALSE, &status);
167   if (U_SUCCESS(status)) {
168     return *factory->NewStringFromAsciiChecked(result);
169   }
170 
171   return *factory->NewStringFromStaticChars("und");
172 }
173 
174 
RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants)175 RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
176   HandleScope scope(isolate);
177   Factory* factory = isolate->factory();
178 
179   DCHECK_EQ(1, args.length());
180 
181   CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
182 
183   uint32_t length = static_cast<uint32_t>(input->length()->Number());
184   // Set some limit to prevent fuzz tests from going OOM.
185   // Can be bumped when callers' requirements change.
186   if (length >= 100) return isolate->ThrowIllegalOperation();
187   Handle<FixedArray> output = factory->NewFixedArray(length);
188   Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
189   Handle<Name> base = factory->NewStringFromStaticChars("base");
190   for (unsigned int i = 0; i < length; ++i) {
191     Handle<Object> locale_id;
192     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
193         isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
194     if (!locale_id->IsString()) {
195       return isolate->Throw(*factory->illegal_argument_string());
196     }
197 
198     v8::String::Utf8Value utf8_locale_id(
199         v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
200 
201     UErrorCode error = U_ZERO_ERROR;
202 
203     // Convert from BCP47 to ICU format.
204     // de-DE-u-co-phonebk -> de_DE@collation=phonebook
205     char icu_locale[ULOC_FULLNAME_CAPACITY];
206     int icu_locale_length = 0;
207     uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
208                         &icu_locale_length, &error);
209     if (U_FAILURE(error) || icu_locale_length == 0) {
210       return isolate->Throw(*factory->illegal_argument_string());
211     }
212 
213     // Maximize the locale.
214     // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
215     char icu_max_locale[ULOC_FULLNAME_CAPACITY];
216     uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
217                           &error);
218 
219     // Remove extensions from maximized locale.
220     // de_Latn_DE@collation=phonebook -> de_Latn_DE
221     char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
222     uloc_getBaseName(icu_max_locale, icu_base_max_locale,
223                      ULOC_FULLNAME_CAPACITY, &error);
224 
225     // Get original name without extensions.
226     // de_DE@collation=phonebook -> de_DE
227     char icu_base_locale[ULOC_FULLNAME_CAPACITY];
228     uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
229                      &error);
230 
231     // Convert from ICU locale format to BCP47 format.
232     // de_Latn_DE -> de-Latn-DE
233     char base_max_locale[ULOC_FULLNAME_CAPACITY];
234     uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
235                        ULOC_FULLNAME_CAPACITY, FALSE, &error);
236 
237     // de_DE -> de-DE
238     char base_locale[ULOC_FULLNAME_CAPACITY];
239     uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
240                        FALSE, &error);
241 
242     if (U_FAILURE(error)) {
243       return isolate->Throw(*factory->illegal_argument_string());
244     }
245 
246     Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
247     Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
248     JSObject::AddProperty(result, maximized, value, NONE);
249     value = factory->NewStringFromAsciiChecked(base_locale);
250     JSObject::AddProperty(result, base, value, NONE);
251     output->set(i, *result);
252   }
253 
254   Handle<JSArray> result = factory->NewJSArrayWithElements(output);
255   result->set_length(Smi::FromInt(length));
256   return *result;
257 }
258 
259 
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject)260 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
261   HandleScope scope(isolate);
262 
263   DCHECK_EQ(1, args.length());
264 
265   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
266 
267   if (!input->IsJSObject()) return isolate->heap()->false_value();
268   Handle<JSObject> obj = Handle<JSObject>::cast(input);
269 
270   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
271   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
272   return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate));
273 }
274 
275 
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType)276 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
277   HandleScope scope(isolate);
278 
279   DCHECK_EQ(2, args.length());
280 
281   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
282   CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
283 
284   if (!input->IsJSObject()) return isolate->heap()->false_value();
285   Handle<JSObject> obj = Handle<JSObject>::cast(input);
286 
287   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
288   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
289   return isolate->heap()->ToBoolean(tag->IsString() &&
290                                     String::cast(*tag)->Equals(*expected_type));
291 }
292 
293 
RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType)294 RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
295   HandleScope scope(isolate);
296 
297   DCHECK_EQ(2, args.length());
298 
299   CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
300   CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
301 
302   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
303   JSObject::SetProperty(input, marker, type, STRICT).Assert();
304 
305   return isolate->heap()->undefined_value();
306 }
307 
308 
RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat)309 RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
310   HandleScope scope(isolate);
311 
312   DCHECK_EQ(3, args.length());
313 
314   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
315   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
316   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
317 
318   Handle<JSFunction> constructor(
319       isolate->native_context()->intl_date_time_format_function());
320 
321   Handle<JSObject> local_object;
322   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
323                                      JSObject::New(constructor, constructor));
324 
325   // Set date time formatter as internal field of the resulting JS object.
326   icu::SimpleDateFormat* date_format =
327       DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
328 
329   if (!date_format) return isolate->ThrowIllegalOperation();
330 
331   local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
332 
333   // Make object handle weak so we can delete the data format once GC kicks in.
334   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
335   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
336                           DateFormat::DeleteDateFormat,
337                           WeakCallbackType::kInternalFields);
338   return *local_object;
339 }
340 
341 
RUNTIME_FUNCTION(Runtime_InternalDateFormat)342 RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
343   HandleScope scope(isolate);
344 
345   DCHECK_EQ(2, args.length());
346 
347   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
348   CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
349 
350   Handle<Object> value;
351   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
352 
353   icu::SimpleDateFormat* date_format =
354       DateFormat::UnpackDateFormat(isolate, date_format_holder);
355   CHECK_NOT_NULL(date_format);
356 
357   icu::UnicodeString result;
358   date_format->format(value->Number(), result);
359 
360   RETURN_RESULT_OR_FAILURE(
361       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
362                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
363                    result.length())));
364 }
365 
366 namespace {
367 // The list comes from third_party/icu/source/i18n/unicode/udat.h.
368 // They're mapped to DateTimeFormat components listed at
369 // https://tc39.github.io/ecma402/#sec-datetimeformat-abstracts .
370 
IcuDateFieldIdToDateType(int32_t field_id,Isolate * isolate)371 Handle<String> IcuDateFieldIdToDateType(int32_t field_id, Isolate* isolate) {
372   switch (field_id) {
373     case -1:
374       return isolate->factory()->literal_string();
375     case UDAT_YEAR_FIELD:
376     case UDAT_EXTENDED_YEAR_FIELD:
377     case UDAT_YEAR_NAME_FIELD:
378       return isolate->factory()->year_string();
379     case UDAT_MONTH_FIELD:
380     case UDAT_STANDALONE_MONTH_FIELD:
381       return isolate->factory()->month_string();
382     case UDAT_DATE_FIELD:
383       return isolate->factory()->day_string();
384     case UDAT_HOUR_OF_DAY1_FIELD:
385     case UDAT_HOUR_OF_DAY0_FIELD:
386     case UDAT_HOUR1_FIELD:
387     case UDAT_HOUR0_FIELD:
388       return isolate->factory()->hour_string();
389     case UDAT_MINUTE_FIELD:
390       return isolate->factory()->minute_string();
391     case UDAT_SECOND_FIELD:
392       return isolate->factory()->second_string();
393     case UDAT_DAY_OF_WEEK_FIELD:
394     case UDAT_DOW_LOCAL_FIELD:
395     case UDAT_STANDALONE_DAY_FIELD:
396       return isolate->factory()->weekday_string();
397     case UDAT_AM_PM_FIELD:
398       return isolate->factory()->dayperiod_string();
399     case UDAT_TIMEZONE_FIELD:
400     case UDAT_TIMEZONE_RFC_FIELD:
401     case UDAT_TIMEZONE_GENERIC_FIELD:
402     case UDAT_TIMEZONE_SPECIAL_FIELD:
403     case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD:
404     case UDAT_TIMEZONE_ISO_FIELD:
405     case UDAT_TIMEZONE_ISO_LOCAL_FIELD:
406       return isolate->factory()->timeZoneName_string();
407     case UDAT_ERA_FIELD:
408       return isolate->factory()->era_string();
409     default:
410       // Other UDAT_*_FIELD's cannot show up because there is no way to specify
411       // them via options of Intl.DateTimeFormat.
412       UNREACHABLE();
413       // To prevent MSVC from issuing C4715 warning.
414       return Handle<String>();
415   }
416 }
417 
AddElement(Handle<JSArray> array,int index,int32_t field_id,const icu::UnicodeString & formatted,int32_t begin,int32_t end,Isolate * isolate)418 bool AddElement(Handle<JSArray> array, int index, int32_t field_id,
419                 const icu::UnicodeString& formatted, int32_t begin, int32_t end,
420                 Isolate* isolate) {
421   HandleScope scope(isolate);
422   Factory* factory = isolate->factory();
423   Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
424   Handle<String> value = IcuDateFieldIdToDateType(field_id, isolate);
425   JSObject::AddProperty(element, factory->type_string(), value, NONE);
426 
427   icu::UnicodeString field(formatted.tempSubStringBetween(begin, end));
428   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
429       isolate, value, factory->NewStringFromTwoByte(Vector<const uint16_t>(
430                           reinterpret_cast<const uint16_t*>(field.getBuffer()),
431                           field.length())),
432       false);
433 
434   JSObject::AddProperty(element, factory->value_string(), value, NONE);
435   RETURN_ON_EXCEPTION_VALUE(
436       isolate, JSObject::AddDataElement(array, index, element, NONE), false);
437   return true;
438 }
439 
440 }  // namespace
441 
RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts)442 RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
443   HandleScope scope(isolate);
444   Factory* factory = isolate->factory();
445 
446   DCHECK_EQ(2, args.length());
447 
448   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
449   CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
450 
451   Handle<Object> value;
452   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
453 
454   icu::SimpleDateFormat* date_format =
455       DateFormat::UnpackDateFormat(isolate, date_format_holder);
456   CHECK_NOT_NULL(date_format);
457 
458   icu::UnicodeString formatted;
459   icu::FieldPositionIterator fp_iter;
460   icu::FieldPosition fp;
461   UErrorCode status = U_ZERO_ERROR;
462   date_format->format(value->Number(), formatted, &fp_iter, status);
463   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
464 
465   Handle<JSArray> result = factory->NewJSArray(0);
466   int32_t length = formatted.length();
467   if (length == 0) return *result;
468 
469   int index = 0;
470   int32_t previous_end_pos = 0;
471   while (fp_iter.next(fp)) {
472     int32_t begin_pos = fp.getBeginIndex();
473     int32_t end_pos = fp.getEndIndex();
474 
475     if (previous_end_pos < begin_pos) {
476       if (!AddElement(result, index, -1, formatted, previous_end_pos, begin_pos,
477                       isolate)) {
478         return isolate->heap()->undefined_value();
479       }
480       ++index;
481     }
482     if (!AddElement(result, index, fp.getField(), formatted, begin_pos, end_pos,
483                     isolate)) {
484       return isolate->heap()->undefined_value();
485     }
486     previous_end_pos = end_pos;
487     ++index;
488   }
489   if (previous_end_pos < length) {
490     if (!AddElement(result, index, -1, formatted, previous_end_pos, length,
491                     isolate)) {
492       return isolate->heap()->undefined_value();
493     }
494   }
495   JSObject::ValidateElements(result);
496   return *result;
497 }
498 
RUNTIME_FUNCTION(Runtime_CreateNumberFormat)499 RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
500   HandleScope scope(isolate);
501 
502   DCHECK_EQ(3, args.length());
503 
504   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
505   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
506   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
507 
508   Handle<JSFunction> constructor(
509       isolate->native_context()->intl_number_format_function());
510 
511   Handle<JSObject> local_object;
512   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
513                                      JSObject::New(constructor, constructor));
514 
515   // Set number formatter as internal field of the resulting JS object.
516   icu::DecimalFormat* number_format =
517       NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
518 
519   if (!number_format) return isolate->ThrowIllegalOperation();
520 
521   local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
522 
523   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
524   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
525                           NumberFormat::DeleteNumberFormat,
526                           WeakCallbackType::kInternalFields);
527   return *local_object;
528 }
529 
530 
RUNTIME_FUNCTION(Runtime_InternalNumberFormat)531 RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
532   HandleScope scope(isolate);
533 
534   DCHECK_EQ(2, args.length());
535 
536   CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
537   CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
538 
539   Handle<Object> value;
540   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
541 
542   icu::DecimalFormat* number_format =
543       NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
544   CHECK_NOT_NULL(number_format);
545 
546   icu::UnicodeString result;
547   number_format->format(value->Number(), result);
548 
549   RETURN_RESULT_OR_FAILURE(
550       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
551                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
552                    result.length())));
553 }
554 
555 
RUNTIME_FUNCTION(Runtime_CreateCollator)556 RUNTIME_FUNCTION(Runtime_CreateCollator) {
557   HandleScope scope(isolate);
558 
559   DCHECK_EQ(3, args.length());
560 
561   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
562   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
563   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
564 
565   Handle<JSFunction> constructor(
566       isolate->native_context()->intl_collator_function());
567 
568   Handle<JSObject> local_object;
569   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
570                                      JSObject::New(constructor, constructor));
571 
572   // Set collator as internal field of the resulting JS object.
573   icu::Collator* collator =
574       Collator::InitializeCollator(isolate, locale, options, resolved);
575 
576   if (!collator) return isolate->ThrowIllegalOperation();
577 
578   local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
579 
580   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
581   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
582                           Collator::DeleteCollator,
583                           WeakCallbackType::kInternalFields);
584   return *local_object;
585 }
586 
587 
RUNTIME_FUNCTION(Runtime_InternalCompare)588 RUNTIME_FUNCTION(Runtime_InternalCompare) {
589   HandleScope scope(isolate);
590 
591   DCHECK_EQ(3, args.length());
592 
593   CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
594   CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
595   CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
596 
597   icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
598   CHECK_NOT_NULL(collator);
599 
600   string1 = String::Flatten(string1);
601   string2 = String::Flatten(string2);
602 
603   UCollationResult result;
604   UErrorCode status = U_ZERO_ERROR;
605   {
606     DisallowHeapAllocation no_gc;
607     int32_t length1 = string1->length();
608     int32_t length2 = string2->length();
609     String::FlatContent flat1 = string1->GetFlatContent();
610     String::FlatContent flat2 = string2->GetFlatContent();
611     std::unique_ptr<uc16[]> sap1;
612     std::unique_ptr<uc16[]> sap2;
613     icu::UnicodeString string_val1(
614         FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1);
615     icu::UnicodeString string_val2(
616         FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2);
617     result = collator->compare(string_val1, string_val2, status);
618   }
619   if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
620 
621   return *isolate->factory()->NewNumberFromInt(result);
622 }
623 
624 
RUNTIME_FUNCTION(Runtime_StringNormalize)625 RUNTIME_FUNCTION(Runtime_StringNormalize) {
626   HandleScope scope(isolate);
627   static const struct {
628     const char* name;
629     UNormalization2Mode mode;
630   } normalizationForms[] = {
631       {"nfc", UNORM2_COMPOSE},
632       {"nfc", UNORM2_DECOMPOSE},
633       {"nfkc", UNORM2_COMPOSE},
634       {"nfkc", UNORM2_DECOMPOSE},
635   };
636 
637   DCHECK_EQ(2, args.length());
638 
639   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
640   CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
641   CHECK(form_id >= 0 &&
642         static_cast<size_t>(form_id) < arraysize(normalizationForms));
643 
644   int length = s->length();
645   s = String::Flatten(s);
646   icu::UnicodeString result;
647   std::unique_ptr<uc16[]> sap;
648   UErrorCode status = U_ZERO_ERROR;
649   {
650     DisallowHeapAllocation no_gc;
651     String::FlatContent flat = s->GetFlatContent();
652     const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
653     icu::UnicodeString input(false, src, length);
654     // Getting a singleton. Should not free it.
655     const icu::Normalizer2* normalizer =
656         icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
657                                       normalizationForms[form_id].mode, status);
658     DCHECK(U_SUCCESS(status));
659     CHECK(normalizer != nullptr);
660     int32_t normalized_prefix_length =
661         normalizer->spanQuickCheckYes(input, status);
662     // Quick return if the input is already normalized.
663     if (length == normalized_prefix_length) return *s;
664     icu::UnicodeString unnormalized =
665         input.tempSubString(normalized_prefix_length);
666     // Read-only alias of the normalized prefix.
667     result.setTo(false, input.getBuffer(), normalized_prefix_length);
668     // copy-on-write; normalize the suffix and append to |result|.
669     normalizer->normalizeSecondAndAppend(result, unnormalized, status);
670   }
671 
672   if (U_FAILURE(status)) {
673     return isolate->heap()->undefined_value();
674   }
675 
676   RETURN_RESULT_OR_FAILURE(
677       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
678                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
679                    result.length())));
680 }
681 
682 
RUNTIME_FUNCTION(Runtime_CreateBreakIterator)683 RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
684   HandleScope scope(isolate);
685 
686   DCHECK_EQ(3, args.length());
687 
688   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
689   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
690   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
691 
692   Handle<JSFunction> constructor(
693       isolate->native_context()->intl_v8_break_iterator_function());
694 
695   Handle<JSObject> local_object;
696   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
697                                      JSObject::New(constructor, constructor));
698 
699   // Set break iterator as internal field of the resulting JS object.
700   icu::BreakIterator* break_iterator = V8BreakIterator::InitializeBreakIterator(
701       isolate, locale, options, resolved);
702 
703   if (!break_iterator) return isolate->ThrowIllegalOperation();
704 
705   local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
706   // Make sure that the pointer to adopted text is NULL.
707   local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
708 
709   // Make object handle weak so we can delete the break iterator once GC kicks
710   // in.
711   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
712   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
713                           V8BreakIterator::DeleteBreakIterator,
714                           WeakCallbackType::kInternalFields);
715   return *local_object;
716 }
717 
718 
RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText)719 RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
720   HandleScope scope(isolate);
721 
722   DCHECK_EQ(2, args.length());
723 
724   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
725   CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
726 
727   icu::BreakIterator* break_iterator =
728       V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
729   CHECK_NOT_NULL(break_iterator);
730 
731   icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
732       break_iterator_holder->GetInternalField(1));
733   delete u_text;
734 
735   int length = text->length();
736   text = String::Flatten(text);
737   DisallowHeapAllocation no_gc;
738   String::FlatContent flat = text->GetFlatContent();
739   std::unique_ptr<uc16[]> sap;
740   const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
741   u_text = new icu::UnicodeString(text_value, length);
742   break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
743 
744   break_iterator->setText(*u_text);
745 
746   return isolate->heap()->undefined_value();
747 }
748 
749 
RUNTIME_FUNCTION(Runtime_BreakIteratorFirst)750 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
751   HandleScope scope(isolate);
752 
753   DCHECK_EQ(1, args.length());
754 
755   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
756 
757   icu::BreakIterator* break_iterator =
758       V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
759   CHECK_NOT_NULL(break_iterator);
760 
761   return *isolate->factory()->NewNumberFromInt(break_iterator->first());
762 }
763 
764 
RUNTIME_FUNCTION(Runtime_BreakIteratorNext)765 RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
766   HandleScope scope(isolate);
767 
768   DCHECK_EQ(1, args.length());
769 
770   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
771 
772   icu::BreakIterator* break_iterator =
773       V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
774   CHECK_NOT_NULL(break_iterator);
775 
776   return *isolate->factory()->NewNumberFromInt(break_iterator->next());
777 }
778 
779 
RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent)780 RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
781   HandleScope scope(isolate);
782 
783   DCHECK_EQ(1, args.length());
784 
785   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
786 
787   icu::BreakIterator* break_iterator =
788       V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
789   CHECK_NOT_NULL(break_iterator);
790 
791   return *isolate->factory()->NewNumberFromInt(break_iterator->current());
792 }
793 
794 
RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType)795 RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
796   HandleScope scope(isolate);
797 
798   DCHECK_EQ(1, args.length());
799 
800   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
801 
802   icu::BreakIterator* break_iterator =
803       V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
804   CHECK_NOT_NULL(break_iterator);
805 
806   // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
807   icu::RuleBasedBreakIterator* rule_based_iterator =
808       static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
809   int32_t status = rule_based_iterator->getRuleStatus();
810   // Keep return values in sync with JavaScript BreakType enum.
811   if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
812     return *isolate->factory()->NewStringFromStaticChars("none");
813   } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
814     return isolate->heap()->number_string();
815   } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
816     return *isolate->factory()->NewStringFromStaticChars("letter");
817   } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
818     return *isolate->factory()->NewStringFromStaticChars("kana");
819   } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
820     return *isolate->factory()->NewStringFromStaticChars("ideo");
821   } else {
822     return *isolate->factory()->NewStringFromStaticChars("unknown");
823   }
824 }
825 
826 namespace {
LocaleConvertCase(Handle<String> s,Isolate * isolate,bool is_to_upper,const char * lang)827 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
828                                           bool is_to_upper, const char* lang) {
829   auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
830   int32_t src_length = s->length();
831   int32_t dest_length = src_length;
832   UErrorCode status;
833   Handle<SeqTwoByteString> result;
834   std::unique_ptr<uc16[]> sap;
835 
836   if (dest_length == 0) return isolate->heap()->empty_string();
837 
838   // This is not a real loop. It'll be executed only once (no overflow) or
839   // twice (overflow).
840   for (int i = 0; i < 2; ++i) {
841     // Case conversion can increase the string length (e.g. sharp-S => SS) so
842     // that we have to handle RangeError exceptions here.
843     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
844         isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
845     DisallowHeapAllocation no_gc;
846     DCHECK(s->IsFlat());
847     String::FlatContent flat = s->GetFlatContent();
848     const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
849     status = U_ZERO_ERROR;
850     dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
851                                  dest_length, src, src_length, lang, &status);
852     if (status != U_BUFFER_OVERFLOW_ERROR) break;
853   }
854 
855   // In most cases, the output will fill the destination buffer completely
856   // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
857   // Only in rare cases, it'll be shorter than the destination buffer and
858   // |result| has to be truncated.
859   DCHECK(U_SUCCESS(status));
860   if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
861     DCHECK(dest_length == result->length());
862     return *result;
863   }
864   if (U_SUCCESS(status)) {
865     DCHECK(dest_length < result->length());
866     return *Handle<SeqTwoByteString>::cast(
867         SeqString::Truncate(result, dest_length));
868   }
869   return *s;
870 }
871 
IsASCIIUpper(uint16_t ch)872 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
873 
874 const uint8_t kToLower[256] = {
875     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
876     0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
877     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
878     0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
879     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
880     0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
881     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
882     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
883     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
884     0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
885     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
886     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
887     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
888     0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
889     0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
890     0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
891     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
892     0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
893     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
894     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
895     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
896     0xFC, 0xFD, 0xFE, 0xFF,
897 };
898 
ToLatin1Lower(uint16_t ch)899 inline uint16_t ToLatin1Lower(uint16_t ch) {
900   return static_cast<uint16_t>(kToLower[ch]);
901 }
902 
ToASCIIUpper(uint16_t ch)903 inline uint16_t ToASCIIUpper(uint16_t ch) {
904   return ch & ~((ch >= 'a' && ch <= 'z') << 5);
905 }
906 
907 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)908 inline uint16_t ToLatin1Upper(uint16_t ch) {
909   DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
910   return ch &
911          ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
912            << 5);
913 }
914 
915 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)916 bool ToUpperFastASCII(const Vector<const Char>& src,
917                       Handle<SeqOneByteString> result) {
918   // Do a faster loop for the case where all the characters are ASCII.
919   uint16_t ored = 0;
920   int32_t index = 0;
921   for (auto it = src.begin(); it != src.end(); ++it) {
922     uint16_t ch = static_cast<uint16_t>(*it);
923     ored |= ch;
924     result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
925   }
926   return !(ored & ~0x7F);
927 }
928 
929 const uint16_t sharp_s = 0xDF;
930 
931 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,uint8_t * dest,int * sharp_s_count)932 bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
933                     int* sharp_s_count) {
934   // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
935 
936   // There are two special cases.
937   //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
938   //  2. Lower case sharp-S converts to "SS" (two characters)
939   *sharp_s_count = 0;
940   for (auto it = src.begin(); it != src.end(); ++it) {
941     uint16_t ch = static_cast<uint16_t>(*it);
942     if (V8_UNLIKELY(ch == sharp_s)) {
943       ++(*sharp_s_count);
944       continue;
945     }
946     if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
947       // Since this upper-cased character does not fit in an 8-bit string, we
948       // need to take the 16-bit path.
949       return false;
950     }
951     *dest++ = ToLatin1Upper(ch);
952   }
953 
954   return true;
955 }
956 
957 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)958 void ToUpperWithSharpS(const Vector<const Char>& src,
959                        Handle<SeqOneByteString> result) {
960   int32_t dest_index = 0;
961   for (auto it = src.begin(); it != src.end(); ++it) {
962     uint16_t ch = static_cast<uint16_t>(*it);
963     if (ch == sharp_s) {
964       result->SeqOneByteStringSet(dest_index++, 'S');
965       result->SeqOneByteStringSet(dest_index++, 'S');
966     } else {
967       result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
968     }
969   }
970 }
971 
FindFirstUpperOrNonAscii(Handle<String> s,int length)972 inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
973   for (int index = 0; index < length; ++index) {
974     uint16_t ch = s->Get(index);
975     if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
976       return index;
977     }
978   }
979   return length;
980 }
981 
ConvertToLower(Handle<String> s,Isolate * isolate)982 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
983   if (!s->HasOnlyOneByteChars()) {
984     // Use a slower implementation for strings with characters beyond U+00FF.
985     return LocaleConvertCase(s, isolate, false, "");
986   }
987 
988   int length = s->length();
989 
990   // We depend here on the invariant that the length of a Latin1
991   // string is invariant under ToLowerCase, and the result always
992   // fits in the Latin1 range in the *root locale*. It does not hold
993   // for ToUpperCase even in the root locale.
994 
995   // Scan the string for uppercase and non-ASCII characters for strings
996   // shorter than a machine-word without any memory allocation overhead.
997   // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
998   // to two parts, one for scanning the prefix with no change and the other for
999   // handling ASCII-only characters.
1000   int index_to_first_unprocessed = length;
1001   const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
1002   if (is_short) {
1003     index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
1004     // Nothing to do if the string is all ASCII with no uppercase.
1005     if (index_to_first_unprocessed == length) return *s;
1006   }
1007 
1008   Handle<SeqOneByteString> result =
1009       isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1010 
1011   DisallowHeapAllocation no_gc;
1012   DCHECK(s->IsFlat());
1013   String::FlatContent flat = s->GetFlatContent();
1014   uint8_t* dest = result->GetChars();
1015   if (flat.IsOneByte()) {
1016     const uint8_t* src = flat.ToOneByteVector().start();
1017     bool has_changed_character = false;
1018     index_to_first_unprocessed = FastAsciiConvert<true>(
1019         reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src),
1020         length, &has_changed_character);
1021     // If not ASCII, we keep the result up to index_to_first_unprocessed and
1022     // process the rest.
1023     if (index_to_first_unprocessed == length)
1024       return has_changed_character ? *result : *s;
1025 
1026     for (int index = index_to_first_unprocessed; index < length; ++index) {
1027       dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
1028     }
1029   } else {
1030     if (index_to_first_unprocessed == length) {
1031       DCHECK(!is_short);
1032       index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
1033     }
1034     // Nothing to do if the string is all ASCII with no uppercase.
1035     if (index_to_first_unprocessed == length) return *s;
1036     const uint16_t* src = flat.ToUC16Vector().start();
1037     CopyChars(dest, src, index_to_first_unprocessed);
1038     for (int index = index_to_first_unprocessed; index < length; ++index) {
1039       dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
1040     }
1041   }
1042 
1043   return *result;
1044 }
1045 
ConvertToUpper(Handle<String> s,Isolate * isolate)1046 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
1047   int32_t length = s->length();
1048   if (s->HasOnlyOneByteChars() && length > 0) {
1049     Handle<SeqOneByteString> result =
1050         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1051 
1052     DCHECK(s->IsFlat());
1053     int sharp_s_count;
1054     bool is_result_single_byte;
1055     {
1056       DisallowHeapAllocation no_gc;
1057       String::FlatContent flat = s->GetFlatContent();
1058       uint8_t* dest = result->GetChars();
1059       if (flat.IsOneByte()) {
1060         Vector<const uint8_t> src = flat.ToOneByteVector();
1061         bool has_changed_character = false;
1062         int index_to_first_unprocessed =
1063             FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()),
1064                                     reinterpret_cast<const char*>(src.start()),
1065                                     length, &has_changed_character);
1066         if (index_to_first_unprocessed == length)
1067           return has_changed_character ? *result : *s;
1068         // If not ASCII, we keep the result up to index_to_first_unprocessed and
1069         // process the rest.
1070         is_result_single_byte =
1071             ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
1072                            dest + index_to_first_unprocessed, &sharp_s_count);
1073       } else {
1074         DCHECK(flat.IsTwoByte());
1075         Vector<const uint16_t> src = flat.ToUC16Vector();
1076         if (ToUpperFastASCII(src, result)) return *result;
1077         is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
1078       }
1079     }
1080 
1081     // Go to the full Unicode path if there are characters whose uppercase
1082     // is beyond the Latin-1 range (cannot be represented in OneByteString).
1083     if (V8_UNLIKELY(!is_result_single_byte)) {
1084       return LocaleConvertCase(s, isolate, true, "");
1085     }
1086 
1087     if (sharp_s_count == 0) return *result;
1088 
1089     // We have sharp_s_count sharp-s characters, but the result is still
1090     // in the Latin-1 range.
1091     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1092         isolate, result,
1093         isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1094     DisallowHeapAllocation no_gc;
1095     String::FlatContent flat = s->GetFlatContent();
1096     if (flat.IsOneByte()) {
1097       ToUpperWithSharpS(flat.ToOneByteVector(), result);
1098     } else {
1099       ToUpperWithSharpS(flat.ToUC16Vector(), result);
1100     }
1101 
1102     return *result;
1103   }
1104 
1105   return LocaleConvertCase(s, isolate, true, "");
1106 }
1107 
ConvertCase(Handle<String> s,bool is_upper,Isolate * isolate)1108 MUST_USE_RESULT Object* ConvertCase(Handle<String> s, bool is_upper,
1109                                     Isolate* isolate) {
1110   return is_upper ? ConvertToUpper(s, isolate) : ConvertToLower(s, isolate);
1111 }
1112 
1113 }  // namespace
1114 
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N)1115 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
1116   HandleScope scope(isolate);
1117   DCHECK_EQ(args.length(), 1);
1118   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1119   s = String::Flatten(s);
1120   return ConvertToLower(s, isolate);
1121 }
1122 
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N)1123 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1124   HandleScope scope(isolate);
1125   DCHECK_EQ(args.length(), 1);
1126   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1127   s = String::Flatten(s);
1128   return ConvertToUpper(s, isolate);
1129 }
1130 
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase)1131 RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1132   HandleScope scope(isolate);
1133   DCHECK_EQ(args.length(), 3);
1134   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1135   CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1136   CONVERT_ARG_HANDLE_CHECKED(String, lang_arg, 2);
1137 
1138   // Primary language tag can be up to 8 characters long in theory.
1139   // https://tools.ietf.org/html/bcp47#section-2.2.1
1140   DCHECK(lang_arg->length() <= 8);
1141   lang_arg = String::Flatten(lang_arg);
1142   s = String::Flatten(s);
1143 
1144   // All the languages requiring special-handling have two-letter codes.
1145   if (V8_UNLIKELY(lang_arg->length() > 2))
1146     return ConvertCase(s, is_upper, isolate);
1147 
1148   char c1, c2;
1149   {
1150     DisallowHeapAllocation no_gc;
1151     String::FlatContent lang = lang_arg->GetFlatContent();
1152     c1 = lang.Get(0);
1153     c2 = lang.Get(1);
1154   }
1155   // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1156   // in the root locale needs to be adjusted for az, lt and tr because even case
1157   // mapping of ASCII range characters are different in those locales.
1158   // Greek (el) does not require any adjustment.
1159   if (V8_UNLIKELY(c1 == 't' && c2 == 'r'))
1160     return LocaleConvertCase(s, isolate, is_upper, "tr");
1161   if (V8_UNLIKELY(c1 == 'e' && c2 == 'l'))
1162     return LocaleConvertCase(s, isolate, is_upper, "el");
1163   if (V8_UNLIKELY(c1 == 'l' && c2 == 't'))
1164     return LocaleConvertCase(s, isolate, is_upper, "lt");
1165   if (V8_UNLIKELY(c1 == 'a' && c2 == 'z'))
1166     return LocaleConvertCase(s, isolate, is_upper, "az");
1167 
1168   return ConvertCase(s, is_upper, isolate);
1169 }
1170 
RUNTIME_FUNCTION(Runtime_DateCacheVersion)1171 RUNTIME_FUNCTION(Runtime_DateCacheVersion) {
1172   HandleScope scope(isolate);
1173   DCHECK_EQ(0, args.length());
1174   if (isolate->serializer_enabled()) return isolate->heap()->undefined_value();
1175   if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) {
1176     Handle<FixedArray> date_cache_version =
1177         isolate->factory()->NewFixedArray(1, TENURED);
1178     date_cache_version->set(0, Smi::kZero);
1179     isolate->eternal_handles()->CreateSingleton(
1180         isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION);
1181   }
1182   Handle<FixedArray> date_cache_version =
1183       Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton(
1184           EternalHandles::DATE_CACHE_VERSION));
1185   return date_cache_version->get(0);
1186 }
1187 
1188 }  // namespace internal
1189 }  // namespace v8
1190 
1191 #endif  // V8_I18N_SUPPORT
1192