• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 
6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h"
8 
9 #include "src/api.h"
10 #include "src/api-natives.h"
11 #include "src/arguments.h"
12 #include "src/factory.h"
13 #include "src/i18n.h"
14 #include "src/isolate-inl.h"
15 #include "src/messages.h"
16 
17 #include "unicode/brkiter.h"
18 #include "unicode/calendar.h"
19 #include "unicode/coll.h"
20 #include "unicode/curramt.h"
21 #include "unicode/datefmt.h"
22 #include "unicode/dcfmtsym.h"
23 #include "unicode/decimfmt.h"
24 #include "unicode/dtfmtsym.h"
25 #include "unicode/dtptngen.h"
26 #include "unicode/locid.h"
27 #include "unicode/normalizer2.h"
28 #include "unicode/numfmt.h"
29 #include "unicode/numsys.h"
30 #include "unicode/rbbi.h"
31 #include "unicode/smpdtfmt.h"
32 #include "unicode/timezone.h"
33 #include "unicode/translit.h"
34 #include "unicode/uchar.h"
35 #include "unicode/ucol.h"
36 #include "unicode/ucurr.h"
37 #include "unicode/uloc.h"
38 #include "unicode/unistr.h"
39 #include "unicode/unum.h"
40 #include "unicode/uversion.h"
41 
42 
43 namespace v8 {
44 namespace internal {
45 namespace {
46 
GetUCharBufferFromFlat(const String::FlatContent & flat,base::SmartArrayPointer<uc16> * dest,int32_t length)47 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
48                                     base::SmartArrayPointer<uc16>* dest,
49                                     int32_t length) {
50   DCHECK(flat.IsFlat());
51   if (flat.IsOneByte()) {
52     if (dest->is_empty()) {
53       dest->Reset(NewArray<uc16>(length));
54       CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
55     }
56     return reinterpret_cast<const UChar*>(dest->get());
57   } else {
58     return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
59   }
60 }
61 
62 }  // namespace
63 
RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag)64 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
65   HandleScope scope(isolate);
66   Factory* factory = isolate->factory();
67 
68   DCHECK(args.length() == 1);
69   CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
70 
71   v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
72 
73   // Return value which denotes invalid language tag.
74   const char* const kInvalidTag = "invalid-tag";
75 
76   UErrorCode error = U_ZERO_ERROR;
77   char icu_result[ULOC_FULLNAME_CAPACITY];
78   int icu_length = 0;
79 
80   uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
81                       &icu_length, &error);
82   if (U_FAILURE(error) || icu_length == 0) {
83     return *factory->NewStringFromAsciiChecked(kInvalidTag);
84   }
85 
86   char result[ULOC_FULLNAME_CAPACITY];
87 
88   // Force strict BCP47 rules.
89   uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
90 
91   if (U_FAILURE(error)) {
92     return *factory->NewStringFromAsciiChecked(kInvalidTag);
93   }
94 
95   return *factory->NewStringFromAsciiChecked(result);
96 }
97 
98 
RUNTIME_FUNCTION(Runtime_AvailableLocalesOf)99 RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
100   HandleScope scope(isolate);
101   Factory* factory = isolate->factory();
102 
103   DCHECK(args.length() == 1);
104   CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
105 
106   const icu::Locale* available_locales = NULL;
107   int32_t count = 0;
108 
109   if (service->IsUtf8EqualTo(CStrVector("collator"))) {
110     available_locales = icu::Collator::getAvailableLocales(count);
111   } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
112     available_locales = icu::NumberFormat::getAvailableLocales(count);
113   } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
114     available_locales = icu::DateFormat::getAvailableLocales(count);
115   } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
116     available_locales = icu::BreakIterator::getAvailableLocales(count);
117   }
118 
119   UErrorCode error = U_ZERO_ERROR;
120   char result[ULOC_FULLNAME_CAPACITY];
121   Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
122 
123   for (int32_t i = 0; i < count; ++i) {
124     const char* icu_name = available_locales[i].getName();
125 
126     error = U_ZERO_ERROR;
127     // No need to force strict BCP47 rules.
128     uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
129     if (U_FAILURE(error)) {
130       // This shouldn't happen, but lets not break the user.
131       continue;
132     }
133 
134     RETURN_FAILURE_ON_EXCEPTION(
135         isolate, JSObject::SetOwnPropertyIgnoreAttributes(
136                      locales, factory->NewStringFromAsciiChecked(result),
137                      factory->NewNumber(i), NONE));
138   }
139 
140   return *locales;
141 }
142 
143 
RUNTIME_FUNCTION(Runtime_GetDefaultICULocale)144 RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
145   HandleScope scope(isolate);
146   Factory* factory = isolate->factory();
147 
148   DCHECK(args.length() == 0);
149 
150   icu::Locale default_locale;
151 
152   // Set the locale
153   char result[ULOC_FULLNAME_CAPACITY];
154   UErrorCode status = U_ZERO_ERROR;
155   uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
156                      FALSE, &status);
157   if (U_SUCCESS(status)) {
158     return *factory->NewStringFromAsciiChecked(result);
159   }
160 
161   return *factory->NewStringFromStaticChars("und");
162 }
163 
164 
RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants)165 RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
166   HandleScope scope(isolate);
167   Factory* factory = isolate->factory();
168 
169   DCHECK(args.length() == 1);
170 
171   CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
172 
173   uint32_t length = static_cast<uint32_t>(input->length()->Number());
174   // Set some limit to prevent fuzz tests from going OOM.
175   // Can be bumped when callers' requirements change.
176   RUNTIME_ASSERT(length < 100);
177   Handle<FixedArray> output = factory->NewFixedArray(length);
178   Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
179   Handle<Name> base = factory->NewStringFromStaticChars("base");
180   for (unsigned int i = 0; i < length; ++i) {
181     Handle<Object> locale_id;
182     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
183         isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
184     if (!locale_id->IsString()) {
185       return isolate->Throw(*factory->illegal_argument_string());
186     }
187 
188     v8::String::Utf8Value utf8_locale_id(
189         v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
190 
191     UErrorCode error = U_ZERO_ERROR;
192 
193     // Convert from BCP47 to ICU format.
194     // de-DE-u-co-phonebk -> de_DE@collation=phonebook
195     char icu_locale[ULOC_FULLNAME_CAPACITY];
196     int icu_locale_length = 0;
197     uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
198                         &icu_locale_length, &error);
199     if (U_FAILURE(error) || icu_locale_length == 0) {
200       return isolate->Throw(*factory->illegal_argument_string());
201     }
202 
203     // Maximize the locale.
204     // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
205     char icu_max_locale[ULOC_FULLNAME_CAPACITY];
206     uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
207                           &error);
208 
209     // Remove extensions from maximized locale.
210     // de_Latn_DE@collation=phonebook -> de_Latn_DE
211     char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
212     uloc_getBaseName(icu_max_locale, icu_base_max_locale,
213                      ULOC_FULLNAME_CAPACITY, &error);
214 
215     // Get original name without extensions.
216     // de_DE@collation=phonebook -> de_DE
217     char icu_base_locale[ULOC_FULLNAME_CAPACITY];
218     uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
219                      &error);
220 
221     // Convert from ICU locale format to BCP47 format.
222     // de_Latn_DE -> de-Latn-DE
223     char base_max_locale[ULOC_FULLNAME_CAPACITY];
224     uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
225                        ULOC_FULLNAME_CAPACITY, FALSE, &error);
226 
227     // de_DE -> de-DE
228     char base_locale[ULOC_FULLNAME_CAPACITY];
229     uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
230                        FALSE, &error);
231 
232     if (U_FAILURE(error)) {
233       return isolate->Throw(*factory->illegal_argument_string());
234     }
235 
236     Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
237     Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
238     JSObject::AddProperty(result, maximized, value, NONE);
239     value = factory->NewStringFromAsciiChecked(base_locale);
240     JSObject::AddProperty(result, base, value, NONE);
241     output->set(i, *result);
242   }
243 
244   Handle<JSArray> result = factory->NewJSArrayWithElements(output);
245   result->set_length(Smi::FromInt(length));
246   return *result;
247 }
248 
249 
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject)250 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
251   HandleScope scope(isolate);
252 
253   DCHECK(args.length() == 1);
254 
255   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
256 
257   if (!input->IsJSObject()) return isolate->heap()->false_value();
258   Handle<JSObject> obj = Handle<JSObject>::cast(input);
259 
260   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
261   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
262   return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate));
263 }
264 
265 
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType)266 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
267   HandleScope scope(isolate);
268 
269   DCHECK(args.length() == 2);
270 
271   CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
272   CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
273 
274   if (!input->IsJSObject()) return isolate->heap()->false_value();
275   Handle<JSObject> obj = Handle<JSObject>::cast(input);
276 
277   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
278   Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
279   return isolate->heap()->ToBoolean(tag->IsString() &&
280                                     String::cast(*tag)->Equals(*expected_type));
281 }
282 
283 
RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType)284 RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
285   HandleScope scope(isolate);
286 
287   DCHECK(args.length() == 3);
288 
289   CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
290   CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
291   CONVERT_ARG_HANDLE_CHECKED(JSObject, impl, 2);
292 
293   Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
294   JSObject::SetProperty(input, marker, type, STRICT).Assert();
295 
296   marker = isolate->factory()->intl_impl_object_symbol();
297   JSObject::SetProperty(input, marker, impl, STRICT).Assert();
298 
299   return isolate->heap()->undefined_value();
300 }
301 
302 
RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject)303 RUNTIME_FUNCTION(Runtime_GetImplFromInitializedIntlObject) {
304   HandleScope scope(isolate);
305 
306   DCHECK(args.length() == 1);
307 
308   CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
309 
310   if (!input->IsJSObject()) {
311     THROW_NEW_ERROR_RETURN_FAILURE(
312         isolate, NewTypeError(MessageTemplate::kNotIntlObject, input));
313   }
314 
315   Handle<JSObject> obj = Handle<JSObject>::cast(input);
316 
317   Handle<Symbol> marker = isolate->factory()->intl_impl_object_symbol();
318 
319   Handle<Object> impl = JSReceiver::GetDataProperty(obj, marker);
320   if (impl->IsTheHole(isolate)) {
321     THROW_NEW_ERROR_RETURN_FAILURE(
322         isolate, NewTypeError(MessageTemplate::kNotIntlObject, obj));
323   }
324   return *impl;
325 }
326 
327 
RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat)328 RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
329   HandleScope scope(isolate);
330 
331   DCHECK(args.length() == 3);
332 
333   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
334   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
335   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
336 
337   Handle<ObjectTemplateInfo> date_format_template = I18N::GetTemplate(isolate);
338 
339   // Create an empty object wrapper.
340   Handle<JSObject> local_object;
341   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
342       isolate, local_object,
343       ApiNatives::InstantiateObject(date_format_template));
344 
345   // Set date time formatter as internal field of the resulting JS object.
346   icu::SimpleDateFormat* date_format =
347       DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
348 
349   if (!date_format) return isolate->ThrowIllegalOperation();
350 
351   local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
352 
353   Factory* factory = isolate->factory();
354   Handle<String> key = factory->NewStringFromStaticChars("dateFormat");
355   Handle<String> value = factory->NewStringFromStaticChars("valid");
356   JSObject::AddProperty(local_object, key, value, NONE);
357 
358   // Make object handle weak so we can delete the data format once GC kicks in.
359   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
360   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
361                           DateFormat::DeleteDateFormat,
362                           WeakCallbackType::kInternalFields);
363   return *local_object;
364 }
365 
366 
RUNTIME_FUNCTION(Runtime_InternalDateFormat)367 RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
368   HandleScope scope(isolate);
369 
370   DCHECK(args.length() == 2);
371 
372   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
373   CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
374 
375   Handle<Object> value;
376   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
377 
378   icu::SimpleDateFormat* date_format =
379       DateFormat::UnpackDateFormat(isolate, date_format_holder);
380   if (!date_format) return isolate->ThrowIllegalOperation();
381 
382   icu::UnicodeString result;
383   date_format->format(value->Number(), result);
384 
385   RETURN_RESULT_OR_FAILURE(
386       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
387                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
388                    result.length())));
389 }
390 
391 
RUNTIME_FUNCTION(Runtime_InternalDateParse)392 RUNTIME_FUNCTION(Runtime_InternalDateParse) {
393   HandleScope scope(isolate);
394 
395   DCHECK(args.length() == 2);
396 
397   CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
398   CONVERT_ARG_HANDLE_CHECKED(String, date_string, 1);
399 
400   v8::String::Utf8Value utf8_date(v8::Utils::ToLocal(date_string));
401   icu::UnicodeString u_date(icu::UnicodeString::fromUTF8(*utf8_date));
402   icu::SimpleDateFormat* date_format =
403       DateFormat::UnpackDateFormat(isolate, date_format_holder);
404   if (!date_format) return isolate->ThrowIllegalOperation();
405 
406   UErrorCode status = U_ZERO_ERROR;
407   UDate date = date_format->parse(u_date, status);
408   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
409 
410   RETURN_RESULT_OR_FAILURE(
411       isolate, JSDate::New(isolate->date_function(), isolate->date_function(),
412                            static_cast<double>(date)));
413 }
414 
415 
RUNTIME_FUNCTION(Runtime_CreateNumberFormat)416 RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
417   HandleScope scope(isolate);
418 
419   DCHECK(args.length() == 3);
420 
421   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
422   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
423   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
424 
425   Handle<ObjectTemplateInfo> number_format_template =
426       I18N::GetTemplate(isolate);
427 
428   // Create an empty object wrapper.
429   Handle<JSObject> local_object;
430   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
431       isolate, local_object,
432       ApiNatives::InstantiateObject(number_format_template));
433 
434   // Set number formatter as internal field of the resulting JS object.
435   icu::DecimalFormat* number_format =
436       NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
437 
438   if (!number_format) return isolate->ThrowIllegalOperation();
439 
440   local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
441 
442   Factory* factory = isolate->factory();
443   Handle<String> key = factory->NewStringFromStaticChars("numberFormat");
444   Handle<String> value = factory->NewStringFromStaticChars("valid");
445   JSObject::AddProperty(local_object, key, value, NONE);
446 
447   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
448   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
449                           NumberFormat::DeleteNumberFormat,
450                           WeakCallbackType::kInternalFields);
451   return *local_object;
452 }
453 
454 
RUNTIME_FUNCTION(Runtime_InternalNumberFormat)455 RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
456   HandleScope scope(isolate);
457 
458   DCHECK(args.length() == 2);
459 
460   CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
461   CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
462 
463   Handle<Object> value;
464   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
465 
466   icu::DecimalFormat* number_format =
467       NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
468   if (!number_format) return isolate->ThrowIllegalOperation();
469 
470   icu::UnicodeString result;
471   number_format->format(value->Number(), result);
472 
473   RETURN_RESULT_OR_FAILURE(
474       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
475                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
476                    result.length())));
477 }
478 
479 
RUNTIME_FUNCTION(Runtime_InternalNumberParse)480 RUNTIME_FUNCTION(Runtime_InternalNumberParse) {
481   HandleScope scope(isolate);
482 
483   DCHECK(args.length() == 2);
484 
485   CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
486   CONVERT_ARG_HANDLE_CHECKED(String, number_string, 1);
487 
488   isolate->CountUsage(v8::Isolate::UseCounterFeature::kIntlV8Parse);
489 
490   v8::String::Utf8Value utf8_number(v8::Utils::ToLocal(number_string));
491   icu::UnicodeString u_number(icu::UnicodeString::fromUTF8(*utf8_number));
492   icu::DecimalFormat* number_format =
493       NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
494   if (!number_format) return isolate->ThrowIllegalOperation();
495 
496   UErrorCode status = U_ZERO_ERROR;
497   icu::Formattable result;
498   // ICU 4.6 doesn't support parseCurrency call. We need to wait for ICU49
499   // to be part of Chrome.
500   // TODO(cira): Include currency parsing code using parseCurrency call.
501   // We need to check if the formatter parses all currencies or only the
502   // one it was constructed with (it will impact the API - how to return ISO
503   // code and the value).
504   number_format->parse(u_number, result, status);
505   if (U_FAILURE(status)) return isolate->heap()->undefined_value();
506 
507   switch (result.getType()) {
508     case icu::Formattable::kDouble:
509       return *isolate->factory()->NewNumber(result.getDouble());
510     case icu::Formattable::kLong:
511       return *isolate->factory()->NewNumberFromInt(result.getLong());
512     case icu::Formattable::kInt64:
513       return *isolate->factory()->NewNumber(
514           static_cast<double>(result.getInt64()));
515     default:
516       return isolate->heap()->undefined_value();
517   }
518 }
519 
520 
RUNTIME_FUNCTION(Runtime_CreateCollator)521 RUNTIME_FUNCTION(Runtime_CreateCollator) {
522   HandleScope scope(isolate);
523 
524   DCHECK(args.length() == 3);
525 
526   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
527   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
528   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
529 
530   Handle<ObjectTemplateInfo> collator_template = I18N::GetTemplate(isolate);
531 
532   // Create an empty object wrapper.
533   Handle<JSObject> local_object;
534   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
535       isolate, local_object, ApiNatives::InstantiateObject(collator_template));
536 
537   // Set collator as internal field of the resulting JS object.
538   icu::Collator* collator =
539       Collator::InitializeCollator(isolate, locale, options, resolved);
540 
541   if (!collator) return isolate->ThrowIllegalOperation();
542 
543   local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
544 
545   Factory* factory = isolate->factory();
546   Handle<String> key = factory->NewStringFromStaticChars("collator");
547   Handle<String> value = factory->NewStringFromStaticChars("valid");
548   JSObject::AddProperty(local_object, key, value, NONE);
549 
550   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
551   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
552                           Collator::DeleteCollator,
553                           WeakCallbackType::kInternalFields);
554   return *local_object;
555 }
556 
557 
RUNTIME_FUNCTION(Runtime_InternalCompare)558 RUNTIME_FUNCTION(Runtime_InternalCompare) {
559   HandleScope scope(isolate);
560 
561   DCHECK(args.length() == 3);
562 
563   CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
564   CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
565   CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
566 
567   icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
568   if (!collator) return isolate->ThrowIllegalOperation();
569 
570   string1 = String::Flatten(string1);
571   string2 = String::Flatten(string2);
572   DisallowHeapAllocation no_gc;
573   int32_t length1 = string1->length();
574   int32_t length2 = string2->length();
575   String::FlatContent flat1 = string1->GetFlatContent();
576   String::FlatContent flat2 = string2->GetFlatContent();
577   base::SmartArrayPointer<uc16> sap1;
578   base::SmartArrayPointer<uc16> sap2;
579   const UChar* string_val1 = GetUCharBufferFromFlat(flat1, &sap1, length1);
580   const UChar* string_val2 = GetUCharBufferFromFlat(flat2, &sap2, length2);
581   UErrorCode status = U_ZERO_ERROR;
582   UCollationResult result =
583       collator->compare(string_val1, length1, string_val2, length2, status);
584   if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
585 
586   return *isolate->factory()->NewNumberFromInt(result);
587 }
588 
589 
RUNTIME_FUNCTION(Runtime_StringNormalize)590 RUNTIME_FUNCTION(Runtime_StringNormalize) {
591   HandleScope scope(isolate);
592   static const struct {
593     const char* name;
594     UNormalization2Mode mode;
595   } normalizationForms[] = {
596       {"nfc", UNORM2_COMPOSE},
597       {"nfc", UNORM2_DECOMPOSE},
598       {"nfkc", UNORM2_COMPOSE},
599       {"nfkc", UNORM2_DECOMPOSE},
600   };
601 
602   DCHECK(args.length() == 2);
603 
604   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
605   CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
606   RUNTIME_ASSERT(form_id >= 0 &&
607                  static_cast<size_t>(form_id) < arraysize(normalizationForms));
608 
609   int length = s->length();
610   s = String::Flatten(s);
611   icu::UnicodeString result;
612   base::SmartArrayPointer<uc16> sap;
613   UErrorCode status = U_ZERO_ERROR;
614   {
615     DisallowHeapAllocation no_gc;
616     String::FlatContent flat = s->GetFlatContent();
617     const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
618     icu::UnicodeString input(false, src, length);
619     // Getting a singleton. Should not free it.
620     const icu::Normalizer2* normalizer =
621         icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
622                                       normalizationForms[form_id].mode, status);
623     DCHECK(U_SUCCESS(status));
624     RUNTIME_ASSERT(normalizer != nullptr);
625     int32_t normalized_prefix_length =
626         normalizer->spanQuickCheckYes(input, status);
627     // Quick return if the input is already normalized.
628     if (length == normalized_prefix_length) return *s;
629     icu::UnicodeString unnormalized =
630         input.tempSubString(normalized_prefix_length);
631     // Read-only alias of the normalized prefix.
632     result.setTo(false, input.getBuffer(), normalized_prefix_length);
633     // copy-on-write; normalize the suffix and append to |result|.
634     normalizer->normalizeSecondAndAppend(result, unnormalized, status);
635   }
636 
637   if (U_FAILURE(status)) {
638     return isolate->heap()->undefined_value();
639   }
640 
641   RETURN_RESULT_OR_FAILURE(
642       isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
643                    reinterpret_cast<const uint16_t*>(result.getBuffer()),
644                    result.length())));
645 }
646 
647 
RUNTIME_FUNCTION(Runtime_CreateBreakIterator)648 RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
649   HandleScope scope(isolate);
650 
651   DCHECK(args.length() == 3);
652 
653   CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
654   CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
655   CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
656 
657   Handle<ObjectTemplateInfo> break_iterator_template =
658       I18N::GetTemplate2(isolate);
659 
660   // Create an empty object wrapper.
661   Handle<JSObject> local_object;
662   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
663       isolate, local_object,
664       ApiNatives::InstantiateObject(break_iterator_template));
665 
666   // Set break iterator as internal field of the resulting JS object.
667   icu::BreakIterator* break_iterator = BreakIterator::InitializeBreakIterator(
668       isolate, locale, options, resolved);
669 
670   if (!break_iterator) return isolate->ThrowIllegalOperation();
671 
672   local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
673   // Make sure that the pointer to adopted text is NULL.
674   local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
675 
676   Factory* factory = isolate->factory();
677   Handle<String> key = factory->NewStringFromStaticChars("breakIterator");
678   Handle<String> value = factory->NewStringFromStaticChars("valid");
679   JSObject::AddProperty(local_object, key, value, NONE);
680 
681   // Make object handle weak so we can delete the break iterator once GC kicks
682   // in.
683   Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
684   GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
685                           BreakIterator::DeleteBreakIterator,
686                           WeakCallbackType::kInternalFields);
687   return *local_object;
688 }
689 
690 
RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText)691 RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
692   HandleScope scope(isolate);
693 
694   DCHECK(args.length() == 2);
695 
696   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
697   CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
698 
699   icu::BreakIterator* break_iterator =
700       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
701   if (!break_iterator) return isolate->ThrowIllegalOperation();
702 
703   icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
704       break_iterator_holder->GetInternalField(1));
705   delete u_text;
706 
707   int length = text->length();
708   text = String::Flatten(text);
709   DisallowHeapAllocation no_gc;
710   String::FlatContent flat = text->GetFlatContent();
711   base::SmartArrayPointer<uc16> sap;
712   const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
713   u_text = new icu::UnicodeString(text_value, length);
714   break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
715 
716   break_iterator->setText(*u_text);
717 
718   return isolate->heap()->undefined_value();
719 }
720 
721 
RUNTIME_FUNCTION(Runtime_BreakIteratorFirst)722 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
723   HandleScope scope(isolate);
724 
725   DCHECK(args.length() == 1);
726 
727   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
728 
729   icu::BreakIterator* break_iterator =
730       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
731   if (!break_iterator) return isolate->ThrowIllegalOperation();
732 
733   return *isolate->factory()->NewNumberFromInt(break_iterator->first());
734 }
735 
736 
RUNTIME_FUNCTION(Runtime_BreakIteratorNext)737 RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
738   HandleScope scope(isolate);
739 
740   DCHECK(args.length() == 1);
741 
742   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
743 
744   icu::BreakIterator* break_iterator =
745       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
746   if (!break_iterator) return isolate->ThrowIllegalOperation();
747 
748   return *isolate->factory()->NewNumberFromInt(break_iterator->next());
749 }
750 
751 
RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent)752 RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
753   HandleScope scope(isolate);
754 
755   DCHECK(args.length() == 1);
756 
757   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
758 
759   icu::BreakIterator* break_iterator =
760       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
761   if (!break_iterator) return isolate->ThrowIllegalOperation();
762 
763   return *isolate->factory()->NewNumberFromInt(break_iterator->current());
764 }
765 
766 
RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType)767 RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
768   HandleScope scope(isolate);
769 
770   DCHECK(args.length() == 1);
771 
772   CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
773 
774   icu::BreakIterator* break_iterator =
775       BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
776   if (!break_iterator) return isolate->ThrowIllegalOperation();
777 
778   // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
779   icu::RuleBasedBreakIterator* rule_based_iterator =
780       static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
781   int32_t status = rule_based_iterator->getRuleStatus();
782   // Keep return values in sync with JavaScript BreakType enum.
783   if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
784     return *isolate->factory()->NewStringFromStaticChars("none");
785   } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
786     return *isolate->factory()->number_string();
787   } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
788     return *isolate->factory()->NewStringFromStaticChars("letter");
789   } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
790     return *isolate->factory()->NewStringFromStaticChars("kana");
791   } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
792     return *isolate->factory()->NewStringFromStaticChars("ideo");
793   } else {
794     return *isolate->factory()->NewStringFromStaticChars("unknown");
795   }
796 }
797 
798 namespace {
ConvertCaseWithTransliterator(icu::UnicodeString * input,const char * transliterator_id)799 void ConvertCaseWithTransliterator(icu::UnicodeString* input,
800                                    const char* transliterator_id) {
801   UErrorCode status = U_ZERO_ERROR;
802   base::SmartPointer<icu::Transliterator> translit(
803       icu::Transliterator::createInstance(
804           icu::UnicodeString(transliterator_id, -1, US_INV), UTRANS_FORWARD,
805           status));
806   if (U_FAILURE(status)) return;
807   translit->transliterate(*input);
808 }
809 
LocaleConvertCase(Handle<String> s,Isolate * isolate,bool is_to_upper,const char * lang)810 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
811                                           bool is_to_upper, const char* lang) {
812   int32_t src_length = s->length();
813 
814   // Greek uppercasing has to be done via transliteration.
815   // TODO(jshin): Drop this special-casing once ICU's regular case conversion
816   // API supports Greek uppercasing. See
817   // http://bugs.icu-project.org/trac/ticket/10582 .
818   // In the meantime, if there's no Greek character in |s|, call this
819   // function again with the root locale (lang="").
820   // ICU's C API for transliteration is nasty and we just use C++ API.
821   if (V8_UNLIKELY(is_to_upper && lang[0] == 'e' && lang[1] == 'l')) {
822     icu::UnicodeString converted;
823     base::SmartArrayPointer<uc16> sap;
824     {
825       DisallowHeapAllocation no_gc;
826       String::FlatContent flat = s->GetFlatContent();
827       const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
828       // Starts with the source string (read-only alias with copy-on-write
829       // semantics) and will be modified to contain the converted result.
830       // Using read-only alias at first saves one copy operation if
831       // transliteration does not change the input, which is rather rare.
832       // Moreover, transliteration takes rather long so that saving one copy
833       // helps only a little bit.
834       converted.setTo(false, src, src_length);
835       ConvertCaseWithTransliterator(&converted, "el-Upper");
836       // If no change is made, just return |s|.
837       if (converted.getBuffer() == src) return *s;
838     }
839     RETURN_RESULT_OR_FAILURE(
840         isolate,
841         isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
842             reinterpret_cast<const uint16_t*>(converted.getBuffer()),
843             converted.length())));
844   }
845 
846   auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
847 
848   int32_t dest_length = src_length;
849   UErrorCode status;
850   Handle<SeqTwoByteString> result;
851   base::SmartArrayPointer<uc16> sap;
852 
853   // This is not a real loop. It'll be executed only once (no overflow) or
854   // twice (overflow).
855   for (int i = 0; i < 2; ++i) {
856     result =
857         isolate->factory()->NewRawTwoByteString(dest_length).ToHandleChecked();
858     DisallowHeapAllocation no_gc;
859     String::FlatContent flat = s->GetFlatContent();
860     const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
861     status = U_ZERO_ERROR;
862     dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
863                                  dest_length, src, src_length, lang, &status);
864     if (status != U_BUFFER_OVERFLOW_ERROR) break;
865   }
866 
867   // In most cases, the output will fill the destination buffer completely
868   // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
869   // Only in rare cases, it'll be shorter than the destination buffer and
870   // |result| has to be truncated.
871   DCHECK(U_SUCCESS(status));
872   if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
873     DCHECK(dest_length == result->length());
874     return *result;
875   }
876   if (U_SUCCESS(status)) {
877     DCHECK(dest_length < result->length());
878     return *Handle<SeqTwoByteString>::cast(
879         SeqString::Truncate(result, dest_length));
880   }
881   return *s;
882 }
883 
IsASCIIUpper(uint16_t ch)884 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
885 
886 const uint8_t kToLower[256] = {
887     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
888     0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
889     0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
890     0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
891     0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
892     0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
893     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
894     0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
895     0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
896     0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
897     0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
898     0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
899     0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
900     0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
901     0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
902     0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
903     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
904     0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
905     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
906     0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
907     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
908     0xFC, 0xFD, 0xFE, 0xFF,
909 };
910 
ToLatin1Lower(uint16_t ch)911 inline uint16_t ToLatin1Lower(uint16_t ch) {
912   return static_cast<uint16_t>(kToLower[ch]);
913 }
914 
ToASCIIUpper(uint16_t ch)915 inline uint16_t ToASCIIUpper(uint16_t ch) {
916   return ch & ~((ch >= 'a' && ch <= 'z') << 5);
917 }
918 
919 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)920 inline uint16_t ToLatin1Upper(uint16_t ch) {
921   DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
922   return ch &
923          ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xE7))
924            << 5);
925 }
926 
927 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)928 bool ToUpperFastASCII(const Vector<const Char>& src,
929                       Handle<SeqOneByteString> result) {
930   // Do a faster loop for the case where all the characters are ASCII.
931   uint16_t ored = 0;
932   int32_t index = 0;
933   for (auto it = src.begin(); it != src.end(); ++it) {
934     uint16_t ch = static_cast<uint16_t>(*it);
935     ored |= ch;
936     result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
937   }
938   return !(ored & ~0x7F);
939 }
940 
941 const uint16_t sharp_s = 0xDF;
942 
943 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,Handle<SeqOneByteString> result,int * sharp_s_count)944 bool ToUpperOneByte(const Vector<const Char>& src,
945                     Handle<SeqOneByteString> result, int* sharp_s_count) {
946   // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
947 
948   // There are two special cases.
949   //  1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
950   //  2. Lower case sharp-S converts to "SS" (two characters)
951   *sharp_s_count = 0;
952   int32_t index = 0;
953   for (auto it = src.begin(); it != src.end(); ++it) {
954     uint16_t ch = static_cast<uint16_t>(*it);
955     if (V8_UNLIKELY(ch == sharp_s)) {
956       ++(*sharp_s_count);
957       continue;
958     }
959     if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
960       // Since this upper-cased character does not fit in an 8-bit string, we
961       // need to take the 16-bit path.
962       return false;
963     }
964     result->SeqOneByteStringSet(index++, ToLatin1Upper(ch));
965   }
966 
967   return true;
968 }
969 
970 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)971 void ToUpperWithSharpS(const Vector<const Char>& src,
972                        Handle<SeqOneByteString> result) {
973   int32_t dest_index = 0;
974   for (auto it = src.begin(); it != src.end(); ++it) {
975     uint16_t ch = static_cast<uint16_t>(*it);
976     if (ch == sharp_s) {
977       result->SeqOneByteStringSet(dest_index++, 'S');
978       result->SeqOneByteStringSet(dest_index++, 'S');
979     } else {
980       result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
981     }
982   }
983 }
984 
985 }  // namespace
986 
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N)987 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
988   HandleScope scope(isolate);
989   DCHECK_EQ(args.length(), 1);
990   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
991 
992   int length = s->length();
993   s = String::Flatten(s);
994   // First scan the string for uppercase and non-ASCII characters:
995   if (s->HasOnlyOneByteChars()) {
996     unsigned first_index_to_lower = length;
997     for (int index = 0; index < length; ++index) {
998       // Blink specializes this path for one-byte strings, so it
999       // does not need to do a generic get, but can do the equivalent
1000       // of SeqOneByteStringGet.
1001       uint16_t ch = s->Get(index);
1002       if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
1003         first_index_to_lower = index;
1004         break;
1005       }
1006     }
1007 
1008     // Nothing to do if the string is all ASCII with no uppercase.
1009     if (first_index_to_lower == length) return *s;
1010 
1011     // We depend here on the invariant that the length of a Latin1
1012     // string is invariant under ToLowerCase, and the result always
1013     // fits in the Latin1 range in the *root locale*. It does not hold
1014     // for ToUpperCase even in the root locale.
1015     Handle<SeqOneByteString> result;
1016     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1017         isolate, result, isolate->factory()->NewRawOneByteString(length));
1018 
1019     DisallowHeapAllocation no_gc;
1020     String::FlatContent flat = s->GetFlatContent();
1021     if (flat.IsOneByte()) {
1022       const uint8_t* src = flat.ToOneByteVector().start();
1023       CopyChars(result->GetChars(), src, first_index_to_lower);
1024       for (int index = first_index_to_lower; index < length; ++index) {
1025         uint16_t ch = static_cast<uint16_t>(src[index]);
1026         result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1027       }
1028     } else {
1029       const uint16_t* src = flat.ToUC16Vector().start();
1030       CopyChars(result->GetChars(), src, first_index_to_lower);
1031       for (int index = first_index_to_lower; index < length; ++index) {
1032         uint16_t ch = src[index];
1033         result->SeqOneByteStringSet(index, ToLatin1Lower(ch));
1034       }
1035     }
1036 
1037     return *result;
1038   }
1039 
1040   // Blink had an additional case here for ASCII 2-byte strings, but
1041   // that is subsumed by the above code (assuming there isn't a false
1042   // negative for HasOnlyOneByteChars).
1043 
1044   // Do a slower implementation for cases that include non-ASCII characters.
1045   return LocaleConvertCase(s, isolate, false, "");
1046 }
1047 
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N)1048 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1049   HandleScope scope(isolate);
1050   DCHECK_EQ(args.length(), 1);
1051   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1052 
1053   // This function could be optimized for no-op cases the way lowercase
1054   // counterpart is, but in empirical testing, few actual calls to upper()
1055   // are no-ops. So, it wouldn't be worth the extra time for pre-scanning.
1056 
1057   int32_t length = s->length();
1058   s = String::Flatten(s);
1059 
1060   if (s->HasOnlyOneByteChars()) {
1061     Handle<SeqOneByteString> result;
1062     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1063         isolate, result, isolate->factory()->NewRawOneByteString(length));
1064 
1065     int sharp_s_count;
1066     bool is_result_single_byte;
1067     {
1068       DisallowHeapAllocation no_gc;
1069       String::FlatContent flat = s->GetFlatContent();
1070       // If it was ok to slow down ASCII-only input slightly, ToUpperFastASCII
1071       // could be removed  because ToUpperOneByte is pretty fast now (it
1072       // does not call ICU API any more.).
1073       if (flat.IsOneByte()) {
1074         Vector<const uint8_t> src = flat.ToOneByteVector();
1075         if (ToUpperFastASCII(src, result)) return *result;
1076         is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1077       } else {
1078         DCHECK(flat.IsTwoByte());
1079         Vector<const uint16_t> src = flat.ToUC16Vector();
1080         if (ToUpperFastASCII(src, result)) return *result;
1081         is_result_single_byte = ToUpperOneByte(src, result, &sharp_s_count);
1082       }
1083     }
1084 
1085     // Go to the full Unicode path if there are characters whose uppercase
1086     // is beyond the Latin-1 range (cannot be represented in OneByteString).
1087     if (V8_UNLIKELY(!is_result_single_byte)) {
1088       return LocaleConvertCase(s, isolate, true, "");
1089     }
1090 
1091     if (sharp_s_count == 0) return *result;
1092 
1093     // We have sharp_s_count sharp-s characters, but the result is still
1094     // in the Latin-1 range.
1095     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1096         isolate, result,
1097         isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1098     DisallowHeapAllocation no_gc;
1099     String::FlatContent flat = s->GetFlatContent();
1100     if (flat.IsOneByte()) {
1101       ToUpperWithSharpS(flat.ToOneByteVector(), result);
1102     } else {
1103       ToUpperWithSharpS(flat.ToUC16Vector(), result);
1104     }
1105 
1106     return *result;
1107   }
1108 
1109   return LocaleConvertCase(s, isolate, true, "");
1110 }
1111 
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase)1112 RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1113   HandleScope scope(isolate);
1114   DCHECK_EQ(args.length(), 3);
1115   CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1116   CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1117   CONVERT_ARG_HANDLE_CHECKED(SeqOneByteString, lang, 2);
1118 
1119   // All the languages requiring special handling ("az", "el", "lt", "tr")
1120   // have a 2-letter language code.
1121   DCHECK(lang->length() == 2);
1122   uint8_t lang_str[3];
1123   memcpy(lang_str, lang->GetChars(), 2);
1124   lang_str[2] = 0;
1125   s = String::Flatten(s);
1126   // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1127   // in the root locale needs to be adjusted for az, lt and tr because even case
1128   // mapping of ASCII range characters are different in those locales.
1129   // Greek (el) does not require any adjustment, though.
1130   return LocaleConvertCase(s, isolate, is_upper,
1131                            reinterpret_cast<const char*>(lang_str));
1132 }
1133 
RUNTIME_FUNCTION(Runtime_DateCacheVersion)1134 RUNTIME_FUNCTION(Runtime_DateCacheVersion) {
1135   HandleScope scope(isolate);
1136   DCHECK_EQ(0, args.length());
1137   if (isolate->serializer_enabled()) return isolate->heap()->undefined_value();
1138   if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) {
1139     Handle<FixedArray> date_cache_version =
1140         isolate->factory()->NewFixedArray(1, TENURED);
1141     date_cache_version->set(0, Smi::FromInt(0));
1142     isolate->eternal_handles()->CreateSingleton(
1143         isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION);
1144   }
1145   Handle<FixedArray> date_cache_version =
1146       Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton(
1147           EternalHandles::DATE_CACHE_VERSION));
1148   return date_cache_version->get(0);
1149 }
1150 
1151 }  // namespace internal
1152 }  // namespace v8
1153 
1154 #endif  // V8_I18N_SUPPORT
1155