1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5
6 #ifdef V8_I18N_SUPPORT
7 #include "src/runtime/runtime-utils.h"
8
9 #include <memory>
10
11 #include "src/api-natives.h"
12 #include "src/api.h"
13 #include "src/arguments.h"
14 #include "src/factory.h"
15 #include "src/i18n.h"
16 #include "src/isolate-inl.h"
17 #include "src/messages.h"
18 #include "src/string-case.h"
19 #include "src/utils.h"
20
21 #include "unicode/brkiter.h"
22 #include "unicode/calendar.h"
23 #include "unicode/coll.h"
24 #include "unicode/curramt.h"
25 #include "unicode/datefmt.h"
26 #include "unicode/dcfmtsym.h"
27 #include "unicode/decimfmt.h"
28 #include "unicode/dtfmtsym.h"
29 #include "unicode/dtptngen.h"
30 #include "unicode/fieldpos.h"
31 #include "unicode/fpositer.h"
32 #include "unicode/locid.h"
33 #include "unicode/normalizer2.h"
34 #include "unicode/numfmt.h"
35 #include "unicode/numsys.h"
36 #include "unicode/rbbi.h"
37 #include "unicode/smpdtfmt.h"
38 #include "unicode/timezone.h"
39 #include "unicode/translit.h"
40 #include "unicode/uchar.h"
41 #include "unicode/ucol.h"
42 #include "unicode/ucurr.h"
43 #include "unicode/uloc.h"
44 #include "unicode/unistr.h"
45 #include "unicode/unum.h"
46 #include "unicode/ustring.h"
47 #include "unicode/uversion.h"
48
49
50 namespace v8 {
51 namespace internal {
52 namespace {
53
GetUCharBufferFromFlat(const String::FlatContent & flat,std::unique_ptr<uc16[]> * dest,int32_t length)54 const UChar* GetUCharBufferFromFlat(const String::FlatContent& flat,
55 std::unique_ptr<uc16[]>* dest,
56 int32_t length) {
57 DCHECK(flat.IsFlat());
58 if (flat.IsOneByte()) {
59 if (!*dest) {
60 dest->reset(NewArray<uc16>(length));
61 CopyChars(dest->get(), flat.ToOneByteVector().start(), length);
62 }
63 return reinterpret_cast<const UChar*>(dest->get());
64 } else {
65 return reinterpret_cast<const UChar*>(flat.ToUC16Vector().start());
66 }
67 }
68
69 } // namespace
70
71 // ECMA 402 6.2.3
RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag)72 RUNTIME_FUNCTION(Runtime_CanonicalizeLanguageTag) {
73 HandleScope scope(isolate);
74 Factory* factory = isolate->factory();
75
76 DCHECK_EQ(1, args.length());
77 CONVERT_ARG_HANDLE_CHECKED(String, locale_id_str, 0);
78
79 v8::String::Utf8Value locale_id(v8::Utils::ToLocal(locale_id_str));
80
81 // Return value which denotes invalid language tag.
82 // TODO(jshin): Can uloc_{for,to}TanguageTag fail even for structually valid
83 // language tags? If not, just add CHECK instead of returning 'invalid-tag'.
84 const char* const kInvalidTag = "invalid-tag";
85
86 UErrorCode error = U_ZERO_ERROR;
87 char icu_result[ULOC_FULLNAME_CAPACITY];
88 int icu_length = 0;
89
90 uloc_forLanguageTag(*locale_id, icu_result, ULOC_FULLNAME_CAPACITY,
91 &icu_length, &error);
92 if (U_FAILURE(error) || icu_length == 0) {
93 return *factory->NewStringFromAsciiChecked(kInvalidTag);
94 }
95
96 char result[ULOC_FULLNAME_CAPACITY];
97
98 // Force strict BCP47 rules.
99 uloc_toLanguageTag(icu_result, result, ULOC_FULLNAME_CAPACITY, TRUE, &error);
100
101 if (U_FAILURE(error)) {
102 return *factory->NewStringFromAsciiChecked(kInvalidTag);
103 }
104
105 return *factory->NewStringFromAsciiChecked(result);
106 }
107
108
RUNTIME_FUNCTION(Runtime_AvailableLocalesOf)109 RUNTIME_FUNCTION(Runtime_AvailableLocalesOf) {
110 HandleScope scope(isolate);
111 Factory* factory = isolate->factory();
112
113 DCHECK_EQ(1, args.length());
114 CONVERT_ARG_HANDLE_CHECKED(String, service, 0);
115
116 const icu::Locale* available_locales = NULL;
117 int32_t count = 0;
118
119 if (service->IsUtf8EqualTo(CStrVector("collator"))) {
120 available_locales = icu::Collator::getAvailableLocales(count);
121 } else if (service->IsUtf8EqualTo(CStrVector("numberformat"))) {
122 available_locales = icu::NumberFormat::getAvailableLocales(count);
123 } else if (service->IsUtf8EqualTo(CStrVector("dateformat"))) {
124 available_locales = icu::DateFormat::getAvailableLocales(count);
125 } else if (service->IsUtf8EqualTo(CStrVector("breakiterator"))) {
126 available_locales = icu::BreakIterator::getAvailableLocales(count);
127 }
128
129 UErrorCode error = U_ZERO_ERROR;
130 char result[ULOC_FULLNAME_CAPACITY];
131 Handle<JSObject> locales = factory->NewJSObject(isolate->object_function());
132
133 for (int32_t i = 0; i < count; ++i) {
134 const char* icu_name = available_locales[i].getName();
135
136 error = U_ZERO_ERROR;
137 // No need to force strict BCP47 rules.
138 uloc_toLanguageTag(icu_name, result, ULOC_FULLNAME_CAPACITY, FALSE, &error);
139 if (U_FAILURE(error)) {
140 // This shouldn't happen, but lets not break the user.
141 continue;
142 }
143
144 RETURN_FAILURE_ON_EXCEPTION(
145 isolate, JSObject::SetOwnPropertyIgnoreAttributes(
146 locales, factory->NewStringFromAsciiChecked(result),
147 factory->NewNumber(i), NONE));
148 }
149
150 return *locales;
151 }
152
153
RUNTIME_FUNCTION(Runtime_GetDefaultICULocale)154 RUNTIME_FUNCTION(Runtime_GetDefaultICULocale) {
155 HandleScope scope(isolate);
156 Factory* factory = isolate->factory();
157
158 DCHECK_EQ(0, args.length());
159
160 icu::Locale default_locale;
161
162 // Set the locale
163 char result[ULOC_FULLNAME_CAPACITY];
164 UErrorCode status = U_ZERO_ERROR;
165 uloc_toLanguageTag(default_locale.getName(), result, ULOC_FULLNAME_CAPACITY,
166 FALSE, &status);
167 if (U_SUCCESS(status)) {
168 return *factory->NewStringFromAsciiChecked(result);
169 }
170
171 return *factory->NewStringFromStaticChars("und");
172 }
173
174
RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants)175 RUNTIME_FUNCTION(Runtime_GetLanguageTagVariants) {
176 HandleScope scope(isolate);
177 Factory* factory = isolate->factory();
178
179 DCHECK_EQ(1, args.length());
180
181 CONVERT_ARG_HANDLE_CHECKED(JSArray, input, 0);
182
183 uint32_t length = static_cast<uint32_t>(input->length()->Number());
184 // Set some limit to prevent fuzz tests from going OOM.
185 // Can be bumped when callers' requirements change.
186 if (length >= 100) return isolate->ThrowIllegalOperation();
187 Handle<FixedArray> output = factory->NewFixedArray(length);
188 Handle<Name> maximized = factory->NewStringFromStaticChars("maximized");
189 Handle<Name> base = factory->NewStringFromStaticChars("base");
190 for (unsigned int i = 0; i < length; ++i) {
191 Handle<Object> locale_id;
192 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
193 isolate, locale_id, JSReceiver::GetElement(isolate, input, i));
194 if (!locale_id->IsString()) {
195 return isolate->Throw(*factory->illegal_argument_string());
196 }
197
198 v8::String::Utf8Value utf8_locale_id(
199 v8::Utils::ToLocal(Handle<String>::cast(locale_id)));
200
201 UErrorCode error = U_ZERO_ERROR;
202
203 // Convert from BCP47 to ICU format.
204 // de-DE-u-co-phonebk -> de_DE@collation=phonebook
205 char icu_locale[ULOC_FULLNAME_CAPACITY];
206 int icu_locale_length = 0;
207 uloc_forLanguageTag(*utf8_locale_id, icu_locale, ULOC_FULLNAME_CAPACITY,
208 &icu_locale_length, &error);
209 if (U_FAILURE(error) || icu_locale_length == 0) {
210 return isolate->Throw(*factory->illegal_argument_string());
211 }
212
213 // Maximize the locale.
214 // de_DE@collation=phonebook -> de_Latn_DE@collation=phonebook
215 char icu_max_locale[ULOC_FULLNAME_CAPACITY];
216 uloc_addLikelySubtags(icu_locale, icu_max_locale, ULOC_FULLNAME_CAPACITY,
217 &error);
218
219 // Remove extensions from maximized locale.
220 // de_Latn_DE@collation=phonebook -> de_Latn_DE
221 char icu_base_max_locale[ULOC_FULLNAME_CAPACITY];
222 uloc_getBaseName(icu_max_locale, icu_base_max_locale,
223 ULOC_FULLNAME_CAPACITY, &error);
224
225 // Get original name without extensions.
226 // de_DE@collation=phonebook -> de_DE
227 char icu_base_locale[ULOC_FULLNAME_CAPACITY];
228 uloc_getBaseName(icu_locale, icu_base_locale, ULOC_FULLNAME_CAPACITY,
229 &error);
230
231 // Convert from ICU locale format to BCP47 format.
232 // de_Latn_DE -> de-Latn-DE
233 char base_max_locale[ULOC_FULLNAME_CAPACITY];
234 uloc_toLanguageTag(icu_base_max_locale, base_max_locale,
235 ULOC_FULLNAME_CAPACITY, FALSE, &error);
236
237 // de_DE -> de-DE
238 char base_locale[ULOC_FULLNAME_CAPACITY];
239 uloc_toLanguageTag(icu_base_locale, base_locale, ULOC_FULLNAME_CAPACITY,
240 FALSE, &error);
241
242 if (U_FAILURE(error)) {
243 return isolate->Throw(*factory->illegal_argument_string());
244 }
245
246 Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
247 Handle<String> value = factory->NewStringFromAsciiChecked(base_max_locale);
248 JSObject::AddProperty(result, maximized, value, NONE);
249 value = factory->NewStringFromAsciiChecked(base_locale);
250 JSObject::AddProperty(result, base, value, NONE);
251 output->set(i, *result);
252 }
253
254 Handle<JSArray> result = factory->NewJSArrayWithElements(output);
255 result->set_length(Smi::FromInt(length));
256 return *result;
257 }
258
259
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject)260 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObject) {
261 HandleScope scope(isolate);
262
263 DCHECK_EQ(1, args.length());
264
265 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
266
267 if (!input->IsJSObject()) return isolate->heap()->false_value();
268 Handle<JSObject> obj = Handle<JSObject>::cast(input);
269
270 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
271 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
272 return isolate->heap()->ToBoolean(!tag->IsUndefined(isolate));
273 }
274
275
RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType)276 RUNTIME_FUNCTION(Runtime_IsInitializedIntlObjectOfType) {
277 HandleScope scope(isolate);
278
279 DCHECK_EQ(2, args.length());
280
281 CONVERT_ARG_HANDLE_CHECKED(Object, input, 0);
282 CONVERT_ARG_HANDLE_CHECKED(String, expected_type, 1);
283
284 if (!input->IsJSObject()) return isolate->heap()->false_value();
285 Handle<JSObject> obj = Handle<JSObject>::cast(input);
286
287 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
288 Handle<Object> tag = JSReceiver::GetDataProperty(obj, marker);
289 return isolate->heap()->ToBoolean(tag->IsString() &&
290 String::cast(*tag)->Equals(*expected_type));
291 }
292
293
RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType)294 RUNTIME_FUNCTION(Runtime_MarkAsInitializedIntlObjectOfType) {
295 HandleScope scope(isolate);
296
297 DCHECK_EQ(2, args.length());
298
299 CONVERT_ARG_HANDLE_CHECKED(JSObject, input, 0);
300 CONVERT_ARG_HANDLE_CHECKED(String, type, 1);
301
302 Handle<Symbol> marker = isolate->factory()->intl_initialized_marker_symbol();
303 JSObject::SetProperty(input, marker, type, STRICT).Assert();
304
305 return isolate->heap()->undefined_value();
306 }
307
308
RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat)309 RUNTIME_FUNCTION(Runtime_CreateDateTimeFormat) {
310 HandleScope scope(isolate);
311
312 DCHECK_EQ(3, args.length());
313
314 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
315 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
316 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
317
318 Handle<JSFunction> constructor(
319 isolate->native_context()->intl_date_time_format_function());
320
321 Handle<JSObject> local_object;
322 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
323 JSObject::New(constructor, constructor));
324
325 // Set date time formatter as internal field of the resulting JS object.
326 icu::SimpleDateFormat* date_format =
327 DateFormat::InitializeDateTimeFormat(isolate, locale, options, resolved);
328
329 if (!date_format) return isolate->ThrowIllegalOperation();
330
331 local_object->SetInternalField(0, reinterpret_cast<Smi*>(date_format));
332
333 // Make object handle weak so we can delete the data format once GC kicks in.
334 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
335 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
336 DateFormat::DeleteDateFormat,
337 WeakCallbackType::kInternalFields);
338 return *local_object;
339 }
340
341
RUNTIME_FUNCTION(Runtime_InternalDateFormat)342 RUNTIME_FUNCTION(Runtime_InternalDateFormat) {
343 HandleScope scope(isolate);
344
345 DCHECK_EQ(2, args.length());
346
347 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
348 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
349
350 Handle<Object> value;
351 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
352
353 icu::SimpleDateFormat* date_format =
354 DateFormat::UnpackDateFormat(isolate, date_format_holder);
355 CHECK_NOT_NULL(date_format);
356
357 icu::UnicodeString result;
358 date_format->format(value->Number(), result);
359
360 RETURN_RESULT_OR_FAILURE(
361 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
362 reinterpret_cast<const uint16_t*>(result.getBuffer()),
363 result.length())));
364 }
365
366 namespace {
367 // The list comes from third_party/icu/source/i18n/unicode/udat.h.
368 // They're mapped to DateTimeFormat components listed at
369 // https://tc39.github.io/ecma402/#sec-datetimeformat-abstracts .
370
IcuDateFieldIdToDateType(int32_t field_id,Isolate * isolate)371 Handle<String> IcuDateFieldIdToDateType(int32_t field_id, Isolate* isolate) {
372 switch (field_id) {
373 case -1:
374 return isolate->factory()->literal_string();
375 case UDAT_YEAR_FIELD:
376 case UDAT_EXTENDED_YEAR_FIELD:
377 case UDAT_YEAR_NAME_FIELD:
378 return isolate->factory()->year_string();
379 case UDAT_MONTH_FIELD:
380 case UDAT_STANDALONE_MONTH_FIELD:
381 return isolate->factory()->month_string();
382 case UDAT_DATE_FIELD:
383 return isolate->factory()->day_string();
384 case UDAT_HOUR_OF_DAY1_FIELD:
385 case UDAT_HOUR_OF_DAY0_FIELD:
386 case UDAT_HOUR1_FIELD:
387 case UDAT_HOUR0_FIELD:
388 return isolate->factory()->hour_string();
389 case UDAT_MINUTE_FIELD:
390 return isolate->factory()->minute_string();
391 case UDAT_SECOND_FIELD:
392 return isolate->factory()->second_string();
393 case UDAT_DAY_OF_WEEK_FIELD:
394 case UDAT_DOW_LOCAL_FIELD:
395 case UDAT_STANDALONE_DAY_FIELD:
396 return isolate->factory()->weekday_string();
397 case UDAT_AM_PM_FIELD:
398 return isolate->factory()->dayperiod_string();
399 case UDAT_TIMEZONE_FIELD:
400 case UDAT_TIMEZONE_RFC_FIELD:
401 case UDAT_TIMEZONE_GENERIC_FIELD:
402 case UDAT_TIMEZONE_SPECIAL_FIELD:
403 case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD:
404 case UDAT_TIMEZONE_ISO_FIELD:
405 case UDAT_TIMEZONE_ISO_LOCAL_FIELD:
406 return isolate->factory()->timeZoneName_string();
407 case UDAT_ERA_FIELD:
408 return isolate->factory()->era_string();
409 default:
410 // Other UDAT_*_FIELD's cannot show up because there is no way to specify
411 // them via options of Intl.DateTimeFormat.
412 UNREACHABLE();
413 // To prevent MSVC from issuing C4715 warning.
414 return Handle<String>();
415 }
416 }
417
AddElement(Handle<JSArray> array,int index,int32_t field_id,const icu::UnicodeString & formatted,int32_t begin,int32_t end,Isolate * isolate)418 bool AddElement(Handle<JSArray> array, int index, int32_t field_id,
419 const icu::UnicodeString& formatted, int32_t begin, int32_t end,
420 Isolate* isolate) {
421 HandleScope scope(isolate);
422 Factory* factory = isolate->factory();
423 Handle<JSObject> element = factory->NewJSObject(isolate->object_function());
424 Handle<String> value = IcuDateFieldIdToDateType(field_id, isolate);
425 JSObject::AddProperty(element, factory->type_string(), value, NONE);
426
427 icu::UnicodeString field(formatted.tempSubStringBetween(begin, end));
428 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
429 isolate, value, factory->NewStringFromTwoByte(Vector<const uint16_t>(
430 reinterpret_cast<const uint16_t*>(field.getBuffer()),
431 field.length())),
432 false);
433
434 JSObject::AddProperty(element, factory->value_string(), value, NONE);
435 RETURN_ON_EXCEPTION_VALUE(
436 isolate, JSObject::AddDataElement(array, index, element, NONE), false);
437 return true;
438 }
439
440 } // namespace
441
RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts)442 RUNTIME_FUNCTION(Runtime_InternalDateFormatToParts) {
443 HandleScope scope(isolate);
444 Factory* factory = isolate->factory();
445
446 DCHECK_EQ(2, args.length());
447
448 CONVERT_ARG_HANDLE_CHECKED(JSObject, date_format_holder, 0);
449 CONVERT_ARG_HANDLE_CHECKED(JSDate, date, 1);
450
451 Handle<Object> value;
452 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(date));
453
454 icu::SimpleDateFormat* date_format =
455 DateFormat::UnpackDateFormat(isolate, date_format_holder);
456 CHECK_NOT_NULL(date_format);
457
458 icu::UnicodeString formatted;
459 icu::FieldPositionIterator fp_iter;
460 icu::FieldPosition fp;
461 UErrorCode status = U_ZERO_ERROR;
462 date_format->format(value->Number(), formatted, &fp_iter, status);
463 if (U_FAILURE(status)) return isolate->heap()->undefined_value();
464
465 Handle<JSArray> result = factory->NewJSArray(0);
466 int32_t length = formatted.length();
467 if (length == 0) return *result;
468
469 int index = 0;
470 int32_t previous_end_pos = 0;
471 while (fp_iter.next(fp)) {
472 int32_t begin_pos = fp.getBeginIndex();
473 int32_t end_pos = fp.getEndIndex();
474
475 if (previous_end_pos < begin_pos) {
476 if (!AddElement(result, index, -1, formatted, previous_end_pos, begin_pos,
477 isolate)) {
478 return isolate->heap()->undefined_value();
479 }
480 ++index;
481 }
482 if (!AddElement(result, index, fp.getField(), formatted, begin_pos, end_pos,
483 isolate)) {
484 return isolate->heap()->undefined_value();
485 }
486 previous_end_pos = end_pos;
487 ++index;
488 }
489 if (previous_end_pos < length) {
490 if (!AddElement(result, index, -1, formatted, previous_end_pos, length,
491 isolate)) {
492 return isolate->heap()->undefined_value();
493 }
494 }
495 JSObject::ValidateElements(result);
496 return *result;
497 }
498
RUNTIME_FUNCTION(Runtime_CreateNumberFormat)499 RUNTIME_FUNCTION(Runtime_CreateNumberFormat) {
500 HandleScope scope(isolate);
501
502 DCHECK_EQ(3, args.length());
503
504 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
505 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
506 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
507
508 Handle<JSFunction> constructor(
509 isolate->native_context()->intl_number_format_function());
510
511 Handle<JSObject> local_object;
512 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
513 JSObject::New(constructor, constructor));
514
515 // Set number formatter as internal field of the resulting JS object.
516 icu::DecimalFormat* number_format =
517 NumberFormat::InitializeNumberFormat(isolate, locale, options, resolved);
518
519 if (!number_format) return isolate->ThrowIllegalOperation();
520
521 local_object->SetInternalField(0, reinterpret_cast<Smi*>(number_format));
522
523 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
524 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
525 NumberFormat::DeleteNumberFormat,
526 WeakCallbackType::kInternalFields);
527 return *local_object;
528 }
529
530
RUNTIME_FUNCTION(Runtime_InternalNumberFormat)531 RUNTIME_FUNCTION(Runtime_InternalNumberFormat) {
532 HandleScope scope(isolate);
533
534 DCHECK_EQ(2, args.length());
535
536 CONVERT_ARG_HANDLE_CHECKED(JSObject, number_format_holder, 0);
537 CONVERT_ARG_HANDLE_CHECKED(Object, number, 1);
538
539 Handle<Object> value;
540 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, value, Object::ToNumber(number));
541
542 icu::DecimalFormat* number_format =
543 NumberFormat::UnpackNumberFormat(isolate, number_format_holder);
544 CHECK_NOT_NULL(number_format);
545
546 icu::UnicodeString result;
547 number_format->format(value->Number(), result);
548
549 RETURN_RESULT_OR_FAILURE(
550 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
551 reinterpret_cast<const uint16_t*>(result.getBuffer()),
552 result.length())));
553 }
554
555
RUNTIME_FUNCTION(Runtime_CreateCollator)556 RUNTIME_FUNCTION(Runtime_CreateCollator) {
557 HandleScope scope(isolate);
558
559 DCHECK_EQ(3, args.length());
560
561 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
562 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
563 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
564
565 Handle<JSFunction> constructor(
566 isolate->native_context()->intl_collator_function());
567
568 Handle<JSObject> local_object;
569 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
570 JSObject::New(constructor, constructor));
571
572 // Set collator as internal field of the resulting JS object.
573 icu::Collator* collator =
574 Collator::InitializeCollator(isolate, locale, options, resolved);
575
576 if (!collator) return isolate->ThrowIllegalOperation();
577
578 local_object->SetInternalField(0, reinterpret_cast<Smi*>(collator));
579
580 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
581 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
582 Collator::DeleteCollator,
583 WeakCallbackType::kInternalFields);
584 return *local_object;
585 }
586
587
RUNTIME_FUNCTION(Runtime_InternalCompare)588 RUNTIME_FUNCTION(Runtime_InternalCompare) {
589 HandleScope scope(isolate);
590
591 DCHECK_EQ(3, args.length());
592
593 CONVERT_ARG_HANDLE_CHECKED(JSObject, collator_holder, 0);
594 CONVERT_ARG_HANDLE_CHECKED(String, string1, 1);
595 CONVERT_ARG_HANDLE_CHECKED(String, string2, 2);
596
597 icu::Collator* collator = Collator::UnpackCollator(isolate, collator_holder);
598 CHECK_NOT_NULL(collator);
599
600 string1 = String::Flatten(string1);
601 string2 = String::Flatten(string2);
602
603 UCollationResult result;
604 UErrorCode status = U_ZERO_ERROR;
605 {
606 DisallowHeapAllocation no_gc;
607 int32_t length1 = string1->length();
608 int32_t length2 = string2->length();
609 String::FlatContent flat1 = string1->GetFlatContent();
610 String::FlatContent flat2 = string2->GetFlatContent();
611 std::unique_ptr<uc16[]> sap1;
612 std::unique_ptr<uc16[]> sap2;
613 icu::UnicodeString string_val1(
614 FALSE, GetUCharBufferFromFlat(flat1, &sap1, length1), length1);
615 icu::UnicodeString string_val2(
616 FALSE, GetUCharBufferFromFlat(flat2, &sap2, length2), length2);
617 result = collator->compare(string_val1, string_val2, status);
618 }
619 if (U_FAILURE(status)) return isolate->ThrowIllegalOperation();
620
621 return *isolate->factory()->NewNumberFromInt(result);
622 }
623
624
RUNTIME_FUNCTION(Runtime_StringNormalize)625 RUNTIME_FUNCTION(Runtime_StringNormalize) {
626 HandleScope scope(isolate);
627 static const struct {
628 const char* name;
629 UNormalization2Mode mode;
630 } normalizationForms[] = {
631 {"nfc", UNORM2_COMPOSE},
632 {"nfc", UNORM2_DECOMPOSE},
633 {"nfkc", UNORM2_COMPOSE},
634 {"nfkc", UNORM2_DECOMPOSE},
635 };
636
637 DCHECK_EQ(2, args.length());
638
639 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
640 CONVERT_NUMBER_CHECKED(int, form_id, Int32, args[1]);
641 CHECK(form_id >= 0 &&
642 static_cast<size_t>(form_id) < arraysize(normalizationForms));
643
644 int length = s->length();
645 s = String::Flatten(s);
646 icu::UnicodeString result;
647 std::unique_ptr<uc16[]> sap;
648 UErrorCode status = U_ZERO_ERROR;
649 {
650 DisallowHeapAllocation no_gc;
651 String::FlatContent flat = s->GetFlatContent();
652 const UChar* src = GetUCharBufferFromFlat(flat, &sap, length);
653 icu::UnicodeString input(false, src, length);
654 // Getting a singleton. Should not free it.
655 const icu::Normalizer2* normalizer =
656 icu::Normalizer2::getInstance(nullptr, normalizationForms[form_id].name,
657 normalizationForms[form_id].mode, status);
658 DCHECK(U_SUCCESS(status));
659 CHECK(normalizer != nullptr);
660 int32_t normalized_prefix_length =
661 normalizer->spanQuickCheckYes(input, status);
662 // Quick return if the input is already normalized.
663 if (length == normalized_prefix_length) return *s;
664 icu::UnicodeString unnormalized =
665 input.tempSubString(normalized_prefix_length);
666 // Read-only alias of the normalized prefix.
667 result.setTo(false, input.getBuffer(), normalized_prefix_length);
668 // copy-on-write; normalize the suffix and append to |result|.
669 normalizer->normalizeSecondAndAppend(result, unnormalized, status);
670 }
671
672 if (U_FAILURE(status)) {
673 return isolate->heap()->undefined_value();
674 }
675
676 RETURN_RESULT_OR_FAILURE(
677 isolate, isolate->factory()->NewStringFromTwoByte(Vector<const uint16_t>(
678 reinterpret_cast<const uint16_t*>(result.getBuffer()),
679 result.length())));
680 }
681
682
RUNTIME_FUNCTION(Runtime_CreateBreakIterator)683 RUNTIME_FUNCTION(Runtime_CreateBreakIterator) {
684 HandleScope scope(isolate);
685
686 DCHECK_EQ(3, args.length());
687
688 CONVERT_ARG_HANDLE_CHECKED(String, locale, 0);
689 CONVERT_ARG_HANDLE_CHECKED(JSObject, options, 1);
690 CONVERT_ARG_HANDLE_CHECKED(JSObject, resolved, 2);
691
692 Handle<JSFunction> constructor(
693 isolate->native_context()->intl_v8_break_iterator_function());
694
695 Handle<JSObject> local_object;
696 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, local_object,
697 JSObject::New(constructor, constructor));
698
699 // Set break iterator as internal field of the resulting JS object.
700 icu::BreakIterator* break_iterator = V8BreakIterator::InitializeBreakIterator(
701 isolate, locale, options, resolved);
702
703 if (!break_iterator) return isolate->ThrowIllegalOperation();
704
705 local_object->SetInternalField(0, reinterpret_cast<Smi*>(break_iterator));
706 // Make sure that the pointer to adopted text is NULL.
707 local_object->SetInternalField(1, static_cast<Smi*>(nullptr));
708
709 // Make object handle weak so we can delete the break iterator once GC kicks
710 // in.
711 Handle<Object> wrapper = isolate->global_handles()->Create(*local_object);
712 GlobalHandles::MakeWeak(wrapper.location(), wrapper.location(),
713 V8BreakIterator::DeleteBreakIterator,
714 WeakCallbackType::kInternalFields);
715 return *local_object;
716 }
717
718
RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText)719 RUNTIME_FUNCTION(Runtime_BreakIteratorAdoptText) {
720 HandleScope scope(isolate);
721
722 DCHECK_EQ(2, args.length());
723
724 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
725 CONVERT_ARG_HANDLE_CHECKED(String, text, 1);
726
727 icu::BreakIterator* break_iterator =
728 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
729 CHECK_NOT_NULL(break_iterator);
730
731 icu::UnicodeString* u_text = reinterpret_cast<icu::UnicodeString*>(
732 break_iterator_holder->GetInternalField(1));
733 delete u_text;
734
735 int length = text->length();
736 text = String::Flatten(text);
737 DisallowHeapAllocation no_gc;
738 String::FlatContent flat = text->GetFlatContent();
739 std::unique_ptr<uc16[]> sap;
740 const UChar* text_value = GetUCharBufferFromFlat(flat, &sap, length);
741 u_text = new icu::UnicodeString(text_value, length);
742 break_iterator_holder->SetInternalField(1, reinterpret_cast<Smi*>(u_text));
743
744 break_iterator->setText(*u_text);
745
746 return isolate->heap()->undefined_value();
747 }
748
749
RUNTIME_FUNCTION(Runtime_BreakIteratorFirst)750 RUNTIME_FUNCTION(Runtime_BreakIteratorFirst) {
751 HandleScope scope(isolate);
752
753 DCHECK_EQ(1, args.length());
754
755 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
756
757 icu::BreakIterator* break_iterator =
758 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
759 CHECK_NOT_NULL(break_iterator);
760
761 return *isolate->factory()->NewNumberFromInt(break_iterator->first());
762 }
763
764
RUNTIME_FUNCTION(Runtime_BreakIteratorNext)765 RUNTIME_FUNCTION(Runtime_BreakIteratorNext) {
766 HandleScope scope(isolate);
767
768 DCHECK_EQ(1, args.length());
769
770 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
771
772 icu::BreakIterator* break_iterator =
773 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
774 CHECK_NOT_NULL(break_iterator);
775
776 return *isolate->factory()->NewNumberFromInt(break_iterator->next());
777 }
778
779
RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent)780 RUNTIME_FUNCTION(Runtime_BreakIteratorCurrent) {
781 HandleScope scope(isolate);
782
783 DCHECK_EQ(1, args.length());
784
785 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
786
787 icu::BreakIterator* break_iterator =
788 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
789 CHECK_NOT_NULL(break_iterator);
790
791 return *isolate->factory()->NewNumberFromInt(break_iterator->current());
792 }
793
794
RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType)795 RUNTIME_FUNCTION(Runtime_BreakIteratorBreakType) {
796 HandleScope scope(isolate);
797
798 DCHECK_EQ(1, args.length());
799
800 CONVERT_ARG_HANDLE_CHECKED(JSObject, break_iterator_holder, 0);
801
802 icu::BreakIterator* break_iterator =
803 V8BreakIterator::UnpackBreakIterator(isolate, break_iterator_holder);
804 CHECK_NOT_NULL(break_iterator);
805
806 // TODO(cira): Remove cast once ICU fixes base BreakIterator class.
807 icu::RuleBasedBreakIterator* rule_based_iterator =
808 static_cast<icu::RuleBasedBreakIterator*>(break_iterator);
809 int32_t status = rule_based_iterator->getRuleStatus();
810 // Keep return values in sync with JavaScript BreakType enum.
811 if (status >= UBRK_WORD_NONE && status < UBRK_WORD_NONE_LIMIT) {
812 return *isolate->factory()->NewStringFromStaticChars("none");
813 } else if (status >= UBRK_WORD_NUMBER && status < UBRK_WORD_NUMBER_LIMIT) {
814 return isolate->heap()->number_string();
815 } else if (status >= UBRK_WORD_LETTER && status < UBRK_WORD_LETTER_LIMIT) {
816 return *isolate->factory()->NewStringFromStaticChars("letter");
817 } else if (status >= UBRK_WORD_KANA && status < UBRK_WORD_KANA_LIMIT) {
818 return *isolate->factory()->NewStringFromStaticChars("kana");
819 } else if (status >= UBRK_WORD_IDEO && status < UBRK_WORD_IDEO_LIMIT) {
820 return *isolate->factory()->NewStringFromStaticChars("ideo");
821 } else {
822 return *isolate->factory()->NewStringFromStaticChars("unknown");
823 }
824 }
825
826 namespace {
LocaleConvertCase(Handle<String> s,Isolate * isolate,bool is_to_upper,const char * lang)827 MUST_USE_RESULT Object* LocaleConvertCase(Handle<String> s, Isolate* isolate,
828 bool is_to_upper, const char* lang) {
829 auto case_converter = is_to_upper ? u_strToUpper : u_strToLower;
830 int32_t src_length = s->length();
831 int32_t dest_length = src_length;
832 UErrorCode status;
833 Handle<SeqTwoByteString> result;
834 std::unique_ptr<uc16[]> sap;
835
836 if (dest_length == 0) return isolate->heap()->empty_string();
837
838 // This is not a real loop. It'll be executed only once (no overflow) or
839 // twice (overflow).
840 for (int i = 0; i < 2; ++i) {
841 // Case conversion can increase the string length (e.g. sharp-S => SS) so
842 // that we have to handle RangeError exceptions here.
843 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
844 isolate, result, isolate->factory()->NewRawTwoByteString(dest_length));
845 DisallowHeapAllocation no_gc;
846 DCHECK(s->IsFlat());
847 String::FlatContent flat = s->GetFlatContent();
848 const UChar* src = GetUCharBufferFromFlat(flat, &sap, src_length);
849 status = U_ZERO_ERROR;
850 dest_length = case_converter(reinterpret_cast<UChar*>(result->GetChars()),
851 dest_length, src, src_length, lang, &status);
852 if (status != U_BUFFER_OVERFLOW_ERROR) break;
853 }
854
855 // In most cases, the output will fill the destination buffer completely
856 // leading to an unterminated string (U_STRING_NOT_TERMINATED_WARNING).
857 // Only in rare cases, it'll be shorter than the destination buffer and
858 // |result| has to be truncated.
859 DCHECK(U_SUCCESS(status));
860 if (V8_LIKELY(status == U_STRING_NOT_TERMINATED_WARNING)) {
861 DCHECK(dest_length == result->length());
862 return *result;
863 }
864 if (U_SUCCESS(status)) {
865 DCHECK(dest_length < result->length());
866 return *Handle<SeqTwoByteString>::cast(
867 SeqString::Truncate(result, dest_length));
868 }
869 return *s;
870 }
871
IsASCIIUpper(uint16_t ch)872 inline bool IsASCIIUpper(uint16_t ch) { return ch >= 'A' && ch <= 'Z'; }
873
874 const uint8_t kToLower[256] = {
875 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
876 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
877 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23,
878 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
879 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
880 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
881 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73,
882 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
883 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B,
884 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
885 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, 0x80, 0x81, 0x82, 0x83,
886 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
887 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B,
888 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
889 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3,
890 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
891 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB,
892 0xEC, 0xED, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xD7,
893 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3,
894 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
895 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB,
896 0xFC, 0xFD, 0xFE, 0xFF,
897 };
898
ToLatin1Lower(uint16_t ch)899 inline uint16_t ToLatin1Lower(uint16_t ch) {
900 return static_cast<uint16_t>(kToLower[ch]);
901 }
902
ToASCIIUpper(uint16_t ch)903 inline uint16_t ToASCIIUpper(uint16_t ch) {
904 return ch & ~((ch >= 'a' && ch <= 'z') << 5);
905 }
906
907 // Does not work for U+00DF (sharp-s), U+00B5 (micron), U+00FF.
ToLatin1Upper(uint16_t ch)908 inline uint16_t ToLatin1Upper(uint16_t ch) {
909 DCHECK(ch != 0xDF && ch != 0xB5 && ch != 0xFF);
910 return ch &
911 ~(((ch >= 'a' && ch <= 'z') || (((ch & 0xE0) == 0xE0) && ch != 0xF7))
912 << 5);
913 }
914
915 template <typename Char>
ToUpperFastASCII(const Vector<const Char> & src,Handle<SeqOneByteString> result)916 bool ToUpperFastASCII(const Vector<const Char>& src,
917 Handle<SeqOneByteString> result) {
918 // Do a faster loop for the case where all the characters are ASCII.
919 uint16_t ored = 0;
920 int32_t index = 0;
921 for (auto it = src.begin(); it != src.end(); ++it) {
922 uint16_t ch = static_cast<uint16_t>(*it);
923 ored |= ch;
924 result->SeqOneByteStringSet(index++, ToASCIIUpper(ch));
925 }
926 return !(ored & ~0x7F);
927 }
928
929 const uint16_t sharp_s = 0xDF;
930
931 template <typename Char>
ToUpperOneByte(const Vector<const Char> & src,uint8_t * dest,int * sharp_s_count)932 bool ToUpperOneByte(const Vector<const Char>& src, uint8_t* dest,
933 int* sharp_s_count) {
934 // Still pretty-fast path for the input with non-ASCII Latin-1 characters.
935
936 // There are two special cases.
937 // 1. U+00B5 and U+00FF are mapped to a character beyond U+00FF.
938 // 2. Lower case sharp-S converts to "SS" (two characters)
939 *sharp_s_count = 0;
940 for (auto it = src.begin(); it != src.end(); ++it) {
941 uint16_t ch = static_cast<uint16_t>(*it);
942 if (V8_UNLIKELY(ch == sharp_s)) {
943 ++(*sharp_s_count);
944 continue;
945 }
946 if (V8_UNLIKELY(ch == 0xB5 || ch == 0xFF)) {
947 // Since this upper-cased character does not fit in an 8-bit string, we
948 // need to take the 16-bit path.
949 return false;
950 }
951 *dest++ = ToLatin1Upper(ch);
952 }
953
954 return true;
955 }
956
957 template <typename Char>
ToUpperWithSharpS(const Vector<const Char> & src,Handle<SeqOneByteString> result)958 void ToUpperWithSharpS(const Vector<const Char>& src,
959 Handle<SeqOneByteString> result) {
960 int32_t dest_index = 0;
961 for (auto it = src.begin(); it != src.end(); ++it) {
962 uint16_t ch = static_cast<uint16_t>(*it);
963 if (ch == sharp_s) {
964 result->SeqOneByteStringSet(dest_index++, 'S');
965 result->SeqOneByteStringSet(dest_index++, 'S');
966 } else {
967 result->SeqOneByteStringSet(dest_index++, ToLatin1Upper(ch));
968 }
969 }
970 }
971
FindFirstUpperOrNonAscii(Handle<String> s,int length)972 inline int FindFirstUpperOrNonAscii(Handle<String> s, int length) {
973 for (int index = 0; index < length; ++index) {
974 uint16_t ch = s->Get(index);
975 if (V8_UNLIKELY(IsASCIIUpper(ch) || ch & ~0x7F)) {
976 return index;
977 }
978 }
979 return length;
980 }
981
ConvertToLower(Handle<String> s,Isolate * isolate)982 MUST_USE_RESULT Object* ConvertToLower(Handle<String> s, Isolate* isolate) {
983 if (!s->HasOnlyOneByteChars()) {
984 // Use a slower implementation for strings with characters beyond U+00FF.
985 return LocaleConvertCase(s, isolate, false, "");
986 }
987
988 int length = s->length();
989
990 // We depend here on the invariant that the length of a Latin1
991 // string is invariant under ToLowerCase, and the result always
992 // fits in the Latin1 range in the *root locale*. It does not hold
993 // for ToUpperCase even in the root locale.
994
995 // Scan the string for uppercase and non-ASCII characters for strings
996 // shorter than a machine-word without any memory allocation overhead.
997 // TODO(jshin): Apply this to a longer input by breaking FastAsciiConvert()
998 // to two parts, one for scanning the prefix with no change and the other for
999 // handling ASCII-only characters.
1000 int index_to_first_unprocessed = length;
1001 const bool is_short = length < static_cast<int>(sizeof(uintptr_t));
1002 if (is_short) {
1003 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
1004 // Nothing to do if the string is all ASCII with no uppercase.
1005 if (index_to_first_unprocessed == length) return *s;
1006 }
1007
1008 Handle<SeqOneByteString> result =
1009 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1010
1011 DisallowHeapAllocation no_gc;
1012 DCHECK(s->IsFlat());
1013 String::FlatContent flat = s->GetFlatContent();
1014 uint8_t* dest = result->GetChars();
1015 if (flat.IsOneByte()) {
1016 const uint8_t* src = flat.ToOneByteVector().start();
1017 bool has_changed_character = false;
1018 index_to_first_unprocessed = FastAsciiConvert<true>(
1019 reinterpret_cast<char*>(dest), reinterpret_cast<const char*>(src),
1020 length, &has_changed_character);
1021 // If not ASCII, we keep the result up to index_to_first_unprocessed and
1022 // process the rest.
1023 if (index_to_first_unprocessed == length)
1024 return has_changed_character ? *result : *s;
1025
1026 for (int index = index_to_first_unprocessed; index < length; ++index) {
1027 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
1028 }
1029 } else {
1030 if (index_to_first_unprocessed == length) {
1031 DCHECK(!is_short);
1032 index_to_first_unprocessed = FindFirstUpperOrNonAscii(s, length);
1033 }
1034 // Nothing to do if the string is all ASCII with no uppercase.
1035 if (index_to_first_unprocessed == length) return *s;
1036 const uint16_t* src = flat.ToUC16Vector().start();
1037 CopyChars(dest, src, index_to_first_unprocessed);
1038 for (int index = index_to_first_unprocessed; index < length; ++index) {
1039 dest[index] = ToLatin1Lower(static_cast<uint16_t>(src[index]));
1040 }
1041 }
1042
1043 return *result;
1044 }
1045
ConvertToUpper(Handle<String> s,Isolate * isolate)1046 MUST_USE_RESULT Object* ConvertToUpper(Handle<String> s, Isolate* isolate) {
1047 int32_t length = s->length();
1048 if (s->HasOnlyOneByteChars() && length > 0) {
1049 Handle<SeqOneByteString> result =
1050 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
1051
1052 DCHECK(s->IsFlat());
1053 int sharp_s_count;
1054 bool is_result_single_byte;
1055 {
1056 DisallowHeapAllocation no_gc;
1057 String::FlatContent flat = s->GetFlatContent();
1058 uint8_t* dest = result->GetChars();
1059 if (flat.IsOneByte()) {
1060 Vector<const uint8_t> src = flat.ToOneByteVector();
1061 bool has_changed_character = false;
1062 int index_to_first_unprocessed =
1063 FastAsciiConvert<false>(reinterpret_cast<char*>(result->GetChars()),
1064 reinterpret_cast<const char*>(src.start()),
1065 length, &has_changed_character);
1066 if (index_to_first_unprocessed == length)
1067 return has_changed_character ? *result : *s;
1068 // If not ASCII, we keep the result up to index_to_first_unprocessed and
1069 // process the rest.
1070 is_result_single_byte =
1071 ToUpperOneByte(src.SubVector(index_to_first_unprocessed, length),
1072 dest + index_to_first_unprocessed, &sharp_s_count);
1073 } else {
1074 DCHECK(flat.IsTwoByte());
1075 Vector<const uint16_t> src = flat.ToUC16Vector();
1076 if (ToUpperFastASCII(src, result)) return *result;
1077 is_result_single_byte = ToUpperOneByte(src, dest, &sharp_s_count);
1078 }
1079 }
1080
1081 // Go to the full Unicode path if there are characters whose uppercase
1082 // is beyond the Latin-1 range (cannot be represented in OneByteString).
1083 if (V8_UNLIKELY(!is_result_single_byte)) {
1084 return LocaleConvertCase(s, isolate, true, "");
1085 }
1086
1087 if (sharp_s_count == 0) return *result;
1088
1089 // We have sharp_s_count sharp-s characters, but the result is still
1090 // in the Latin-1 range.
1091 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1092 isolate, result,
1093 isolate->factory()->NewRawOneByteString(length + sharp_s_count));
1094 DisallowHeapAllocation no_gc;
1095 String::FlatContent flat = s->GetFlatContent();
1096 if (flat.IsOneByte()) {
1097 ToUpperWithSharpS(flat.ToOneByteVector(), result);
1098 } else {
1099 ToUpperWithSharpS(flat.ToUC16Vector(), result);
1100 }
1101
1102 return *result;
1103 }
1104
1105 return LocaleConvertCase(s, isolate, true, "");
1106 }
1107
ConvertCase(Handle<String> s,bool is_upper,Isolate * isolate)1108 MUST_USE_RESULT Object* ConvertCase(Handle<String> s, bool is_upper,
1109 Isolate* isolate) {
1110 return is_upper ? ConvertToUpper(s, isolate) : ConvertToLower(s, isolate);
1111 }
1112
1113 } // namespace
1114
RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N)1115 RUNTIME_FUNCTION(Runtime_StringToLowerCaseI18N) {
1116 HandleScope scope(isolate);
1117 DCHECK_EQ(args.length(), 1);
1118 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1119 s = String::Flatten(s);
1120 return ConvertToLower(s, isolate);
1121 }
1122
RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N)1123 RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) {
1124 HandleScope scope(isolate);
1125 DCHECK_EQ(args.length(), 1);
1126 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1127 s = String::Flatten(s);
1128 return ConvertToUpper(s, isolate);
1129 }
1130
RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase)1131 RUNTIME_FUNCTION(Runtime_StringLocaleConvertCase) {
1132 HandleScope scope(isolate);
1133 DCHECK_EQ(args.length(), 3);
1134 CONVERT_ARG_HANDLE_CHECKED(String, s, 0);
1135 CONVERT_BOOLEAN_ARG_CHECKED(is_upper, 1);
1136 CONVERT_ARG_HANDLE_CHECKED(String, lang_arg, 2);
1137
1138 // Primary language tag can be up to 8 characters long in theory.
1139 // https://tools.ietf.org/html/bcp47#section-2.2.1
1140 DCHECK(lang_arg->length() <= 8);
1141 lang_arg = String::Flatten(lang_arg);
1142 s = String::Flatten(s);
1143
1144 // All the languages requiring special-handling have two-letter codes.
1145 if (V8_UNLIKELY(lang_arg->length() > 2))
1146 return ConvertCase(s, is_upper, isolate);
1147
1148 char c1, c2;
1149 {
1150 DisallowHeapAllocation no_gc;
1151 String::FlatContent lang = lang_arg->GetFlatContent();
1152 c1 = lang.Get(0);
1153 c2 = lang.Get(1);
1154 }
1155 // TODO(jshin): Consider adding a fast path for ASCII or Latin-1. The fastpath
1156 // in the root locale needs to be adjusted for az, lt and tr because even case
1157 // mapping of ASCII range characters are different in those locales.
1158 // Greek (el) does not require any adjustment.
1159 if (V8_UNLIKELY(c1 == 't' && c2 == 'r'))
1160 return LocaleConvertCase(s, isolate, is_upper, "tr");
1161 if (V8_UNLIKELY(c1 == 'e' && c2 == 'l'))
1162 return LocaleConvertCase(s, isolate, is_upper, "el");
1163 if (V8_UNLIKELY(c1 == 'l' && c2 == 't'))
1164 return LocaleConvertCase(s, isolate, is_upper, "lt");
1165 if (V8_UNLIKELY(c1 == 'a' && c2 == 'z'))
1166 return LocaleConvertCase(s, isolate, is_upper, "az");
1167
1168 return ConvertCase(s, is_upper, isolate);
1169 }
1170
RUNTIME_FUNCTION(Runtime_DateCacheVersion)1171 RUNTIME_FUNCTION(Runtime_DateCacheVersion) {
1172 HandleScope scope(isolate);
1173 DCHECK_EQ(0, args.length());
1174 if (isolate->serializer_enabled()) return isolate->heap()->undefined_value();
1175 if (!isolate->eternal_handles()->Exists(EternalHandles::DATE_CACHE_VERSION)) {
1176 Handle<FixedArray> date_cache_version =
1177 isolate->factory()->NewFixedArray(1, TENURED);
1178 date_cache_version->set(0, Smi::kZero);
1179 isolate->eternal_handles()->CreateSingleton(
1180 isolate, *date_cache_version, EternalHandles::DATE_CACHE_VERSION);
1181 }
1182 Handle<FixedArray> date_cache_version =
1183 Handle<FixedArray>::cast(isolate->eternal_handles()->GetSingleton(
1184 EternalHandles::DATE_CACHE_VERSION));
1185 return date_cache_version->get(0);
1186 }
1187
1188 } // namespace internal
1189 } // namespace v8
1190
1191 #endif // V8_I18N_SUPPORT
1192