1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-utils-inl.h"
6 #include "src/builtins/builtins.h"
7 #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
8 #include "src/logging/counters.h"
9 #include "src/numbers/conversions.h"
10 #include "src/objects/objects-inl.h"
11 #ifdef V8_INTL_SUPPORT
12 #include "src/objects/intl-objects.h"
13 #endif
14 #include "src/base/strings.h"
15 #include "src/regexp/regexp-utils.h"
16 #include "src/strings/string-builder-inl.h"
17 #include "src/strings/string-case.h"
18 #include "src/strings/unicode-inl.h"
19 #include "src/strings/unicode.h"
20
21 namespace v8 {
22 namespace internal {
23
24 namespace { // for String.fromCodePoint
25
IsValidCodePoint(Isolate * isolate,Handle<Object> value)26 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
27 if (!value->IsNumber() &&
28 !Object::ToNumber(isolate, value).ToHandle(&value)) {
29 return false;
30 }
31
32 if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
33 value->Number()) {
34 return false;
35 }
36
37 if (value->Number() < 0 || value->Number() > 0x10FFFF) {
38 return false;
39 }
40
41 return true;
42 }
43
44 static constexpr base::uc32 kInvalidCodePoint = static_cast<base::uc32>(-1);
45
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)46 base::uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
47 Handle<Object> value = args.at(1 + index);
48 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
49 isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint);
50 if (!IsValidCodePoint(isolate, value)) {
51 isolate->Throw(*isolate->factory()->NewRangeError(
52 MessageTemplate::kInvalidCodePoint, value));
53 return kInvalidCodePoint;
54 }
55 return DoubleToUint32(value->Number());
56 }
57
58 } // namespace
59
60 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)61 BUILTIN(StringFromCodePoint) {
62 HandleScope scope(isolate);
63 int const length = args.length() - 1;
64 if (length == 0) return ReadOnlyRoots(isolate).empty_string();
65 DCHECK_LT(0, length);
66
67 // Optimistically assume that the resulting String contains only one byte
68 // characters.
69 std::vector<uint8_t> one_byte_buffer;
70 one_byte_buffer.reserve(length);
71 base::uc32 code = 0;
72 int index;
73 for (index = 0; index < length; index++) {
74 code = NextCodePoint(isolate, args, index);
75 if (code == kInvalidCodePoint) {
76 return ReadOnlyRoots(isolate).exception();
77 }
78 if (code > String::kMaxOneByteCharCode) {
79 break;
80 }
81 one_byte_buffer.push_back(code);
82 }
83
84 if (index == length) {
85 RETURN_RESULT_OR_FAILURE(
86 isolate, isolate->factory()->NewStringFromOneByte(base::Vector<uint8_t>(
87 one_byte_buffer.data(), one_byte_buffer.size())));
88 }
89
90 std::vector<base::uc16> two_byte_buffer;
91 two_byte_buffer.reserve(length - index);
92
93 while (true) {
94 if (code <=
95 static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
96 two_byte_buffer.push_back(code);
97 } else {
98 two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
99 two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
100 }
101
102 if (++index == length) {
103 break;
104 }
105 code = NextCodePoint(isolate, args, index);
106 if (code == kInvalidCodePoint) {
107 return ReadOnlyRoots(isolate).exception();
108 }
109 }
110
111 Handle<SeqTwoByteString> result;
112 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
113 isolate, result,
114 isolate->factory()->NewRawTwoByteString(
115 static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
116
117 DisallowGarbageCollection no_gc;
118 CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
119 one_byte_buffer.size());
120 CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
121 two_byte_buffer.data(), two_byte_buffer.size());
122
123 return *result;
124 }
125
126 // ES6 section 21.1.3.9
127 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)128 BUILTIN(StringPrototypeLastIndexOf) {
129 HandleScope handle_scope(isolate);
130 return String::LastIndexOf(isolate, args.receiver(),
131 args.atOrUndefined(isolate, 1),
132 args.atOrUndefined(isolate, 2));
133 }
134
135 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
136 //
137 // This function is implementation specific. For now, we do not
138 // do anything locale specific.
BUILTIN(StringPrototypeLocaleCompare)139 BUILTIN(StringPrototypeLocaleCompare) {
140 HandleScope handle_scope(isolate);
141
142 isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
143 static const char* const kMethod = "String.prototype.localeCompare";
144
145 #ifdef V8_INTL_SUPPORT
146 TO_THIS_STRING(str1, kMethod);
147 Handle<String> str2;
148 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
149 isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
150 base::Optional<int> result = Intl::StringLocaleCompare(
151 isolate, str1, str2, args.atOrUndefined(isolate, 2),
152 args.atOrUndefined(isolate, 3), kMethod);
153 if (!result.has_value()) {
154 DCHECK(isolate->has_pending_exception());
155 return ReadOnlyRoots(isolate).exception();
156 }
157 return Smi::FromInt(result.value());
158 #else
159 DCHECK_LE(2, args.length());
160
161 TO_THIS_STRING(str1, kMethod);
162 Handle<String> str2;
163 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
164 Object::ToString(isolate, args.at(1)));
165
166 if (str1.is_identical_to(str2)) return Smi::zero(); // Equal.
167 int str1_length = str1->length();
168 int str2_length = str2->length();
169
170 // Decide trivial cases without flattening.
171 if (str1_length == 0) {
172 if (str2_length == 0) return Smi::zero(); // Equal.
173 return Smi::FromInt(-str2_length);
174 } else {
175 if (str2_length == 0) return Smi::FromInt(str1_length);
176 }
177
178 int end = str1_length < str2_length ? str1_length : str2_length;
179
180 // No need to flatten if we are going to find the answer on the first
181 // character. At this point we know there is at least one character
182 // in each string, due to the trivial case handling above.
183 int d = str1->Get(0) - str2->Get(0);
184 if (d != 0) return Smi::FromInt(d);
185
186 str1 = String::Flatten(isolate, str1);
187 str2 = String::Flatten(isolate, str2);
188
189 DisallowGarbageCollection no_gc;
190 String::FlatContent flat1 = str1->GetFlatContent(no_gc);
191 String::FlatContent flat2 = str2->GetFlatContent(no_gc);
192
193 for (int i = 0; i < end; i++) {
194 if (flat1.Get(i) != flat2.Get(i)) {
195 return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
196 }
197 }
198
199 return Smi::FromInt(str1_length - str2_length);
200 #endif // !V8_INTL_SUPPORT
201 }
202
203 #ifndef V8_INTL_SUPPORT
204 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
205 //
206 // Simply checks the argument is valid and returns the string itself.
207 // If internationalization is enabled, then intl.js will override this function
208 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)209 BUILTIN(StringPrototypeNormalize) {
210 HandleScope handle_scope(isolate);
211 TO_THIS_STRING(string, "String.prototype.normalize");
212
213 Handle<Object> form_input = args.atOrUndefined(isolate, 1);
214 if (form_input->IsUndefined(isolate)) return *string;
215
216 Handle<String> form;
217 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
218 Object::ToString(isolate, form_input));
219
220 if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) ||
221 String::Equals(isolate, form, isolate->factory()->NFD_string()) ||
222 String::Equals(isolate, form, isolate->factory()->NFKC_string()) ||
223 String::Equals(isolate, form, isolate->factory()->NFKD_string()))) {
224 Handle<String> valid_forms =
225 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
226 THROW_NEW_ERROR_RETURN_FAILURE(
227 isolate,
228 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
229 }
230
231 return *string;
232 }
233 #endif // !V8_INTL_SUPPORT
234
235
236 #ifndef V8_INTL_SUPPORT
237 namespace {
238
ToUpperOverflows(base::uc32 character)239 inline bool ToUpperOverflows(base::uc32 character) {
240 // y with umlauts and the micro sign are the only characters that stop
241 // fitting into one-byte when converting to uppercase.
242 static const base::uc32 yuml_code = 0xFF;
243 static const base::uc32 micro_code = 0xB5;
244 return (character == yuml_code || character == micro_code);
245 }
246
247 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String string,SeqString result,int result_length,unibrow::Mapping<Converter,128> * mapping)248 V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
249 Isolate* isolate, String string, SeqString result, int result_length,
250 unibrow::Mapping<Converter, 128>* mapping) {
251 DisallowGarbageCollection no_gc;
252 // We try this twice, once with the assumption that the result is no longer
253 // than the input and, if that assumption breaks, again with the exact
254 // length. This may not be pretty, but it is nicer than what was here before
255 // and I hereby claim my vaffel-is.
256 //
257 // NOTE: This assumes that the upper/lower case of an ASCII
258 // character is also ASCII. This is currently the case, but it
259 // might break in the future if we implement more context and locale
260 // dependent upper/lower conversions.
261 bool has_changed_character = false;
262
263 // Convert all characters to upper case, assuming that they will fit
264 // in the buffer
265 StringCharacterStream stream(string);
266 unibrow::uchar chars[Converter::kMaxWidth];
267 // We can assume that the string is not empty
268 base::uc32 current = stream.GetNext();
269 bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString();
270 for (int i = 0; i < result_length;) {
271 bool has_next = stream.HasMore();
272 base::uc32 next = has_next ? stream.GetNext() : 0;
273 int char_length = mapping->get(current, next, chars);
274 if (char_length == 0) {
275 // The case conversion of this character is the character itself.
276 result.Set(i, current);
277 i++;
278 } else if (char_length == 1 &&
279 (ignore_overflow || !ToUpperOverflows(current))) {
280 // Common case: converting the letter resulted in one character.
281 DCHECK(static_cast<base::uc32>(chars[0]) != current);
282 result.Set(i, chars[0]);
283 has_changed_character = true;
284 i++;
285 } else if (result_length == string.length()) {
286 bool overflows = ToUpperOverflows(current);
287 // We've assumed that the result would be as long as the
288 // input but here is a character that converts to several
289 // characters. No matter, we calculate the exact length
290 // of the result and try the whole thing again.
291 //
292 // Note that this leaves room for optimization. We could just
293 // memcpy what we already have to the result string. Also,
294 // the result string is the last object allocated we could
295 // "realloc" it and probably, in the vast majority of cases,
296 // extend the existing string to be able to hold the full
297 // result.
298 int next_length = 0;
299 if (has_next) {
300 next_length = mapping->get(next, 0, chars);
301 if (next_length == 0) next_length = 1;
302 }
303 int current_length = i + char_length + next_length;
304 while (stream.HasMore()) {
305 current = stream.GetNext();
306 overflows |= ToUpperOverflows(current);
307 // NOTE: we use 0 as the next character here because, while
308 // the next character may affect what a character converts to,
309 // it does not in any case affect the length of what it convert
310 // to.
311 int char_length = mapping->get(current, 0, chars);
312 if (char_length == 0) char_length = 1;
313 current_length += char_length;
314 if (current_length > String::kMaxLength) {
315 AllowGarbageCollection allocate_error_and_return;
316 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
317 NewInvalidStringLengthError());
318 }
319 }
320 // Try again with the real length. Return signed if we need
321 // to allocate a two-byte string for to uppercase.
322 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
323 : Smi::FromInt(current_length);
324 } else {
325 for (int j = 0; j < char_length; j++) {
326 result.Set(i, chars[j]);
327 i++;
328 }
329 has_changed_character = true;
330 }
331 current = next;
332 }
333 if (has_changed_character) {
334 return result;
335 } else {
336 // If we didn't actually change anything in doing the conversion
337 // we simple return the result and let the converted string
338 // become garbage; there is no reason to keep two identical strings
339 // alive.
340 return string;
341 }
342 }
343
344 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)345 V8_WARN_UNUSED_RESULT static Object ConvertCase(
346 Handle<String> s, Isolate* isolate,
347 unibrow::Mapping<Converter, 128>* mapping) {
348 s = String::Flatten(isolate, s);
349 int length = s->length();
350 // Assume that the string is not empty; we need this assumption later
351 if (length == 0) return *s;
352
353 // Simpler handling of ASCII strings.
354 //
355 // NOTE: This assumes that the upper/lower case of an ASCII
356 // character is also ASCII. This is currently the case, but it
357 // might break in the future if we implement more context and locale
358 // dependent upper/lower conversions.
359 if (String::IsOneByteRepresentationUnderneath(*s)) {
360 // Same length as input.
361 Handle<SeqOneByteString> result =
362 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363 DisallowGarbageCollection no_gc;
364 String::FlatContent flat_content = s->GetFlatContent(no_gc);
365 DCHECK(flat_content.IsFlat());
366 bool has_changed_character = false;
367 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
368 reinterpret_cast<char*>(result->GetChars(no_gc)),
369 reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()),
370 length, &has_changed_character);
371 // If not ASCII, we discard the result and take the 2 byte path.
372 if (index_to_first_unprocessed == length)
373 return has_changed_character ? *result : *s;
374 }
375
376 Handle<SeqString> result; // Same length as input.
377 if (s->IsOneByteRepresentation()) {
378 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
379 } else {
380 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
381 }
382
383 Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
384 if (answer.IsException(isolate) || answer.IsString()) return answer;
385
386 DCHECK(answer.IsSmi());
387 length = Smi::ToInt(answer);
388 if (s->IsOneByteRepresentation() && length > 0) {
389 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
390 isolate, result, isolate->factory()->NewRawOneByteString(length));
391 } else {
392 if (length < 0) length = -length;
393 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
394 isolate, result, isolate->factory()->NewRawTwoByteString(length));
395 }
396 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
397 }
398
399 } // namespace
400
BUILTIN(StringPrototypeToLocaleLowerCase)401 BUILTIN(StringPrototypeToLocaleLowerCase) {
402 HandleScope scope(isolate);
403 TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
404 return ConvertCase(string, isolate,
405 isolate->runtime_state()->to_lower_mapping());
406 }
407
BUILTIN(StringPrototypeToLocaleUpperCase)408 BUILTIN(StringPrototypeToLocaleUpperCase) {
409 HandleScope scope(isolate);
410 TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
411 return ConvertCase(string, isolate,
412 isolate->runtime_state()->to_upper_mapping());
413 }
414
BUILTIN(StringPrototypeToLowerCase)415 BUILTIN(StringPrototypeToLowerCase) {
416 HandleScope scope(isolate);
417 TO_THIS_STRING(string, "String.prototype.toLowerCase");
418 return ConvertCase(string, isolate,
419 isolate->runtime_state()->to_lower_mapping());
420 }
421
BUILTIN(StringPrototypeToUpperCase)422 BUILTIN(StringPrototypeToUpperCase) {
423 HandleScope scope(isolate);
424 TO_THIS_STRING(string, "String.prototype.toUpperCase");
425 return ConvertCase(string, isolate,
426 isolate->runtime_state()->to_upper_mapping());
427 }
428 #endif // !V8_INTL_SUPPORT
429
430 // ES6 #sec-string.prototype.raw
BUILTIN(StringRaw)431 BUILTIN(StringRaw) {
432 HandleScope scope(isolate);
433 Handle<Object> templ = args.atOrUndefined(isolate, 1);
434 const uint32_t argc = args.length();
435 Handle<String> raw_string =
436 isolate->factory()->NewStringFromAsciiChecked("raw");
437
438 Handle<Object> cooked;
439 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
440 Object::ToObject(isolate, templ));
441
442 Handle<Object> raw;
443 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
444 isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
445 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
446 Object::ToObject(isolate, raw));
447 Handle<Object> raw_len;
448 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
449 isolate, raw_len,
450 Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
451
452 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
453 Object::ToLength(isolate, raw_len));
454
455 IncrementalStringBuilder result_builder(isolate);
456 // Intentional spec violation: we ignore {length} values >= 2^32, because
457 // assuming non-empty chunks they would generate too-long strings anyway.
458 const double raw_len_number = raw_len->Number();
459 const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
460 ? std::numeric_limits<uint32_t>::max()
461 : static_cast<uint32_t>(raw_len_number);
462 if (length > 0) {
463 Handle<Object> first_element;
464 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
465 Object::GetElement(isolate, raw, 0));
466
467 Handle<String> first_string;
468 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
469 isolate, first_string, Object::ToString(isolate, first_element));
470 result_builder.AppendString(first_string);
471
472 for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
473 if (arg_i < argc) {
474 Handle<String> argument_string;
475 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
476 isolate, argument_string,
477 Object::ToString(isolate, args.at(arg_i)));
478 result_builder.AppendString(argument_string);
479 }
480
481 Handle<Object> element;
482 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
483 Object::GetElement(isolate, raw, i));
484
485 Handle<String> element_string;
486 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
487 Object::ToString(isolate, element));
488 result_builder.AppendString(element_string);
489 }
490 }
491
492 RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
493 }
494
495 } // namespace internal
496 } // namespace v8
497