1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/objects/string.h"
6
7 #include "src/common/assert-scope.h"
8 #include "src/common/globals.h"
9 #include "src/execution/thread-id.h"
10 #include "src/handles/handles-inl.h"
11 #include "src/heap/heap-inl.h"
12 #include "src/heap/memory-chunk.h"
13 #include "src/heap/read-only-heap.h"
14 #include "src/numbers/conversions.h"
15 #include "src/objects/map.h"
16 #include "src/objects/oddball.h"
17 #include "src/objects/string-comparator.h"
18 #include "src/objects/string-inl.h"
19 #include "src/strings/char-predicates.h"
20 #include "src/strings/string-builder-inl.h"
21 #include "src/strings/string-hasher.h"
22 #include "src/strings/string-search.h"
23 #include "src/strings/string-stream.h"
24 #include "src/strings/unicode-inl.h"
25 #include "src/utils/ostreams.h"
26
27 namespace v8 {
28 namespace internal {
29
SlowFlatten(Isolate * isolate,Handle<ConsString> cons,AllocationType allocation)30 Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
31 AllocationType allocation) {
32 DCHECK_NE(cons->second().length(), 0);
33
34 // TurboFan can create cons strings with empty first parts.
35 while (cons->first().length() == 0) {
36 // We do not want to call this function recursively. Therefore we call
37 // String::Flatten only in those cases where String::SlowFlatten is not
38 // called again.
39 if (cons->second().IsConsString() && !cons->second().IsFlat()) {
40 cons = handle(ConsString::cast(cons->second()), isolate);
41 } else {
42 return String::Flatten(isolate, handle(cons->second(), isolate));
43 }
44 }
45
46 DCHECK(AllowHeapAllocation::IsAllowed());
47 DCHECK(AllowGarbageCollection::IsAllowed());
48 int length = cons->length();
49 allocation =
50 ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
51 Handle<SeqString> result;
52 if (cons->IsOneByteRepresentation()) {
53 Handle<SeqOneByteString> flat =
54 isolate->factory()
55 ->NewRawOneByteString(length, allocation)
56 .ToHandleChecked();
57 DisallowHeapAllocation no_gc;
58 WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
59 result = flat;
60 } else {
61 Handle<SeqTwoByteString> flat =
62 isolate->factory()
63 ->NewRawTwoByteString(length, allocation)
64 .ToHandleChecked();
65 DisallowHeapAllocation no_gc;
66 WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
67 result = flat;
68 }
69 cons->set_first(*result);
70 cons->set_second(ReadOnlyRoots(isolate).empty_string());
71 DCHECK(result->IsFlat());
72 return result;
73 }
74
75 namespace {
76
77 template <class StringClass>
MigrateExternalStringResource(Isolate * isolate,ExternalString from,StringClass to)78 void MigrateExternalStringResource(Isolate* isolate, ExternalString from,
79 StringClass to) {
80 Address to_resource_address = to.resource_as_address();
81 if (to_resource_address == kNullAddress) {
82 StringClass cast_from = StringClass::cast(from);
83 // |to| is a just-created internalized copy of |from|. Migrate the resource.
84 to.SetResource(isolate, cast_from.resource());
85 // Zap |from|'s resource pointer to reflect the fact that |from| has
86 // relinquished ownership of its resource.
87 isolate->heap()->UpdateExternalString(
88 from, ExternalString::cast(from).ExternalPayloadSize(), 0);
89 cast_from.SetResource(isolate, nullptr);
90 } else if (to_resource_address != from.resource_as_address()) {
91 // |to| already existed and has its own resource. Finalize |from|.
92 isolate->heap()->FinalizeExternalString(from);
93 }
94 }
95
96 } // namespace
97
MakeThin(Isolate * isolate,String internalized)98 void String::MakeThin(Isolate* isolate, String internalized) {
99 DisallowHeapAllocation no_gc;
100 DCHECK_NE(*this, internalized);
101 DCHECK(internalized.IsInternalizedString());
102
103 if (this->IsExternalString()) {
104 if (internalized.IsExternalOneByteString()) {
105 MigrateExternalStringResource(isolate, ExternalString::cast(*this),
106 ExternalOneByteString::cast(internalized));
107 } else if (internalized.IsExternalTwoByteString()) {
108 MigrateExternalStringResource(isolate, ExternalString::cast(*this),
109 ExternalTwoByteString::cast(internalized));
110 } else {
111 // If the external string is duped into an existing non-external
112 // internalized string, free its resource (it's about to be rewritten
113 // into a ThinString below).
114 isolate->heap()->FinalizeExternalString(*this);
115 }
116 }
117
118 bool has_pointers = StringShape(*this).IsIndirect();
119
120 int old_size = this->Size();
121 bool one_byte = internalized.IsOneByteRepresentation();
122 Handle<Map> map = one_byte ? isolate->factory()->thin_one_byte_string_map()
123 : isolate->factory()->thin_string_map();
124 // Update actual first and then do release store on the map word. This ensures
125 // that the concurrent marker will read the pointer when visiting a
126 // ThinString.
127 ThinString thin = ThinString::unchecked_cast(*this);
128 thin.set_actual(internalized);
129 DCHECK_GE(old_size, ThinString::kSize);
130 this->synchronized_set_map(*map);
131 Address thin_end = thin.address() + ThinString::kSize;
132 int size_delta = old_size - ThinString::kSize;
133 if (size_delta != 0) {
134 Heap* heap = isolate->heap();
135 heap->CreateFillerObjectAt(
136 thin_end, size_delta,
137 has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
138 }
139 }
140
MakeExternal(v8::String::ExternalStringResource * resource)141 bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
142 DisallowHeapAllocation no_allocation;
143 // Externalizing twice leaks the external resource, so it's
144 // prohibited by the API.
145 DCHECK(this->SupportsExternalization());
146 DCHECK(resource->IsCacheable());
147 #ifdef ENABLE_SLOW_DCHECKS
148 if (FLAG_enable_slow_asserts) {
149 // Assert that the resource and the string are equivalent.
150 DCHECK(static_cast<size_t>(this->length()) == resource->length());
151 ScopedVector<uc16> smart_chars(this->length());
152 String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
153 DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
154 resource->length() * sizeof(smart_chars[0])));
155 }
156 #endif // DEBUG
157 int size = this->Size(); // Byte size of the original string.
158 // Abort if size does not allow in-place conversion.
159 if (size < ExternalString::kUncachedSize) return false;
160 // Read-only strings cannot be made external, since that would mutate the
161 // string.
162 if (IsReadOnlyHeapObject(*this)) return false;
163 Isolate* isolate = GetIsolateFromWritableObject(*this);
164 bool is_internalized = this->IsInternalizedString();
165 bool has_pointers = StringShape(*this).IsIndirect();
166
167 if (has_pointers) {
168 isolate->heap()->NotifyObjectLayoutChange(*this, no_allocation,
169 InvalidateRecordedSlots::kYes);
170 }
171
172 // Disallow garbage collection to avoid possible GC vs string access deadlock.
173 DisallowGarbageCollection no_gc;
174 base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
175 isolate->string_access());
176 // Morph the string to an external string by replacing the map and
177 // reinitializing the fields. This won't work if the space the existing
178 // string occupies is too small for a regular external string. Instead, we
179 // resort to an uncached external string instead, omitting the field caching
180 // the address of the backing store. When we encounter uncached external
181 // strings in generated code, we need to bailout to runtime.
182 Map new_map;
183 ReadOnlyRoots roots(isolate);
184 if (size < ExternalString::kSizeOfAllExternalStrings) {
185 if (is_internalized) {
186 new_map = roots.uncached_external_internalized_string_map();
187 } else {
188 new_map = roots.uncached_external_string_map();
189 }
190 } else {
191 new_map = is_internalized ? roots.external_internalized_string_map()
192 : roots.external_string_map();
193 }
194
195 // Byte size of the external String object.
196 int new_size = this->SizeFromMap(new_map);
197 isolate->heap()->CreateFillerObjectAt(
198 this->address() + new_size, size - new_size,
199 has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
200
201 // We are storing the new map using release store after creating a filler for
202 // the left-over space to avoid races with the sweeper thread.
203 this->synchronized_set_map(new_map);
204
205 ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
206 self.AllocateExternalPointerEntries(isolate);
207 self.SetResource(isolate, resource);
208 isolate->heap()->RegisterExternalString(*this);
209 if (is_internalized) self.Hash(); // Force regeneration of the hash value.
210 return true;
211 }
212
MakeExternal(v8::String::ExternalOneByteStringResource * resource)213 bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
214 DisallowHeapAllocation no_allocation;
215 // Externalizing twice leaks the external resource, so it's
216 // prohibited by the API.
217 DCHECK(this->SupportsExternalization());
218 DCHECK(resource->IsCacheable());
219 #ifdef ENABLE_SLOW_DCHECKS
220 if (FLAG_enable_slow_asserts) {
221 // Assert that the resource and the string are equivalent.
222 DCHECK(static_cast<size_t>(this->length()) == resource->length());
223 if (this->IsTwoByteRepresentation()) {
224 ScopedVector<uint16_t> smart_chars(this->length());
225 String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
226 DCHECK(String::IsOneByte(smart_chars.begin(), this->length()));
227 }
228 ScopedVector<char> smart_chars(this->length());
229 String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
230 DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
231 resource->length() * sizeof(smart_chars[0])));
232 }
233 #endif // DEBUG
234 int size = this->Size(); // Byte size of the original string.
235 // Abort if size does not allow in-place conversion.
236 if (size < ExternalString::kUncachedSize) return false;
237 // Read-only strings cannot be made external, since that would mutate the
238 // string.
239 if (IsReadOnlyHeapObject(*this)) return false;
240 Isolate* isolate = GetIsolateFromWritableObject(*this);
241 bool is_internalized = this->IsInternalizedString();
242 bool has_pointers = StringShape(*this).IsIndirect();
243
244 if (has_pointers) {
245 isolate->heap()->NotifyObjectLayoutChange(*this, no_allocation,
246 InvalidateRecordedSlots::kYes);
247 }
248
249 // Disallow garbage collection to avoid possible GC vs string access deadlock.
250 DisallowGarbageCollection no_gc;
251 base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
252 isolate->string_access());
253 // Morph the string to an external string by replacing the map and
254 // reinitializing the fields. This won't work if the space the existing
255 // string occupies is too small for a regular external string. Instead, we
256 // resort to an uncached external string instead, omitting the field caching
257 // the address of the backing store. When we encounter uncached external
258 // strings in generated code, we need to bailout to runtime.
259 Map new_map;
260 ReadOnlyRoots roots(isolate);
261 if (size < ExternalString::kSizeOfAllExternalStrings) {
262 new_map = is_internalized
263 ? roots.uncached_external_one_byte_internalized_string_map()
264 : roots.uncached_external_one_byte_string_map();
265 } else {
266 new_map = is_internalized
267 ? roots.external_one_byte_internalized_string_map()
268 : roots.external_one_byte_string_map();
269 }
270
271 // Byte size of the external String object.
272 int new_size = this->SizeFromMap(new_map);
273 isolate->heap()->CreateFillerObjectAt(
274 this->address() + new_size, size - new_size,
275 has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
276
277 // We are storing the new map using release store after creating a filler for
278 // the left-over space to avoid races with the sweeper thread.
279 this->synchronized_set_map(new_map);
280
281 ExternalOneByteString self = ExternalOneByteString::cast(*this);
282 self.AllocateExternalPointerEntries(isolate);
283 self.SetResource(isolate, resource);
284 isolate->heap()->RegisterExternalString(*this);
285 if (is_internalized) self.Hash(); // Force regeneration of the hash value.
286 return true;
287 }
288
SupportsExternalization()289 bool String::SupportsExternalization() {
290 if (this->IsThinString()) {
291 return i::ThinString::cast(*this).actual().SupportsExternalization();
292 }
293
294 // RO_SPACE strings cannot be externalized.
295 if (IsReadOnlyHeapObject(*this)) {
296 return false;
297 }
298
299 // Already an external string.
300 if (StringShape(*this).IsExternal()) {
301 return false;
302 }
303
304 #ifdef V8_COMPRESS_POINTERS
305 // Small strings may not be in-place externalizable.
306 if (this->Size() < ExternalString::kUncachedSize) return false;
307 #else
308 DCHECK_LE(ExternalString::kUncachedSize, this->Size());
309 #endif
310
311 Isolate* isolate = GetIsolateFromWritableObject(*this);
312 return !isolate->heap()->IsInGCPostProcessing();
313 }
314
PrefixForDebugPrint() const315 const char* String::PrefixForDebugPrint() const {
316 StringShape shape(*this);
317 if (IsTwoByteRepresentation()) {
318 StringShape shape(*this);
319 if (shape.IsInternalized()) {
320 return "u#";
321 } else if (shape.IsCons()) {
322 return "uc\"";
323 } else if (shape.IsThin()) {
324 return "u>\"";
325 } else if (shape.IsExternal()) {
326 return "ue\"";
327 } else {
328 return "u\"";
329 }
330 } else {
331 StringShape shape(*this);
332 if (shape.IsInternalized()) {
333 return "#";
334 } else if (shape.IsCons()) {
335 return "c\"";
336 } else if (shape.IsThin()) {
337 return ">\"";
338 } else if (shape.IsExternal()) {
339 return "e\"";
340 } else {
341 return "\"";
342 }
343 }
344 UNREACHABLE();
345 }
346
SuffixForDebugPrint() const347 const char* String::SuffixForDebugPrint() const {
348 StringShape shape(*this);
349 if (shape.IsInternalized()) return "";
350 return "\"";
351 }
352
StringShortPrint(StringStream * accumulator)353 void String::StringShortPrint(StringStream* accumulator) {
354 if (!LooksValid()) {
355 accumulator->Add("<Invalid String>");
356 return;
357 }
358
359 const int len = length();
360 accumulator->Add("<String[%u]: ", len);
361 accumulator->Add(PrefixForDebugPrint());
362
363 if (len > kMaxShortPrintLength) {
364 accumulator->Add("...<truncated>>");
365 accumulator->Add(SuffixForDebugPrint());
366 accumulator->Put('>');
367 return;
368 }
369
370 PrintUC16(accumulator, 0, len);
371 accumulator->Add(SuffixForDebugPrint());
372 accumulator->Put('>');
373 }
374
PrintUC16(std::ostream & os,int start,int end)375 void String::PrintUC16(std::ostream& os, int start, int end) { // NOLINT
376 if (end < 0) end = length();
377 StringCharacterStream stream(*this, start);
378 for (int i = start; i < end && stream.HasMore(); i++) {
379 os << AsUC16(stream.GetNext());
380 }
381 }
382
PrintUC16(StringStream * accumulator,int start,int end)383 void String::PrintUC16(StringStream* accumulator, int start, int end) {
384 if (end < 0) end = length();
385 StringCharacterStream stream(*this, start);
386 for (int i = start; i < end && stream.HasMore(); i++) {
387 uint16_t c = stream.GetNext();
388 if (c == '\n') {
389 accumulator->Add("\\n");
390 } else if (c == '\r') {
391 accumulator->Add("\\r");
392 } else if (c == '\\') {
393 accumulator->Add("\\\\");
394 } else if (!std::isprint(c)) {
395 accumulator->Add("\\x%02x", c);
396 } else {
397 accumulator->Put(static_cast<char>(c));
398 }
399 }
400 }
401
402 // static
Trim(Isolate * isolate,Handle<String> string,TrimMode mode)403 Handle<String> String::Trim(Isolate* isolate, Handle<String> string,
404 TrimMode mode) {
405 string = String::Flatten(isolate, string);
406 int const length = string->length();
407
408 // Perform left trimming if requested.
409 int left = 0;
410 if (mode == kTrim || mode == kTrimStart) {
411 while (left < length && IsWhiteSpaceOrLineTerminator(string->Get(left))) {
412 left++;
413 }
414 }
415
416 // Perform right trimming if requested.
417 int right = length;
418 if (mode == kTrim || mode == kTrimEnd) {
419 while (right > left &&
420 IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
421 right--;
422 }
423 }
424
425 return isolate->factory()->NewSubString(string, left, right);
426 }
427
ToArrayIndex(Address addr)428 int32_t String::ToArrayIndex(Address addr) {
429 DisallowHeapAllocation no_gc;
430 String key(addr);
431
432 uint32_t index;
433 if (!key.AsArrayIndex(&index)) return -1;
434 if (index <= INT_MAX) return index;
435 return -1;
436 }
437
LooksValid()438 bool String::LooksValid() {
439 // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
440 // basically the same logic as the way we access the heap in the first place.
441 // RO_SPACE objects should always be valid.
442 if (ReadOnlyHeap::Contains(*this)) return true;
443 BasicMemoryChunk* chunk = BasicMemoryChunk::FromHeapObject(*this);
444 if (chunk->heap() == nullptr) return false;
445 return chunk->heap()->Contains(*this);
446 }
447
448 namespace {
449
AreDigits(const uint8_t * s,int from,int to)450 bool AreDigits(const uint8_t* s, int from, int to) {
451 for (int i = from; i < to; i++) {
452 if (s[i] < '0' || s[i] > '9') return false;
453 }
454
455 return true;
456 }
457
ParseDecimalInteger(const uint8_t * s,int from,int to)458 int ParseDecimalInteger(const uint8_t* s, int from, int to) {
459 DCHECK_LT(to - from, 10); // Overflow is not possible.
460 DCHECK(from < to);
461 int d = s[from] - '0';
462
463 for (int i = from + 1; i < to; i++) {
464 d = 10 * d + (s[i] - '0');
465 }
466
467 return d;
468 }
469
470 } // namespace
471
472 // static
ToNumber(Isolate * isolate,Handle<String> subject)473 Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
474 // Flatten {subject} string first.
475 subject = String::Flatten(isolate, subject);
476
477 // Fast array index case.
478 uint32_t index;
479 if (subject->AsArrayIndex(&index)) {
480 return isolate->factory()->NewNumberFromUint(index);
481 }
482
483 // Fast case: short integer or some sorts of junk values.
484 if (subject->IsSeqOneByteString()) {
485 int len = subject->length();
486 if (len == 0) return handle(Smi::zero(), isolate);
487
488 DisallowHeapAllocation no_gc;
489 uint8_t const* data =
490 Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
491 bool minus = (data[0] == '-');
492 int start_pos = (minus ? 1 : 0);
493
494 if (start_pos == len) {
495 return isolate->factory()->nan_value();
496 } else if (data[start_pos] > '9') {
497 // Fast check for a junk value. A valid string may start from a
498 // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
499 // or the 'I' character ('Infinity'). All of that have codes not greater
500 // than '9' except 'I' and .
501 if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
502 return isolate->factory()->nan_value();
503 }
504 } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
505 // The maximal/minimal smi has 10 digits. If the string has less digits
506 // we know it will fit into the smi-data type.
507 int d = ParseDecimalInteger(data, start_pos, len);
508 if (minus) {
509 if (d == 0) return isolate->factory()->minus_zero_value();
510 d = -d;
511 } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
512 (len == 1 || data[0] != '0')) {
513 // String hash is not calculated yet but all the data are present.
514 // Update the hash field to speed up sequential convertions.
515 uint32_t hash = StringHasher::MakeArrayIndexHash(d, len);
516 #ifdef DEBUG
517 subject->Hash(); // Force hash calculation.
518 DCHECK_EQ(static_cast<int>(subject->hash_field()),
519 static_cast<int>(hash));
520 #endif
521 subject->set_hash_field(hash);
522 }
523 return handle(Smi::FromInt(d), isolate);
524 }
525 }
526
527 // Slower case.
528 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
529 return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
530 }
531
GetFlatContent(const DisallowHeapAllocation & no_gc)532 String::FlatContent String::GetFlatContent(
533 const DisallowHeapAllocation& no_gc) {
534 #if DEBUG
535 // Check that this method is called only from the main thread.
536 {
537 Isolate* isolate;
538 // We don't have to check read only strings as those won't move.
539 DCHECK_IMPLIES(GetIsolateFromHeapObject(*this, &isolate),
540 ThreadId::Current() == isolate->thread_id());
541 }
542 #endif
543 USE(no_gc);
544 int length = this->length();
545 StringShape shape(*this);
546 String string = *this;
547 int offset = 0;
548 if (shape.representation_tag() == kConsStringTag) {
549 ConsString cons = ConsString::cast(string);
550 if (cons.second().length() != 0) {
551 return FlatContent(no_gc);
552 }
553 string = cons.first();
554 shape = StringShape(string);
555 } else if (shape.representation_tag() == kSlicedStringTag) {
556 SlicedString slice = SlicedString::cast(string);
557 offset = slice.offset();
558 string = slice.parent();
559 shape = StringShape(string);
560 DCHECK(shape.representation_tag() != kConsStringTag &&
561 shape.representation_tag() != kSlicedStringTag);
562 }
563 if (shape.representation_tag() == kThinStringTag) {
564 ThinString thin = ThinString::cast(string);
565 string = thin.actual();
566 shape = StringShape(string);
567 DCHECK(!shape.IsCons());
568 DCHECK(!shape.IsSliced());
569 }
570 if (shape.encoding_tag() == kOneByteStringTag) {
571 const uint8_t* start;
572 if (shape.representation_tag() == kSeqStringTag) {
573 start = SeqOneByteString::cast(string).GetChars(no_gc);
574 } else {
575 start = ExternalOneByteString::cast(string).GetChars();
576 }
577 return FlatContent(start + offset, length, no_gc);
578 } else {
579 DCHECK_EQ(shape.encoding_tag(), kTwoByteStringTag);
580 const uc16* start;
581 if (shape.representation_tag() == kSeqStringTag) {
582 start = SeqTwoByteString::cast(string).GetChars(no_gc);
583 } else {
584 start = ExternalTwoByteString::cast(string).GetChars();
585 }
586 return FlatContent(start + offset, length, no_gc);
587 }
588 }
589
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int offset,int length,int * length_return)590 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
591 RobustnessFlag robust_flag,
592 int offset, int length,
593 int* length_return) {
594 if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
595 return std::unique_ptr<char[]>();
596 }
597 // Negative length means the to the end of the string.
598 if (length < 0) length = kMaxInt - offset;
599
600 // Compute the size of the UTF-8 string. Start at the specified offset.
601 StringCharacterStream stream(*this, offset);
602 int character_position = offset;
603 int utf8_bytes = 0;
604 int last = unibrow::Utf16::kNoPreviousCharacter;
605 while (stream.HasMore() && character_position++ < offset + length) {
606 uint16_t character = stream.GetNext();
607 utf8_bytes += unibrow::Utf8::Length(character, last);
608 last = character;
609 }
610
611 if (length_return) {
612 *length_return = utf8_bytes;
613 }
614
615 char* result = NewArray<char>(utf8_bytes + 1);
616
617 // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
618 stream.Reset(*this, offset);
619 character_position = offset;
620 int utf8_byte_position = 0;
621 last = unibrow::Utf16::kNoPreviousCharacter;
622 while (stream.HasMore() && character_position++ < offset + length) {
623 uint16_t character = stream.GetNext();
624 if (allow_nulls == DISALLOW_NULLS && character == 0) {
625 character = ' ';
626 }
627 utf8_byte_position +=
628 unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
629 last = character;
630 }
631 result[utf8_byte_position] = 0;
632 return std::unique_ptr<char[]>(result);
633 }
634
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int * length_return)635 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
636 RobustnessFlag robust_flag,
637 int* length_return) {
638 return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
639 }
640
641 template <typename sinkchar>
WriteToFlat(String source,sinkchar * sink,int from,int to)642 void String::WriteToFlat(String source, sinkchar* sink, int from, int to) {
643 DisallowHeapAllocation no_gc;
644 SharedStringAccessGuardIfNeeded access_guard(source);
645 while (from < to) {
646 DCHECK_LE(0, from);
647 DCHECK_LE(to, source.length());
648 switch (StringShape(source).full_representation_tag()) {
649 case kOneByteStringTag | kExternalStringTag: {
650 CopyChars(sink, ExternalOneByteString::cast(source).GetChars() + from,
651 to - from);
652 return;
653 }
654 case kTwoByteStringTag | kExternalStringTag: {
655 const uc16* data = ExternalTwoByteString::cast(source).GetChars();
656 CopyChars(sink, data + from, to - from);
657 return;
658 }
659 case kOneByteStringTag | kSeqStringTag: {
660 CopyChars(
661 sink,
662 SeqOneByteString::cast(source).GetChars(no_gc, access_guard) + from,
663 to - from);
664 return;
665 }
666 case kTwoByteStringTag | kSeqStringTag: {
667 CopyChars(
668 sink,
669 SeqTwoByteString::cast(source).GetChars(no_gc, access_guard) + from,
670 to - from);
671 return;
672 }
673 case kOneByteStringTag | kConsStringTag:
674 case kTwoByteStringTag | kConsStringTag: {
675 ConsString cons_string = ConsString::cast(source);
676 String first = cons_string.first();
677 int boundary = first.length();
678 if (to - boundary >= boundary - from) {
679 // Right hand side is longer. Recurse over left.
680 if (from < boundary) {
681 WriteToFlat(first, sink, from, boundary);
682 if (from == 0 && cons_string.second() == first) {
683 CopyChars(sink + boundary, sink, boundary);
684 return;
685 }
686 sink += boundary - from;
687 from = 0;
688 } else {
689 from -= boundary;
690 }
691 to -= boundary;
692 source = cons_string.second();
693 } else {
694 // Left hand side is longer. Recurse over right.
695 if (to > boundary) {
696 String second = cons_string.second();
697 // When repeatedly appending to a string, we get a cons string that
698 // is unbalanced to the left, a list, essentially. We inline the
699 // common case of sequential one-byte right child.
700 if (to - boundary == 1) {
701 sink[boundary - from] = static_cast<sinkchar>(second.Get(0));
702 } else if (second.IsSeqOneByteString()) {
703 CopyChars(
704 sink + boundary - from,
705 SeqOneByteString::cast(second).GetChars(no_gc, access_guard),
706 to - boundary);
707 } else {
708 WriteToFlat(second, sink + boundary - from, 0, to - boundary);
709 }
710 to = boundary;
711 }
712 source = first;
713 }
714 break;
715 }
716 case kOneByteStringTag | kSlicedStringTag:
717 case kTwoByteStringTag | kSlicedStringTag: {
718 SlicedString slice = SlicedString::cast(source);
719 unsigned offset = slice.offset();
720 WriteToFlat(slice.parent(), sink, from + offset, to + offset);
721 return;
722 }
723 case kOneByteStringTag | kThinStringTag:
724 case kTwoByteStringTag | kThinStringTag:
725 source = ThinString::cast(source).actual();
726 break;
727 }
728 }
729 DCHECK_EQ(from, to);
730 }
731
732 template <typename SourceChar>
CalculateLineEndsImpl(std::vector<int> * line_ends,Vector<const SourceChar> src,bool include_ending_line)733 static void CalculateLineEndsImpl(std::vector<int>* line_ends,
734 Vector<const SourceChar> src,
735 bool include_ending_line) {
736 const int src_len = src.length();
737 for (int i = 0; i < src_len - 1; i++) {
738 SourceChar current = src[i];
739 SourceChar next = src[i + 1];
740 if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
741 }
742
743 if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
744 line_ends->push_back(src_len - 1);
745 }
746 if (include_ending_line) {
747 // Include one character beyond the end of script. The rewriter uses that
748 // position for the implicit return statement.
749 line_ends->push_back(src_len);
750 }
751 }
752
753 template <typename LocalIsolate>
CalculateLineEnds(LocalIsolate * isolate,Handle<String> src,bool include_ending_line)754 Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
755 Handle<String> src,
756 bool include_ending_line) {
757 src = Flatten(isolate, src);
758 // Rough estimate of line count based on a roughly estimated average
759 // length of (unpacked) code.
760 int line_count_estimate = src->length() >> 4;
761 std::vector<int> line_ends;
762 line_ends.reserve(line_count_estimate);
763 {
764 DisallowHeapAllocation no_allocation; // ensure vectors stay valid.
765 // Dispatch on type of strings.
766 String::FlatContent content = src->GetFlatContent(no_allocation);
767 DCHECK(content.IsFlat());
768 if (content.IsOneByte()) {
769 CalculateLineEndsImpl(&line_ends, content.ToOneByteVector(),
770 include_ending_line);
771 } else {
772 CalculateLineEndsImpl(&line_ends, content.ToUC16Vector(),
773 include_ending_line);
774 }
775 }
776 int line_count = static_cast<int>(line_ends.size());
777 Handle<FixedArray> array =
778 isolate->factory()->NewFixedArray(line_count, AllocationType::kOld);
779 for (int i = 0; i < line_count; i++) {
780 array->set(i, Smi::FromInt(line_ends[i]));
781 }
782 return array;
783 }
784
785 template Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
786 Handle<String> src,
787 bool include_ending_line);
788 template Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
789 Handle<String> src,
790 bool include_ending_line);
791
SlowEquals(String other)792 bool String::SlowEquals(String other) {
793 DisallowHeapAllocation no_gc;
794 // Fast check: negative check with lengths.
795 int len = length();
796 if (len != other.length()) return false;
797 if (len == 0) return true;
798
799 // Fast check: if at least one ThinString is involved, dereference it/them
800 // and restart.
801 if (this->IsThinString() || other.IsThinString()) {
802 if (other.IsThinString()) other = ThinString::cast(other).actual();
803 if (this->IsThinString()) {
804 return ThinString::cast(*this).actual().Equals(other);
805 } else {
806 return this->Equals(other);
807 }
808 }
809
810 // Fast check: if hash code is computed for both strings
811 // a fast negative check can be performed.
812 if (HasHashCode() && other.HasHashCode()) {
813 #ifdef ENABLE_SLOW_DCHECKS
814 if (FLAG_enable_slow_asserts) {
815 if (Hash() != other.Hash()) {
816 bool found_difference = false;
817 for (int i = 0; i < len; i++) {
818 if (Get(i) != other.Get(i)) {
819 found_difference = true;
820 break;
821 }
822 }
823 DCHECK(found_difference);
824 }
825 }
826 #endif
827 if (Hash() != other.Hash()) return false;
828 }
829
830 // We know the strings are both non-empty. Compare the first chars
831 // before we try to flatten the strings.
832 if (this->Get(0) != other.Get(0)) return false;
833
834 if (IsSeqOneByteString() && other.IsSeqOneByteString()) {
835 const uint8_t* str1 = SeqOneByteString::cast(*this).GetChars(no_gc);
836 const uint8_t* str2 = SeqOneByteString::cast(other).GetChars(no_gc);
837 return CompareRawStringContents(str1, str2, len);
838 }
839
840 StringComparator comparator;
841 return comparator.Equals(*this, other);
842 }
843
SlowEquals(Isolate * isolate,Handle<String> one,Handle<String> two)844 bool String::SlowEquals(Isolate* isolate, Handle<String> one,
845 Handle<String> two) {
846 // Fast check: negative check with lengths.
847 int one_length = one->length();
848 if (one_length != two->length()) return false;
849 if (one_length == 0) return true;
850
851 // Fast check: if at least one ThinString is involved, dereference it/them
852 // and restart.
853 if (one->IsThinString() || two->IsThinString()) {
854 if (one->IsThinString())
855 one = handle(ThinString::cast(*one).actual(), isolate);
856 if (two->IsThinString())
857 two = handle(ThinString::cast(*two).actual(), isolate);
858 return String::Equals(isolate, one, two);
859 }
860
861 // Fast check: if hash code is computed for both strings
862 // a fast negative check can be performed.
863 if (one->HasHashCode() && two->HasHashCode()) {
864 #ifdef ENABLE_SLOW_DCHECKS
865 if (FLAG_enable_slow_asserts) {
866 if (one->Hash() != two->Hash()) {
867 bool found_difference = false;
868 for (int i = 0; i < one_length; i++) {
869 if (one->Get(i) != two->Get(i)) {
870 found_difference = true;
871 break;
872 }
873 }
874 DCHECK(found_difference);
875 }
876 }
877 #endif
878 if (one->Hash() != two->Hash()) return false;
879 }
880
881 // We know the strings are both non-empty. Compare the first chars
882 // before we try to flatten the strings.
883 if (one->Get(0) != two->Get(0)) return false;
884
885 one = String::Flatten(isolate, one);
886 two = String::Flatten(isolate, two);
887
888 DisallowHeapAllocation no_gc;
889 String::FlatContent flat1 = one->GetFlatContent(no_gc);
890 String::FlatContent flat2 = two->GetFlatContent(no_gc);
891
892 if (flat1.IsOneByte() && flat2.IsOneByte()) {
893 return CompareRawStringContents(flat1.ToOneByteVector().begin(),
894 flat2.ToOneByteVector().begin(),
895 one_length);
896 } else {
897 for (int i = 0; i < one_length; i++) {
898 if (flat1.Get(i) != flat2.Get(i)) return false;
899 }
900 return true;
901 }
902 }
903
904 // static
Compare(Isolate * isolate,Handle<String> x,Handle<String> y)905 ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
906 Handle<String> y) {
907 // A few fast case tests before we flatten.
908 if (x.is_identical_to(y)) {
909 return ComparisonResult::kEqual;
910 } else if (y->length() == 0) {
911 return x->length() == 0 ? ComparisonResult::kEqual
912 : ComparisonResult::kGreaterThan;
913 } else if (x->length() == 0) {
914 return ComparisonResult::kLessThan;
915 }
916
917 int const d = x->Get(0) - y->Get(0);
918 if (d < 0) {
919 return ComparisonResult::kLessThan;
920 } else if (d > 0) {
921 return ComparisonResult::kGreaterThan;
922 }
923
924 // Slow case.
925 x = String::Flatten(isolate, x);
926 y = String::Flatten(isolate, y);
927
928 DisallowHeapAllocation no_gc;
929 ComparisonResult result = ComparisonResult::kEqual;
930 int prefix_length = x->length();
931 if (y->length() < prefix_length) {
932 prefix_length = y->length();
933 result = ComparisonResult::kGreaterThan;
934 } else if (y->length() > prefix_length) {
935 result = ComparisonResult::kLessThan;
936 }
937 int r;
938 String::FlatContent x_content = x->GetFlatContent(no_gc);
939 String::FlatContent y_content = y->GetFlatContent(no_gc);
940 if (x_content.IsOneByte()) {
941 Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
942 if (y_content.IsOneByte()) {
943 Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
944 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
945 } else {
946 Vector<const uc16> y_chars = y_content.ToUC16Vector();
947 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
948 }
949 } else {
950 Vector<const uc16> x_chars = x_content.ToUC16Vector();
951 if (y_content.IsOneByte()) {
952 Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
953 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
954 } else {
955 Vector<const uc16> y_chars = y_content.ToUC16Vector();
956 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
957 }
958 }
959 if (r < 0) {
960 result = ComparisonResult::kLessThan;
961 } else if (r > 0) {
962 result = ComparisonResult::kGreaterThan;
963 }
964 return result;
965 }
966
IndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)967 Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
968 Handle<Object> search, Handle<Object> position) {
969 if (receiver->IsNullOrUndefined(isolate)) {
970 THROW_NEW_ERROR_RETURN_FAILURE(
971 isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
972 isolate->factory()->NewStringFromAsciiChecked(
973 "String.prototype.indexOf")));
974 }
975 Handle<String> receiver_string;
976 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
977 Object::ToString(isolate, receiver));
978
979 Handle<String> search_string;
980 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
981 Object::ToString(isolate, search));
982
983 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
984 Object::ToInteger(isolate, position));
985
986 uint32_t index = receiver_string->ToValidIndex(*position);
987 return Smi::FromInt(
988 String::IndexOf(isolate, receiver_string, search_string, index));
989 }
990
991 namespace {
992
993 template <typename T>
SearchString(Isolate * isolate,String::FlatContent receiver_content,Vector<T> pat_vector,int start_index)994 int SearchString(Isolate* isolate, String::FlatContent receiver_content,
995 Vector<T> pat_vector, int start_index) {
996 if (receiver_content.IsOneByte()) {
997 return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
998 start_index);
999 }
1000 return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
1001 start_index);
1002 }
1003
1004 } // namespace
1005
IndexOf(Isolate * isolate,Handle<String> receiver,Handle<String> search,int start_index)1006 int String::IndexOf(Isolate* isolate, Handle<String> receiver,
1007 Handle<String> search, int start_index) {
1008 DCHECK_LE(0, start_index);
1009 DCHECK(start_index <= receiver->length());
1010
1011 uint32_t search_length = search->length();
1012 if (search_length == 0) return start_index;
1013
1014 uint32_t receiver_length = receiver->length();
1015 if (start_index + search_length > receiver_length) return -1;
1016
1017 receiver = String::Flatten(isolate, receiver);
1018 search = String::Flatten(isolate, search);
1019
1020 DisallowHeapAllocation no_gc; // ensure vectors stay valid
1021 // Extract flattened substrings of cons strings before getting encoding.
1022 String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
1023 String::FlatContent search_content = search->GetFlatContent(no_gc);
1024
1025 // dispatch on type of strings
1026 if (search_content.IsOneByte()) {
1027 Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1028 return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
1029 start_index);
1030 }
1031 Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1032 return SearchString<const uc16>(isolate, receiver_content, pat_vector,
1033 start_index);
1034 }
1035
GetSubstitution(Isolate * isolate,Match * match,Handle<String> replacement,int start_index)1036 MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
1037 Handle<String> replacement,
1038 int start_index) {
1039 DCHECK_GE(start_index, 0);
1040
1041 Factory* factory = isolate->factory();
1042
1043 const int replacement_length = replacement->length();
1044 const int captures_length = match->CaptureCount();
1045
1046 replacement = String::Flatten(isolate, replacement);
1047
1048 Handle<String> dollar_string =
1049 factory->LookupSingleCharacterStringFromCode('$');
1050 int next_dollar_ix =
1051 String::IndexOf(isolate, replacement, dollar_string, start_index);
1052 if (next_dollar_ix < 0) {
1053 return replacement;
1054 }
1055
1056 IncrementalStringBuilder builder(isolate);
1057
1058 if (next_dollar_ix > 0) {
1059 builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
1060 }
1061
1062 while (true) {
1063 const int peek_ix = next_dollar_ix + 1;
1064 if (peek_ix >= replacement_length) {
1065 builder.AppendCharacter('$');
1066 return builder.Finish();
1067 }
1068
1069 int continue_from_ix = -1;
1070 const uint16_t peek = replacement->Get(peek_ix);
1071 switch (peek) {
1072 case '$': // $$
1073 builder.AppendCharacter('$');
1074 continue_from_ix = peek_ix + 1;
1075 break;
1076 case '&': // $& - match
1077 builder.AppendString(match->GetMatch());
1078 continue_from_ix = peek_ix + 1;
1079 break;
1080 case '`': // $` - prefix
1081 builder.AppendString(match->GetPrefix());
1082 continue_from_ix = peek_ix + 1;
1083 break;
1084 case '\'': // $' - suffix
1085 builder.AppendString(match->GetSuffix());
1086 continue_from_ix = peek_ix + 1;
1087 break;
1088 case '0':
1089 case '1':
1090 case '2':
1091 case '3':
1092 case '4':
1093 case '5':
1094 case '6':
1095 case '7':
1096 case '8':
1097 case '9': {
1098 // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
1099 int scaled_index = (peek - '0');
1100 int advance = 1;
1101
1102 if (peek_ix + 1 < replacement_length) {
1103 const uint16_t next_peek = replacement->Get(peek_ix + 1);
1104 if (next_peek >= '0' && next_peek <= '9') {
1105 const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
1106 if (new_scaled_index < captures_length) {
1107 scaled_index = new_scaled_index;
1108 advance = 2;
1109 }
1110 }
1111 }
1112
1113 if (scaled_index == 0 || scaled_index >= captures_length) {
1114 builder.AppendCharacter('$');
1115 continue_from_ix = peek_ix;
1116 break;
1117 }
1118
1119 bool capture_exists;
1120 Handle<String> capture;
1121 ASSIGN_RETURN_ON_EXCEPTION(
1122 isolate, capture, match->GetCapture(scaled_index, &capture_exists),
1123 String);
1124 if (capture_exists) builder.AppendString(capture);
1125 continue_from_ix = peek_ix + advance;
1126 break;
1127 }
1128 case '<': { // $<name> - named capture
1129 using CaptureState = String::Match::CaptureState;
1130
1131 if (!match->HasNamedCaptures()) {
1132 builder.AppendCharacter('$');
1133 continue_from_ix = peek_ix;
1134 break;
1135 }
1136
1137 Handle<String> bracket_string =
1138 factory->LookupSingleCharacterStringFromCode('>');
1139 const int closing_bracket_ix =
1140 String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1141
1142 if (closing_bracket_ix == -1) {
1143 // No closing bracket was found, treat '$<' as a string literal.
1144 builder.AppendCharacter('$');
1145 continue_from_ix = peek_ix;
1146 break;
1147 }
1148
1149 Handle<String> capture_name =
1150 factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1151 Handle<String> capture;
1152 CaptureState capture_state;
1153 ASSIGN_RETURN_ON_EXCEPTION(
1154 isolate, capture,
1155 match->GetNamedCapture(capture_name, &capture_state), String);
1156
1157 if (capture_state == CaptureState::MATCHED) {
1158 builder.AppendString(capture);
1159 }
1160
1161 continue_from_ix = closing_bracket_ix + 1;
1162 break;
1163 }
1164 default:
1165 builder.AppendCharacter('$');
1166 continue_from_ix = peek_ix;
1167 break;
1168 }
1169
1170 // Go the the next $ in the replacement.
1171 // TODO(jgruber): Single-char lookups could be much more efficient.
1172 DCHECK_NE(continue_from_ix, -1);
1173 next_dollar_ix =
1174 String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1175
1176 // Return if there are no more $ characters in the replacement. If we
1177 // haven't reached the end, we need to append the suffix.
1178 if (next_dollar_ix < 0) {
1179 if (continue_from_ix < replacement_length) {
1180 builder.AppendString(factory->NewSubString(
1181 replacement, continue_from_ix, replacement_length));
1182 }
1183 return builder.Finish();
1184 }
1185
1186 // Append substring between the previous and the next $ character.
1187 if (next_dollar_ix > continue_from_ix) {
1188 builder.AppendString(
1189 factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1190 }
1191 }
1192
1193 UNREACHABLE();
1194 }
1195
1196 namespace { // for String.Prototype.lastIndexOf
1197
1198 template <typename schar, typename pchar>
StringMatchBackwards(Vector<const schar> subject,Vector<const pchar> pattern,int idx)1199 int StringMatchBackwards(Vector<const schar> subject,
1200 Vector<const pchar> pattern, int idx) {
1201 int pattern_length = pattern.length();
1202 DCHECK_GE(pattern_length, 1);
1203 DCHECK(idx + pattern_length <= subject.length());
1204
1205 if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1206 for (int i = 0; i < pattern_length; i++) {
1207 uc16 c = pattern[i];
1208 if (c > String::kMaxOneByteCharCode) {
1209 return -1;
1210 }
1211 }
1212 }
1213
1214 pchar pattern_first_char = pattern[0];
1215 for (int i = idx; i >= 0; i--) {
1216 if (subject[i] != pattern_first_char) continue;
1217 int j = 1;
1218 while (j < pattern_length) {
1219 if (pattern[j] != subject[i + j]) {
1220 break;
1221 }
1222 j++;
1223 }
1224 if (j == pattern_length) {
1225 return i;
1226 }
1227 }
1228 return -1;
1229 }
1230
1231 } // namespace
1232
LastIndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)1233 Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1234 Handle<Object> search, Handle<Object> position) {
1235 if (receiver->IsNullOrUndefined(isolate)) {
1236 THROW_NEW_ERROR_RETURN_FAILURE(
1237 isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1238 isolate->factory()->NewStringFromAsciiChecked(
1239 "String.prototype.lastIndexOf")));
1240 }
1241 Handle<String> receiver_string;
1242 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1243 Object::ToString(isolate, receiver));
1244
1245 Handle<String> search_string;
1246 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1247 Object::ToString(isolate, search));
1248
1249 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1250 Object::ToNumber(isolate, position));
1251
1252 uint32_t start_index;
1253
1254 if (position->IsNaN()) {
1255 start_index = receiver_string->length();
1256 } else {
1257 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1258 Object::ToInteger(isolate, position));
1259 start_index = receiver_string->ToValidIndex(*position);
1260 }
1261
1262 uint32_t pattern_length = search_string->length();
1263 uint32_t receiver_length = receiver_string->length();
1264
1265 if (start_index + pattern_length > receiver_length) {
1266 start_index = receiver_length - pattern_length;
1267 }
1268
1269 if (pattern_length == 0) {
1270 return Smi::FromInt(start_index);
1271 }
1272
1273 receiver_string = String::Flatten(isolate, receiver_string);
1274 search_string = String::Flatten(isolate, search_string);
1275
1276 int last_index = -1;
1277 DisallowHeapAllocation no_gc; // ensure vectors stay valid
1278
1279 String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1280 String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1281
1282 if (search_content.IsOneByte()) {
1283 Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1284 if (receiver_content.IsOneByte()) {
1285 last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1286 pat_vector, start_index);
1287 } else {
1288 last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1289 pat_vector, start_index);
1290 }
1291 } else {
1292 Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1293 if (receiver_content.IsOneByte()) {
1294 last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1295 pat_vector, start_index);
1296 } else {
1297 last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1298 pat_vector, start_index);
1299 }
1300 }
1301 return Smi::FromInt(last_index);
1302 }
1303
1304 template <>
IsEqualTo(Vector<const uint8_t> str)1305 bool String::IsEqualTo(Vector<const uint8_t> str) {
1306 return IsOneByteEqualTo(str);
1307 }
1308
1309 template <>
IsEqualTo(Vector<const uc16> str)1310 bool String::IsEqualTo(Vector<const uc16> str) {
1311 return IsTwoByteEqualTo(str);
1312 }
1313
HasOneBytePrefix(Vector<const char> str)1314 bool String::HasOneBytePrefix(Vector<const char> str) {
1315 int slen = str.length();
1316 if (slen > length()) return false;
1317 DisallowHeapAllocation no_gc;
1318 FlatContent content = GetFlatContent(no_gc);
1319 if (content.IsOneByte()) {
1320 return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1321 0;
1322 }
1323 return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1324 }
1325
IsOneByteEqualTo(Vector<const uint8_t> str)1326 bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
1327 int slen = length();
1328 if (str.length() != slen) return false;
1329 DisallowHeapAllocation no_gc;
1330 FlatContent content = GetFlatContent(no_gc);
1331 if (content.IsOneByte()) {
1332 return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1333 0;
1334 }
1335 return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1336 }
1337
IsTwoByteEqualTo(Vector<const uc16> str)1338 bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
1339 int slen = length();
1340 if (str.length() != slen) return false;
1341 DisallowHeapAllocation no_gc;
1342 FlatContent content = GetFlatContent(no_gc);
1343 if (content.IsOneByte()) {
1344 return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1345 0;
1346 }
1347 return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1348 }
1349
1350 namespace {
1351
1352 template <typename Char>
HashString(String string,size_t start,int length,uint64_t seed)1353 uint32_t HashString(String string, size_t start, int length, uint64_t seed) {
1354 DisallowHeapAllocation no_gc;
1355
1356 if (length > String::kMaxHashCalcLength) {
1357 return StringHasher::GetTrivialHash(length);
1358 }
1359
1360 std::unique_ptr<Char[]> buffer;
1361 const Char* chars;
1362
1363 if (string.IsConsString()) {
1364 DCHECK_EQ(0, start);
1365 DCHECK(!string.IsFlat());
1366 buffer.reset(new Char[length]);
1367 String::WriteToFlat(string, buffer.get(), 0, length);
1368 chars = buffer.get();
1369 } else {
1370 chars = string.GetChars<Char>(no_gc) + start;
1371 }
1372
1373 return StringHasher::HashSequentialString<Char>(chars, length, seed);
1374 }
1375
1376 } // namespace
1377
ComputeAndSetHash()1378 uint32_t String::ComputeAndSetHash() {
1379 DisallowHeapAllocation no_gc;
1380 // Should only be called if hash code has not yet been computed.
1381 DCHECK(!HasHashCode());
1382
1383 // Store the hash code in the object.
1384 uint64_t seed = HashSeed(GetReadOnlyRoots());
1385 size_t start = 0;
1386 String string = *this;
1387 if (string.IsSlicedString()) {
1388 SlicedString sliced = SlicedString::cast(string);
1389 start = sliced.offset();
1390 string = sliced.parent();
1391 }
1392 if (string.IsConsString() && string.IsFlat()) {
1393 string = ConsString::cast(string).first();
1394 }
1395 if (string.IsThinString()) {
1396 string = ThinString::cast(string).actual();
1397 if (length() == string.length()) {
1398 set_hash_field(string.hash_field());
1399 return hash_field() >> kHashShift;
1400 }
1401 }
1402 uint32_t field = string.IsOneByteRepresentation()
1403 ? HashString<uint8_t>(string, start, length(), seed)
1404 : HashString<uint16_t>(string, start, length(), seed);
1405 set_hash_field(field);
1406
1407 // Check the hash code is there.
1408 DCHECK(HasHashCode());
1409 uint32_t result = field >> kHashShift;
1410 DCHECK_NE(result, 0); // Ensure that the hash value of 0 is never computed.
1411 return result;
1412 }
1413
SlowAsArrayIndex(uint32_t * index)1414 bool String::SlowAsArrayIndex(uint32_t* index) {
1415 DisallowHeapAllocation no_gc;
1416 int length = this->length();
1417 if (length <= kMaxCachedArrayIndexLength) {
1418 Hash(); // Force computation of hash code.
1419 uint32_t field = hash_field();
1420 if ((field & kIsNotIntegerIndexMask) != 0) return false;
1421 *index = ArrayIndexValueBits::decode(field);
1422 return true;
1423 }
1424 if (length == 0 || length > kMaxArrayIndexSize) return false;
1425 StringCharacterStream stream(*this);
1426 return StringToIndex(&stream, index);
1427 }
1428
SlowAsIntegerIndex(size_t * index)1429 bool String::SlowAsIntegerIndex(size_t* index) {
1430 DisallowHeapAllocation no_gc;
1431 int length = this->length();
1432 if (length <= kMaxCachedArrayIndexLength) {
1433 Hash(); // Force computation of hash code.
1434 uint32_t field = hash_field();
1435 if ((field & kIsNotIntegerIndexMask) != 0) return false;
1436 *index = ArrayIndexValueBits::decode(field);
1437 return true;
1438 }
1439 if (length == 0 || length > kMaxIntegerIndexSize) return false;
1440 StringCharacterStream stream(*this);
1441 return StringToIndex<StringCharacterStream, size_t, kToIntegerIndex>(&stream,
1442 index);
1443 }
1444
PrintOn(FILE * file)1445 void String::PrintOn(FILE* file) {
1446 int length = this->length();
1447 for (int i = 0; i < length; i++) {
1448 PrintF(file, "%c", Get(i));
1449 }
1450 }
1451
Truncate(Handle<SeqString> string,int new_length)1452 Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1453 if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1454
1455 int new_size, old_size;
1456 int old_length = string->length();
1457 if (old_length <= new_length) return string;
1458
1459 if (string->IsSeqOneByteString()) {
1460 old_size = SeqOneByteString::SizeFor(old_length);
1461 new_size = SeqOneByteString::SizeFor(new_length);
1462 } else {
1463 DCHECK(string->IsSeqTwoByteString());
1464 old_size = SeqTwoByteString::SizeFor(old_length);
1465 new_size = SeqTwoByteString::SizeFor(new_length);
1466 }
1467
1468 int delta = old_size - new_size;
1469
1470 Address start_of_string = string->address();
1471 DCHECK(IsAligned(start_of_string, kObjectAlignment));
1472 DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1473
1474 Heap* heap = Heap::FromWritableHeapObject(*string);
1475 // Sizes are pointer size aligned, so that we can use filler objects
1476 // that are a multiple of pointer size.
1477 heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1478 ClearRecordedSlots::kNo);
1479 // We are storing the new length using release store after creating a filler
1480 // for the left-over space to avoid races with the sweeper thread.
1481 string->synchronized_set_length(new_length);
1482
1483 return string;
1484 }
1485
clear_padding()1486 void SeqOneByteString::clear_padding() {
1487 int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1488 memset(reinterpret_cast<void*>(address() + data_size), 0,
1489 SizeFor(length()) - data_size);
1490 }
1491
clear_padding()1492 void SeqTwoByteString::clear_padding() {
1493 int data_size = SeqString::kHeaderSize + length() * kUC16Size;
1494 memset(reinterpret_cast<void*>(address() + data_size), 0,
1495 SizeFor(length()) - data_size);
1496 }
1497
Get(int index)1498 uint16_t ConsString::Get(int index) {
1499 DCHECK(index >= 0 && index < this->length());
1500
1501 // Check for a flattened cons string
1502 if (second().length() == 0) {
1503 String left = first();
1504 return left.Get(index);
1505 }
1506
1507 String string = String::cast(*this);
1508
1509 while (true) {
1510 if (StringShape(string).IsCons()) {
1511 ConsString cons_string = ConsString::cast(string);
1512 String left = cons_string.first();
1513 if (left.length() > index) {
1514 string = left;
1515 } else {
1516 index -= left.length();
1517 string = cons_string.second();
1518 }
1519 } else {
1520 return string.Get(index);
1521 }
1522 }
1523
1524 UNREACHABLE();
1525 }
1526
Get(int index)1527 uint16_t ThinString::Get(int index) { return actual().Get(index); }
1528
Get(int index)1529 uint16_t SlicedString::Get(int index) { return parent().Get(offset() + index); }
1530
ExternalPayloadSize() const1531 int ExternalString::ExternalPayloadSize() const {
1532 int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1533 return length() * length_multiplier;
1534 }
1535
FlatStringReader(Isolate * isolate,Handle<String> str)1536 FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1537 : Relocatable(isolate), str_(str), length_(str->length()) {
1538 #if DEBUG
1539 // Check that this constructor is called only from the main thread.
1540 DCHECK_EQ(ThreadId::Current(), isolate->thread_id());
1541 #endif
1542 PostGarbageCollection();
1543 }
1544
PostGarbageCollection()1545 void FlatStringReader::PostGarbageCollection() {
1546 DCHECK(str_->IsFlat());
1547 DisallowHeapAllocation no_gc;
1548 // This does not actually prevent the vector from being relocated later.
1549 String::FlatContent content = str_->GetFlatContent(no_gc);
1550 DCHECK(content.IsFlat());
1551 is_one_byte_ = content.IsOneByte();
1552 if (is_one_byte_) {
1553 start_ = content.ToOneByteVector().begin();
1554 } else {
1555 start_ = content.ToUC16Vector().begin();
1556 }
1557 }
1558
Initialize(ConsString cons_string,int offset)1559 void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1560 DCHECK(!cons_string.is_null());
1561 root_ = cons_string;
1562 consumed_ = offset;
1563 // Force stack blown condition to trigger restart.
1564 depth_ = 1;
1565 maximum_depth_ = kStackSize + depth_;
1566 DCHECK(StackBlown());
1567 }
1568
Continue(int * offset_out)1569 String ConsStringIterator::Continue(int* offset_out) {
1570 DCHECK_NE(depth_, 0);
1571 DCHECK_EQ(0, *offset_out);
1572 bool blew_stack = StackBlown();
1573 String string;
1574 // Get the next leaf if there is one.
1575 if (!blew_stack) string = NextLeaf(&blew_stack);
1576 // Restart search from root.
1577 if (blew_stack) {
1578 DCHECK(string.is_null());
1579 string = Search(offset_out);
1580 }
1581 // Ensure future calls return null immediately.
1582 if (string.is_null()) Reset(ConsString());
1583 return string;
1584 }
1585
Search(int * offset_out)1586 String ConsStringIterator::Search(int* offset_out) {
1587 ConsString cons_string = root_;
1588 // Reset the stack, pushing the root string.
1589 depth_ = 1;
1590 maximum_depth_ = 1;
1591 frames_[0] = cons_string;
1592 const int consumed = consumed_;
1593 int offset = 0;
1594 while (true) {
1595 // Loop until the string is found which contains the target offset.
1596 String string = cons_string.first();
1597 int length = string.length();
1598 int32_t type;
1599 if (consumed < offset + length) {
1600 // Target offset is in the left branch.
1601 // Keep going if we're still in a ConString.
1602 type = string.map().instance_type();
1603 if ((type & kStringRepresentationMask) == kConsStringTag) {
1604 cons_string = ConsString::cast(string);
1605 PushLeft(cons_string);
1606 continue;
1607 }
1608 // Tell the stack we're done descending.
1609 AdjustMaximumDepth();
1610 } else {
1611 // Descend right.
1612 // Update progress through the string.
1613 offset += length;
1614 // Keep going if we're still in a ConString.
1615 string = cons_string.second();
1616 type = string.map().instance_type();
1617 if ((type & kStringRepresentationMask) == kConsStringTag) {
1618 cons_string = ConsString::cast(string);
1619 PushRight(cons_string);
1620 continue;
1621 }
1622 // Need this to be updated for the current string.
1623 length = string.length();
1624 // Account for the possibility of an empty right leaf.
1625 // This happens only if we have asked for an offset outside the string.
1626 if (length == 0) {
1627 // Reset so future operations will return null immediately.
1628 Reset(ConsString());
1629 return String();
1630 }
1631 // Tell the stack we're done descending.
1632 AdjustMaximumDepth();
1633 // Pop stack so next iteration is in correct place.
1634 Pop();
1635 }
1636 DCHECK_NE(length, 0);
1637 // Adjust return values and exit.
1638 consumed_ = offset + length;
1639 *offset_out = consumed - offset;
1640 return string;
1641 }
1642 UNREACHABLE();
1643 }
1644
NextLeaf(bool * blew_stack)1645 String ConsStringIterator::NextLeaf(bool* blew_stack) {
1646 while (true) {
1647 // Tree traversal complete.
1648 if (depth_ == 0) {
1649 *blew_stack = false;
1650 return String();
1651 }
1652 // We've lost track of higher nodes.
1653 if (StackBlown()) {
1654 *blew_stack = true;
1655 return String();
1656 }
1657 // Go right.
1658 ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1659 String string = cons_string.second();
1660 int32_t type = string.map().instance_type();
1661 if ((type & kStringRepresentationMask) != kConsStringTag) {
1662 // Pop stack so next iteration is in correct place.
1663 Pop();
1664 int length = string.length();
1665 // Could be a flattened ConsString.
1666 if (length == 0) continue;
1667 consumed_ += length;
1668 return string;
1669 }
1670 cons_string = ConsString::cast(string);
1671 PushRight(cons_string);
1672 // Need to traverse all the way left.
1673 while (true) {
1674 // Continue left.
1675 string = cons_string.first();
1676 type = string.map().instance_type();
1677 if ((type & kStringRepresentationMask) != kConsStringTag) {
1678 AdjustMaximumDepth();
1679 int length = string.length();
1680 if (length == 0) break; // Skip empty left-hand sides of ConsStrings.
1681 consumed_ += length;
1682 return string;
1683 }
1684 cons_string = ConsString::cast(string);
1685 PushLeft(cons_string);
1686 }
1687 }
1688 UNREACHABLE();
1689 }
1690
AddressOfCharacterAt(int start_index,const DisallowHeapAllocation & no_gc)1691 const byte* String::AddressOfCharacterAt(int start_index,
1692 const DisallowHeapAllocation& no_gc) {
1693 DCHECK(IsFlat());
1694 String subject = *this;
1695 if (subject.IsConsString()) {
1696 subject = ConsString::cast(subject).first();
1697 } else if (subject.IsSlicedString()) {
1698 start_index += SlicedString::cast(subject).offset();
1699 subject = SlicedString::cast(subject).parent();
1700 }
1701 if (subject.IsThinString()) {
1702 subject = ThinString::cast(subject).actual();
1703 }
1704 CHECK_LE(0, start_index);
1705 CHECK_LE(start_index, subject.length());
1706 if (subject.IsSeqOneByteString()) {
1707 return reinterpret_cast<const byte*>(
1708 SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
1709 } else if (subject.IsSeqTwoByteString()) {
1710 return reinterpret_cast<const byte*>(
1711 SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
1712 } else if (subject.IsExternalOneByteString()) {
1713 return reinterpret_cast<const byte*>(
1714 ExternalOneByteString::cast(subject).GetChars() + start_index);
1715 } else {
1716 DCHECK(subject.IsExternalTwoByteString());
1717 return reinterpret_cast<const byte*>(
1718 ExternalTwoByteString::cast(subject).GetChars() + start_index);
1719 }
1720 }
1721
1722 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
1723 String source, uint16_t* sink, int from, int to);
1724
1725 } // namespace internal
1726 } // namespace v8
1727