• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/objects/string.h"
6 
7 #include "src/common/assert-scope.h"
8 #include "src/common/globals.h"
9 #include "src/execution/thread-id.h"
10 #include "src/handles/handles-inl.h"
11 #include "src/heap/heap-inl.h"
12 #include "src/heap/memory-chunk.h"
13 #include "src/heap/read-only-heap.h"
14 #include "src/numbers/conversions.h"
15 #include "src/objects/map.h"
16 #include "src/objects/oddball.h"
17 #include "src/objects/string-comparator.h"
18 #include "src/objects/string-inl.h"
19 #include "src/strings/char-predicates.h"
20 #include "src/strings/string-builder-inl.h"
21 #include "src/strings/string-hasher.h"
22 #include "src/strings/string-search.h"
23 #include "src/strings/string-stream.h"
24 #include "src/strings/unicode-inl.h"
25 #include "src/utils/ostreams.h"
26 
27 namespace v8 {
28 namespace internal {
29 
SlowFlatten(Isolate * isolate,Handle<ConsString> cons,AllocationType allocation)30 Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
31                                    AllocationType allocation) {
32   DCHECK_NE(cons->second().length(), 0);
33 
34   // TurboFan can create cons strings with empty first parts.
35   while (cons->first().length() == 0) {
36     // We do not want to call this function recursively. Therefore we call
37     // String::Flatten only in those cases where String::SlowFlatten is not
38     // called again.
39     if (cons->second().IsConsString() && !cons->second().IsFlat()) {
40       cons = handle(ConsString::cast(cons->second()), isolate);
41     } else {
42       return String::Flatten(isolate, handle(cons->second(), isolate));
43     }
44   }
45 
46   DCHECK(AllowHeapAllocation::IsAllowed());
47   DCHECK(AllowGarbageCollection::IsAllowed());
48   int length = cons->length();
49   allocation =
50       ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
51   Handle<SeqString> result;
52   if (cons->IsOneByteRepresentation()) {
53     Handle<SeqOneByteString> flat =
54         isolate->factory()
55             ->NewRawOneByteString(length, allocation)
56             .ToHandleChecked();
57     DisallowHeapAllocation no_gc;
58     WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
59     result = flat;
60   } else {
61     Handle<SeqTwoByteString> flat =
62         isolate->factory()
63             ->NewRawTwoByteString(length, allocation)
64             .ToHandleChecked();
65     DisallowHeapAllocation no_gc;
66     WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
67     result = flat;
68   }
69   cons->set_first(*result);
70   cons->set_second(ReadOnlyRoots(isolate).empty_string());
71   DCHECK(result->IsFlat());
72   return result;
73 }
74 
75 namespace {
76 
77 template <class StringClass>
MigrateExternalStringResource(Isolate * isolate,ExternalString from,StringClass to)78 void MigrateExternalStringResource(Isolate* isolate, ExternalString from,
79                                    StringClass to) {
80   Address to_resource_address = to.resource_as_address();
81   if (to_resource_address == kNullAddress) {
82     StringClass cast_from = StringClass::cast(from);
83     // |to| is a just-created internalized copy of |from|. Migrate the resource.
84     to.SetResource(isolate, cast_from.resource());
85     // Zap |from|'s resource pointer to reflect the fact that |from| has
86     // relinquished ownership of its resource.
87     isolate->heap()->UpdateExternalString(
88         from, ExternalString::cast(from).ExternalPayloadSize(), 0);
89     cast_from.SetResource(isolate, nullptr);
90   } else if (to_resource_address != from.resource_as_address()) {
91     // |to| already existed and has its own resource. Finalize |from|.
92     isolate->heap()->FinalizeExternalString(from);
93   }
94 }
95 
96 }  // namespace
97 
MakeThin(Isolate * isolate,String internalized)98 void String::MakeThin(Isolate* isolate, String internalized) {
99   DisallowHeapAllocation no_gc;
100   DCHECK_NE(*this, internalized);
101   DCHECK(internalized.IsInternalizedString());
102 
103   if (this->IsExternalString()) {
104     if (internalized.IsExternalOneByteString()) {
105       MigrateExternalStringResource(isolate, ExternalString::cast(*this),
106                                     ExternalOneByteString::cast(internalized));
107     } else if (internalized.IsExternalTwoByteString()) {
108       MigrateExternalStringResource(isolate, ExternalString::cast(*this),
109                                     ExternalTwoByteString::cast(internalized));
110     } else {
111       // If the external string is duped into an existing non-external
112       // internalized string, free its resource (it's about to be rewritten
113       // into a ThinString below).
114       isolate->heap()->FinalizeExternalString(*this);
115     }
116   }
117 
118   bool has_pointers = StringShape(*this).IsIndirect();
119 
120   int old_size = this->Size();
121   bool one_byte = internalized.IsOneByteRepresentation();
122   Handle<Map> map = one_byte ? isolate->factory()->thin_one_byte_string_map()
123                              : isolate->factory()->thin_string_map();
124   // Update actual first and then do release store on the map word. This ensures
125   // that the concurrent marker will read the pointer when visiting a
126   // ThinString.
127   ThinString thin = ThinString::unchecked_cast(*this);
128   thin.set_actual(internalized);
129   DCHECK_GE(old_size, ThinString::kSize);
130   this->synchronized_set_map(*map);
131   Address thin_end = thin.address() + ThinString::kSize;
132   int size_delta = old_size - ThinString::kSize;
133   if (size_delta != 0) {
134     Heap* heap = isolate->heap();
135     heap->CreateFillerObjectAt(
136         thin_end, size_delta,
137         has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
138   }
139 }
140 
MakeExternal(v8::String::ExternalStringResource * resource)141 bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
142   DisallowHeapAllocation no_allocation;
143   // Externalizing twice leaks the external resource, so it's
144   // prohibited by the API.
145   DCHECK(this->SupportsExternalization());
146   DCHECK(resource->IsCacheable());
147 #ifdef ENABLE_SLOW_DCHECKS
148   if (FLAG_enable_slow_asserts) {
149     // Assert that the resource and the string are equivalent.
150     DCHECK(static_cast<size_t>(this->length()) == resource->length());
151     ScopedVector<uc16> smart_chars(this->length());
152     String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
153     DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
154                         resource->length() * sizeof(smart_chars[0])));
155   }
156 #endif                      // DEBUG
157   int size = this->Size();  // Byte size of the original string.
158   // Abort if size does not allow in-place conversion.
159   if (size < ExternalString::kUncachedSize) return false;
160   // Read-only strings cannot be made external, since that would mutate the
161   // string.
162   if (IsReadOnlyHeapObject(*this)) return false;
163   Isolate* isolate = GetIsolateFromWritableObject(*this);
164   bool is_internalized = this->IsInternalizedString();
165   bool has_pointers = StringShape(*this).IsIndirect();
166 
167   if (has_pointers) {
168     isolate->heap()->NotifyObjectLayoutChange(*this, no_allocation,
169                                               InvalidateRecordedSlots::kYes);
170   }
171 
172   // Disallow garbage collection to avoid possible GC vs string access deadlock.
173   DisallowGarbageCollection no_gc;
174   base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
175       isolate->string_access());
176   // Morph the string to an external string by replacing the map and
177   // reinitializing the fields.  This won't work if the space the existing
178   // string occupies is too small for a regular external string.  Instead, we
179   // resort to an uncached external string instead, omitting the field caching
180   // the address of the backing store.  When we encounter uncached external
181   // strings in generated code, we need to bailout to runtime.
182   Map new_map;
183   ReadOnlyRoots roots(isolate);
184   if (size < ExternalString::kSizeOfAllExternalStrings) {
185     if (is_internalized) {
186       new_map = roots.uncached_external_internalized_string_map();
187     } else {
188       new_map = roots.uncached_external_string_map();
189     }
190   } else {
191     new_map = is_internalized ? roots.external_internalized_string_map()
192                               : roots.external_string_map();
193   }
194 
195   // Byte size of the external String object.
196   int new_size = this->SizeFromMap(new_map);
197   isolate->heap()->CreateFillerObjectAt(
198       this->address() + new_size, size - new_size,
199       has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
200 
201   // We are storing the new map using release store after creating a filler for
202   // the left-over space to avoid races with the sweeper thread.
203   this->synchronized_set_map(new_map);
204 
205   ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
206   self.AllocateExternalPointerEntries(isolate);
207   self.SetResource(isolate, resource);
208   isolate->heap()->RegisterExternalString(*this);
209   if (is_internalized) self.Hash();  // Force regeneration of the hash value.
210   return true;
211 }
212 
MakeExternal(v8::String::ExternalOneByteStringResource * resource)213 bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
214   DisallowHeapAllocation no_allocation;
215   // Externalizing twice leaks the external resource, so it's
216   // prohibited by the API.
217   DCHECK(this->SupportsExternalization());
218   DCHECK(resource->IsCacheable());
219 #ifdef ENABLE_SLOW_DCHECKS
220   if (FLAG_enable_slow_asserts) {
221     // Assert that the resource and the string are equivalent.
222     DCHECK(static_cast<size_t>(this->length()) == resource->length());
223     if (this->IsTwoByteRepresentation()) {
224       ScopedVector<uint16_t> smart_chars(this->length());
225       String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
226       DCHECK(String::IsOneByte(smart_chars.begin(), this->length()));
227     }
228     ScopedVector<char> smart_chars(this->length());
229     String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
230     DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
231                         resource->length() * sizeof(smart_chars[0])));
232   }
233 #endif                      // DEBUG
234   int size = this->Size();  // Byte size of the original string.
235   // Abort if size does not allow in-place conversion.
236   if (size < ExternalString::kUncachedSize) return false;
237   // Read-only strings cannot be made external, since that would mutate the
238   // string.
239   if (IsReadOnlyHeapObject(*this)) return false;
240   Isolate* isolate = GetIsolateFromWritableObject(*this);
241   bool is_internalized = this->IsInternalizedString();
242   bool has_pointers = StringShape(*this).IsIndirect();
243 
244   if (has_pointers) {
245     isolate->heap()->NotifyObjectLayoutChange(*this, no_allocation,
246                                               InvalidateRecordedSlots::kYes);
247   }
248 
249   // Disallow garbage collection to avoid possible GC vs string access deadlock.
250   DisallowGarbageCollection no_gc;
251   base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
252       isolate->string_access());
253   // Morph the string to an external string by replacing the map and
254   // reinitializing the fields.  This won't work if the space the existing
255   // string occupies is too small for a regular external string.  Instead, we
256   // resort to an uncached external string instead, omitting the field caching
257   // the address of the backing store.  When we encounter uncached external
258   // strings in generated code, we need to bailout to runtime.
259   Map new_map;
260   ReadOnlyRoots roots(isolate);
261   if (size < ExternalString::kSizeOfAllExternalStrings) {
262     new_map = is_internalized
263                   ? roots.uncached_external_one_byte_internalized_string_map()
264                   : roots.uncached_external_one_byte_string_map();
265   } else {
266     new_map = is_internalized
267                   ? roots.external_one_byte_internalized_string_map()
268                   : roots.external_one_byte_string_map();
269   }
270 
271   // Byte size of the external String object.
272   int new_size = this->SizeFromMap(new_map);
273   isolate->heap()->CreateFillerObjectAt(
274       this->address() + new_size, size - new_size,
275       has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
276 
277   // We are storing the new map using release store after creating a filler for
278   // the left-over space to avoid races with the sweeper thread.
279   this->synchronized_set_map(new_map);
280 
281   ExternalOneByteString self = ExternalOneByteString::cast(*this);
282   self.AllocateExternalPointerEntries(isolate);
283   self.SetResource(isolate, resource);
284   isolate->heap()->RegisterExternalString(*this);
285   if (is_internalized) self.Hash();  // Force regeneration of the hash value.
286   return true;
287 }
288 
SupportsExternalization()289 bool String::SupportsExternalization() {
290   if (this->IsThinString()) {
291     return i::ThinString::cast(*this).actual().SupportsExternalization();
292   }
293 
294   // RO_SPACE strings cannot be externalized.
295   if (IsReadOnlyHeapObject(*this)) {
296     return false;
297   }
298 
299   // Already an external string.
300   if (StringShape(*this).IsExternal()) {
301     return false;
302   }
303 
304 #ifdef V8_COMPRESS_POINTERS
305   // Small strings may not be in-place externalizable.
306   if (this->Size() < ExternalString::kUncachedSize) return false;
307 #else
308   DCHECK_LE(ExternalString::kUncachedSize, this->Size());
309 #endif
310 
311   Isolate* isolate = GetIsolateFromWritableObject(*this);
312   return !isolate->heap()->IsInGCPostProcessing();
313 }
314 
PrefixForDebugPrint() const315 const char* String::PrefixForDebugPrint() const {
316   StringShape shape(*this);
317   if (IsTwoByteRepresentation()) {
318     StringShape shape(*this);
319     if (shape.IsInternalized()) {
320       return "u#";
321     } else if (shape.IsCons()) {
322       return "uc\"";
323     } else if (shape.IsThin()) {
324       return "u>\"";
325     } else if (shape.IsExternal()) {
326       return "ue\"";
327     } else {
328       return "u\"";
329     }
330   } else {
331     StringShape shape(*this);
332     if (shape.IsInternalized()) {
333       return "#";
334     } else if (shape.IsCons()) {
335       return "c\"";
336     } else if (shape.IsThin()) {
337       return ">\"";
338     } else if (shape.IsExternal()) {
339       return "e\"";
340     } else {
341       return "\"";
342     }
343   }
344   UNREACHABLE();
345 }
346 
SuffixForDebugPrint() const347 const char* String::SuffixForDebugPrint() const {
348   StringShape shape(*this);
349   if (shape.IsInternalized()) return "";
350   return "\"";
351 }
352 
StringShortPrint(StringStream * accumulator)353 void String::StringShortPrint(StringStream* accumulator) {
354   if (!LooksValid()) {
355     accumulator->Add("<Invalid String>");
356     return;
357   }
358 
359   const int len = length();
360   accumulator->Add("<String[%u]: ", len);
361   accumulator->Add(PrefixForDebugPrint());
362 
363   if (len > kMaxShortPrintLength) {
364     accumulator->Add("...<truncated>>");
365     accumulator->Add(SuffixForDebugPrint());
366     accumulator->Put('>');
367     return;
368   }
369 
370   PrintUC16(accumulator, 0, len);
371   accumulator->Add(SuffixForDebugPrint());
372   accumulator->Put('>');
373 }
374 
PrintUC16(std::ostream & os,int start,int end)375 void String::PrintUC16(std::ostream& os, int start, int end) {  // NOLINT
376   if (end < 0) end = length();
377   StringCharacterStream stream(*this, start);
378   for (int i = start; i < end && stream.HasMore(); i++) {
379     os << AsUC16(stream.GetNext());
380   }
381 }
382 
PrintUC16(StringStream * accumulator,int start,int end)383 void String::PrintUC16(StringStream* accumulator, int start, int end) {
384   if (end < 0) end = length();
385   StringCharacterStream stream(*this, start);
386   for (int i = start; i < end && stream.HasMore(); i++) {
387     uint16_t c = stream.GetNext();
388     if (c == '\n') {
389       accumulator->Add("\\n");
390     } else if (c == '\r') {
391       accumulator->Add("\\r");
392     } else if (c == '\\') {
393       accumulator->Add("\\\\");
394     } else if (!std::isprint(c)) {
395       accumulator->Add("\\x%02x", c);
396     } else {
397       accumulator->Put(static_cast<char>(c));
398     }
399   }
400 }
401 
402 // static
Trim(Isolate * isolate,Handle<String> string,TrimMode mode)403 Handle<String> String::Trim(Isolate* isolate, Handle<String> string,
404                             TrimMode mode) {
405   string = String::Flatten(isolate, string);
406   int const length = string->length();
407 
408   // Perform left trimming if requested.
409   int left = 0;
410   if (mode == kTrim || mode == kTrimStart) {
411     while (left < length && IsWhiteSpaceOrLineTerminator(string->Get(left))) {
412       left++;
413     }
414   }
415 
416   // Perform right trimming if requested.
417   int right = length;
418   if (mode == kTrim || mode == kTrimEnd) {
419     while (right > left &&
420            IsWhiteSpaceOrLineTerminator(string->Get(right - 1))) {
421       right--;
422     }
423   }
424 
425   return isolate->factory()->NewSubString(string, left, right);
426 }
427 
ToArrayIndex(Address addr)428 int32_t String::ToArrayIndex(Address addr) {
429   DisallowHeapAllocation no_gc;
430   String key(addr);
431 
432   uint32_t index;
433   if (!key.AsArrayIndex(&index)) return -1;
434   if (index <= INT_MAX) return index;
435   return -1;
436 }
437 
LooksValid()438 bool String::LooksValid() {
439   // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
440   // basically the same logic as the way we access the heap in the first place.
441   // RO_SPACE objects should always be valid.
442   if (ReadOnlyHeap::Contains(*this)) return true;
443   BasicMemoryChunk* chunk = BasicMemoryChunk::FromHeapObject(*this);
444   if (chunk->heap() == nullptr) return false;
445   return chunk->heap()->Contains(*this);
446 }
447 
448 namespace {
449 
AreDigits(const uint8_t * s,int from,int to)450 bool AreDigits(const uint8_t* s, int from, int to) {
451   for (int i = from; i < to; i++) {
452     if (s[i] < '0' || s[i] > '9') return false;
453   }
454 
455   return true;
456 }
457 
ParseDecimalInteger(const uint8_t * s,int from,int to)458 int ParseDecimalInteger(const uint8_t* s, int from, int to) {
459   DCHECK_LT(to - from, 10);  // Overflow is not possible.
460   DCHECK(from < to);
461   int d = s[from] - '0';
462 
463   for (int i = from + 1; i < to; i++) {
464     d = 10 * d + (s[i] - '0');
465   }
466 
467   return d;
468 }
469 
470 }  // namespace
471 
472 // static
ToNumber(Isolate * isolate,Handle<String> subject)473 Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
474   // Flatten {subject} string first.
475   subject = String::Flatten(isolate, subject);
476 
477   // Fast array index case.
478   uint32_t index;
479   if (subject->AsArrayIndex(&index)) {
480     return isolate->factory()->NewNumberFromUint(index);
481   }
482 
483   // Fast case: short integer or some sorts of junk values.
484   if (subject->IsSeqOneByteString()) {
485     int len = subject->length();
486     if (len == 0) return handle(Smi::zero(), isolate);
487 
488     DisallowHeapAllocation no_gc;
489     uint8_t const* data =
490         Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
491     bool minus = (data[0] == '-');
492     int start_pos = (minus ? 1 : 0);
493 
494     if (start_pos == len) {
495       return isolate->factory()->nan_value();
496     } else if (data[start_pos] > '9') {
497       // Fast check for a junk value. A valid string may start from a
498       // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
499       // or the 'I' character ('Infinity'). All of that have codes not greater
500       // than '9' except 'I' and &nbsp;.
501       if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
502         return isolate->factory()->nan_value();
503       }
504     } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
505       // The maximal/minimal smi has 10 digits. If the string has less digits
506       // we know it will fit into the smi-data type.
507       int d = ParseDecimalInteger(data, start_pos, len);
508       if (minus) {
509         if (d == 0) return isolate->factory()->minus_zero_value();
510         d = -d;
511       } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
512                  (len == 1 || data[0] != '0')) {
513         // String hash is not calculated yet but all the data are present.
514         // Update the hash field to speed up sequential convertions.
515         uint32_t hash = StringHasher::MakeArrayIndexHash(d, len);
516 #ifdef DEBUG
517         subject->Hash();  // Force hash calculation.
518         DCHECK_EQ(static_cast<int>(subject->hash_field()),
519                   static_cast<int>(hash));
520 #endif
521         subject->set_hash_field(hash);
522       }
523       return handle(Smi::FromInt(d), isolate);
524     }
525   }
526 
527   // Slower case.
528   int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
529   return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
530 }
531 
GetFlatContent(const DisallowHeapAllocation & no_gc)532 String::FlatContent String::GetFlatContent(
533     const DisallowHeapAllocation& no_gc) {
534 #if DEBUG
535   // Check that this method is called only from the main thread.
536   {
537     Isolate* isolate;
538     // We don't have to check read only strings as those won't move.
539     DCHECK_IMPLIES(GetIsolateFromHeapObject(*this, &isolate),
540                    ThreadId::Current() == isolate->thread_id());
541   }
542 #endif
543   USE(no_gc);
544   int length = this->length();
545   StringShape shape(*this);
546   String string = *this;
547   int offset = 0;
548   if (shape.representation_tag() == kConsStringTag) {
549     ConsString cons = ConsString::cast(string);
550     if (cons.second().length() != 0) {
551       return FlatContent(no_gc);
552     }
553     string = cons.first();
554     shape = StringShape(string);
555   } else if (shape.representation_tag() == kSlicedStringTag) {
556     SlicedString slice = SlicedString::cast(string);
557     offset = slice.offset();
558     string = slice.parent();
559     shape = StringShape(string);
560     DCHECK(shape.representation_tag() != kConsStringTag &&
561            shape.representation_tag() != kSlicedStringTag);
562   }
563   if (shape.representation_tag() == kThinStringTag) {
564     ThinString thin = ThinString::cast(string);
565     string = thin.actual();
566     shape = StringShape(string);
567     DCHECK(!shape.IsCons());
568     DCHECK(!shape.IsSliced());
569   }
570   if (shape.encoding_tag() == kOneByteStringTag) {
571     const uint8_t* start;
572     if (shape.representation_tag() == kSeqStringTag) {
573       start = SeqOneByteString::cast(string).GetChars(no_gc);
574     } else {
575       start = ExternalOneByteString::cast(string).GetChars();
576     }
577     return FlatContent(start + offset, length, no_gc);
578   } else {
579     DCHECK_EQ(shape.encoding_tag(), kTwoByteStringTag);
580     const uc16* start;
581     if (shape.representation_tag() == kSeqStringTag) {
582       start = SeqTwoByteString::cast(string).GetChars(no_gc);
583     } else {
584       start = ExternalTwoByteString::cast(string).GetChars();
585     }
586     return FlatContent(start + offset, length, no_gc);
587   }
588 }
589 
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int offset,int length,int * length_return)590 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
591                                           RobustnessFlag robust_flag,
592                                           int offset, int length,
593                                           int* length_return) {
594   if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
595     return std::unique_ptr<char[]>();
596   }
597   // Negative length means the to the end of the string.
598   if (length < 0) length = kMaxInt - offset;
599 
600   // Compute the size of the UTF-8 string. Start at the specified offset.
601   StringCharacterStream stream(*this, offset);
602   int character_position = offset;
603   int utf8_bytes = 0;
604   int last = unibrow::Utf16::kNoPreviousCharacter;
605   while (stream.HasMore() && character_position++ < offset + length) {
606     uint16_t character = stream.GetNext();
607     utf8_bytes += unibrow::Utf8::Length(character, last);
608     last = character;
609   }
610 
611   if (length_return) {
612     *length_return = utf8_bytes;
613   }
614 
615   char* result = NewArray<char>(utf8_bytes + 1);
616 
617   // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
618   stream.Reset(*this, offset);
619   character_position = offset;
620   int utf8_byte_position = 0;
621   last = unibrow::Utf16::kNoPreviousCharacter;
622   while (stream.HasMore() && character_position++ < offset + length) {
623     uint16_t character = stream.GetNext();
624     if (allow_nulls == DISALLOW_NULLS && character == 0) {
625       character = ' ';
626     }
627     utf8_byte_position +=
628         unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
629     last = character;
630   }
631   result[utf8_byte_position] = 0;
632   return std::unique_ptr<char[]>(result);
633 }
634 
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int * length_return)635 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
636                                           RobustnessFlag robust_flag,
637                                           int* length_return) {
638   return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
639 }
640 
641 template <typename sinkchar>
WriteToFlat(String source,sinkchar * sink,int from,int to)642 void String::WriteToFlat(String source, sinkchar* sink, int from, int to) {
643   DisallowHeapAllocation no_gc;
644   SharedStringAccessGuardIfNeeded access_guard(source);
645   while (from < to) {
646     DCHECK_LE(0, from);
647     DCHECK_LE(to, source.length());
648     switch (StringShape(source).full_representation_tag()) {
649       case kOneByteStringTag | kExternalStringTag: {
650         CopyChars(sink, ExternalOneByteString::cast(source).GetChars() + from,
651                   to - from);
652         return;
653       }
654       case kTwoByteStringTag | kExternalStringTag: {
655         const uc16* data = ExternalTwoByteString::cast(source).GetChars();
656         CopyChars(sink, data + from, to - from);
657         return;
658       }
659       case kOneByteStringTag | kSeqStringTag: {
660         CopyChars(
661             sink,
662             SeqOneByteString::cast(source).GetChars(no_gc, access_guard) + from,
663             to - from);
664         return;
665       }
666       case kTwoByteStringTag | kSeqStringTag: {
667         CopyChars(
668             sink,
669             SeqTwoByteString::cast(source).GetChars(no_gc, access_guard) + from,
670             to - from);
671         return;
672       }
673       case kOneByteStringTag | kConsStringTag:
674       case kTwoByteStringTag | kConsStringTag: {
675         ConsString cons_string = ConsString::cast(source);
676         String first = cons_string.first();
677         int boundary = first.length();
678         if (to - boundary >= boundary - from) {
679           // Right hand side is longer.  Recurse over left.
680           if (from < boundary) {
681             WriteToFlat(first, sink, from, boundary);
682             if (from == 0 && cons_string.second() == first) {
683               CopyChars(sink + boundary, sink, boundary);
684               return;
685             }
686             sink += boundary - from;
687             from = 0;
688           } else {
689             from -= boundary;
690           }
691           to -= boundary;
692           source = cons_string.second();
693         } else {
694           // Left hand side is longer.  Recurse over right.
695           if (to > boundary) {
696             String second = cons_string.second();
697             // When repeatedly appending to a string, we get a cons string that
698             // is unbalanced to the left, a list, essentially.  We inline the
699             // common case of sequential one-byte right child.
700             if (to - boundary == 1) {
701               sink[boundary - from] = static_cast<sinkchar>(second.Get(0));
702             } else if (second.IsSeqOneByteString()) {
703               CopyChars(
704                   sink + boundary - from,
705                   SeqOneByteString::cast(second).GetChars(no_gc, access_guard),
706                   to - boundary);
707             } else {
708               WriteToFlat(second, sink + boundary - from, 0, to - boundary);
709             }
710             to = boundary;
711           }
712           source = first;
713         }
714         break;
715       }
716       case kOneByteStringTag | kSlicedStringTag:
717       case kTwoByteStringTag | kSlicedStringTag: {
718         SlicedString slice = SlicedString::cast(source);
719         unsigned offset = slice.offset();
720         WriteToFlat(slice.parent(), sink, from + offset, to + offset);
721         return;
722       }
723       case kOneByteStringTag | kThinStringTag:
724       case kTwoByteStringTag | kThinStringTag:
725         source = ThinString::cast(source).actual();
726         break;
727     }
728   }
729   DCHECK_EQ(from, to);
730 }
731 
732 template <typename SourceChar>
CalculateLineEndsImpl(std::vector<int> * line_ends,Vector<const SourceChar> src,bool include_ending_line)733 static void CalculateLineEndsImpl(std::vector<int>* line_ends,
734                                   Vector<const SourceChar> src,
735                                   bool include_ending_line) {
736   const int src_len = src.length();
737   for (int i = 0; i < src_len - 1; i++) {
738     SourceChar current = src[i];
739     SourceChar next = src[i + 1];
740     if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
741   }
742 
743   if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
744     line_ends->push_back(src_len - 1);
745   }
746   if (include_ending_line) {
747     // Include one character beyond the end of script. The rewriter uses that
748     // position for the implicit return statement.
749     line_ends->push_back(src_len);
750   }
751 }
752 
753 template <typename LocalIsolate>
CalculateLineEnds(LocalIsolate * isolate,Handle<String> src,bool include_ending_line)754 Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
755                                              Handle<String> src,
756                                              bool include_ending_line) {
757   src = Flatten(isolate, src);
758   // Rough estimate of line count based on a roughly estimated average
759   // length of (unpacked) code.
760   int line_count_estimate = src->length() >> 4;
761   std::vector<int> line_ends;
762   line_ends.reserve(line_count_estimate);
763   {
764     DisallowHeapAllocation no_allocation;  // ensure vectors stay valid.
765     // Dispatch on type of strings.
766     String::FlatContent content = src->GetFlatContent(no_allocation);
767     DCHECK(content.IsFlat());
768     if (content.IsOneByte()) {
769       CalculateLineEndsImpl(&line_ends, content.ToOneByteVector(),
770                             include_ending_line);
771     } else {
772       CalculateLineEndsImpl(&line_ends, content.ToUC16Vector(),
773                             include_ending_line);
774     }
775   }
776   int line_count = static_cast<int>(line_ends.size());
777   Handle<FixedArray> array =
778       isolate->factory()->NewFixedArray(line_count, AllocationType::kOld);
779   for (int i = 0; i < line_count; i++) {
780     array->set(i, Smi::FromInt(line_ends[i]));
781   }
782   return array;
783 }
784 
785 template Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
786                                                       Handle<String> src,
787                                                       bool include_ending_line);
788 template Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
789                                                       Handle<String> src,
790                                                       bool include_ending_line);
791 
SlowEquals(String other)792 bool String::SlowEquals(String other) {
793   DisallowHeapAllocation no_gc;
794   // Fast check: negative check with lengths.
795   int len = length();
796   if (len != other.length()) return false;
797   if (len == 0) return true;
798 
799   // Fast check: if at least one ThinString is involved, dereference it/them
800   // and restart.
801   if (this->IsThinString() || other.IsThinString()) {
802     if (other.IsThinString()) other = ThinString::cast(other).actual();
803     if (this->IsThinString()) {
804       return ThinString::cast(*this).actual().Equals(other);
805     } else {
806       return this->Equals(other);
807     }
808   }
809 
810   // Fast check: if hash code is computed for both strings
811   // a fast negative check can be performed.
812   if (HasHashCode() && other.HasHashCode()) {
813 #ifdef ENABLE_SLOW_DCHECKS
814     if (FLAG_enable_slow_asserts) {
815       if (Hash() != other.Hash()) {
816         bool found_difference = false;
817         for (int i = 0; i < len; i++) {
818           if (Get(i) != other.Get(i)) {
819             found_difference = true;
820             break;
821           }
822         }
823         DCHECK(found_difference);
824       }
825     }
826 #endif
827     if (Hash() != other.Hash()) return false;
828   }
829 
830   // We know the strings are both non-empty. Compare the first chars
831   // before we try to flatten the strings.
832   if (this->Get(0) != other.Get(0)) return false;
833 
834   if (IsSeqOneByteString() && other.IsSeqOneByteString()) {
835     const uint8_t* str1 = SeqOneByteString::cast(*this).GetChars(no_gc);
836     const uint8_t* str2 = SeqOneByteString::cast(other).GetChars(no_gc);
837     return CompareRawStringContents(str1, str2, len);
838   }
839 
840   StringComparator comparator;
841   return comparator.Equals(*this, other);
842 }
843 
SlowEquals(Isolate * isolate,Handle<String> one,Handle<String> two)844 bool String::SlowEquals(Isolate* isolate, Handle<String> one,
845                         Handle<String> two) {
846   // Fast check: negative check with lengths.
847   int one_length = one->length();
848   if (one_length != two->length()) return false;
849   if (one_length == 0) return true;
850 
851   // Fast check: if at least one ThinString is involved, dereference it/them
852   // and restart.
853   if (one->IsThinString() || two->IsThinString()) {
854     if (one->IsThinString())
855       one = handle(ThinString::cast(*one).actual(), isolate);
856     if (two->IsThinString())
857       two = handle(ThinString::cast(*two).actual(), isolate);
858     return String::Equals(isolate, one, two);
859   }
860 
861   // Fast check: if hash code is computed for both strings
862   // a fast negative check can be performed.
863   if (one->HasHashCode() && two->HasHashCode()) {
864 #ifdef ENABLE_SLOW_DCHECKS
865     if (FLAG_enable_slow_asserts) {
866       if (one->Hash() != two->Hash()) {
867         bool found_difference = false;
868         for (int i = 0; i < one_length; i++) {
869           if (one->Get(i) != two->Get(i)) {
870             found_difference = true;
871             break;
872           }
873         }
874         DCHECK(found_difference);
875       }
876     }
877 #endif
878     if (one->Hash() != two->Hash()) return false;
879   }
880 
881   // We know the strings are both non-empty. Compare the first chars
882   // before we try to flatten the strings.
883   if (one->Get(0) != two->Get(0)) return false;
884 
885   one = String::Flatten(isolate, one);
886   two = String::Flatten(isolate, two);
887 
888   DisallowHeapAllocation no_gc;
889   String::FlatContent flat1 = one->GetFlatContent(no_gc);
890   String::FlatContent flat2 = two->GetFlatContent(no_gc);
891 
892   if (flat1.IsOneByte() && flat2.IsOneByte()) {
893     return CompareRawStringContents(flat1.ToOneByteVector().begin(),
894                                     flat2.ToOneByteVector().begin(),
895                                     one_length);
896   } else {
897     for (int i = 0; i < one_length; i++) {
898       if (flat1.Get(i) != flat2.Get(i)) return false;
899     }
900     return true;
901   }
902 }
903 
904 // static
Compare(Isolate * isolate,Handle<String> x,Handle<String> y)905 ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
906                                  Handle<String> y) {
907   // A few fast case tests before we flatten.
908   if (x.is_identical_to(y)) {
909     return ComparisonResult::kEqual;
910   } else if (y->length() == 0) {
911     return x->length() == 0 ? ComparisonResult::kEqual
912                             : ComparisonResult::kGreaterThan;
913   } else if (x->length() == 0) {
914     return ComparisonResult::kLessThan;
915   }
916 
917   int const d = x->Get(0) - y->Get(0);
918   if (d < 0) {
919     return ComparisonResult::kLessThan;
920   } else if (d > 0) {
921     return ComparisonResult::kGreaterThan;
922   }
923 
924   // Slow case.
925   x = String::Flatten(isolate, x);
926   y = String::Flatten(isolate, y);
927 
928   DisallowHeapAllocation no_gc;
929   ComparisonResult result = ComparisonResult::kEqual;
930   int prefix_length = x->length();
931   if (y->length() < prefix_length) {
932     prefix_length = y->length();
933     result = ComparisonResult::kGreaterThan;
934   } else if (y->length() > prefix_length) {
935     result = ComparisonResult::kLessThan;
936   }
937   int r;
938   String::FlatContent x_content = x->GetFlatContent(no_gc);
939   String::FlatContent y_content = y->GetFlatContent(no_gc);
940   if (x_content.IsOneByte()) {
941     Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
942     if (y_content.IsOneByte()) {
943       Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
944       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
945     } else {
946       Vector<const uc16> y_chars = y_content.ToUC16Vector();
947       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
948     }
949   } else {
950     Vector<const uc16> x_chars = x_content.ToUC16Vector();
951     if (y_content.IsOneByte()) {
952       Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
953       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
954     } else {
955       Vector<const uc16> y_chars = y_content.ToUC16Vector();
956       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
957     }
958   }
959   if (r < 0) {
960     result = ComparisonResult::kLessThan;
961   } else if (r > 0) {
962     result = ComparisonResult::kGreaterThan;
963   }
964   return result;
965 }
966 
IndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)967 Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
968                        Handle<Object> search, Handle<Object> position) {
969   if (receiver->IsNullOrUndefined(isolate)) {
970     THROW_NEW_ERROR_RETURN_FAILURE(
971         isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
972                               isolate->factory()->NewStringFromAsciiChecked(
973                                   "String.prototype.indexOf")));
974   }
975   Handle<String> receiver_string;
976   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
977                                      Object::ToString(isolate, receiver));
978 
979   Handle<String> search_string;
980   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
981                                      Object::ToString(isolate, search));
982 
983   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
984                                      Object::ToInteger(isolate, position));
985 
986   uint32_t index = receiver_string->ToValidIndex(*position);
987   return Smi::FromInt(
988       String::IndexOf(isolate, receiver_string, search_string, index));
989 }
990 
991 namespace {
992 
993 template <typename T>
SearchString(Isolate * isolate,String::FlatContent receiver_content,Vector<T> pat_vector,int start_index)994 int SearchString(Isolate* isolate, String::FlatContent receiver_content,
995                  Vector<T> pat_vector, int start_index) {
996   if (receiver_content.IsOneByte()) {
997     return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
998                         start_index);
999   }
1000   return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
1001                       start_index);
1002 }
1003 
1004 }  // namespace
1005 
IndexOf(Isolate * isolate,Handle<String> receiver,Handle<String> search,int start_index)1006 int String::IndexOf(Isolate* isolate, Handle<String> receiver,
1007                     Handle<String> search, int start_index) {
1008   DCHECK_LE(0, start_index);
1009   DCHECK(start_index <= receiver->length());
1010 
1011   uint32_t search_length = search->length();
1012   if (search_length == 0) return start_index;
1013 
1014   uint32_t receiver_length = receiver->length();
1015   if (start_index + search_length > receiver_length) return -1;
1016 
1017   receiver = String::Flatten(isolate, receiver);
1018   search = String::Flatten(isolate, search);
1019 
1020   DisallowHeapAllocation no_gc;  // ensure vectors stay valid
1021   // Extract flattened substrings of cons strings before getting encoding.
1022   String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
1023   String::FlatContent search_content = search->GetFlatContent(no_gc);
1024 
1025   // dispatch on type of strings
1026   if (search_content.IsOneByte()) {
1027     Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1028     return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
1029                                        start_index);
1030   }
1031   Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1032   return SearchString<const uc16>(isolate, receiver_content, pat_vector,
1033                                   start_index);
1034 }
1035 
GetSubstitution(Isolate * isolate,Match * match,Handle<String> replacement,int start_index)1036 MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
1037                                             Handle<String> replacement,
1038                                             int start_index) {
1039   DCHECK_GE(start_index, 0);
1040 
1041   Factory* factory = isolate->factory();
1042 
1043   const int replacement_length = replacement->length();
1044   const int captures_length = match->CaptureCount();
1045 
1046   replacement = String::Flatten(isolate, replacement);
1047 
1048   Handle<String> dollar_string =
1049       factory->LookupSingleCharacterStringFromCode('$');
1050   int next_dollar_ix =
1051       String::IndexOf(isolate, replacement, dollar_string, start_index);
1052   if (next_dollar_ix < 0) {
1053     return replacement;
1054   }
1055 
1056   IncrementalStringBuilder builder(isolate);
1057 
1058   if (next_dollar_ix > 0) {
1059     builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
1060   }
1061 
1062   while (true) {
1063     const int peek_ix = next_dollar_ix + 1;
1064     if (peek_ix >= replacement_length) {
1065       builder.AppendCharacter('$');
1066       return builder.Finish();
1067     }
1068 
1069     int continue_from_ix = -1;
1070     const uint16_t peek = replacement->Get(peek_ix);
1071     switch (peek) {
1072       case '$':  // $$
1073         builder.AppendCharacter('$');
1074         continue_from_ix = peek_ix + 1;
1075         break;
1076       case '&':  // $& - match
1077         builder.AppendString(match->GetMatch());
1078         continue_from_ix = peek_ix + 1;
1079         break;
1080       case '`':  // $` - prefix
1081         builder.AppendString(match->GetPrefix());
1082         continue_from_ix = peek_ix + 1;
1083         break;
1084       case '\'':  // $' - suffix
1085         builder.AppendString(match->GetSuffix());
1086         continue_from_ix = peek_ix + 1;
1087         break;
1088       case '0':
1089       case '1':
1090       case '2':
1091       case '3':
1092       case '4':
1093       case '5':
1094       case '6':
1095       case '7':
1096       case '8':
1097       case '9': {
1098         // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
1099         int scaled_index = (peek - '0');
1100         int advance = 1;
1101 
1102         if (peek_ix + 1 < replacement_length) {
1103           const uint16_t next_peek = replacement->Get(peek_ix + 1);
1104           if (next_peek >= '0' && next_peek <= '9') {
1105             const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
1106             if (new_scaled_index < captures_length) {
1107               scaled_index = new_scaled_index;
1108               advance = 2;
1109             }
1110           }
1111         }
1112 
1113         if (scaled_index == 0 || scaled_index >= captures_length) {
1114           builder.AppendCharacter('$');
1115           continue_from_ix = peek_ix;
1116           break;
1117         }
1118 
1119         bool capture_exists;
1120         Handle<String> capture;
1121         ASSIGN_RETURN_ON_EXCEPTION(
1122             isolate, capture, match->GetCapture(scaled_index, &capture_exists),
1123             String);
1124         if (capture_exists) builder.AppendString(capture);
1125         continue_from_ix = peek_ix + advance;
1126         break;
1127       }
1128       case '<': {  // $<name> - named capture
1129         using CaptureState = String::Match::CaptureState;
1130 
1131         if (!match->HasNamedCaptures()) {
1132           builder.AppendCharacter('$');
1133           continue_from_ix = peek_ix;
1134           break;
1135         }
1136 
1137         Handle<String> bracket_string =
1138             factory->LookupSingleCharacterStringFromCode('>');
1139         const int closing_bracket_ix =
1140             String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1141 
1142         if (closing_bracket_ix == -1) {
1143           // No closing bracket was found, treat '$<' as a string literal.
1144           builder.AppendCharacter('$');
1145           continue_from_ix = peek_ix;
1146           break;
1147         }
1148 
1149         Handle<String> capture_name =
1150             factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1151         Handle<String> capture;
1152         CaptureState capture_state;
1153         ASSIGN_RETURN_ON_EXCEPTION(
1154             isolate, capture,
1155             match->GetNamedCapture(capture_name, &capture_state), String);
1156 
1157         if (capture_state == CaptureState::MATCHED) {
1158           builder.AppendString(capture);
1159         }
1160 
1161         continue_from_ix = closing_bracket_ix + 1;
1162         break;
1163       }
1164       default:
1165         builder.AppendCharacter('$');
1166         continue_from_ix = peek_ix;
1167         break;
1168     }
1169 
1170     // Go the the next $ in the replacement.
1171     // TODO(jgruber): Single-char lookups could be much more efficient.
1172     DCHECK_NE(continue_from_ix, -1);
1173     next_dollar_ix =
1174         String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1175 
1176     // Return if there are no more $ characters in the replacement. If we
1177     // haven't reached the end, we need to append the suffix.
1178     if (next_dollar_ix < 0) {
1179       if (continue_from_ix < replacement_length) {
1180         builder.AppendString(factory->NewSubString(
1181             replacement, continue_from_ix, replacement_length));
1182       }
1183       return builder.Finish();
1184     }
1185 
1186     // Append substring between the previous and the next $ character.
1187     if (next_dollar_ix > continue_from_ix) {
1188       builder.AppendString(
1189           factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1190     }
1191   }
1192 
1193   UNREACHABLE();
1194 }
1195 
1196 namespace {  // for String.Prototype.lastIndexOf
1197 
1198 template <typename schar, typename pchar>
StringMatchBackwards(Vector<const schar> subject,Vector<const pchar> pattern,int idx)1199 int StringMatchBackwards(Vector<const schar> subject,
1200                          Vector<const pchar> pattern, int idx) {
1201   int pattern_length = pattern.length();
1202   DCHECK_GE(pattern_length, 1);
1203   DCHECK(idx + pattern_length <= subject.length());
1204 
1205   if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1206     for (int i = 0; i < pattern_length; i++) {
1207       uc16 c = pattern[i];
1208       if (c > String::kMaxOneByteCharCode) {
1209         return -1;
1210       }
1211     }
1212   }
1213 
1214   pchar pattern_first_char = pattern[0];
1215   for (int i = idx; i >= 0; i--) {
1216     if (subject[i] != pattern_first_char) continue;
1217     int j = 1;
1218     while (j < pattern_length) {
1219       if (pattern[j] != subject[i + j]) {
1220         break;
1221       }
1222       j++;
1223     }
1224     if (j == pattern_length) {
1225       return i;
1226     }
1227   }
1228   return -1;
1229 }
1230 
1231 }  // namespace
1232 
LastIndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)1233 Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1234                            Handle<Object> search, Handle<Object> position) {
1235   if (receiver->IsNullOrUndefined(isolate)) {
1236     THROW_NEW_ERROR_RETURN_FAILURE(
1237         isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1238                               isolate->factory()->NewStringFromAsciiChecked(
1239                                   "String.prototype.lastIndexOf")));
1240   }
1241   Handle<String> receiver_string;
1242   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1243                                      Object::ToString(isolate, receiver));
1244 
1245   Handle<String> search_string;
1246   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1247                                      Object::ToString(isolate, search));
1248 
1249   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1250                                      Object::ToNumber(isolate, position));
1251 
1252   uint32_t start_index;
1253 
1254   if (position->IsNaN()) {
1255     start_index = receiver_string->length();
1256   } else {
1257     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1258                                        Object::ToInteger(isolate, position));
1259     start_index = receiver_string->ToValidIndex(*position);
1260   }
1261 
1262   uint32_t pattern_length = search_string->length();
1263   uint32_t receiver_length = receiver_string->length();
1264 
1265   if (start_index + pattern_length > receiver_length) {
1266     start_index = receiver_length - pattern_length;
1267   }
1268 
1269   if (pattern_length == 0) {
1270     return Smi::FromInt(start_index);
1271   }
1272 
1273   receiver_string = String::Flatten(isolate, receiver_string);
1274   search_string = String::Flatten(isolate, search_string);
1275 
1276   int last_index = -1;
1277   DisallowHeapAllocation no_gc;  // ensure vectors stay valid
1278 
1279   String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1280   String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1281 
1282   if (search_content.IsOneByte()) {
1283     Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1284     if (receiver_content.IsOneByte()) {
1285       last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1286                                         pat_vector, start_index);
1287     } else {
1288       last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1289                                         pat_vector, start_index);
1290     }
1291   } else {
1292     Vector<const uc16> pat_vector = search_content.ToUC16Vector();
1293     if (receiver_content.IsOneByte()) {
1294       last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1295                                         pat_vector, start_index);
1296     } else {
1297       last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1298                                         pat_vector, start_index);
1299     }
1300   }
1301   return Smi::FromInt(last_index);
1302 }
1303 
1304 template <>
IsEqualTo(Vector<const uint8_t> str)1305 bool String::IsEqualTo(Vector<const uint8_t> str) {
1306   return IsOneByteEqualTo(str);
1307 }
1308 
1309 template <>
IsEqualTo(Vector<const uc16> str)1310 bool String::IsEqualTo(Vector<const uc16> str) {
1311   return IsTwoByteEqualTo(str);
1312 }
1313 
HasOneBytePrefix(Vector<const char> str)1314 bool String::HasOneBytePrefix(Vector<const char> str) {
1315   int slen = str.length();
1316   if (slen > length()) return false;
1317   DisallowHeapAllocation no_gc;
1318   FlatContent content = GetFlatContent(no_gc);
1319   if (content.IsOneByte()) {
1320     return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1321            0;
1322   }
1323   return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1324 }
1325 
IsOneByteEqualTo(Vector<const uint8_t> str)1326 bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
1327   int slen = length();
1328   if (str.length() != slen) return false;
1329   DisallowHeapAllocation no_gc;
1330   FlatContent content = GetFlatContent(no_gc);
1331   if (content.IsOneByte()) {
1332     return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1333            0;
1334   }
1335   return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1336 }
1337 
IsTwoByteEqualTo(Vector<const uc16> str)1338 bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
1339   int slen = length();
1340   if (str.length() != slen) return false;
1341   DisallowHeapAllocation no_gc;
1342   FlatContent content = GetFlatContent(no_gc);
1343   if (content.IsOneByte()) {
1344     return CompareChars(content.ToOneByteVector().begin(), str.begin(), slen) ==
1345            0;
1346   }
1347   return CompareChars(content.ToUC16Vector().begin(), str.begin(), slen) == 0;
1348 }
1349 
1350 namespace {
1351 
1352 template <typename Char>
HashString(String string,size_t start,int length,uint64_t seed)1353 uint32_t HashString(String string, size_t start, int length, uint64_t seed) {
1354   DisallowHeapAllocation no_gc;
1355 
1356   if (length > String::kMaxHashCalcLength) {
1357     return StringHasher::GetTrivialHash(length);
1358   }
1359 
1360   std::unique_ptr<Char[]> buffer;
1361   const Char* chars;
1362 
1363   if (string.IsConsString()) {
1364     DCHECK_EQ(0, start);
1365     DCHECK(!string.IsFlat());
1366     buffer.reset(new Char[length]);
1367     String::WriteToFlat(string, buffer.get(), 0, length);
1368     chars = buffer.get();
1369   } else {
1370     chars = string.GetChars<Char>(no_gc) + start;
1371   }
1372 
1373   return StringHasher::HashSequentialString<Char>(chars, length, seed);
1374 }
1375 
1376 }  // namespace
1377 
ComputeAndSetHash()1378 uint32_t String::ComputeAndSetHash() {
1379   DisallowHeapAllocation no_gc;
1380   // Should only be called if hash code has not yet been computed.
1381   DCHECK(!HasHashCode());
1382 
1383   // Store the hash code in the object.
1384   uint64_t seed = HashSeed(GetReadOnlyRoots());
1385   size_t start = 0;
1386   String string = *this;
1387   if (string.IsSlicedString()) {
1388     SlicedString sliced = SlicedString::cast(string);
1389     start = sliced.offset();
1390     string = sliced.parent();
1391   }
1392   if (string.IsConsString() && string.IsFlat()) {
1393     string = ConsString::cast(string).first();
1394   }
1395   if (string.IsThinString()) {
1396     string = ThinString::cast(string).actual();
1397     if (length() == string.length()) {
1398       set_hash_field(string.hash_field());
1399       return hash_field() >> kHashShift;
1400     }
1401   }
1402   uint32_t field = string.IsOneByteRepresentation()
1403                        ? HashString<uint8_t>(string, start, length(), seed)
1404                        : HashString<uint16_t>(string, start, length(), seed);
1405   set_hash_field(field);
1406 
1407   // Check the hash code is there.
1408   DCHECK(HasHashCode());
1409   uint32_t result = field >> kHashShift;
1410   DCHECK_NE(result, 0);  // Ensure that the hash value of 0 is never computed.
1411   return result;
1412 }
1413 
SlowAsArrayIndex(uint32_t * index)1414 bool String::SlowAsArrayIndex(uint32_t* index) {
1415   DisallowHeapAllocation no_gc;
1416   int length = this->length();
1417   if (length <= kMaxCachedArrayIndexLength) {
1418     Hash();  // Force computation of hash code.
1419     uint32_t field = hash_field();
1420     if ((field & kIsNotIntegerIndexMask) != 0) return false;
1421     *index = ArrayIndexValueBits::decode(field);
1422     return true;
1423   }
1424   if (length == 0 || length > kMaxArrayIndexSize) return false;
1425   StringCharacterStream stream(*this);
1426   return StringToIndex(&stream, index);
1427 }
1428 
SlowAsIntegerIndex(size_t * index)1429 bool String::SlowAsIntegerIndex(size_t* index) {
1430   DisallowHeapAllocation no_gc;
1431   int length = this->length();
1432   if (length <= kMaxCachedArrayIndexLength) {
1433     Hash();  // Force computation of hash code.
1434     uint32_t field = hash_field();
1435     if ((field & kIsNotIntegerIndexMask) != 0) return false;
1436     *index = ArrayIndexValueBits::decode(field);
1437     return true;
1438   }
1439   if (length == 0 || length > kMaxIntegerIndexSize) return false;
1440   StringCharacterStream stream(*this);
1441   return StringToIndex<StringCharacterStream, size_t, kToIntegerIndex>(&stream,
1442                                                                        index);
1443 }
1444 
PrintOn(FILE * file)1445 void String::PrintOn(FILE* file) {
1446   int length = this->length();
1447   for (int i = 0; i < length; i++) {
1448     PrintF(file, "%c", Get(i));
1449   }
1450 }
1451 
Truncate(Handle<SeqString> string,int new_length)1452 Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1453   if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1454 
1455   int new_size, old_size;
1456   int old_length = string->length();
1457   if (old_length <= new_length) return string;
1458 
1459   if (string->IsSeqOneByteString()) {
1460     old_size = SeqOneByteString::SizeFor(old_length);
1461     new_size = SeqOneByteString::SizeFor(new_length);
1462   } else {
1463     DCHECK(string->IsSeqTwoByteString());
1464     old_size = SeqTwoByteString::SizeFor(old_length);
1465     new_size = SeqTwoByteString::SizeFor(new_length);
1466   }
1467 
1468   int delta = old_size - new_size;
1469 
1470   Address start_of_string = string->address();
1471   DCHECK(IsAligned(start_of_string, kObjectAlignment));
1472   DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1473 
1474   Heap* heap = Heap::FromWritableHeapObject(*string);
1475   // Sizes are pointer size aligned, so that we can use filler objects
1476   // that are a multiple of pointer size.
1477   heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1478                              ClearRecordedSlots::kNo);
1479   // We are storing the new length using release store after creating a filler
1480   // for the left-over space to avoid races with the sweeper thread.
1481   string->synchronized_set_length(new_length);
1482 
1483   return string;
1484 }
1485 
clear_padding()1486 void SeqOneByteString::clear_padding() {
1487   int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1488   memset(reinterpret_cast<void*>(address() + data_size), 0,
1489          SizeFor(length()) - data_size);
1490 }
1491 
clear_padding()1492 void SeqTwoByteString::clear_padding() {
1493   int data_size = SeqString::kHeaderSize + length() * kUC16Size;
1494   memset(reinterpret_cast<void*>(address() + data_size), 0,
1495          SizeFor(length()) - data_size);
1496 }
1497 
Get(int index)1498 uint16_t ConsString::Get(int index) {
1499   DCHECK(index >= 0 && index < this->length());
1500 
1501   // Check for a flattened cons string
1502   if (second().length() == 0) {
1503     String left = first();
1504     return left.Get(index);
1505   }
1506 
1507   String string = String::cast(*this);
1508 
1509   while (true) {
1510     if (StringShape(string).IsCons()) {
1511       ConsString cons_string = ConsString::cast(string);
1512       String left = cons_string.first();
1513       if (left.length() > index) {
1514         string = left;
1515       } else {
1516         index -= left.length();
1517         string = cons_string.second();
1518       }
1519     } else {
1520       return string.Get(index);
1521     }
1522   }
1523 
1524   UNREACHABLE();
1525 }
1526 
Get(int index)1527 uint16_t ThinString::Get(int index) { return actual().Get(index); }
1528 
Get(int index)1529 uint16_t SlicedString::Get(int index) { return parent().Get(offset() + index); }
1530 
ExternalPayloadSize() const1531 int ExternalString::ExternalPayloadSize() const {
1532   int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1533   return length() * length_multiplier;
1534 }
1535 
FlatStringReader(Isolate * isolate,Handle<String> str)1536 FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1537     : Relocatable(isolate), str_(str), length_(str->length()) {
1538 #if DEBUG
1539   // Check that this constructor is called only from the main thread.
1540   DCHECK_EQ(ThreadId::Current(), isolate->thread_id());
1541 #endif
1542   PostGarbageCollection();
1543 }
1544 
PostGarbageCollection()1545 void FlatStringReader::PostGarbageCollection() {
1546   DCHECK(str_->IsFlat());
1547   DisallowHeapAllocation no_gc;
1548   // This does not actually prevent the vector from being relocated later.
1549   String::FlatContent content = str_->GetFlatContent(no_gc);
1550   DCHECK(content.IsFlat());
1551   is_one_byte_ = content.IsOneByte();
1552   if (is_one_byte_) {
1553     start_ = content.ToOneByteVector().begin();
1554   } else {
1555     start_ = content.ToUC16Vector().begin();
1556   }
1557 }
1558 
Initialize(ConsString cons_string,int offset)1559 void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1560   DCHECK(!cons_string.is_null());
1561   root_ = cons_string;
1562   consumed_ = offset;
1563   // Force stack blown condition to trigger restart.
1564   depth_ = 1;
1565   maximum_depth_ = kStackSize + depth_;
1566   DCHECK(StackBlown());
1567 }
1568 
Continue(int * offset_out)1569 String ConsStringIterator::Continue(int* offset_out) {
1570   DCHECK_NE(depth_, 0);
1571   DCHECK_EQ(0, *offset_out);
1572   bool blew_stack = StackBlown();
1573   String string;
1574   // Get the next leaf if there is one.
1575   if (!blew_stack) string = NextLeaf(&blew_stack);
1576   // Restart search from root.
1577   if (blew_stack) {
1578     DCHECK(string.is_null());
1579     string = Search(offset_out);
1580   }
1581   // Ensure future calls return null immediately.
1582   if (string.is_null()) Reset(ConsString());
1583   return string;
1584 }
1585 
Search(int * offset_out)1586 String ConsStringIterator::Search(int* offset_out) {
1587   ConsString cons_string = root_;
1588   // Reset the stack, pushing the root string.
1589   depth_ = 1;
1590   maximum_depth_ = 1;
1591   frames_[0] = cons_string;
1592   const int consumed = consumed_;
1593   int offset = 0;
1594   while (true) {
1595     // Loop until the string is found which contains the target offset.
1596     String string = cons_string.first();
1597     int length = string.length();
1598     int32_t type;
1599     if (consumed < offset + length) {
1600       // Target offset is in the left branch.
1601       // Keep going if we're still in a ConString.
1602       type = string.map().instance_type();
1603       if ((type & kStringRepresentationMask) == kConsStringTag) {
1604         cons_string = ConsString::cast(string);
1605         PushLeft(cons_string);
1606         continue;
1607       }
1608       // Tell the stack we're done descending.
1609       AdjustMaximumDepth();
1610     } else {
1611       // Descend right.
1612       // Update progress through the string.
1613       offset += length;
1614       // Keep going if we're still in a ConString.
1615       string = cons_string.second();
1616       type = string.map().instance_type();
1617       if ((type & kStringRepresentationMask) == kConsStringTag) {
1618         cons_string = ConsString::cast(string);
1619         PushRight(cons_string);
1620         continue;
1621       }
1622       // Need this to be updated for the current string.
1623       length = string.length();
1624       // Account for the possibility of an empty right leaf.
1625       // This happens only if we have asked for an offset outside the string.
1626       if (length == 0) {
1627         // Reset so future operations will return null immediately.
1628         Reset(ConsString());
1629         return String();
1630       }
1631       // Tell the stack we're done descending.
1632       AdjustMaximumDepth();
1633       // Pop stack so next iteration is in correct place.
1634       Pop();
1635     }
1636     DCHECK_NE(length, 0);
1637     // Adjust return values and exit.
1638     consumed_ = offset + length;
1639     *offset_out = consumed - offset;
1640     return string;
1641   }
1642   UNREACHABLE();
1643 }
1644 
NextLeaf(bool * blew_stack)1645 String ConsStringIterator::NextLeaf(bool* blew_stack) {
1646   while (true) {
1647     // Tree traversal complete.
1648     if (depth_ == 0) {
1649       *blew_stack = false;
1650       return String();
1651     }
1652     // We've lost track of higher nodes.
1653     if (StackBlown()) {
1654       *blew_stack = true;
1655       return String();
1656     }
1657     // Go right.
1658     ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1659     String string = cons_string.second();
1660     int32_t type = string.map().instance_type();
1661     if ((type & kStringRepresentationMask) != kConsStringTag) {
1662       // Pop stack so next iteration is in correct place.
1663       Pop();
1664       int length = string.length();
1665       // Could be a flattened ConsString.
1666       if (length == 0) continue;
1667       consumed_ += length;
1668       return string;
1669     }
1670     cons_string = ConsString::cast(string);
1671     PushRight(cons_string);
1672     // Need to traverse all the way left.
1673     while (true) {
1674       // Continue left.
1675       string = cons_string.first();
1676       type = string.map().instance_type();
1677       if ((type & kStringRepresentationMask) != kConsStringTag) {
1678         AdjustMaximumDepth();
1679         int length = string.length();
1680         if (length == 0) break;  // Skip empty left-hand sides of ConsStrings.
1681         consumed_ += length;
1682         return string;
1683       }
1684       cons_string = ConsString::cast(string);
1685       PushLeft(cons_string);
1686     }
1687   }
1688   UNREACHABLE();
1689 }
1690 
AddressOfCharacterAt(int start_index,const DisallowHeapAllocation & no_gc)1691 const byte* String::AddressOfCharacterAt(int start_index,
1692                                          const DisallowHeapAllocation& no_gc) {
1693   DCHECK(IsFlat());
1694   String subject = *this;
1695   if (subject.IsConsString()) {
1696     subject = ConsString::cast(subject).first();
1697   } else if (subject.IsSlicedString()) {
1698     start_index += SlicedString::cast(subject).offset();
1699     subject = SlicedString::cast(subject).parent();
1700   }
1701   if (subject.IsThinString()) {
1702     subject = ThinString::cast(subject).actual();
1703   }
1704   CHECK_LE(0, start_index);
1705   CHECK_LE(start_index, subject.length());
1706   if (subject.IsSeqOneByteString()) {
1707     return reinterpret_cast<const byte*>(
1708         SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
1709   } else if (subject.IsSeqTwoByteString()) {
1710     return reinterpret_cast<const byte*>(
1711         SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
1712   } else if (subject.IsExternalOneByteString()) {
1713     return reinterpret_cast<const byte*>(
1714         ExternalOneByteString::cast(subject).GetChars() + start_index);
1715   } else {
1716     DCHECK(subject.IsExternalTwoByteString());
1717     return reinterpret_cast<const byte*>(
1718         ExternalTwoByteString::cast(subject).GetChars() + start_index);
1719   }
1720 }
1721 
1722 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
1723     String source, uint16_t* sink, int from, int to);
1724 
1725 }  // namespace internal
1726 }  // namespace v8
1727