• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/objects/string.h"
6 
7 #include "src/base/platform/yield-processor.h"
8 #include "src/common/assert-scope.h"
9 #include "src/common/globals.h"
10 #include "src/execution/isolate-utils.h"
11 #include "src/execution/thread-id.h"
12 #include "src/handles/handles-inl.h"
13 #include "src/heap/heap-inl.h"
14 #include "src/heap/local-factory-inl.h"
15 #include "src/heap/local-heap-inl.h"
16 #include "src/heap/memory-chunk.h"
17 #include "src/heap/read-only-heap.h"
18 #include "src/numbers/conversions.h"
19 #include "src/objects/instance-type.h"
20 #include "src/objects/map.h"
21 #include "src/objects/oddball.h"
22 #include "src/objects/string-comparator.h"
23 #include "src/objects/string-inl.h"
24 #include "src/strings/char-predicates.h"
25 #include "src/strings/string-builder-inl.h"
26 #include "src/strings/string-hasher.h"
27 #include "src/strings/string-search.h"
28 #include "src/strings/string-stream.h"
29 #include "src/strings/unicode-inl.h"
30 #include "src/utils/ostreams.h"
31 
32 namespace v8 {
33 namespace internal {
34 
SlowFlatten(Isolate * isolate,Handle<ConsString> cons,AllocationType allocation)35 Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
36                                    AllocationType allocation) {
37   DCHECK_NE(cons->second().length(), 0);
38   DCHECK(!cons->InSharedHeap());
39 
40   // TurboFan can create cons strings with empty first parts.
41   while (cons->first().length() == 0) {
42     // We do not want to call this function recursively. Therefore we call
43     // String::Flatten only in those cases where String::SlowFlatten is not
44     // called again.
45     if (cons->second().IsConsString() && !cons->second().IsFlat()) {
46       cons = handle(ConsString::cast(cons->second()), isolate);
47     } else {
48       return String::Flatten(isolate, handle(cons->second(), isolate),
49                              allocation);
50     }
51   }
52 
53   DCHECK(AllowGarbageCollection::IsAllowed());
54   int length = cons->length();
55   if (allocation != AllocationType::kSharedOld) {
56     allocation =
57         ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
58   }
59   Handle<SeqString> result;
60   if (cons->IsOneByteRepresentation()) {
61     Handle<SeqOneByteString> flat =
62         isolate->factory()
63             ->NewRawOneByteString(length, allocation)
64             .ToHandleChecked();
65     DisallowGarbageCollection no_gc;
66     WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
67     result = flat;
68   } else {
69     Handle<SeqTwoByteString> flat =
70         isolate->factory()
71             ->NewRawTwoByteString(length, allocation)
72             .ToHandleChecked();
73     DisallowGarbageCollection no_gc;
74     WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
75     result = flat;
76   }
77   cons->set_first(*result);
78   cons->set_second(ReadOnlyRoots(isolate).empty_string());
79   DCHECK(result->IsFlat());
80   return result;
81 }
82 
SlowShare(Isolate * isolate,Handle<String> source)83 Handle<String> String::SlowShare(Isolate* isolate, Handle<String> source) {
84   DCHECK(FLAG_shared_string_table);
85   Handle<String> flat = Flatten(isolate, source, AllocationType::kSharedOld);
86 
87   // Do not recursively call Share, so directly compute the sharing strategy for
88   // the flat string, which could already be a copy or an existing string from
89   // e.g. a shortcut ConsString.
90   MaybeHandle<Map> new_map;
91   switch (isolate->factory()->ComputeSharingStrategyForString(flat, &new_map)) {
92     case StringTransitionStrategy::kCopy:
93       break;
94     case StringTransitionStrategy::kInPlace:
95       // A relaxed write is sufficient here, because at this point the string
96       // has not yet escaped the current thread.
97       DCHECK(flat->InSharedHeap());
98       flat->set_map_no_write_barrier(*new_map.ToHandleChecked());
99       return flat;
100     case StringTransitionStrategy::kAlreadyTransitioned:
101       return flat;
102   }
103 
104   int length = flat->length();
105   if (flat->IsOneByteRepresentation()) {
106     Handle<SeqOneByteString> copy =
107         isolate->factory()->NewRawSharedOneByteString(length).ToHandleChecked();
108     DisallowGarbageCollection no_gc;
109     WriteToFlat(*flat, copy->GetChars(no_gc), 0, length);
110     return copy;
111   }
112   Handle<SeqTwoByteString> copy =
113       isolate->factory()->NewRawSharedTwoByteString(length).ToHandleChecked();
114   DisallowGarbageCollection no_gc;
115   WriteToFlat(*flat, copy->GetChars(no_gc), 0, length);
116   return copy;
117 }
118 
119 namespace {
120 
121 template <class StringClass>
MigrateExternalStringResource(Isolate * isolate,ExternalString from,StringClass to)122 void MigrateExternalStringResource(Isolate* isolate, ExternalString from,
123                                    StringClass to) {
124   Address to_resource_address = to.resource_as_address();
125   if (to_resource_address == kNullAddress) {
126     StringClass cast_from = StringClass::cast(from);
127     // |to| is a just-created internalized copy of |from|. Migrate the resource.
128     to.SetResource(isolate, cast_from.resource());
129     // Zap |from|'s resource pointer to reflect the fact that |from| has
130     // relinquished ownership of its resource.
131     isolate->heap()->UpdateExternalString(
132         from, ExternalString::cast(from).ExternalPayloadSize(), 0);
133     cast_from.SetResource(isolate, nullptr);
134   } else if (to_resource_address != from.resource_as_address()) {
135     // |to| already existed and has its own resource. Finalize |from|.
136     isolate->heap()->FinalizeExternalString(from);
137   }
138 }
139 
MigrateExternalString(Isolate * isolate,String string,String internalized)140 void MigrateExternalString(Isolate* isolate, String string,
141                            String internalized) {
142   if (internalized.IsExternalOneByteString()) {
143     MigrateExternalStringResource(isolate, ExternalString::cast(string),
144                                   ExternalOneByteString::cast(internalized));
145   } else if (internalized.IsExternalTwoByteString()) {
146     MigrateExternalStringResource(isolate, ExternalString::cast(string),
147                                   ExternalTwoByteString::cast(internalized));
148   } else {
149     // If the external string is duped into an existing non-external
150     // internalized string, free its resource (it's about to be rewritten
151     // into a ThinString below).
152     isolate->heap()->FinalizeExternalString(string);
153   }
154 }
155 
156 template <typename IsolateT>
ComputeThinStringMap(IsolateT * isolate,StringShape from_string_shape,bool one_byte)157 Map ComputeThinStringMap(IsolateT* isolate, StringShape from_string_shape,
158                          bool one_byte) {
159   ReadOnlyRoots roots(isolate);
160   if (from_string_shape.IsShared()) {
161     return one_byte ? roots.shared_thin_one_byte_string_map()
162                     : roots.shared_thin_string_map();
163   }
164   return one_byte ? roots.thin_one_byte_string_map() : roots.thin_string_map();
165 }
166 
167 enum class StringMigrationResult {
168   kThisThreadMigrated,
169   kAnotherThreadMigrated
170 };
171 
172 // This function must be used when migrating strings whose
173 // StringShape::CanMigrateInParallel() is true. It encapsulates the
174 // synchronization needed for parallel migrations from multiple threads. The
175 // user passes a lambda to perform to update the representation.
176 //
177 // Returns whether this thread successfully migrated the string or another
178 // thread did so.
179 //
180 // The locking algorithm to migrate a String uses its map word as a migration
181 // lock:
182 //
183 //   map = string.map(kAcquireLoad);
184 //   if (map != SENTINEL_MAP &&
185 //       string.compare_and_swap_map(map, SENTINEL_MAP)) {
186 //     // Lock acquired, i.e. the string's map is SENTINEL_MAP.
187 //   } else {
188 //     // Lock not acquired. Another thread set the sentinel. Spin until the
189 //     // map is no longer the sentinel, i.e. until the other thread
190 //     // releases the lock.
191 //     Map reloaded_map;
192 //     do {
193 //       reloaded_map = string.map(kAcquireLoad);
194 //     } while (reloaded_map == SENTINEL_MAP);
195 //   }
196 //
197 // Some notes on usage:
198 // - The initial map must be loaded with kAcquireLoad for synchronization.
199 // - Avoid loading the map multiple times. Load the map once and branch
200 //   on that.
201 // - The lambda is passed the string and its initial (pre-migration)
202 //   StringShape.
203 // - The lambda may be executed under a spinlock, so it should be as short
204 //   as possible.
205 // - Currently only SeqString -> ThinString migrations can happen in
206 //   parallel. If kAnotherThreadMigrated is returned, then the caller doesn't
207 //   need to do any other work. In the future, if additional migrations can
208 //   happen in parallel, then restarts may be needed if the parallel migration
209 //   was to a different type (e.g. SeqString -> External).
210 //
211 // Example:
212 //
213 //   DisallowGarbageCollection no_gc;
214 //   Map initial_map = string.map(kAcquireLoad);
215 //   switch (MigrateStringMapUnderLockIfNeeded(
216 //     isolate, string, initial_map, target_map,
217 //     [](Isolate* isolate, String string, StringShape initial_shape) {
218 //       auto t = TargetStringType::unchecked_cast(string);
219 //       t.set_field(foo);
220 //       t.set_another_field(bar);
221 //     }, no_gc);
222 //
223 template <typename IsolateT, typename Callback>
MigrateStringMapUnderLockIfNeeded(IsolateT * isolate,String string,Map initial_map,Map target_map,Callback update_representation,const DisallowGarbageCollection & no_gc)224 StringMigrationResult MigrateStringMapUnderLockIfNeeded(
225     IsolateT* isolate, String string, Map initial_map, Map target_map,
226     Callback update_representation, const DisallowGarbageCollection& no_gc) {
227   USE(no_gc);
228 
229   InstanceType initial_type = initial_map.instance_type();
230   StringShape initial_shape(initial_type);
231 
232   if (initial_shape.CanMigrateInParallel()) {
233     // A string whose map is a sentinel map means that it is in the critical
234     // section for being migrated to a different map. There are multiple
235     // sentinel maps: one for each InstanceType that may be migrated from.
236     Map sentinel_map =
237         *isolate->factory()->GetStringMigrationSentinelMap(initial_type);
238 
239     // Try to acquire the migration lock by setting the string's map to the
240     // sentinel map. Note that it's possible that we've already witnessed a
241     // sentinel map.
242     if (initial_map == sentinel_map ||
243         !string.release_compare_and_swap_map_word(
244             MapWord::FromMap(initial_map), MapWord::FromMap(sentinel_map))) {
245       // If the lock couldn't be acquired, another thread must be migrating this
246       // string. The string's map will be the sentinel map until the migration
247       // is finished. Spin until the map is no longer the sentinel map.
248       //
249       // TODO(v8:12007): Replace this spin lock with a ParkingLot-like
250       // primitive.
251       Map reloaded_map = string.map(kAcquireLoad);
252       while (reloaded_map == sentinel_map) {
253         YIELD_PROCESSOR;
254         reloaded_map = string.map(kAcquireLoad);
255       }
256 
257       // Another thread must have migrated once the map is no longer the
258       // sentinel map.
259       //
260       // TODO(v8:12007): At time of writing there is only a single kind of
261       // migration that can happen in parallel: SeqString -> ThinString. If
262       // other parallel migrations are added, this DCHECK will fail, and users
263       // of MigrateStringMapUnderLockIfNeeded would need to restart if the
264       // string was migrated to a different map than target_map.
265       DCHECK_EQ(reloaded_map, target_map);
266       return StringMigrationResult::kAnotherThreadMigrated;
267     }
268   }
269 
270   // With the lock held for cases where it's needed, do the work to update the
271   // representation before storing the map word. In addition to parallel
272   // migrations, this also ensures that the concurrent marker will read the
273   // updated representation when visiting migrated strings.
274   update_representation(isolate, string, initial_shape);
275 
276   // Do the store on the map word.
277   //
278   // In debug mode, do a compare-and-swap that is checked to succeed, to check
279   // that all string map migrations are using this function, since to be in the
280   // migration critical section, the string's current map must be the sentinel
281   // map.
282   //
283   // Otherwise do a normal release store.
284   if (DEBUG_BOOL && initial_shape.CanMigrateInParallel()) {
285     DCHECK_NE(initial_map, target_map);
286     Map sentinel_map =
287         *isolate->factory()->GetStringMigrationSentinelMap(initial_type);
288     CHECK(string.release_compare_and_swap_map_word(
289         MapWord::FromMap(sentinel_map), MapWord::FromMap(target_map)));
290   } else {
291     string.set_map_safe_transition(target_map, kReleaseStore);
292   }
293 
294   return StringMigrationResult::kThisThreadMigrated;
295 }
296 
297 }  // namespace
298 
299 template <typename IsolateT>
MakeThin(IsolateT * isolate,String internalized)300 void String::MakeThin(IsolateT* isolate, String internalized) {
301   DisallowGarbageCollection no_gc;
302   DCHECK_NE(*this, internalized);
303   DCHECK(internalized.IsInternalizedString());
304 
305   // Load the map once at the beginning and use it to query for the shape of the
306   // string to avoid reloading the map in case of parallel migrations. See
307   // comment above for MigrateStringMapUnderLockIfNeeded.
308   Map initial_map = this->map(kAcquireLoad);
309   StringShape initial_shape(initial_map);
310 
311   // TODO(v8:12007): Support shared ThinStrings.
312   //
313   // Currently in-place migrations to ThinStrings are disabled for shared
314   // strings to unblock prototyping.
315   if (initial_shape.IsShared()) return;
316   DCHECK(!initial_shape.IsThin());
317 
318   bool has_pointers = initial_shape.IsIndirect();
319   int old_size = this->SizeFromMap(initial_map);
320   Map target_map = ComputeThinStringMap(isolate, initial_shape,
321                                         internalized.IsOneByteRepresentation());
322   switch (MigrateStringMapUnderLockIfNeeded(
323       isolate, *this, initial_map, target_map,
324       [=](IsolateT* isolate, String string, StringShape initial_shape) {
325         if (initial_shape.IsExternal()) {
326           // TODO(v8:12007): Support external strings.
327           DCHECK(!initial_shape.IsShared());
328           MigrateExternalString(isolate->AsIsolate(), string, internalized);
329         }
330 
331         ThinString::unchecked_cast(string).set_actual(internalized);
332         DCHECK_GE(old_size, ThinString::kSize);
333       },
334       no_gc)) {
335     case StringMigrationResult::kThisThreadMigrated:
336       // Overwrite character data with the filler below.
337       break;
338     case StringMigrationResult::kAnotherThreadMigrated:
339       // Nothing to do.
340       //
341       // TODO(v8:12007): Support shared ThinStrings.
342       UNREACHABLE();
343   }
344 
345   ThinString thin = ThinString::cast(*this);
346   Address thin_end = thin.address() + ThinString::kSize;
347   int size_delta = old_size - ThinString::kSize;
348   if (size_delta != 0) {
349     if (!Heap::IsLargeObject(thin)) {
350       isolate->heap()->CreateFillerObjectAt(
351           thin_end, size_delta,
352           has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
353     } else {
354       // We don't need special handling for the combination IsLargeObject &&
355       // has_pointers, because indirect strings never get that large.
356       DCHECK(!has_pointers);
357     }
358   }
359 }
360 
361 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::MakeThin(
362     Isolate* isolate, String internalized);
363 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::MakeThin(
364     LocalIsolate* isolate, String internalized);
365 
MakeExternal(v8::String::ExternalStringResource * resource)366 bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
367   // Disallow garbage collection to avoid possible GC vs string access deadlock.
368   DisallowGarbageCollection no_gc;
369 
370   // Externalizing twice leaks the external resource, so it's
371   // prohibited by the API.
372   DCHECK(this->SupportsExternalization());
373   DCHECK(resource->IsCacheable());
374 #ifdef ENABLE_SLOW_DCHECKS
375   if (FLAG_enable_slow_asserts) {
376     // Assert that the resource and the string are equivalent.
377     DCHECK(static_cast<size_t>(this->length()) == resource->length());
378     base::ScopedVector<base::uc16> smart_chars(this->length());
379     String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
380     DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
381                         resource->length() * sizeof(smart_chars[0])));
382   }
383 #endif                      // DEBUG
384   int size = this->Size();  // Byte size of the original string.
385   // Abort if size does not allow in-place conversion.
386   if (size < ExternalString::kUncachedSize) return false;
387   // Read-only strings cannot be made external, since that would mutate the
388   // string.
389   if (IsReadOnlyHeapObject(*this)) return false;
390   Isolate* isolate = GetIsolateFromWritableObject(*this);
391   bool is_internalized = this->IsInternalizedString();
392   bool has_pointers = StringShape(*this).IsIndirect();
393 
394   if (has_pointers) {
395     isolate->heap()->NotifyObjectLayoutChange(*this, no_gc,
396                                               InvalidateRecordedSlots::kYes);
397   }
398 
399   base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
400       isolate->internalized_string_access());
401   // Morph the string to an external string by replacing the map and
402   // reinitializing the fields.  This won't work if the space the existing
403   // string occupies is too small for a regular external string.  Instead, we
404   // resort to an uncached external string instead, omitting the field caching
405   // the address of the backing store.  When we encounter uncached external
406   // strings in generated code, we need to bailout to runtime.
407   Map new_map;
408   ReadOnlyRoots roots(isolate);
409   if (size < ExternalString::kSizeOfAllExternalStrings) {
410     if (is_internalized) {
411       new_map = roots.uncached_external_internalized_string_map();
412     } else {
413       new_map = roots.uncached_external_string_map();
414     }
415   } else {
416     new_map = is_internalized ? roots.external_internalized_string_map()
417                               : roots.external_string_map();
418   }
419 
420   // Byte size of the external String object.
421   int new_size = this->SizeFromMap(new_map);
422   if (!isolate->heap()->IsLargeObject(*this)) {
423     isolate->heap()->CreateFillerObjectAt(
424         this->address() + new_size, size - new_size,
425         has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
426   } else {
427     // We don't need special handling for the combination IsLargeObject &&
428     // has_pointers, because indirect strings never get that large.
429     DCHECK(!has_pointers);
430   }
431 
432   // We are storing the new map using release store after creating a filler for
433   // the left-over space to avoid races with the sweeper thread.
434   this->set_map(new_map, kReleaseStore);
435 
436   ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
437   self.AllocateExternalPointerEntries(isolate);
438   self.SetResource(isolate, resource);
439   isolate->heap()->RegisterExternalString(*this);
440   // Force regeneration of the hash value.
441   if (is_internalized) self.EnsureHash();
442   return true;
443 }
444 
MakeExternal(v8::String::ExternalOneByteStringResource * resource)445 bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
446   // Disallow garbage collection to avoid possible GC vs string access deadlock.
447   DisallowGarbageCollection no_gc;
448 
449   // Externalizing twice leaks the external resource, so it's
450   // prohibited by the API.
451   DCHECK(this->SupportsExternalization());
452   DCHECK(resource->IsCacheable());
453 #ifdef ENABLE_SLOW_DCHECKS
454   if (FLAG_enable_slow_asserts) {
455     // Assert that the resource and the string are equivalent.
456     DCHECK(static_cast<size_t>(this->length()) == resource->length());
457     if (this->IsTwoByteRepresentation()) {
458       base::ScopedVector<uint16_t> smart_chars(this->length());
459       String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
460       DCHECK(String::IsOneByte(smart_chars.begin(), this->length()));
461     }
462     base::ScopedVector<char> smart_chars(this->length());
463     String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
464     DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
465                         resource->length() * sizeof(smart_chars[0])));
466   }
467 #endif                      // DEBUG
468   int size = this->Size();  // Byte size of the original string.
469   // Abort if size does not allow in-place conversion.
470   if (size < ExternalString::kUncachedSize) return false;
471   // Read-only strings cannot be made external, since that would mutate the
472   // string.
473   if (IsReadOnlyHeapObject(*this)) return false;
474   Isolate* isolate = GetIsolateFromWritableObject(*this);
475   bool is_internalized = this->IsInternalizedString();
476   bool has_pointers = StringShape(*this).IsIndirect();
477 
478   if (has_pointers) {
479     isolate->heap()->NotifyObjectLayoutChange(*this, no_gc,
480                                               InvalidateRecordedSlots::kYes);
481   }
482 
483   base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
484       isolate->internalized_string_access());
485   // Morph the string to an external string by replacing the map and
486   // reinitializing the fields.  This won't work if the space the existing
487   // string occupies is too small for a regular external string.  Instead, we
488   // resort to an uncached external string instead, omitting the field caching
489   // the address of the backing store.  When we encounter uncached external
490   // strings in generated code, we need to bailout to runtime.
491   Map new_map;
492   ReadOnlyRoots roots(isolate);
493   if (size < ExternalString::kSizeOfAllExternalStrings) {
494     new_map = is_internalized
495                   ? roots.uncached_external_one_byte_internalized_string_map()
496                   : roots.uncached_external_one_byte_string_map();
497   } else {
498     new_map = is_internalized
499                   ? roots.external_one_byte_internalized_string_map()
500                   : roots.external_one_byte_string_map();
501   }
502 
503   if (!isolate->heap()->IsLargeObject(*this)) {
504     // Byte size of the external String object.
505     int new_size = this->SizeFromMap(new_map);
506 
507     isolate->heap()->CreateFillerObjectAt(
508         this->address() + new_size, size - new_size,
509         has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
510   } else {
511     // We don't need special handling for the combination IsLargeObject &&
512     // has_pointers, because indirect strings never get that large.
513     DCHECK(!has_pointers);
514   }
515 
516   // We are storing the new map using release store after creating a filler for
517   // the left-over space to avoid races with the sweeper thread.
518   this->set_map(new_map, kReleaseStore);
519 
520   ExternalOneByteString self = ExternalOneByteString::cast(*this);
521   self.AllocateExternalPointerEntries(isolate);
522   self.SetResource(isolate, resource);
523   isolate->heap()->RegisterExternalString(*this);
524   // Force regeneration of the hash value.
525   if (is_internalized) self.EnsureHash();
526   return true;
527 }
528 
SupportsExternalization()529 bool String::SupportsExternalization() {
530   if (this->IsThinString()) {
531     return i::ThinString::cast(*this).actual().SupportsExternalization();
532   }
533 
534   // RO_SPACE strings cannot be externalized.
535   if (IsReadOnlyHeapObject(*this)) {
536     return false;
537   }
538 
539   // Already an external string.
540   if (StringShape(*this).IsExternal()) {
541     return false;
542   }
543 
544   // External strings in the shared heap conflicts with the heap sandbox at the
545   // moment. Disable it until supported.
546   if (InSharedHeap()) return false;
547 
548 #ifdef V8_COMPRESS_POINTERS
549   // Small strings may not be in-place externalizable.
550   if (this->Size() < ExternalString::kUncachedSize) return false;
551 #else
552   DCHECK_LE(ExternalString::kUncachedSize, this->Size());
553 #endif
554 
555   Isolate* isolate = GetIsolateFromWritableObject(*this);
556   return !isolate->heap()->IsInGCPostProcessing();
557 }
558 
PrefixForDebugPrint() const559 const char* String::PrefixForDebugPrint() const {
560   StringShape shape(*this);
561   if (IsTwoByteRepresentation()) {
562     if (shape.IsInternalized()) {
563       return "u#";
564     } else if (shape.IsCons()) {
565       return "uc\"";
566     } else if (shape.IsThin()) {
567       return "u>\"";
568     } else if (shape.IsExternal()) {
569       return "ue\"";
570     } else {
571       return "u\"";
572     }
573   } else {
574     if (shape.IsInternalized()) {
575       return "#";
576     } else if (shape.IsCons()) {
577       return "c\"";
578     } else if (shape.IsThin()) {
579       return ">\"";
580     } else if (shape.IsExternal()) {
581       return "e\"";
582     } else {
583       return "\"";
584     }
585   }
586   UNREACHABLE();
587 }
588 
SuffixForDebugPrint() const589 const char* String::SuffixForDebugPrint() const {
590   StringShape shape(*this);
591   if (shape.IsInternalized()) return "";
592   return "\"";
593 }
594 
StringShortPrint(StringStream * accumulator)595 void String::StringShortPrint(StringStream* accumulator) {
596   if (!LooksValid()) {
597     accumulator->Add("<Invalid String>");
598     return;
599   }
600 
601   const int len = length();
602   accumulator->Add("<String[%u]: ", len);
603   accumulator->Add(PrefixForDebugPrint());
604 
605   if (len > kMaxShortPrintLength) {
606     accumulator->Add("...<truncated>>");
607     accumulator->Add(SuffixForDebugPrint());
608     accumulator->Put('>');
609     return;
610   }
611 
612   PrintUC16(accumulator, 0, len);
613   accumulator->Add(SuffixForDebugPrint());
614   accumulator->Put('>');
615 }
616 
PrintUC16(std::ostream & os,int start,int end)617 void String::PrintUC16(std::ostream& os, int start, int end) {
618   if (end < 0) end = length();
619   StringCharacterStream stream(*this, start);
620   for (int i = start; i < end && stream.HasMore(); i++) {
621     os << AsUC16(stream.GetNext());
622   }
623 }
624 
PrintUC16(StringStream * accumulator,int start,int end)625 void String::PrintUC16(StringStream* accumulator, int start, int end) {
626   if (end < 0) end = length();
627   StringCharacterStream stream(*this, start);
628   for (int i = start; i < end && stream.HasMore(); i++) {
629     uint16_t c = stream.GetNext();
630     if (c == '\n') {
631       accumulator->Add("\\n");
632     } else if (c == '\r') {
633       accumulator->Add("\\r");
634     } else if (c == '\\') {
635       accumulator->Add("\\\\");
636     } else if (!std::isprint(c)) {
637       accumulator->Add("\\x%02x", c);
638     } else {
639       accumulator->Put(static_cast<char>(c));
640     }
641   }
642 }
643 
ToArrayIndex(Address addr)644 int32_t String::ToArrayIndex(Address addr) {
645   DisallowGarbageCollection no_gc;
646   String key(addr);
647 
648   uint32_t index;
649   if (!key.AsArrayIndex(&index)) return -1;
650   if (index <= INT_MAX) return index;
651   return -1;
652 }
653 
LooksValid()654 bool String::LooksValid() {
655   // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
656   // basically the same logic as the way we access the heap in the first place.
657   // RO_SPACE objects should always be valid.
658   if (V8_ENABLE_THIRD_PARTY_HEAP_BOOL) return true;
659   if (ReadOnlyHeap::Contains(*this)) return true;
660   BasicMemoryChunk* chunk = BasicMemoryChunk::FromHeapObject(*this);
661   if (chunk->heap() == nullptr) return false;
662   return chunk->heap()->Contains(*this);
663 }
664 
665 namespace {
666 
AreDigits(const uint8_t * s,int from,int to)667 bool AreDigits(const uint8_t* s, int from, int to) {
668   for (int i = from; i < to; i++) {
669     if (s[i] < '0' || s[i] > '9') return false;
670   }
671 
672   return true;
673 }
674 
ParseDecimalInteger(const uint8_t * s,int from,int to)675 int ParseDecimalInteger(const uint8_t* s, int from, int to) {
676   DCHECK_LT(to - from, 10);  // Overflow is not possible.
677   DCHECK(from < to);
678   int d = s[from] - '0';
679 
680   for (int i = from + 1; i < to; i++) {
681     d = 10 * d + (s[i] - '0');
682   }
683 
684   return d;
685 }
686 
687 }  // namespace
688 
689 // static
ToNumber(Isolate * isolate,Handle<String> subject)690 Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
691   // Flatten {subject} string first.
692   subject = String::Flatten(isolate, subject);
693 
694   // Fast array index case.
695   uint32_t index;
696   if (subject->AsArrayIndex(&index)) {
697     return isolate->factory()->NewNumberFromUint(index);
698   }
699 
700   // Fast case: short integer or some sorts of junk values.
701   if (subject->IsSeqOneByteString()) {
702     int len = subject->length();
703     if (len == 0) return handle(Smi::zero(), isolate);
704 
705     DisallowGarbageCollection no_gc;
706     uint8_t const* data =
707         Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
708     bool minus = (data[0] == '-');
709     int start_pos = (minus ? 1 : 0);
710 
711     if (start_pos == len) {
712       return isolate->factory()->nan_value();
713     } else if (data[start_pos] > '9') {
714       // Fast check for a junk value. A valid string may start from a
715       // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
716       // or the 'I' character ('Infinity'). All of that have codes not greater
717       // than '9' except 'I' and &nbsp;.
718       if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
719         return isolate->factory()->nan_value();
720       }
721     } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
722       // The maximal/minimal smi has 10 digits. If the string has less digits
723       // we know it will fit into the smi-data type.
724       int d = ParseDecimalInteger(data, start_pos, len);
725       if (minus) {
726         if (d == 0) return isolate->factory()->minus_zero_value();
727         d = -d;
728       } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
729                  (len == 1 || data[0] != '0')) {
730         // String hash is not calculated yet but all the data are present.
731         // Update the hash field to speed up sequential convertions.
732         uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(d, len);
733 #ifdef DEBUG
734         subject->EnsureHash();  // Force hash calculation.
735         DCHECK_EQ(subject->raw_hash_field(), raw_hash_field);
736 #endif
737         subject->set_raw_hash_field(raw_hash_field);
738       }
739       return handle(Smi::FromInt(d), isolate);
740     }
741   }
742 
743   // Slower case.
744   int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
745   return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
746 }
747 
SlowGetFlatContent(const DisallowGarbageCollection & no_gc,const SharedStringAccessGuardIfNeeded & access_guard)748 String::FlatContent String::SlowGetFlatContent(
749     const DisallowGarbageCollection& no_gc,
750     const SharedStringAccessGuardIfNeeded& access_guard) {
751   USE(no_gc);
752   PtrComprCageBase cage_base = GetPtrComprCageBase(*this);
753   String string = *this;
754   StringShape shape(string, cage_base);
755   int offset = 0;
756 
757   // Extract cons- and sliced strings.
758   if (shape.IsCons()) {
759     ConsString cons = ConsString::cast(string);
760     if (!cons.IsFlat(cage_base)) return FlatContent(no_gc);
761     string = cons.first(cage_base);
762     shape = StringShape(string, cage_base);
763   } else if (shape.IsSliced()) {
764     SlicedString slice = SlicedString::cast(string);
765     offset = slice.offset();
766     string = slice.parent(cage_base);
767     shape = StringShape(string, cage_base);
768   }
769 
770   DCHECK(!shape.IsCons());
771   DCHECK(!shape.IsSliced());
772 
773   // Extract thin strings.
774   if (shape.IsThin()) {
775     ThinString thin = ThinString::cast(string);
776     string = thin.actual(cage_base);
777     shape = StringShape(string, cage_base);
778   }
779 
780   DCHECK(shape.IsDirect());
781   return TryGetFlatContentFromDirectString(cage_base, no_gc, string, offset,
782                                            length(), access_guard)
783       .value();
784 }
785 
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int offset,int length,int * length_return)786 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
787                                           RobustnessFlag robust_flag,
788                                           int offset, int length,
789                                           int* length_return) {
790   if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
791     return std::unique_ptr<char[]>();
792   }
793   // Negative length means the to the end of the string.
794   if (length < 0) length = kMaxInt - offset;
795 
796   // Compute the size of the UTF-8 string. Start at the specified offset.
797   StringCharacterStream stream(*this, offset);
798   int character_position = offset;
799   int utf8_bytes = 0;
800   int last = unibrow::Utf16::kNoPreviousCharacter;
801   while (stream.HasMore() && character_position++ < offset + length) {
802     uint16_t character = stream.GetNext();
803     utf8_bytes += unibrow::Utf8::Length(character, last);
804     last = character;
805   }
806 
807   if (length_return) {
808     *length_return = utf8_bytes;
809   }
810 
811   char* result = NewArray<char>(utf8_bytes + 1);
812 
813   // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
814   stream.Reset(*this, offset);
815   character_position = offset;
816   int utf8_byte_position = 0;
817   last = unibrow::Utf16::kNoPreviousCharacter;
818   while (stream.HasMore() && character_position++ < offset + length) {
819     uint16_t character = stream.GetNext();
820     if (allow_nulls == DISALLOW_NULLS && character == 0) {
821       character = ' ';
822     }
823     utf8_byte_position +=
824         unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
825     last = character;
826   }
827   result[utf8_byte_position] = 0;
828   return std::unique_ptr<char[]>(result);
829 }
830 
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int * length_return)831 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
832                                           RobustnessFlag robust_flag,
833                                           int* length_return) {
834   return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
835 }
836 
837 // static
838 template <typename sinkchar>
WriteToFlat(String source,sinkchar * sink,int start,int length)839 void String::WriteToFlat(String source, sinkchar* sink, int start, int length) {
840   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(source));
841   return WriteToFlat(source, sink, start, length, GetPtrComprCageBase(source),
842                      SharedStringAccessGuardIfNeeded::NotNeeded());
843 }
844 
845 // static
846 template <typename sinkchar>
WriteToFlat(String source,sinkchar * sink,int start,int length,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)847 void String::WriteToFlat(String source, sinkchar* sink, int start, int length,
848                          PtrComprCageBase cage_base,
849                          const SharedStringAccessGuardIfNeeded& access_guard) {
850   DisallowGarbageCollection no_gc;
851   if (length == 0) return;
852   while (true) {
853     DCHECK_LT(0, length);
854     DCHECK_LE(0, start);
855     DCHECK_LE(length, source.length());
856     switch (StringShape(source, cage_base).representation_and_encoding_tag()) {
857       case kOneByteStringTag | kExternalStringTag:
858         CopyChars(
859             sink,
860             ExternalOneByteString::cast(source).GetChars(cage_base) + start,
861             length);
862         return;
863       case kTwoByteStringTag | kExternalStringTag:
864         CopyChars(
865             sink,
866             ExternalTwoByteString::cast(source).GetChars(cage_base) + start,
867             length);
868         return;
869       case kOneByteStringTag | kSeqStringTag:
870         CopyChars(sink,
871                   SeqOneByteString::cast(source).GetChars(no_gc, access_guard) +
872                       start,
873                   length);
874         return;
875       case kTwoByteStringTag | kSeqStringTag:
876         CopyChars(sink,
877                   SeqTwoByteString::cast(source).GetChars(no_gc, access_guard) +
878                       start,
879                   length);
880         return;
881       case kOneByteStringTag | kConsStringTag:
882       case kTwoByteStringTag | kConsStringTag: {
883         ConsString cons_string = ConsString::cast(source);
884         String first = cons_string.first(cage_base);
885         int boundary = first.length();
886         int first_length = boundary - start;
887         int second_length = start + length - boundary;
888         if (second_length >= first_length) {
889           // Right hand side is longer.  Recurse over left.
890           if (first_length > 0) {
891             WriteToFlat(first, sink, start, first_length, cage_base,
892                         access_guard);
893             if (start == 0 && cons_string.second(cage_base) == first) {
894               CopyChars(sink + boundary, sink, boundary);
895               return;
896             }
897             sink += boundary - start;
898             start = 0;
899             length -= first_length;
900           } else {
901             start -= boundary;
902           }
903           source = cons_string.second(cage_base);
904         } else {
905           // Left hand side is longer.  Recurse over right.
906           if (second_length > 0) {
907             String second = cons_string.second(cage_base);
908             // When repeatedly appending to a string, we get a cons string that
909             // is unbalanced to the left, a list, essentially.  We inline the
910             // common case of sequential one-byte right child.
911             if (second_length == 1) {
912               sink[boundary - start] =
913                   static_cast<sinkchar>(second.Get(0, cage_base, access_guard));
914             } else if (second.IsSeqOneByteString(cage_base)) {
915               CopyChars(
916                   sink + boundary - start,
917                   SeqOneByteString::cast(second).GetChars(no_gc, access_guard),
918                   second_length);
919             } else {
920               WriteToFlat(second, sink + boundary - start, 0, second_length,
921                           cage_base, access_guard);
922             }
923             length -= second_length;
924           }
925           source = first;
926         }
927         if (length == 0) return;
928         continue;
929       }
930       case kOneByteStringTag | kSlicedStringTag:
931       case kTwoByteStringTag | kSlicedStringTag: {
932         SlicedString slice = SlicedString::cast(source);
933         unsigned offset = slice.offset();
934         source = slice.parent(cage_base);
935         start += offset;
936         continue;
937       }
938       case kOneByteStringTag | kThinStringTag:
939       case kTwoByteStringTag | kThinStringTag:
940         source = ThinString::cast(source).actual(cage_base);
941         continue;
942     }
943     UNREACHABLE();
944   }
945   UNREACHABLE();
946 }
947 
948 template <typename SourceChar>
CalculateLineEndsImpl(std::vector<int> * line_ends,base::Vector<const SourceChar> src,bool include_ending_line)949 static void CalculateLineEndsImpl(std::vector<int>* line_ends,
950                                   base::Vector<const SourceChar> src,
951                                   bool include_ending_line) {
952   const int src_len = src.length();
953   for (int i = 0; i < src_len - 1; i++) {
954     SourceChar current = src[i];
955     SourceChar next = src[i + 1];
956     if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
957   }
958 
959   if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
960     line_ends->push_back(src_len - 1);
961   }
962   if (include_ending_line) {
963     // Include one character beyond the end of script. The rewriter uses that
964     // position for the implicit return statement.
965     line_ends->push_back(src_len);
966   }
967 }
968 
969 template <typename IsolateT>
CalculateLineEnds(IsolateT * isolate,Handle<String> src,bool include_ending_line)970 Handle<FixedArray> String::CalculateLineEnds(IsolateT* isolate,
971                                              Handle<String> src,
972                                              bool include_ending_line) {
973   src = Flatten(isolate, src);
974   // Rough estimate of line count based on a roughly estimated average
975   // length of (unpacked) code.
976   int line_count_estimate = src->length() >> 4;
977   std::vector<int> line_ends;
978   line_ends.reserve(line_count_estimate);
979   {
980     DisallowGarbageCollection no_gc;  // ensure vectors stay valid.
981     // Dispatch on type of strings.
982     String::FlatContent content = src->GetFlatContent(no_gc);
983     DCHECK(content.IsFlat());
984     if (content.IsOneByte()) {
985       CalculateLineEndsImpl(&line_ends, content.ToOneByteVector(),
986                             include_ending_line);
987     } else {
988       CalculateLineEndsImpl(&line_ends, content.ToUC16Vector(),
989                             include_ending_line);
990     }
991   }
992   int line_count = static_cast<int>(line_ends.size());
993   Handle<FixedArray> array =
994       isolate->factory()->NewFixedArray(line_count, AllocationType::kOld);
995   for (int i = 0; i < line_count; i++) {
996     array->set(i, Smi::FromInt(line_ends[i]));
997   }
998   return array;
999 }
1000 
1001 template Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
1002                                                       Handle<String> src,
1003                                                       bool include_ending_line);
1004 template Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
1005                                                       Handle<String> src,
1006                                                       bool include_ending_line);
1007 
SlowEquals(String other) const1008 bool String::SlowEquals(String other) const {
1009   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1010   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(other));
1011   return SlowEquals(other, SharedStringAccessGuardIfNeeded::NotNeeded());
1012 }
1013 
SlowEquals(String other,const SharedStringAccessGuardIfNeeded & access_guard) const1014 bool String::SlowEquals(
1015     String other, const SharedStringAccessGuardIfNeeded& access_guard) const {
1016   DisallowGarbageCollection no_gc;
1017   // Fast check: negative check with lengths.
1018   int len = length();
1019   if (len != other.length()) return false;
1020   if (len == 0) return true;
1021 
1022   PtrComprCageBase cage_base = GetPtrComprCageBase(*this);
1023 
1024   // Fast check: if at least one ThinString is involved, dereference it/them
1025   // and restart.
1026   if (this->IsThinString(cage_base) || other.IsThinString(cage_base)) {
1027     if (other.IsThinString(cage_base))
1028       other = ThinString::cast(other).actual(cage_base);
1029     if (this->IsThinString(cage_base)) {
1030       return ThinString::cast(*this).actual(cage_base).Equals(other);
1031     } else {
1032       return this->Equals(other);
1033     }
1034   }
1035 
1036   // Fast check: if hash code is computed for both strings
1037   // a fast negative check can be performed.
1038   if (HasHashCode() && other.HasHashCode()) {
1039 #ifdef ENABLE_SLOW_DCHECKS
1040     if (FLAG_enable_slow_asserts) {
1041       if (hash() != other.hash()) {
1042         bool found_difference = false;
1043         for (int i = 0; i < len; i++) {
1044           if (Get(i) != other.Get(i)) {
1045             found_difference = true;
1046             break;
1047           }
1048         }
1049         DCHECK(found_difference);
1050       }
1051     }
1052 #endif
1053     if (hash() != other.hash()) return false;
1054   }
1055 
1056   // We know the strings are both non-empty. Compare the first chars
1057   // before we try to flatten the strings.
1058   if (this->Get(0, cage_base, access_guard) !=
1059       other.Get(0, cage_base, access_guard))
1060     return false;
1061 
1062   if (IsSeqOneByteString() && other.IsSeqOneByteString()) {
1063     const uint8_t* str1 =
1064         SeqOneByteString::cast(*this).GetChars(no_gc, access_guard);
1065     const uint8_t* str2 =
1066         SeqOneByteString::cast(other).GetChars(no_gc, access_guard);
1067     return CompareCharsEqual(str1, str2, len);
1068   }
1069 
1070   StringComparator comparator;
1071   return comparator.Equals(*this, other, access_guard);
1072 }
1073 
1074 // static
SlowEquals(Isolate * isolate,Handle<String> one,Handle<String> two)1075 bool String::SlowEquals(Isolate* isolate, Handle<String> one,
1076                         Handle<String> two) {
1077   // Fast check: negative check with lengths.
1078   const int one_length = one->length();
1079   if (one_length != two->length()) return false;
1080   if (one_length == 0) return true;
1081 
1082   // Fast check: if at least one ThinString is involved, dereference it/them
1083   // and restart.
1084   if (one->IsThinString() || two->IsThinString()) {
1085     if (one->IsThinString()) {
1086       one = handle(ThinString::cast(*one).actual(), isolate);
1087     }
1088     if (two->IsThinString()) {
1089       two = handle(ThinString::cast(*two).actual(), isolate);
1090     }
1091     return String::Equals(isolate, one, two);
1092   }
1093 
1094   // Fast check: if hash code is computed for both strings
1095   // a fast negative check can be performed.
1096   if (one->HasHashCode() && two->HasHashCode()) {
1097 #ifdef ENABLE_SLOW_DCHECKS
1098     if (FLAG_enable_slow_asserts) {
1099       if (one->hash() != two->hash()) {
1100         bool found_difference = false;
1101         for (int i = 0; i < one_length; i++) {
1102           if (one->Get(i) != two->Get(i)) {
1103             found_difference = true;
1104             break;
1105           }
1106         }
1107         DCHECK(found_difference);
1108       }
1109     }
1110 #endif
1111     if (one->hash() != two->hash()) return false;
1112   }
1113 
1114   // We know the strings are both non-empty. Compare the first chars
1115   // before we try to flatten the strings.
1116   if (one->Get(0) != two->Get(0)) return false;
1117 
1118   one = String::Flatten(isolate, one);
1119   two = String::Flatten(isolate, two);
1120 
1121   DisallowGarbageCollection no_gc;
1122   String::FlatContent flat1 = one->GetFlatContent(no_gc);
1123   String::FlatContent flat2 = two->GetFlatContent(no_gc);
1124 
1125   if (flat1.IsOneByte() && flat2.IsOneByte()) {
1126     return CompareCharsEqual(flat1.ToOneByteVector().begin(),
1127                              flat2.ToOneByteVector().begin(), one_length);
1128   } else if (flat1.IsTwoByte() && flat2.IsTwoByte()) {
1129     return CompareCharsEqual(flat1.ToUC16Vector().begin(),
1130                              flat2.ToUC16Vector().begin(), one_length);
1131   } else if (flat1.IsOneByte() && flat2.IsTwoByte()) {
1132     return CompareCharsEqual(flat1.ToOneByteVector().begin(),
1133                              flat2.ToUC16Vector().begin(), one_length);
1134   } else if (flat1.IsTwoByte() && flat2.IsOneByte()) {
1135     return CompareCharsEqual(flat1.ToUC16Vector().begin(),
1136                              flat2.ToOneByteVector().begin(), one_length);
1137   }
1138   UNREACHABLE();
1139 }
1140 
1141 // static
Compare(Isolate * isolate,Handle<String> x,Handle<String> y)1142 ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
1143                                  Handle<String> y) {
1144   // A few fast case tests before we flatten.
1145   if (x.is_identical_to(y)) {
1146     return ComparisonResult::kEqual;
1147   } else if (y->length() == 0) {
1148     return x->length() == 0 ? ComparisonResult::kEqual
1149                             : ComparisonResult::kGreaterThan;
1150   } else if (x->length() == 0) {
1151     return ComparisonResult::kLessThan;
1152   }
1153 
1154   int const d = x->Get(0) - y->Get(0);
1155   if (d < 0) {
1156     return ComparisonResult::kLessThan;
1157   } else if (d > 0) {
1158     return ComparisonResult::kGreaterThan;
1159   }
1160 
1161   // Slow case.
1162   x = String::Flatten(isolate, x);
1163   y = String::Flatten(isolate, y);
1164 
1165   DisallowGarbageCollection no_gc;
1166   ComparisonResult result = ComparisonResult::kEqual;
1167   int prefix_length = x->length();
1168   if (y->length() < prefix_length) {
1169     prefix_length = y->length();
1170     result = ComparisonResult::kGreaterThan;
1171   } else if (y->length() > prefix_length) {
1172     result = ComparisonResult::kLessThan;
1173   }
1174   int r;
1175   String::FlatContent x_content = x->GetFlatContent(no_gc);
1176   String::FlatContent y_content = y->GetFlatContent(no_gc);
1177   if (x_content.IsOneByte()) {
1178     base::Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
1179     if (y_content.IsOneByte()) {
1180       base::Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
1181       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1182     } else {
1183       base::Vector<const base::uc16> y_chars = y_content.ToUC16Vector();
1184       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1185     }
1186   } else {
1187     base::Vector<const base::uc16> x_chars = x_content.ToUC16Vector();
1188     if (y_content.IsOneByte()) {
1189       base::Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
1190       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1191     } else {
1192       base::Vector<const base::uc16> y_chars = y_content.ToUC16Vector();
1193       r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1194     }
1195   }
1196   if (r < 0) {
1197     result = ComparisonResult::kLessThan;
1198   } else if (r > 0) {
1199     result = ComparisonResult::kGreaterThan;
1200   }
1201   return result;
1202 }
1203 
1204 namespace {
1205 
ToValidIndex(String str,Object number)1206 uint32_t ToValidIndex(String str, Object number) {
1207   uint32_t index = PositiveNumberToUint32(number);
1208   uint32_t length_value = static_cast<uint32_t>(str.length());
1209   if (index > length_value) return length_value;
1210   return index;
1211 }
1212 
1213 }  // namespace
1214 
IndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)1215 Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
1216                        Handle<Object> search, Handle<Object> position) {
1217   if (receiver->IsNullOrUndefined(isolate)) {
1218     THROW_NEW_ERROR_RETURN_FAILURE(
1219         isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1220                               isolate->factory()->NewStringFromAsciiChecked(
1221                                   "String.prototype.indexOf")));
1222   }
1223   Handle<String> receiver_string;
1224   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1225                                      Object::ToString(isolate, receiver));
1226 
1227   Handle<String> search_string;
1228   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1229                                      Object::ToString(isolate, search));
1230 
1231   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1232                                      Object::ToInteger(isolate, position));
1233 
1234   uint32_t index = ToValidIndex(*receiver_string, *position);
1235   return Smi::FromInt(
1236       String::IndexOf(isolate, receiver_string, search_string, index));
1237 }
1238 
1239 namespace {
1240 
1241 template <typename T>
SearchString(Isolate * isolate,String::FlatContent receiver_content,base::Vector<T> pat_vector,int start_index)1242 int SearchString(Isolate* isolate, String::FlatContent receiver_content,
1243                  base::Vector<T> pat_vector, int start_index) {
1244   if (receiver_content.IsOneByte()) {
1245     return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
1246                         start_index);
1247   }
1248   return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
1249                       start_index);
1250 }
1251 
1252 }  // namespace
1253 
IndexOf(Isolate * isolate,Handle<String> receiver,Handle<String> search,int start_index)1254 int String::IndexOf(Isolate* isolate, Handle<String> receiver,
1255                     Handle<String> search, int start_index) {
1256   DCHECK_LE(0, start_index);
1257   DCHECK(start_index <= receiver->length());
1258 
1259   uint32_t search_length = search->length();
1260   if (search_length == 0) return start_index;
1261 
1262   uint32_t receiver_length = receiver->length();
1263   if (start_index + search_length > receiver_length) return -1;
1264 
1265   receiver = String::Flatten(isolate, receiver);
1266   search = String::Flatten(isolate, search);
1267 
1268   DisallowGarbageCollection no_gc;  // ensure vectors stay valid
1269   // Extract flattened substrings of cons strings before getting encoding.
1270   String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
1271   String::FlatContent search_content = search->GetFlatContent(no_gc);
1272 
1273   // dispatch on type of strings
1274   if (search_content.IsOneByte()) {
1275     base::Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1276     return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
1277                                        start_index);
1278   }
1279   base::Vector<const base::uc16> pat_vector = search_content.ToUC16Vector();
1280   return SearchString<const base::uc16>(isolate, receiver_content, pat_vector,
1281                                         start_index);
1282 }
1283 
GetSubstitution(Isolate * isolate,Match * match,Handle<String> replacement,int start_index)1284 MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
1285                                             Handle<String> replacement,
1286                                             int start_index) {
1287   DCHECK_GE(start_index, 0);
1288 
1289   Factory* factory = isolate->factory();
1290 
1291   const int replacement_length = replacement->length();
1292   const int captures_length = match->CaptureCount();
1293 
1294   replacement = String::Flatten(isolate, replacement);
1295 
1296   Handle<String> dollar_string =
1297       factory->LookupSingleCharacterStringFromCode('$');
1298   int next_dollar_ix =
1299       String::IndexOf(isolate, replacement, dollar_string, start_index);
1300   if (next_dollar_ix < 0) {
1301     return replacement;
1302   }
1303 
1304   IncrementalStringBuilder builder(isolate);
1305 
1306   if (next_dollar_ix > 0) {
1307     builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
1308   }
1309 
1310   while (true) {
1311     const int peek_ix = next_dollar_ix + 1;
1312     if (peek_ix >= replacement_length) {
1313       builder.AppendCharacter('$');
1314       return builder.Finish();
1315     }
1316 
1317     int continue_from_ix = -1;
1318     const uint16_t peek = replacement->Get(peek_ix);
1319     switch (peek) {
1320       case '$':  // $$
1321         builder.AppendCharacter('$');
1322         continue_from_ix = peek_ix + 1;
1323         break;
1324       case '&':  // $& - match
1325         builder.AppendString(match->GetMatch());
1326         continue_from_ix = peek_ix + 1;
1327         break;
1328       case '`':  // $` - prefix
1329         builder.AppendString(match->GetPrefix());
1330         continue_from_ix = peek_ix + 1;
1331         break;
1332       case '\'':  // $' - suffix
1333         builder.AppendString(match->GetSuffix());
1334         continue_from_ix = peek_ix + 1;
1335         break;
1336       case '0':
1337       case '1':
1338       case '2':
1339       case '3':
1340       case '4':
1341       case '5':
1342       case '6':
1343       case '7':
1344       case '8':
1345       case '9': {
1346         // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
1347         int scaled_index = (peek - '0');
1348         int advance = 1;
1349 
1350         if (peek_ix + 1 < replacement_length) {
1351           const uint16_t next_peek = replacement->Get(peek_ix + 1);
1352           if (next_peek >= '0' && next_peek <= '9') {
1353             const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
1354             if (new_scaled_index < captures_length) {
1355               scaled_index = new_scaled_index;
1356               advance = 2;
1357             }
1358           }
1359         }
1360 
1361         if (scaled_index == 0 || scaled_index >= captures_length) {
1362           builder.AppendCharacter('$');
1363           continue_from_ix = peek_ix;
1364           break;
1365         }
1366 
1367         bool capture_exists;
1368         Handle<String> capture;
1369         ASSIGN_RETURN_ON_EXCEPTION(
1370             isolate, capture, match->GetCapture(scaled_index, &capture_exists),
1371             String);
1372         if (capture_exists) builder.AppendString(capture);
1373         continue_from_ix = peek_ix + advance;
1374         break;
1375       }
1376       case '<': {  // $<name> - named capture
1377         using CaptureState = String::Match::CaptureState;
1378 
1379         if (!match->HasNamedCaptures()) {
1380           builder.AppendCharacter('$');
1381           continue_from_ix = peek_ix;
1382           break;
1383         }
1384 
1385         Handle<String> bracket_string =
1386             factory->LookupSingleCharacterStringFromCode('>');
1387         const int closing_bracket_ix =
1388             String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1389 
1390         if (closing_bracket_ix == -1) {
1391           // No closing bracket was found, treat '$<' as a string literal.
1392           builder.AppendCharacter('$');
1393           continue_from_ix = peek_ix;
1394           break;
1395         }
1396 
1397         Handle<String> capture_name =
1398             factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1399         Handle<String> capture;
1400         CaptureState capture_state;
1401         ASSIGN_RETURN_ON_EXCEPTION(
1402             isolate, capture,
1403             match->GetNamedCapture(capture_name, &capture_state), String);
1404 
1405         if (capture_state == CaptureState::MATCHED) {
1406           builder.AppendString(capture);
1407         }
1408 
1409         continue_from_ix = closing_bracket_ix + 1;
1410         break;
1411       }
1412       default:
1413         builder.AppendCharacter('$');
1414         continue_from_ix = peek_ix;
1415         break;
1416     }
1417 
1418     // Go the the next $ in the replacement.
1419     // TODO(jgruber): Single-char lookups could be much more efficient.
1420     DCHECK_NE(continue_from_ix, -1);
1421     next_dollar_ix =
1422         String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1423 
1424     // Return if there are no more $ characters in the replacement. If we
1425     // haven't reached the end, we need to append the suffix.
1426     if (next_dollar_ix < 0) {
1427       if (continue_from_ix < replacement_length) {
1428         builder.AppendString(factory->NewSubString(
1429             replacement, continue_from_ix, replacement_length));
1430       }
1431       return builder.Finish();
1432     }
1433 
1434     // Append substring between the previous and the next $ character.
1435     if (next_dollar_ix > continue_from_ix) {
1436       builder.AppendString(
1437           factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1438     }
1439   }
1440 
1441   UNREACHABLE();
1442 }
1443 
1444 namespace {  // for String.Prototype.lastIndexOf
1445 
1446 template <typename schar, typename pchar>
StringMatchBackwards(base::Vector<const schar> subject,base::Vector<const pchar> pattern,int idx)1447 int StringMatchBackwards(base::Vector<const schar> subject,
1448                          base::Vector<const pchar> pattern, int idx) {
1449   int pattern_length = pattern.length();
1450   DCHECK_GE(pattern_length, 1);
1451   DCHECK(idx + pattern_length <= subject.length());
1452 
1453   if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1454     for (int i = 0; i < pattern_length; i++) {
1455       base::uc16 c = pattern[i];
1456       if (c > String::kMaxOneByteCharCode) {
1457         return -1;
1458       }
1459     }
1460   }
1461 
1462   pchar pattern_first_char = pattern[0];
1463   for (int i = idx; i >= 0; i--) {
1464     if (subject[i] != pattern_first_char) continue;
1465     int j = 1;
1466     while (j < pattern_length) {
1467       if (pattern[j] != subject[i + j]) {
1468         break;
1469       }
1470       j++;
1471     }
1472     if (j == pattern_length) {
1473       return i;
1474     }
1475   }
1476   return -1;
1477 }
1478 
1479 }  // namespace
1480 
LastIndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)1481 Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1482                            Handle<Object> search, Handle<Object> position) {
1483   if (receiver->IsNullOrUndefined(isolate)) {
1484     THROW_NEW_ERROR_RETURN_FAILURE(
1485         isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1486                               isolate->factory()->NewStringFromAsciiChecked(
1487                                   "String.prototype.lastIndexOf")));
1488   }
1489   Handle<String> receiver_string;
1490   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1491                                      Object::ToString(isolate, receiver));
1492 
1493   Handle<String> search_string;
1494   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1495                                      Object::ToString(isolate, search));
1496 
1497   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1498                                      Object::ToNumber(isolate, position));
1499 
1500   uint32_t start_index;
1501 
1502   if (position->IsNaN()) {
1503     start_index = receiver_string->length();
1504   } else {
1505     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1506                                        Object::ToInteger(isolate, position));
1507     start_index = ToValidIndex(*receiver_string, *position);
1508   }
1509 
1510   uint32_t pattern_length = search_string->length();
1511   uint32_t receiver_length = receiver_string->length();
1512 
1513   if (start_index + pattern_length > receiver_length) {
1514     start_index = receiver_length - pattern_length;
1515   }
1516 
1517   if (pattern_length == 0) {
1518     return Smi::FromInt(start_index);
1519   }
1520 
1521   receiver_string = String::Flatten(isolate, receiver_string);
1522   search_string = String::Flatten(isolate, search_string);
1523 
1524   int last_index = -1;
1525   DisallowGarbageCollection no_gc;  // ensure vectors stay valid
1526 
1527   String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1528   String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1529 
1530   if (search_content.IsOneByte()) {
1531     base::Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1532     if (receiver_content.IsOneByte()) {
1533       last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1534                                         pat_vector, start_index);
1535     } else {
1536       last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1537                                         pat_vector, start_index);
1538     }
1539   } else {
1540     base::Vector<const base::uc16> pat_vector = search_content.ToUC16Vector();
1541     if (receiver_content.IsOneByte()) {
1542       last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1543                                         pat_vector, start_index);
1544     } else {
1545       last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1546                                         pat_vector, start_index);
1547     }
1548   }
1549   return Smi::FromInt(last_index);
1550 }
1551 
HasOneBytePrefix(base::Vector<const char> str)1552 bool String::HasOneBytePrefix(base::Vector<const char> str) {
1553   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1554   return IsEqualToImpl<EqualityType::kPrefix>(
1555       str, GetPtrComprCageBase(*this),
1556       SharedStringAccessGuardIfNeeded::NotNeeded());
1557 }
1558 
1559 namespace {
1560 
1561 template <typename Char>
IsIdentifierVector(const base::Vector<Char> & vec)1562 bool IsIdentifierVector(const base::Vector<Char>& vec) {
1563   if (vec.empty()) {
1564     return false;
1565   }
1566   if (!IsIdentifierStart(vec[0])) {
1567     return false;
1568   }
1569   for (size_t i = 1; i < vec.size(); ++i) {
1570     if (!IsIdentifierPart(vec[i])) {
1571       return false;
1572     }
1573   }
1574   return true;
1575 }
1576 
1577 }  // namespace
1578 
1579 // static
IsIdentifier(Isolate * isolate,Handle<String> str)1580 bool String::IsIdentifier(Isolate* isolate, Handle<String> str) {
1581   str = String::Flatten(isolate, str);
1582   DisallowGarbageCollection no_gc;
1583   String::FlatContent flat = str->GetFlatContent(no_gc);
1584   return flat.IsOneByte() ? IsIdentifierVector(flat.ToOneByteVector())
1585                           : IsIdentifierVector(flat.ToUC16Vector());
1586 }
1587 
1588 namespace {
1589 
1590 template <typename Char>
HashString(String string,size_t start,int length,uint64_t seed,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)1591 uint32_t HashString(String string, size_t start, int length, uint64_t seed,
1592                     PtrComprCageBase cage_base,
1593                     const SharedStringAccessGuardIfNeeded& access_guard) {
1594   DisallowGarbageCollection no_gc;
1595 
1596   if (length > String::kMaxHashCalcLength) {
1597     return StringHasher::GetTrivialHash(length);
1598   }
1599 
1600   std::unique_ptr<Char[]> buffer;
1601   const Char* chars;
1602 
1603   if (string.IsConsString(cage_base)) {
1604     DCHECK_EQ(0, start);
1605     DCHECK(!string.IsFlat());
1606     buffer.reset(new Char[length]);
1607     String::WriteToFlat(string, buffer.get(), 0, length, cage_base,
1608                         access_guard);
1609     chars = buffer.get();
1610   } else {
1611     chars = string.GetChars<Char>(cage_base, no_gc, access_guard) + start;
1612   }
1613 
1614   return StringHasher::HashSequentialString<Char>(chars, length, seed);
1615 }
1616 
1617 }  // namespace
1618 
ComputeAndSetHash()1619 uint32_t String::ComputeAndSetHash() {
1620   DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1621   return ComputeAndSetHash(SharedStringAccessGuardIfNeeded::NotNeeded());
1622 }
ComputeAndSetHash(const SharedStringAccessGuardIfNeeded & access_guard)1623 uint32_t String::ComputeAndSetHash(
1624     const SharedStringAccessGuardIfNeeded& access_guard) {
1625   DisallowGarbageCollection no_gc;
1626   // Should only be called if hash code has not yet been computed.
1627   //
1628   // If in-place internalizable strings are shared, there may be calls to
1629   // ComputeAndSetHash in parallel. Since only flat strings are in-place
1630   // internalizable and their contents do not change, the result hash is the
1631   // same. The raw hash field is stored with relaxed ordering.
1632   DCHECK_IMPLIES(!FLAG_shared_string_table, !HasHashCode());
1633 
1634   // Store the hash code in the object.
1635   uint64_t seed = HashSeed(GetReadOnlyRoots());
1636   size_t start = 0;
1637   String string = *this;
1638   PtrComprCageBase cage_base = GetPtrComprCageBase(string);
1639   StringShape shape(string, cage_base);
1640   if (shape.IsSliced()) {
1641     SlicedString sliced = SlicedString::cast(string);
1642     start = sliced.offset();
1643     string = sliced.parent(cage_base);
1644     shape = StringShape(string, cage_base);
1645   }
1646   if (shape.IsCons() && string.IsFlat(cage_base)) {
1647     string = ConsString::cast(string).first(cage_base);
1648     shape = StringShape(string, cage_base);
1649   }
1650   if (shape.IsThin()) {
1651     string = ThinString::cast(string).actual(cage_base);
1652     shape = StringShape(string, cage_base);
1653     if (length() == string.length()) {
1654       set_raw_hash_field(string.raw_hash_field());
1655       return hash();
1656     }
1657   }
1658   uint32_t raw_hash_field =
1659       shape.encoding_tag() == kOneByteStringTag
1660           ? HashString<uint8_t>(string, start, length(), seed, cage_base,
1661                                 access_guard)
1662           : HashString<uint16_t>(string, start, length(), seed, cage_base,
1663                                  access_guard);
1664   set_raw_hash_field(raw_hash_field);
1665 
1666   // Check the hash code is there.
1667   DCHECK(HasHashCode());
1668   uint32_t result = HashBits::decode(raw_hash_field);
1669   DCHECK_NE(result, 0);  // Ensure that the hash value of 0 is never computed.
1670   return result;
1671 }
1672 
SlowAsArrayIndex(uint32_t * index)1673 bool String::SlowAsArrayIndex(uint32_t* index) {
1674   DisallowGarbageCollection no_gc;
1675   int length = this->length();
1676   if (length <= kMaxCachedArrayIndexLength) {
1677     EnsureHash();  // Force computation of hash code.
1678     uint32_t field = raw_hash_field();
1679     if (!IsIntegerIndex(field)) return false;
1680     *index = ArrayIndexValueBits::decode(field);
1681     return true;
1682   }
1683   if (length == 0 || length > kMaxArrayIndexSize) return false;
1684   StringCharacterStream stream(*this);
1685   return StringToIndex(&stream, index);
1686 }
1687 
SlowAsIntegerIndex(size_t * index)1688 bool String::SlowAsIntegerIndex(size_t* index) {
1689   DisallowGarbageCollection no_gc;
1690   int length = this->length();
1691   if (length <= kMaxCachedArrayIndexLength) {
1692     EnsureHash();  // Force computation of hash code.
1693     uint32_t field = raw_hash_field();
1694     if (!IsIntegerIndex(field)) return false;
1695     *index = ArrayIndexValueBits::decode(field);
1696     return true;
1697   }
1698   if (length == 0 || length > kMaxIntegerIndexSize) return false;
1699   StringCharacterStream stream(*this);
1700   return StringToIndex<StringCharacterStream, size_t, kToIntegerIndex>(&stream,
1701                                                                        index);
1702 }
1703 
PrintOn(FILE * file)1704 void String::PrintOn(FILE* file) {
1705   int length = this->length();
1706   for (int i = 0; i < length; i++) {
1707     PrintF(file, "%c", Get(i));
1708   }
1709 }
1710 
PrintOn(std::ostream & ostream)1711 void String::PrintOn(std::ostream& ostream) {
1712   int length = this->length();
1713   for (int i = 0; i < length; i++) {
1714     ostream.put(Get(i));
1715   }
1716 }
1717 
Truncate(Handle<SeqString> string,int new_length)1718 Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1719   if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1720 
1721   int new_size, old_size;
1722   int old_length = string->length();
1723   if (old_length <= new_length) return string;
1724 
1725   if (string->IsSeqOneByteString()) {
1726     old_size = SeqOneByteString::SizeFor(old_length);
1727     new_size = SeqOneByteString::SizeFor(new_length);
1728   } else {
1729     DCHECK(string->IsSeqTwoByteString());
1730     old_size = SeqTwoByteString::SizeFor(old_length);
1731     new_size = SeqTwoByteString::SizeFor(new_length);
1732   }
1733 
1734   int delta = old_size - new_size;
1735 
1736   Address start_of_string = string->address();
1737   DCHECK(IsAligned(start_of_string, kObjectAlignment));
1738   DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1739 
1740   Heap* heap = Heap::FromWritableHeapObject(*string);
1741   if (!heap->IsLargeObject(*string)) {
1742     // Sizes are pointer size aligned, so that we can use filler objects
1743     // that are a multiple of pointer size.
1744     heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1745                                ClearRecordedSlots::kNo);
1746   }
1747   // We are storing the new length using release store after creating a filler
1748   // for the left-over space to avoid races with the sweeper thread.
1749   string->set_length(new_length, kReleaseStore);
1750 
1751   return string;
1752 }
1753 
clear_padding()1754 void SeqOneByteString::clear_padding() {
1755   int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1756   memset(reinterpret_cast<void*>(address() + data_size), 0,
1757          SizeFor(length()) - data_size);
1758 }
1759 
clear_padding()1760 void SeqTwoByteString::clear_padding() {
1761   int data_size = SeqString::kHeaderSize + length() * base::kUC16Size;
1762   memset(reinterpret_cast<void*>(address() + data_size), 0,
1763          SizeFor(length()) - data_size);
1764 }
1765 
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard) const1766 uint16_t ConsString::Get(
1767     int index, PtrComprCageBase cage_base,
1768     const SharedStringAccessGuardIfNeeded& access_guard) const {
1769   DCHECK(index >= 0 && index < this->length());
1770 
1771   // Check for a flattened cons string
1772   if (second(cage_base).length() == 0) {
1773     String left = first(cage_base);
1774     return left.Get(index);
1775   }
1776 
1777   String string = String::cast(*this);
1778 
1779   while (true) {
1780     if (StringShape(string, cage_base).IsCons()) {
1781       ConsString cons_string = ConsString::cast(string);
1782       String left = cons_string.first();
1783       if (left.length() > index) {
1784         string = left;
1785       } else {
1786         index -= left.length();
1787         string = cons_string.second(cage_base);
1788       }
1789     } else {
1790       return string.Get(index, cage_base, access_guard);
1791     }
1792   }
1793 
1794   UNREACHABLE();
1795 }
1796 
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard) const1797 uint16_t ThinString::Get(
1798     int index, PtrComprCageBase cage_base,
1799     const SharedStringAccessGuardIfNeeded& access_guard) const {
1800   return actual(cage_base).Get(index, cage_base, access_guard);
1801 }
1802 
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard) const1803 uint16_t SlicedString::Get(
1804     int index, PtrComprCageBase cage_base,
1805     const SharedStringAccessGuardIfNeeded& access_guard) const {
1806   return parent(cage_base).Get(offset() + index, cage_base, access_guard);
1807 }
1808 
ExternalPayloadSize() const1809 int ExternalString::ExternalPayloadSize() const {
1810   int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1811   return length() * length_multiplier;
1812 }
1813 
FlatStringReader(Isolate * isolate,Handle<String> str)1814 FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1815     : Relocatable(isolate), str_(str), length_(str->length()) {
1816 #if DEBUG
1817   // Check that this constructor is called only from the main thread.
1818   DCHECK_EQ(ThreadId::Current(), isolate->thread_id());
1819 #endif
1820   PostGarbageCollection();
1821 }
1822 
PostGarbageCollection()1823 void FlatStringReader::PostGarbageCollection() {
1824   DCHECK(str_->IsFlat());
1825   DisallowGarbageCollection no_gc;
1826   // This does not actually prevent the vector from being relocated later.
1827   String::FlatContent content = str_->GetFlatContent(no_gc);
1828   DCHECK(content.IsFlat());
1829   is_one_byte_ = content.IsOneByte();
1830   if (is_one_byte_) {
1831     start_ = content.ToOneByteVector().begin();
1832   } else {
1833     start_ = content.ToUC16Vector().begin();
1834   }
1835 }
1836 
Initialize(ConsString cons_string,int offset)1837 void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1838   DCHECK(!cons_string.is_null());
1839   root_ = cons_string;
1840   consumed_ = offset;
1841   // Force stack blown condition to trigger restart.
1842   depth_ = 1;
1843   maximum_depth_ = kStackSize + depth_;
1844   DCHECK(StackBlown());
1845 }
1846 
Continue(int * offset_out)1847 String ConsStringIterator::Continue(int* offset_out) {
1848   DCHECK_NE(depth_, 0);
1849   DCHECK_EQ(0, *offset_out);
1850   bool blew_stack = StackBlown();
1851   String string;
1852   // Get the next leaf if there is one.
1853   if (!blew_stack) string = NextLeaf(&blew_stack);
1854   // Restart search from root.
1855   if (blew_stack) {
1856     DCHECK(string.is_null());
1857     string = Search(offset_out);
1858   }
1859   // Ensure future calls return null immediately.
1860   if (string.is_null()) Reset(ConsString());
1861   return string;
1862 }
1863 
Search(int * offset_out)1864 String ConsStringIterator::Search(int* offset_out) {
1865   ConsString cons_string = root_;
1866   // Reset the stack, pushing the root string.
1867   depth_ = 1;
1868   maximum_depth_ = 1;
1869   frames_[0] = cons_string;
1870   const int consumed = consumed_;
1871   int offset = 0;
1872   while (true) {
1873     // Loop until the string is found which contains the target offset.
1874     String string = cons_string.first();
1875     int length = string.length();
1876     int32_t type;
1877     if (consumed < offset + length) {
1878       // Target offset is in the left branch.
1879       // Keep going if we're still in a ConString.
1880       type = string.map().instance_type();
1881       if ((type & kStringRepresentationMask) == kConsStringTag) {
1882         cons_string = ConsString::cast(string);
1883         PushLeft(cons_string);
1884         continue;
1885       }
1886       // Tell the stack we're done descending.
1887       AdjustMaximumDepth();
1888     } else {
1889       // Descend right.
1890       // Update progress through the string.
1891       offset += length;
1892       // Keep going if we're still in a ConString.
1893       string = cons_string.second();
1894       type = string.map().instance_type();
1895       if ((type & kStringRepresentationMask) == kConsStringTag) {
1896         cons_string = ConsString::cast(string);
1897         PushRight(cons_string);
1898         continue;
1899       }
1900       // Need this to be updated for the current string.
1901       length = string.length();
1902       // Account for the possibility of an empty right leaf.
1903       // This happens only if we have asked for an offset outside the string.
1904       if (length == 0) {
1905         // Reset so future operations will return null immediately.
1906         Reset(ConsString());
1907         return String();
1908       }
1909       // Tell the stack we're done descending.
1910       AdjustMaximumDepth();
1911       // Pop stack so next iteration is in correct place.
1912       Pop();
1913     }
1914     DCHECK_NE(length, 0);
1915     // Adjust return values and exit.
1916     consumed_ = offset + length;
1917     *offset_out = consumed - offset;
1918     return string;
1919   }
1920   UNREACHABLE();
1921 }
1922 
NextLeaf(bool * blew_stack)1923 String ConsStringIterator::NextLeaf(bool* blew_stack) {
1924   while (true) {
1925     // Tree traversal complete.
1926     if (depth_ == 0) {
1927       *blew_stack = false;
1928       return String();
1929     }
1930     // We've lost track of higher nodes.
1931     if (StackBlown()) {
1932       *blew_stack = true;
1933       return String();
1934     }
1935     // Go right.
1936     ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1937     String string = cons_string.second();
1938     int32_t type = string.map().instance_type();
1939     if ((type & kStringRepresentationMask) != kConsStringTag) {
1940       // Pop stack so next iteration is in correct place.
1941       Pop();
1942       int length = string.length();
1943       // Could be a flattened ConsString.
1944       if (length == 0) continue;
1945       consumed_ += length;
1946       return string;
1947     }
1948     cons_string = ConsString::cast(string);
1949     PushRight(cons_string);
1950     // Need to traverse all the way left.
1951     while (true) {
1952       // Continue left.
1953       string = cons_string.first();
1954       type = string.map().instance_type();
1955       if ((type & kStringRepresentationMask) != kConsStringTag) {
1956         AdjustMaximumDepth();
1957         int length = string.length();
1958         if (length == 0) break;  // Skip empty left-hand sides of ConsStrings.
1959         consumed_ += length;
1960         return string;
1961       }
1962       cons_string = ConsString::cast(string);
1963       PushLeft(cons_string);
1964     }
1965   }
1966   UNREACHABLE();
1967 }
1968 
AddressOfCharacterAt(int start_index,const DisallowGarbageCollection & no_gc)1969 const byte* String::AddressOfCharacterAt(
1970     int start_index, const DisallowGarbageCollection& no_gc) {
1971   DCHECK(IsFlat());
1972   String subject = *this;
1973   PtrComprCageBase cage_base = GetPtrComprCageBase(subject);
1974   StringShape shape(subject, cage_base);
1975   if (subject.IsConsString(cage_base)) {
1976     subject = ConsString::cast(subject).first(cage_base);
1977     shape = StringShape(subject, cage_base);
1978   } else if (subject.IsSlicedString(cage_base)) {
1979     start_index += SlicedString::cast(subject).offset();
1980     subject = SlicedString::cast(subject).parent(cage_base);
1981     shape = StringShape(subject, cage_base);
1982   }
1983   if (subject.IsThinString(cage_base)) {
1984     subject = ThinString::cast(subject).actual(cage_base);
1985     shape = StringShape(subject, cage_base);
1986   }
1987   CHECK_LE(0, start_index);
1988   CHECK_LE(start_index, subject.length());
1989   switch (shape.representation_and_encoding_tag()) {
1990     case kOneByteStringTag | kSeqStringTag:
1991       return reinterpret_cast<const byte*>(
1992           SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
1993     case kTwoByteStringTag | kSeqStringTag:
1994       return reinterpret_cast<const byte*>(
1995           SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
1996     case kOneByteStringTag | kExternalStringTag:
1997       return reinterpret_cast<const byte*>(
1998           ExternalOneByteString::cast(subject).GetChars(cage_base) +
1999           start_index);
2000     case kTwoByteStringTag | kExternalStringTag:
2001       return reinterpret_cast<const byte*>(
2002           ExternalTwoByteString::cast(subject).GetChars(cage_base) +
2003           start_index);
2004     default:
2005       UNREACHABLE();
2006   }
2007 }
2008 
2009 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2010     String source, uint16_t* sink, int from, int to);
2011 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2012     String source, uint8_t* sink, int from, int to);
2013 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2014     String source, uint16_t* sink, int from, int to, PtrComprCageBase cage_base,
2015     const SharedStringAccessGuardIfNeeded&);
2016 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2017     String source, uint8_t* sink, int from, int to, PtrComprCageBase cage_base,
2018     const SharedStringAccessGuardIfNeeded&);
2019 
2020 namespace {
2021 // Check that the constants defined in src/objects/instance-type.h coincides
2022 // with the Torque-definition of string instance types in src/objects/string.tq.
2023 
2024 DEFINE_TORQUE_GENERATED_STRING_INSTANCE_TYPE()
2025 
2026 STATIC_ASSERT(kStringRepresentationMask == RepresentationBits::kMask);
2027 
2028 STATIC_ASSERT(kStringEncodingMask == IsOneByteBit::kMask);
2029 STATIC_ASSERT(kTwoByteStringTag == IsOneByteBit::encode(false));
2030 STATIC_ASSERT(kOneByteStringTag == IsOneByteBit::encode(true));
2031 
2032 STATIC_ASSERT(kUncachedExternalStringMask == IsUncachedBit::kMask);
2033 STATIC_ASSERT(kUncachedExternalStringTag == IsUncachedBit::encode(true));
2034 
2035 STATIC_ASSERT(kIsNotInternalizedMask == IsNotInternalizedBit::kMask);
2036 STATIC_ASSERT(kNotInternalizedTag == IsNotInternalizedBit::encode(true));
2037 STATIC_ASSERT(kInternalizedTag == IsNotInternalizedBit::encode(false));
2038 }  // namespace
2039 
2040 }  // namespace internal
2041 }  // namespace v8
2042