1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/objects/string.h"
6
7 #include "src/base/platform/yield-processor.h"
8 #include "src/common/assert-scope.h"
9 #include "src/common/globals.h"
10 #include "src/execution/isolate-utils.h"
11 #include "src/execution/thread-id.h"
12 #include "src/handles/handles-inl.h"
13 #include "src/heap/heap-inl.h"
14 #include "src/heap/local-factory-inl.h"
15 #include "src/heap/local-heap-inl.h"
16 #include "src/heap/memory-chunk.h"
17 #include "src/heap/read-only-heap.h"
18 #include "src/numbers/conversions.h"
19 #include "src/objects/instance-type.h"
20 #include "src/objects/map.h"
21 #include "src/objects/oddball.h"
22 #include "src/objects/string-comparator.h"
23 #include "src/objects/string-inl.h"
24 #include "src/strings/char-predicates.h"
25 #include "src/strings/string-builder-inl.h"
26 #include "src/strings/string-hasher.h"
27 #include "src/strings/string-search.h"
28 #include "src/strings/string-stream.h"
29 #include "src/strings/unicode-inl.h"
30 #include "src/utils/ostreams.h"
31
32 namespace v8 {
33 namespace internal {
34
SlowFlatten(Isolate * isolate,Handle<ConsString> cons,AllocationType allocation)35 Handle<String> String::SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
36 AllocationType allocation) {
37 DCHECK_NE(cons->second().length(), 0);
38 DCHECK(!cons->InSharedHeap());
39
40 // TurboFan can create cons strings with empty first parts.
41 while (cons->first().length() == 0) {
42 // We do not want to call this function recursively. Therefore we call
43 // String::Flatten only in those cases where String::SlowFlatten is not
44 // called again.
45 if (cons->second().IsConsString() && !cons->second().IsFlat()) {
46 cons = handle(ConsString::cast(cons->second()), isolate);
47 } else {
48 return String::Flatten(isolate, handle(cons->second(), isolate),
49 allocation);
50 }
51 }
52
53 DCHECK(AllowGarbageCollection::IsAllowed());
54 int length = cons->length();
55 if (allocation != AllocationType::kSharedOld) {
56 allocation =
57 ObjectInYoungGeneration(*cons) ? allocation : AllocationType::kOld;
58 }
59 Handle<SeqString> result;
60 if (cons->IsOneByteRepresentation()) {
61 Handle<SeqOneByteString> flat =
62 isolate->factory()
63 ->NewRawOneByteString(length, allocation)
64 .ToHandleChecked();
65 DisallowGarbageCollection no_gc;
66 WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
67 result = flat;
68 } else {
69 Handle<SeqTwoByteString> flat =
70 isolate->factory()
71 ->NewRawTwoByteString(length, allocation)
72 .ToHandleChecked();
73 DisallowGarbageCollection no_gc;
74 WriteToFlat(*cons, flat->GetChars(no_gc), 0, length);
75 result = flat;
76 }
77 cons->set_first(*result);
78 cons->set_second(ReadOnlyRoots(isolate).empty_string());
79 DCHECK(result->IsFlat());
80 return result;
81 }
82
SlowShare(Isolate * isolate,Handle<String> source)83 Handle<String> String::SlowShare(Isolate* isolate, Handle<String> source) {
84 DCHECK(FLAG_shared_string_table);
85 Handle<String> flat = Flatten(isolate, source, AllocationType::kSharedOld);
86
87 // Do not recursively call Share, so directly compute the sharing strategy for
88 // the flat string, which could already be a copy or an existing string from
89 // e.g. a shortcut ConsString.
90 MaybeHandle<Map> new_map;
91 switch (isolate->factory()->ComputeSharingStrategyForString(flat, &new_map)) {
92 case StringTransitionStrategy::kCopy:
93 break;
94 case StringTransitionStrategy::kInPlace:
95 // A relaxed write is sufficient here, because at this point the string
96 // has not yet escaped the current thread.
97 DCHECK(flat->InSharedHeap());
98 flat->set_map_no_write_barrier(*new_map.ToHandleChecked());
99 return flat;
100 case StringTransitionStrategy::kAlreadyTransitioned:
101 return flat;
102 }
103
104 int length = flat->length();
105 if (flat->IsOneByteRepresentation()) {
106 Handle<SeqOneByteString> copy =
107 isolate->factory()->NewRawSharedOneByteString(length).ToHandleChecked();
108 DisallowGarbageCollection no_gc;
109 WriteToFlat(*flat, copy->GetChars(no_gc), 0, length);
110 return copy;
111 }
112 Handle<SeqTwoByteString> copy =
113 isolate->factory()->NewRawSharedTwoByteString(length).ToHandleChecked();
114 DisallowGarbageCollection no_gc;
115 WriteToFlat(*flat, copy->GetChars(no_gc), 0, length);
116 return copy;
117 }
118
119 namespace {
120
121 template <class StringClass>
MigrateExternalStringResource(Isolate * isolate,ExternalString from,StringClass to)122 void MigrateExternalStringResource(Isolate* isolate, ExternalString from,
123 StringClass to) {
124 Address to_resource_address = to.resource_as_address();
125 if (to_resource_address == kNullAddress) {
126 StringClass cast_from = StringClass::cast(from);
127 // |to| is a just-created internalized copy of |from|. Migrate the resource.
128 to.SetResource(isolate, cast_from.resource());
129 // Zap |from|'s resource pointer to reflect the fact that |from| has
130 // relinquished ownership of its resource.
131 isolate->heap()->UpdateExternalString(
132 from, ExternalString::cast(from).ExternalPayloadSize(), 0);
133 cast_from.SetResource(isolate, nullptr);
134 } else if (to_resource_address != from.resource_as_address()) {
135 // |to| already existed and has its own resource. Finalize |from|.
136 isolate->heap()->FinalizeExternalString(from);
137 }
138 }
139
MigrateExternalString(Isolate * isolate,String string,String internalized)140 void MigrateExternalString(Isolate* isolate, String string,
141 String internalized) {
142 if (internalized.IsExternalOneByteString()) {
143 MigrateExternalStringResource(isolate, ExternalString::cast(string),
144 ExternalOneByteString::cast(internalized));
145 } else if (internalized.IsExternalTwoByteString()) {
146 MigrateExternalStringResource(isolate, ExternalString::cast(string),
147 ExternalTwoByteString::cast(internalized));
148 } else {
149 // If the external string is duped into an existing non-external
150 // internalized string, free its resource (it's about to be rewritten
151 // into a ThinString below).
152 isolate->heap()->FinalizeExternalString(string);
153 }
154 }
155
156 template <typename IsolateT>
ComputeThinStringMap(IsolateT * isolate,StringShape from_string_shape,bool one_byte)157 Map ComputeThinStringMap(IsolateT* isolate, StringShape from_string_shape,
158 bool one_byte) {
159 ReadOnlyRoots roots(isolate);
160 if (from_string_shape.IsShared()) {
161 return one_byte ? roots.shared_thin_one_byte_string_map()
162 : roots.shared_thin_string_map();
163 }
164 return one_byte ? roots.thin_one_byte_string_map() : roots.thin_string_map();
165 }
166
167 enum class StringMigrationResult {
168 kThisThreadMigrated,
169 kAnotherThreadMigrated
170 };
171
172 // This function must be used when migrating strings whose
173 // StringShape::CanMigrateInParallel() is true. It encapsulates the
174 // synchronization needed for parallel migrations from multiple threads. The
175 // user passes a lambda to perform to update the representation.
176 //
177 // Returns whether this thread successfully migrated the string or another
178 // thread did so.
179 //
180 // The locking algorithm to migrate a String uses its map word as a migration
181 // lock:
182 //
183 // map = string.map(kAcquireLoad);
184 // if (map != SENTINEL_MAP &&
185 // string.compare_and_swap_map(map, SENTINEL_MAP)) {
186 // // Lock acquired, i.e. the string's map is SENTINEL_MAP.
187 // } else {
188 // // Lock not acquired. Another thread set the sentinel. Spin until the
189 // // map is no longer the sentinel, i.e. until the other thread
190 // // releases the lock.
191 // Map reloaded_map;
192 // do {
193 // reloaded_map = string.map(kAcquireLoad);
194 // } while (reloaded_map == SENTINEL_MAP);
195 // }
196 //
197 // Some notes on usage:
198 // - The initial map must be loaded with kAcquireLoad for synchronization.
199 // - Avoid loading the map multiple times. Load the map once and branch
200 // on that.
201 // - The lambda is passed the string and its initial (pre-migration)
202 // StringShape.
203 // - The lambda may be executed under a spinlock, so it should be as short
204 // as possible.
205 // - Currently only SeqString -> ThinString migrations can happen in
206 // parallel. If kAnotherThreadMigrated is returned, then the caller doesn't
207 // need to do any other work. In the future, if additional migrations can
208 // happen in parallel, then restarts may be needed if the parallel migration
209 // was to a different type (e.g. SeqString -> External).
210 //
211 // Example:
212 //
213 // DisallowGarbageCollection no_gc;
214 // Map initial_map = string.map(kAcquireLoad);
215 // switch (MigrateStringMapUnderLockIfNeeded(
216 // isolate, string, initial_map, target_map,
217 // [](Isolate* isolate, String string, StringShape initial_shape) {
218 // auto t = TargetStringType::unchecked_cast(string);
219 // t.set_field(foo);
220 // t.set_another_field(bar);
221 // }, no_gc);
222 //
223 template <typename IsolateT, typename Callback>
MigrateStringMapUnderLockIfNeeded(IsolateT * isolate,String string,Map initial_map,Map target_map,Callback update_representation,const DisallowGarbageCollection & no_gc)224 StringMigrationResult MigrateStringMapUnderLockIfNeeded(
225 IsolateT* isolate, String string, Map initial_map, Map target_map,
226 Callback update_representation, const DisallowGarbageCollection& no_gc) {
227 USE(no_gc);
228
229 InstanceType initial_type = initial_map.instance_type();
230 StringShape initial_shape(initial_type);
231
232 if (initial_shape.CanMigrateInParallel()) {
233 // A string whose map is a sentinel map means that it is in the critical
234 // section for being migrated to a different map. There are multiple
235 // sentinel maps: one for each InstanceType that may be migrated from.
236 Map sentinel_map =
237 *isolate->factory()->GetStringMigrationSentinelMap(initial_type);
238
239 // Try to acquire the migration lock by setting the string's map to the
240 // sentinel map. Note that it's possible that we've already witnessed a
241 // sentinel map.
242 if (initial_map == sentinel_map ||
243 !string.release_compare_and_swap_map_word(
244 MapWord::FromMap(initial_map), MapWord::FromMap(sentinel_map))) {
245 // If the lock couldn't be acquired, another thread must be migrating this
246 // string. The string's map will be the sentinel map until the migration
247 // is finished. Spin until the map is no longer the sentinel map.
248 //
249 // TODO(v8:12007): Replace this spin lock with a ParkingLot-like
250 // primitive.
251 Map reloaded_map = string.map(kAcquireLoad);
252 while (reloaded_map == sentinel_map) {
253 YIELD_PROCESSOR;
254 reloaded_map = string.map(kAcquireLoad);
255 }
256
257 // Another thread must have migrated once the map is no longer the
258 // sentinel map.
259 //
260 // TODO(v8:12007): At time of writing there is only a single kind of
261 // migration that can happen in parallel: SeqString -> ThinString. If
262 // other parallel migrations are added, this DCHECK will fail, and users
263 // of MigrateStringMapUnderLockIfNeeded would need to restart if the
264 // string was migrated to a different map than target_map.
265 DCHECK_EQ(reloaded_map, target_map);
266 return StringMigrationResult::kAnotherThreadMigrated;
267 }
268 }
269
270 // With the lock held for cases where it's needed, do the work to update the
271 // representation before storing the map word. In addition to parallel
272 // migrations, this also ensures that the concurrent marker will read the
273 // updated representation when visiting migrated strings.
274 update_representation(isolate, string, initial_shape);
275
276 // Do the store on the map word.
277 //
278 // In debug mode, do a compare-and-swap that is checked to succeed, to check
279 // that all string map migrations are using this function, since to be in the
280 // migration critical section, the string's current map must be the sentinel
281 // map.
282 //
283 // Otherwise do a normal release store.
284 if (DEBUG_BOOL && initial_shape.CanMigrateInParallel()) {
285 DCHECK_NE(initial_map, target_map);
286 Map sentinel_map =
287 *isolate->factory()->GetStringMigrationSentinelMap(initial_type);
288 CHECK(string.release_compare_and_swap_map_word(
289 MapWord::FromMap(sentinel_map), MapWord::FromMap(target_map)));
290 } else {
291 string.set_map_safe_transition(target_map, kReleaseStore);
292 }
293
294 return StringMigrationResult::kThisThreadMigrated;
295 }
296
297 } // namespace
298
299 template <typename IsolateT>
MakeThin(IsolateT * isolate,String internalized)300 void String::MakeThin(IsolateT* isolate, String internalized) {
301 DisallowGarbageCollection no_gc;
302 DCHECK_NE(*this, internalized);
303 DCHECK(internalized.IsInternalizedString());
304
305 // Load the map once at the beginning and use it to query for the shape of the
306 // string to avoid reloading the map in case of parallel migrations. See
307 // comment above for MigrateStringMapUnderLockIfNeeded.
308 Map initial_map = this->map(kAcquireLoad);
309 StringShape initial_shape(initial_map);
310
311 // TODO(v8:12007): Support shared ThinStrings.
312 //
313 // Currently in-place migrations to ThinStrings are disabled for shared
314 // strings to unblock prototyping.
315 if (initial_shape.IsShared()) return;
316 DCHECK(!initial_shape.IsThin());
317
318 bool has_pointers = initial_shape.IsIndirect();
319 int old_size = this->SizeFromMap(initial_map);
320 Map target_map = ComputeThinStringMap(isolate, initial_shape,
321 internalized.IsOneByteRepresentation());
322 switch (MigrateStringMapUnderLockIfNeeded(
323 isolate, *this, initial_map, target_map,
324 [=](IsolateT* isolate, String string, StringShape initial_shape) {
325 if (initial_shape.IsExternal()) {
326 // TODO(v8:12007): Support external strings.
327 DCHECK(!initial_shape.IsShared());
328 MigrateExternalString(isolate->AsIsolate(), string, internalized);
329 }
330
331 ThinString::unchecked_cast(string).set_actual(internalized);
332 DCHECK_GE(old_size, ThinString::kSize);
333 },
334 no_gc)) {
335 case StringMigrationResult::kThisThreadMigrated:
336 // Overwrite character data with the filler below.
337 break;
338 case StringMigrationResult::kAnotherThreadMigrated:
339 // Nothing to do.
340 //
341 // TODO(v8:12007): Support shared ThinStrings.
342 UNREACHABLE();
343 }
344
345 ThinString thin = ThinString::cast(*this);
346 Address thin_end = thin.address() + ThinString::kSize;
347 int size_delta = old_size - ThinString::kSize;
348 if (size_delta != 0) {
349 if (!Heap::IsLargeObject(thin)) {
350 isolate->heap()->CreateFillerObjectAt(
351 thin_end, size_delta,
352 has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
353 } else {
354 // We don't need special handling for the combination IsLargeObject &&
355 // has_pointers, because indirect strings never get that large.
356 DCHECK(!has_pointers);
357 }
358 }
359 }
360
361 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::MakeThin(
362 Isolate* isolate, String internalized);
363 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::MakeThin(
364 LocalIsolate* isolate, String internalized);
365
MakeExternal(v8::String::ExternalStringResource * resource)366 bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
367 // Disallow garbage collection to avoid possible GC vs string access deadlock.
368 DisallowGarbageCollection no_gc;
369
370 // Externalizing twice leaks the external resource, so it's
371 // prohibited by the API.
372 DCHECK(this->SupportsExternalization());
373 DCHECK(resource->IsCacheable());
374 #ifdef ENABLE_SLOW_DCHECKS
375 if (FLAG_enable_slow_asserts) {
376 // Assert that the resource and the string are equivalent.
377 DCHECK(static_cast<size_t>(this->length()) == resource->length());
378 base::ScopedVector<base::uc16> smart_chars(this->length());
379 String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
380 DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
381 resource->length() * sizeof(smart_chars[0])));
382 }
383 #endif // DEBUG
384 int size = this->Size(); // Byte size of the original string.
385 // Abort if size does not allow in-place conversion.
386 if (size < ExternalString::kUncachedSize) return false;
387 // Read-only strings cannot be made external, since that would mutate the
388 // string.
389 if (IsReadOnlyHeapObject(*this)) return false;
390 Isolate* isolate = GetIsolateFromWritableObject(*this);
391 bool is_internalized = this->IsInternalizedString();
392 bool has_pointers = StringShape(*this).IsIndirect();
393
394 if (has_pointers) {
395 isolate->heap()->NotifyObjectLayoutChange(*this, no_gc,
396 InvalidateRecordedSlots::kYes);
397 }
398
399 base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
400 isolate->internalized_string_access());
401 // Morph the string to an external string by replacing the map and
402 // reinitializing the fields. This won't work if the space the existing
403 // string occupies is too small for a regular external string. Instead, we
404 // resort to an uncached external string instead, omitting the field caching
405 // the address of the backing store. When we encounter uncached external
406 // strings in generated code, we need to bailout to runtime.
407 Map new_map;
408 ReadOnlyRoots roots(isolate);
409 if (size < ExternalString::kSizeOfAllExternalStrings) {
410 if (is_internalized) {
411 new_map = roots.uncached_external_internalized_string_map();
412 } else {
413 new_map = roots.uncached_external_string_map();
414 }
415 } else {
416 new_map = is_internalized ? roots.external_internalized_string_map()
417 : roots.external_string_map();
418 }
419
420 // Byte size of the external String object.
421 int new_size = this->SizeFromMap(new_map);
422 if (!isolate->heap()->IsLargeObject(*this)) {
423 isolate->heap()->CreateFillerObjectAt(
424 this->address() + new_size, size - new_size,
425 has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
426 } else {
427 // We don't need special handling for the combination IsLargeObject &&
428 // has_pointers, because indirect strings never get that large.
429 DCHECK(!has_pointers);
430 }
431
432 // We are storing the new map using release store after creating a filler for
433 // the left-over space to avoid races with the sweeper thread.
434 this->set_map(new_map, kReleaseStore);
435
436 ExternalTwoByteString self = ExternalTwoByteString::cast(*this);
437 self.AllocateExternalPointerEntries(isolate);
438 self.SetResource(isolate, resource);
439 isolate->heap()->RegisterExternalString(*this);
440 // Force regeneration of the hash value.
441 if (is_internalized) self.EnsureHash();
442 return true;
443 }
444
MakeExternal(v8::String::ExternalOneByteStringResource * resource)445 bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) {
446 // Disallow garbage collection to avoid possible GC vs string access deadlock.
447 DisallowGarbageCollection no_gc;
448
449 // Externalizing twice leaks the external resource, so it's
450 // prohibited by the API.
451 DCHECK(this->SupportsExternalization());
452 DCHECK(resource->IsCacheable());
453 #ifdef ENABLE_SLOW_DCHECKS
454 if (FLAG_enable_slow_asserts) {
455 // Assert that the resource and the string are equivalent.
456 DCHECK(static_cast<size_t>(this->length()) == resource->length());
457 if (this->IsTwoByteRepresentation()) {
458 base::ScopedVector<uint16_t> smart_chars(this->length());
459 String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
460 DCHECK(String::IsOneByte(smart_chars.begin(), this->length()));
461 }
462 base::ScopedVector<char> smart_chars(this->length());
463 String::WriteToFlat(*this, smart_chars.begin(), 0, this->length());
464 DCHECK_EQ(0, memcmp(smart_chars.begin(), resource->data(),
465 resource->length() * sizeof(smart_chars[0])));
466 }
467 #endif // DEBUG
468 int size = this->Size(); // Byte size of the original string.
469 // Abort if size does not allow in-place conversion.
470 if (size < ExternalString::kUncachedSize) return false;
471 // Read-only strings cannot be made external, since that would mutate the
472 // string.
473 if (IsReadOnlyHeapObject(*this)) return false;
474 Isolate* isolate = GetIsolateFromWritableObject(*this);
475 bool is_internalized = this->IsInternalizedString();
476 bool has_pointers = StringShape(*this).IsIndirect();
477
478 if (has_pointers) {
479 isolate->heap()->NotifyObjectLayoutChange(*this, no_gc,
480 InvalidateRecordedSlots::kYes);
481 }
482
483 base::SharedMutexGuard<base::kExclusive> shared_mutex_guard(
484 isolate->internalized_string_access());
485 // Morph the string to an external string by replacing the map and
486 // reinitializing the fields. This won't work if the space the existing
487 // string occupies is too small for a regular external string. Instead, we
488 // resort to an uncached external string instead, omitting the field caching
489 // the address of the backing store. When we encounter uncached external
490 // strings in generated code, we need to bailout to runtime.
491 Map new_map;
492 ReadOnlyRoots roots(isolate);
493 if (size < ExternalString::kSizeOfAllExternalStrings) {
494 new_map = is_internalized
495 ? roots.uncached_external_one_byte_internalized_string_map()
496 : roots.uncached_external_one_byte_string_map();
497 } else {
498 new_map = is_internalized
499 ? roots.external_one_byte_internalized_string_map()
500 : roots.external_one_byte_string_map();
501 }
502
503 if (!isolate->heap()->IsLargeObject(*this)) {
504 // Byte size of the external String object.
505 int new_size = this->SizeFromMap(new_map);
506
507 isolate->heap()->CreateFillerObjectAt(
508 this->address() + new_size, size - new_size,
509 has_pointers ? ClearRecordedSlots::kYes : ClearRecordedSlots::kNo);
510 } else {
511 // We don't need special handling for the combination IsLargeObject &&
512 // has_pointers, because indirect strings never get that large.
513 DCHECK(!has_pointers);
514 }
515
516 // We are storing the new map using release store after creating a filler for
517 // the left-over space to avoid races with the sweeper thread.
518 this->set_map(new_map, kReleaseStore);
519
520 ExternalOneByteString self = ExternalOneByteString::cast(*this);
521 self.AllocateExternalPointerEntries(isolate);
522 self.SetResource(isolate, resource);
523 isolate->heap()->RegisterExternalString(*this);
524 // Force regeneration of the hash value.
525 if (is_internalized) self.EnsureHash();
526 return true;
527 }
528
SupportsExternalization()529 bool String::SupportsExternalization() {
530 if (this->IsThinString()) {
531 return i::ThinString::cast(*this).actual().SupportsExternalization();
532 }
533
534 // RO_SPACE strings cannot be externalized.
535 if (IsReadOnlyHeapObject(*this)) {
536 return false;
537 }
538
539 // Already an external string.
540 if (StringShape(*this).IsExternal()) {
541 return false;
542 }
543
544 // External strings in the shared heap conflicts with the heap sandbox at the
545 // moment. Disable it until supported.
546 if (InSharedHeap()) return false;
547
548 #ifdef V8_COMPRESS_POINTERS
549 // Small strings may not be in-place externalizable.
550 if (this->Size() < ExternalString::kUncachedSize) return false;
551 #else
552 DCHECK_LE(ExternalString::kUncachedSize, this->Size());
553 #endif
554
555 Isolate* isolate = GetIsolateFromWritableObject(*this);
556 return !isolate->heap()->IsInGCPostProcessing();
557 }
558
PrefixForDebugPrint() const559 const char* String::PrefixForDebugPrint() const {
560 StringShape shape(*this);
561 if (IsTwoByteRepresentation()) {
562 if (shape.IsInternalized()) {
563 return "u#";
564 } else if (shape.IsCons()) {
565 return "uc\"";
566 } else if (shape.IsThin()) {
567 return "u>\"";
568 } else if (shape.IsExternal()) {
569 return "ue\"";
570 } else {
571 return "u\"";
572 }
573 } else {
574 if (shape.IsInternalized()) {
575 return "#";
576 } else if (shape.IsCons()) {
577 return "c\"";
578 } else if (shape.IsThin()) {
579 return ">\"";
580 } else if (shape.IsExternal()) {
581 return "e\"";
582 } else {
583 return "\"";
584 }
585 }
586 UNREACHABLE();
587 }
588
SuffixForDebugPrint() const589 const char* String::SuffixForDebugPrint() const {
590 StringShape shape(*this);
591 if (shape.IsInternalized()) return "";
592 return "\"";
593 }
594
StringShortPrint(StringStream * accumulator)595 void String::StringShortPrint(StringStream* accumulator) {
596 if (!LooksValid()) {
597 accumulator->Add("<Invalid String>");
598 return;
599 }
600
601 const int len = length();
602 accumulator->Add("<String[%u]: ", len);
603 accumulator->Add(PrefixForDebugPrint());
604
605 if (len > kMaxShortPrintLength) {
606 accumulator->Add("...<truncated>>");
607 accumulator->Add(SuffixForDebugPrint());
608 accumulator->Put('>');
609 return;
610 }
611
612 PrintUC16(accumulator, 0, len);
613 accumulator->Add(SuffixForDebugPrint());
614 accumulator->Put('>');
615 }
616
PrintUC16(std::ostream & os,int start,int end)617 void String::PrintUC16(std::ostream& os, int start, int end) {
618 if (end < 0) end = length();
619 StringCharacterStream stream(*this, start);
620 for (int i = start; i < end && stream.HasMore(); i++) {
621 os << AsUC16(stream.GetNext());
622 }
623 }
624
PrintUC16(StringStream * accumulator,int start,int end)625 void String::PrintUC16(StringStream* accumulator, int start, int end) {
626 if (end < 0) end = length();
627 StringCharacterStream stream(*this, start);
628 for (int i = start; i < end && stream.HasMore(); i++) {
629 uint16_t c = stream.GetNext();
630 if (c == '\n') {
631 accumulator->Add("\\n");
632 } else if (c == '\r') {
633 accumulator->Add("\\r");
634 } else if (c == '\\') {
635 accumulator->Add("\\\\");
636 } else if (!std::isprint(c)) {
637 accumulator->Add("\\x%02x", c);
638 } else {
639 accumulator->Put(static_cast<char>(c));
640 }
641 }
642 }
643
ToArrayIndex(Address addr)644 int32_t String::ToArrayIndex(Address addr) {
645 DisallowGarbageCollection no_gc;
646 String key(addr);
647
648 uint32_t index;
649 if (!key.AsArrayIndex(&index)) return -1;
650 if (index <= INT_MAX) return index;
651 return -1;
652 }
653
LooksValid()654 bool String::LooksValid() {
655 // TODO(leszeks): Maybe remove this check entirely, Heap::Contains uses
656 // basically the same logic as the way we access the heap in the first place.
657 // RO_SPACE objects should always be valid.
658 if (V8_ENABLE_THIRD_PARTY_HEAP_BOOL) return true;
659 if (ReadOnlyHeap::Contains(*this)) return true;
660 BasicMemoryChunk* chunk = BasicMemoryChunk::FromHeapObject(*this);
661 if (chunk->heap() == nullptr) return false;
662 return chunk->heap()->Contains(*this);
663 }
664
665 namespace {
666
AreDigits(const uint8_t * s,int from,int to)667 bool AreDigits(const uint8_t* s, int from, int to) {
668 for (int i = from; i < to; i++) {
669 if (s[i] < '0' || s[i] > '9') return false;
670 }
671
672 return true;
673 }
674
ParseDecimalInteger(const uint8_t * s,int from,int to)675 int ParseDecimalInteger(const uint8_t* s, int from, int to) {
676 DCHECK_LT(to - from, 10); // Overflow is not possible.
677 DCHECK(from < to);
678 int d = s[from] - '0';
679
680 for (int i = from + 1; i < to; i++) {
681 d = 10 * d + (s[i] - '0');
682 }
683
684 return d;
685 }
686
687 } // namespace
688
689 // static
ToNumber(Isolate * isolate,Handle<String> subject)690 Handle<Object> String::ToNumber(Isolate* isolate, Handle<String> subject) {
691 // Flatten {subject} string first.
692 subject = String::Flatten(isolate, subject);
693
694 // Fast array index case.
695 uint32_t index;
696 if (subject->AsArrayIndex(&index)) {
697 return isolate->factory()->NewNumberFromUint(index);
698 }
699
700 // Fast case: short integer or some sorts of junk values.
701 if (subject->IsSeqOneByteString()) {
702 int len = subject->length();
703 if (len == 0) return handle(Smi::zero(), isolate);
704
705 DisallowGarbageCollection no_gc;
706 uint8_t const* data =
707 Handle<SeqOneByteString>::cast(subject)->GetChars(no_gc);
708 bool minus = (data[0] == '-');
709 int start_pos = (minus ? 1 : 0);
710
711 if (start_pos == len) {
712 return isolate->factory()->nan_value();
713 } else if (data[start_pos] > '9') {
714 // Fast check for a junk value. A valid string may start from a
715 // whitespace, a sign ('+' or '-'), the decimal point, a decimal digit
716 // or the 'I' character ('Infinity'). All of that have codes not greater
717 // than '9' except 'I' and .
718 if (data[start_pos] != 'I' && data[start_pos] != 0xA0) {
719 return isolate->factory()->nan_value();
720 }
721 } else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
722 // The maximal/minimal smi has 10 digits. If the string has less digits
723 // we know it will fit into the smi-data type.
724 int d = ParseDecimalInteger(data, start_pos, len);
725 if (minus) {
726 if (d == 0) return isolate->factory()->minus_zero_value();
727 d = -d;
728 } else if (!subject->HasHashCode() && len <= String::kMaxArrayIndexSize &&
729 (len == 1 || data[0] != '0')) {
730 // String hash is not calculated yet but all the data are present.
731 // Update the hash field to speed up sequential convertions.
732 uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(d, len);
733 #ifdef DEBUG
734 subject->EnsureHash(); // Force hash calculation.
735 DCHECK_EQ(subject->raw_hash_field(), raw_hash_field);
736 #endif
737 subject->set_raw_hash_field(raw_hash_field);
738 }
739 return handle(Smi::FromInt(d), isolate);
740 }
741 }
742
743 // Slower case.
744 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY;
745 return isolate->factory()->NewNumber(StringToDouble(isolate, subject, flags));
746 }
747
SlowGetFlatContent(const DisallowGarbageCollection & no_gc,const SharedStringAccessGuardIfNeeded & access_guard)748 String::FlatContent String::SlowGetFlatContent(
749 const DisallowGarbageCollection& no_gc,
750 const SharedStringAccessGuardIfNeeded& access_guard) {
751 USE(no_gc);
752 PtrComprCageBase cage_base = GetPtrComprCageBase(*this);
753 String string = *this;
754 StringShape shape(string, cage_base);
755 int offset = 0;
756
757 // Extract cons- and sliced strings.
758 if (shape.IsCons()) {
759 ConsString cons = ConsString::cast(string);
760 if (!cons.IsFlat(cage_base)) return FlatContent(no_gc);
761 string = cons.first(cage_base);
762 shape = StringShape(string, cage_base);
763 } else if (shape.IsSliced()) {
764 SlicedString slice = SlicedString::cast(string);
765 offset = slice.offset();
766 string = slice.parent(cage_base);
767 shape = StringShape(string, cage_base);
768 }
769
770 DCHECK(!shape.IsCons());
771 DCHECK(!shape.IsSliced());
772
773 // Extract thin strings.
774 if (shape.IsThin()) {
775 ThinString thin = ThinString::cast(string);
776 string = thin.actual(cage_base);
777 shape = StringShape(string, cage_base);
778 }
779
780 DCHECK(shape.IsDirect());
781 return TryGetFlatContentFromDirectString(cage_base, no_gc, string, offset,
782 length(), access_guard)
783 .value();
784 }
785
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int offset,int length,int * length_return)786 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
787 RobustnessFlag robust_flag,
788 int offset, int length,
789 int* length_return) {
790 if (robust_flag == ROBUST_STRING_TRAVERSAL && !LooksValid()) {
791 return std::unique_ptr<char[]>();
792 }
793 // Negative length means the to the end of the string.
794 if (length < 0) length = kMaxInt - offset;
795
796 // Compute the size of the UTF-8 string. Start at the specified offset.
797 StringCharacterStream stream(*this, offset);
798 int character_position = offset;
799 int utf8_bytes = 0;
800 int last = unibrow::Utf16::kNoPreviousCharacter;
801 while (stream.HasMore() && character_position++ < offset + length) {
802 uint16_t character = stream.GetNext();
803 utf8_bytes += unibrow::Utf8::Length(character, last);
804 last = character;
805 }
806
807 if (length_return) {
808 *length_return = utf8_bytes;
809 }
810
811 char* result = NewArray<char>(utf8_bytes + 1);
812
813 // Convert the UTF-16 string to a UTF-8 buffer. Start at the specified offset.
814 stream.Reset(*this, offset);
815 character_position = offset;
816 int utf8_byte_position = 0;
817 last = unibrow::Utf16::kNoPreviousCharacter;
818 while (stream.HasMore() && character_position++ < offset + length) {
819 uint16_t character = stream.GetNext();
820 if (allow_nulls == DISALLOW_NULLS && character == 0) {
821 character = ' ';
822 }
823 utf8_byte_position +=
824 unibrow::Utf8::Encode(result + utf8_byte_position, character, last);
825 last = character;
826 }
827 result[utf8_byte_position] = 0;
828 return std::unique_ptr<char[]>(result);
829 }
830
ToCString(AllowNullsFlag allow_nulls,RobustnessFlag robust_flag,int * length_return)831 std::unique_ptr<char[]> String::ToCString(AllowNullsFlag allow_nulls,
832 RobustnessFlag robust_flag,
833 int* length_return) {
834 return ToCString(allow_nulls, robust_flag, 0, -1, length_return);
835 }
836
837 // static
838 template <typename sinkchar>
WriteToFlat(String source,sinkchar * sink,int start,int length)839 void String::WriteToFlat(String source, sinkchar* sink, int start, int length) {
840 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(source));
841 return WriteToFlat(source, sink, start, length, GetPtrComprCageBase(source),
842 SharedStringAccessGuardIfNeeded::NotNeeded());
843 }
844
845 // static
846 template <typename sinkchar>
WriteToFlat(String source,sinkchar * sink,int start,int length,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)847 void String::WriteToFlat(String source, sinkchar* sink, int start, int length,
848 PtrComprCageBase cage_base,
849 const SharedStringAccessGuardIfNeeded& access_guard) {
850 DisallowGarbageCollection no_gc;
851 if (length == 0) return;
852 while (true) {
853 DCHECK_LT(0, length);
854 DCHECK_LE(0, start);
855 DCHECK_LE(length, source.length());
856 switch (StringShape(source, cage_base).representation_and_encoding_tag()) {
857 case kOneByteStringTag | kExternalStringTag:
858 CopyChars(
859 sink,
860 ExternalOneByteString::cast(source).GetChars(cage_base) + start,
861 length);
862 return;
863 case kTwoByteStringTag | kExternalStringTag:
864 CopyChars(
865 sink,
866 ExternalTwoByteString::cast(source).GetChars(cage_base) + start,
867 length);
868 return;
869 case kOneByteStringTag | kSeqStringTag:
870 CopyChars(sink,
871 SeqOneByteString::cast(source).GetChars(no_gc, access_guard) +
872 start,
873 length);
874 return;
875 case kTwoByteStringTag | kSeqStringTag:
876 CopyChars(sink,
877 SeqTwoByteString::cast(source).GetChars(no_gc, access_guard) +
878 start,
879 length);
880 return;
881 case kOneByteStringTag | kConsStringTag:
882 case kTwoByteStringTag | kConsStringTag: {
883 ConsString cons_string = ConsString::cast(source);
884 String first = cons_string.first(cage_base);
885 int boundary = first.length();
886 int first_length = boundary - start;
887 int second_length = start + length - boundary;
888 if (second_length >= first_length) {
889 // Right hand side is longer. Recurse over left.
890 if (first_length > 0) {
891 WriteToFlat(first, sink, start, first_length, cage_base,
892 access_guard);
893 if (start == 0 && cons_string.second(cage_base) == first) {
894 CopyChars(sink + boundary, sink, boundary);
895 return;
896 }
897 sink += boundary - start;
898 start = 0;
899 length -= first_length;
900 } else {
901 start -= boundary;
902 }
903 source = cons_string.second(cage_base);
904 } else {
905 // Left hand side is longer. Recurse over right.
906 if (second_length > 0) {
907 String second = cons_string.second(cage_base);
908 // When repeatedly appending to a string, we get a cons string that
909 // is unbalanced to the left, a list, essentially. We inline the
910 // common case of sequential one-byte right child.
911 if (second_length == 1) {
912 sink[boundary - start] =
913 static_cast<sinkchar>(second.Get(0, cage_base, access_guard));
914 } else if (second.IsSeqOneByteString(cage_base)) {
915 CopyChars(
916 sink + boundary - start,
917 SeqOneByteString::cast(second).GetChars(no_gc, access_guard),
918 second_length);
919 } else {
920 WriteToFlat(second, sink + boundary - start, 0, second_length,
921 cage_base, access_guard);
922 }
923 length -= second_length;
924 }
925 source = first;
926 }
927 if (length == 0) return;
928 continue;
929 }
930 case kOneByteStringTag | kSlicedStringTag:
931 case kTwoByteStringTag | kSlicedStringTag: {
932 SlicedString slice = SlicedString::cast(source);
933 unsigned offset = slice.offset();
934 source = slice.parent(cage_base);
935 start += offset;
936 continue;
937 }
938 case kOneByteStringTag | kThinStringTag:
939 case kTwoByteStringTag | kThinStringTag:
940 source = ThinString::cast(source).actual(cage_base);
941 continue;
942 }
943 UNREACHABLE();
944 }
945 UNREACHABLE();
946 }
947
948 template <typename SourceChar>
CalculateLineEndsImpl(std::vector<int> * line_ends,base::Vector<const SourceChar> src,bool include_ending_line)949 static void CalculateLineEndsImpl(std::vector<int>* line_ends,
950 base::Vector<const SourceChar> src,
951 bool include_ending_line) {
952 const int src_len = src.length();
953 for (int i = 0; i < src_len - 1; i++) {
954 SourceChar current = src[i];
955 SourceChar next = src[i + 1];
956 if (IsLineTerminatorSequence(current, next)) line_ends->push_back(i);
957 }
958
959 if (src_len > 0 && IsLineTerminatorSequence(src[src_len - 1], 0)) {
960 line_ends->push_back(src_len - 1);
961 }
962 if (include_ending_line) {
963 // Include one character beyond the end of script. The rewriter uses that
964 // position for the implicit return statement.
965 line_ends->push_back(src_len);
966 }
967 }
968
969 template <typename IsolateT>
CalculateLineEnds(IsolateT * isolate,Handle<String> src,bool include_ending_line)970 Handle<FixedArray> String::CalculateLineEnds(IsolateT* isolate,
971 Handle<String> src,
972 bool include_ending_line) {
973 src = Flatten(isolate, src);
974 // Rough estimate of line count based on a roughly estimated average
975 // length of (unpacked) code.
976 int line_count_estimate = src->length() >> 4;
977 std::vector<int> line_ends;
978 line_ends.reserve(line_count_estimate);
979 {
980 DisallowGarbageCollection no_gc; // ensure vectors stay valid.
981 // Dispatch on type of strings.
982 String::FlatContent content = src->GetFlatContent(no_gc);
983 DCHECK(content.IsFlat());
984 if (content.IsOneByte()) {
985 CalculateLineEndsImpl(&line_ends, content.ToOneByteVector(),
986 include_ending_line);
987 } else {
988 CalculateLineEndsImpl(&line_ends, content.ToUC16Vector(),
989 include_ending_line);
990 }
991 }
992 int line_count = static_cast<int>(line_ends.size());
993 Handle<FixedArray> array =
994 isolate->factory()->NewFixedArray(line_count, AllocationType::kOld);
995 for (int i = 0; i < line_count; i++) {
996 array->set(i, Smi::FromInt(line_ends[i]));
997 }
998 return array;
999 }
1000
1001 template Handle<FixedArray> String::CalculateLineEnds(Isolate* isolate,
1002 Handle<String> src,
1003 bool include_ending_line);
1004 template Handle<FixedArray> String::CalculateLineEnds(LocalIsolate* isolate,
1005 Handle<String> src,
1006 bool include_ending_line);
1007
SlowEquals(String other) const1008 bool String::SlowEquals(String other) const {
1009 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1010 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(other));
1011 return SlowEquals(other, SharedStringAccessGuardIfNeeded::NotNeeded());
1012 }
1013
SlowEquals(String other,const SharedStringAccessGuardIfNeeded & access_guard) const1014 bool String::SlowEquals(
1015 String other, const SharedStringAccessGuardIfNeeded& access_guard) const {
1016 DisallowGarbageCollection no_gc;
1017 // Fast check: negative check with lengths.
1018 int len = length();
1019 if (len != other.length()) return false;
1020 if (len == 0) return true;
1021
1022 PtrComprCageBase cage_base = GetPtrComprCageBase(*this);
1023
1024 // Fast check: if at least one ThinString is involved, dereference it/them
1025 // and restart.
1026 if (this->IsThinString(cage_base) || other.IsThinString(cage_base)) {
1027 if (other.IsThinString(cage_base))
1028 other = ThinString::cast(other).actual(cage_base);
1029 if (this->IsThinString(cage_base)) {
1030 return ThinString::cast(*this).actual(cage_base).Equals(other);
1031 } else {
1032 return this->Equals(other);
1033 }
1034 }
1035
1036 // Fast check: if hash code is computed for both strings
1037 // a fast negative check can be performed.
1038 if (HasHashCode() && other.HasHashCode()) {
1039 #ifdef ENABLE_SLOW_DCHECKS
1040 if (FLAG_enable_slow_asserts) {
1041 if (hash() != other.hash()) {
1042 bool found_difference = false;
1043 for (int i = 0; i < len; i++) {
1044 if (Get(i) != other.Get(i)) {
1045 found_difference = true;
1046 break;
1047 }
1048 }
1049 DCHECK(found_difference);
1050 }
1051 }
1052 #endif
1053 if (hash() != other.hash()) return false;
1054 }
1055
1056 // We know the strings are both non-empty. Compare the first chars
1057 // before we try to flatten the strings.
1058 if (this->Get(0, cage_base, access_guard) !=
1059 other.Get(0, cage_base, access_guard))
1060 return false;
1061
1062 if (IsSeqOneByteString() && other.IsSeqOneByteString()) {
1063 const uint8_t* str1 =
1064 SeqOneByteString::cast(*this).GetChars(no_gc, access_guard);
1065 const uint8_t* str2 =
1066 SeqOneByteString::cast(other).GetChars(no_gc, access_guard);
1067 return CompareCharsEqual(str1, str2, len);
1068 }
1069
1070 StringComparator comparator;
1071 return comparator.Equals(*this, other, access_guard);
1072 }
1073
1074 // static
SlowEquals(Isolate * isolate,Handle<String> one,Handle<String> two)1075 bool String::SlowEquals(Isolate* isolate, Handle<String> one,
1076 Handle<String> two) {
1077 // Fast check: negative check with lengths.
1078 const int one_length = one->length();
1079 if (one_length != two->length()) return false;
1080 if (one_length == 0) return true;
1081
1082 // Fast check: if at least one ThinString is involved, dereference it/them
1083 // and restart.
1084 if (one->IsThinString() || two->IsThinString()) {
1085 if (one->IsThinString()) {
1086 one = handle(ThinString::cast(*one).actual(), isolate);
1087 }
1088 if (two->IsThinString()) {
1089 two = handle(ThinString::cast(*two).actual(), isolate);
1090 }
1091 return String::Equals(isolate, one, two);
1092 }
1093
1094 // Fast check: if hash code is computed for both strings
1095 // a fast negative check can be performed.
1096 if (one->HasHashCode() && two->HasHashCode()) {
1097 #ifdef ENABLE_SLOW_DCHECKS
1098 if (FLAG_enable_slow_asserts) {
1099 if (one->hash() != two->hash()) {
1100 bool found_difference = false;
1101 for (int i = 0; i < one_length; i++) {
1102 if (one->Get(i) != two->Get(i)) {
1103 found_difference = true;
1104 break;
1105 }
1106 }
1107 DCHECK(found_difference);
1108 }
1109 }
1110 #endif
1111 if (one->hash() != two->hash()) return false;
1112 }
1113
1114 // We know the strings are both non-empty. Compare the first chars
1115 // before we try to flatten the strings.
1116 if (one->Get(0) != two->Get(0)) return false;
1117
1118 one = String::Flatten(isolate, one);
1119 two = String::Flatten(isolate, two);
1120
1121 DisallowGarbageCollection no_gc;
1122 String::FlatContent flat1 = one->GetFlatContent(no_gc);
1123 String::FlatContent flat2 = two->GetFlatContent(no_gc);
1124
1125 if (flat1.IsOneByte() && flat2.IsOneByte()) {
1126 return CompareCharsEqual(flat1.ToOneByteVector().begin(),
1127 flat2.ToOneByteVector().begin(), one_length);
1128 } else if (flat1.IsTwoByte() && flat2.IsTwoByte()) {
1129 return CompareCharsEqual(flat1.ToUC16Vector().begin(),
1130 flat2.ToUC16Vector().begin(), one_length);
1131 } else if (flat1.IsOneByte() && flat2.IsTwoByte()) {
1132 return CompareCharsEqual(flat1.ToOneByteVector().begin(),
1133 flat2.ToUC16Vector().begin(), one_length);
1134 } else if (flat1.IsTwoByte() && flat2.IsOneByte()) {
1135 return CompareCharsEqual(flat1.ToUC16Vector().begin(),
1136 flat2.ToOneByteVector().begin(), one_length);
1137 }
1138 UNREACHABLE();
1139 }
1140
1141 // static
Compare(Isolate * isolate,Handle<String> x,Handle<String> y)1142 ComparisonResult String::Compare(Isolate* isolate, Handle<String> x,
1143 Handle<String> y) {
1144 // A few fast case tests before we flatten.
1145 if (x.is_identical_to(y)) {
1146 return ComparisonResult::kEqual;
1147 } else if (y->length() == 0) {
1148 return x->length() == 0 ? ComparisonResult::kEqual
1149 : ComparisonResult::kGreaterThan;
1150 } else if (x->length() == 0) {
1151 return ComparisonResult::kLessThan;
1152 }
1153
1154 int const d = x->Get(0) - y->Get(0);
1155 if (d < 0) {
1156 return ComparisonResult::kLessThan;
1157 } else if (d > 0) {
1158 return ComparisonResult::kGreaterThan;
1159 }
1160
1161 // Slow case.
1162 x = String::Flatten(isolate, x);
1163 y = String::Flatten(isolate, y);
1164
1165 DisallowGarbageCollection no_gc;
1166 ComparisonResult result = ComparisonResult::kEqual;
1167 int prefix_length = x->length();
1168 if (y->length() < prefix_length) {
1169 prefix_length = y->length();
1170 result = ComparisonResult::kGreaterThan;
1171 } else if (y->length() > prefix_length) {
1172 result = ComparisonResult::kLessThan;
1173 }
1174 int r;
1175 String::FlatContent x_content = x->GetFlatContent(no_gc);
1176 String::FlatContent y_content = y->GetFlatContent(no_gc);
1177 if (x_content.IsOneByte()) {
1178 base::Vector<const uint8_t> x_chars = x_content.ToOneByteVector();
1179 if (y_content.IsOneByte()) {
1180 base::Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
1181 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1182 } else {
1183 base::Vector<const base::uc16> y_chars = y_content.ToUC16Vector();
1184 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1185 }
1186 } else {
1187 base::Vector<const base::uc16> x_chars = x_content.ToUC16Vector();
1188 if (y_content.IsOneByte()) {
1189 base::Vector<const uint8_t> y_chars = y_content.ToOneByteVector();
1190 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1191 } else {
1192 base::Vector<const base::uc16> y_chars = y_content.ToUC16Vector();
1193 r = CompareChars(x_chars.begin(), y_chars.begin(), prefix_length);
1194 }
1195 }
1196 if (r < 0) {
1197 result = ComparisonResult::kLessThan;
1198 } else if (r > 0) {
1199 result = ComparisonResult::kGreaterThan;
1200 }
1201 return result;
1202 }
1203
1204 namespace {
1205
ToValidIndex(String str,Object number)1206 uint32_t ToValidIndex(String str, Object number) {
1207 uint32_t index = PositiveNumberToUint32(number);
1208 uint32_t length_value = static_cast<uint32_t>(str.length());
1209 if (index > length_value) return length_value;
1210 return index;
1211 }
1212
1213 } // namespace
1214
IndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)1215 Object String::IndexOf(Isolate* isolate, Handle<Object> receiver,
1216 Handle<Object> search, Handle<Object> position) {
1217 if (receiver->IsNullOrUndefined(isolate)) {
1218 THROW_NEW_ERROR_RETURN_FAILURE(
1219 isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1220 isolate->factory()->NewStringFromAsciiChecked(
1221 "String.prototype.indexOf")));
1222 }
1223 Handle<String> receiver_string;
1224 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1225 Object::ToString(isolate, receiver));
1226
1227 Handle<String> search_string;
1228 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1229 Object::ToString(isolate, search));
1230
1231 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1232 Object::ToInteger(isolate, position));
1233
1234 uint32_t index = ToValidIndex(*receiver_string, *position);
1235 return Smi::FromInt(
1236 String::IndexOf(isolate, receiver_string, search_string, index));
1237 }
1238
1239 namespace {
1240
1241 template <typename T>
SearchString(Isolate * isolate,String::FlatContent receiver_content,base::Vector<T> pat_vector,int start_index)1242 int SearchString(Isolate* isolate, String::FlatContent receiver_content,
1243 base::Vector<T> pat_vector, int start_index) {
1244 if (receiver_content.IsOneByte()) {
1245 return SearchString(isolate, receiver_content.ToOneByteVector(), pat_vector,
1246 start_index);
1247 }
1248 return SearchString(isolate, receiver_content.ToUC16Vector(), pat_vector,
1249 start_index);
1250 }
1251
1252 } // namespace
1253
IndexOf(Isolate * isolate,Handle<String> receiver,Handle<String> search,int start_index)1254 int String::IndexOf(Isolate* isolate, Handle<String> receiver,
1255 Handle<String> search, int start_index) {
1256 DCHECK_LE(0, start_index);
1257 DCHECK(start_index <= receiver->length());
1258
1259 uint32_t search_length = search->length();
1260 if (search_length == 0) return start_index;
1261
1262 uint32_t receiver_length = receiver->length();
1263 if (start_index + search_length > receiver_length) return -1;
1264
1265 receiver = String::Flatten(isolate, receiver);
1266 search = String::Flatten(isolate, search);
1267
1268 DisallowGarbageCollection no_gc; // ensure vectors stay valid
1269 // Extract flattened substrings of cons strings before getting encoding.
1270 String::FlatContent receiver_content = receiver->GetFlatContent(no_gc);
1271 String::FlatContent search_content = search->GetFlatContent(no_gc);
1272
1273 // dispatch on type of strings
1274 if (search_content.IsOneByte()) {
1275 base::Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1276 return SearchString<const uint8_t>(isolate, receiver_content, pat_vector,
1277 start_index);
1278 }
1279 base::Vector<const base::uc16> pat_vector = search_content.ToUC16Vector();
1280 return SearchString<const base::uc16>(isolate, receiver_content, pat_vector,
1281 start_index);
1282 }
1283
GetSubstitution(Isolate * isolate,Match * match,Handle<String> replacement,int start_index)1284 MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
1285 Handle<String> replacement,
1286 int start_index) {
1287 DCHECK_GE(start_index, 0);
1288
1289 Factory* factory = isolate->factory();
1290
1291 const int replacement_length = replacement->length();
1292 const int captures_length = match->CaptureCount();
1293
1294 replacement = String::Flatten(isolate, replacement);
1295
1296 Handle<String> dollar_string =
1297 factory->LookupSingleCharacterStringFromCode('$');
1298 int next_dollar_ix =
1299 String::IndexOf(isolate, replacement, dollar_string, start_index);
1300 if (next_dollar_ix < 0) {
1301 return replacement;
1302 }
1303
1304 IncrementalStringBuilder builder(isolate);
1305
1306 if (next_dollar_ix > 0) {
1307 builder.AppendString(factory->NewSubString(replacement, 0, next_dollar_ix));
1308 }
1309
1310 while (true) {
1311 const int peek_ix = next_dollar_ix + 1;
1312 if (peek_ix >= replacement_length) {
1313 builder.AppendCharacter('$');
1314 return builder.Finish();
1315 }
1316
1317 int continue_from_ix = -1;
1318 const uint16_t peek = replacement->Get(peek_ix);
1319 switch (peek) {
1320 case '$': // $$
1321 builder.AppendCharacter('$');
1322 continue_from_ix = peek_ix + 1;
1323 break;
1324 case '&': // $& - match
1325 builder.AppendString(match->GetMatch());
1326 continue_from_ix = peek_ix + 1;
1327 break;
1328 case '`': // $` - prefix
1329 builder.AppendString(match->GetPrefix());
1330 continue_from_ix = peek_ix + 1;
1331 break;
1332 case '\'': // $' - suffix
1333 builder.AppendString(match->GetSuffix());
1334 continue_from_ix = peek_ix + 1;
1335 break;
1336 case '0':
1337 case '1':
1338 case '2':
1339 case '3':
1340 case '4':
1341 case '5':
1342 case '6':
1343 case '7':
1344 case '8':
1345 case '9': {
1346 // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
1347 int scaled_index = (peek - '0');
1348 int advance = 1;
1349
1350 if (peek_ix + 1 < replacement_length) {
1351 const uint16_t next_peek = replacement->Get(peek_ix + 1);
1352 if (next_peek >= '0' && next_peek <= '9') {
1353 const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
1354 if (new_scaled_index < captures_length) {
1355 scaled_index = new_scaled_index;
1356 advance = 2;
1357 }
1358 }
1359 }
1360
1361 if (scaled_index == 0 || scaled_index >= captures_length) {
1362 builder.AppendCharacter('$');
1363 continue_from_ix = peek_ix;
1364 break;
1365 }
1366
1367 bool capture_exists;
1368 Handle<String> capture;
1369 ASSIGN_RETURN_ON_EXCEPTION(
1370 isolate, capture, match->GetCapture(scaled_index, &capture_exists),
1371 String);
1372 if (capture_exists) builder.AppendString(capture);
1373 continue_from_ix = peek_ix + advance;
1374 break;
1375 }
1376 case '<': { // $<name> - named capture
1377 using CaptureState = String::Match::CaptureState;
1378
1379 if (!match->HasNamedCaptures()) {
1380 builder.AppendCharacter('$');
1381 continue_from_ix = peek_ix;
1382 break;
1383 }
1384
1385 Handle<String> bracket_string =
1386 factory->LookupSingleCharacterStringFromCode('>');
1387 const int closing_bracket_ix =
1388 String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
1389
1390 if (closing_bracket_ix == -1) {
1391 // No closing bracket was found, treat '$<' as a string literal.
1392 builder.AppendCharacter('$');
1393 continue_from_ix = peek_ix;
1394 break;
1395 }
1396
1397 Handle<String> capture_name =
1398 factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
1399 Handle<String> capture;
1400 CaptureState capture_state;
1401 ASSIGN_RETURN_ON_EXCEPTION(
1402 isolate, capture,
1403 match->GetNamedCapture(capture_name, &capture_state), String);
1404
1405 if (capture_state == CaptureState::MATCHED) {
1406 builder.AppendString(capture);
1407 }
1408
1409 continue_from_ix = closing_bracket_ix + 1;
1410 break;
1411 }
1412 default:
1413 builder.AppendCharacter('$');
1414 continue_from_ix = peek_ix;
1415 break;
1416 }
1417
1418 // Go the the next $ in the replacement.
1419 // TODO(jgruber): Single-char lookups could be much more efficient.
1420 DCHECK_NE(continue_from_ix, -1);
1421 next_dollar_ix =
1422 String::IndexOf(isolate, replacement, dollar_string, continue_from_ix);
1423
1424 // Return if there are no more $ characters in the replacement. If we
1425 // haven't reached the end, we need to append the suffix.
1426 if (next_dollar_ix < 0) {
1427 if (continue_from_ix < replacement_length) {
1428 builder.AppendString(factory->NewSubString(
1429 replacement, continue_from_ix, replacement_length));
1430 }
1431 return builder.Finish();
1432 }
1433
1434 // Append substring between the previous and the next $ character.
1435 if (next_dollar_ix > continue_from_ix) {
1436 builder.AppendString(
1437 factory->NewSubString(replacement, continue_from_ix, next_dollar_ix));
1438 }
1439 }
1440
1441 UNREACHABLE();
1442 }
1443
1444 namespace { // for String.Prototype.lastIndexOf
1445
1446 template <typename schar, typename pchar>
StringMatchBackwards(base::Vector<const schar> subject,base::Vector<const pchar> pattern,int idx)1447 int StringMatchBackwards(base::Vector<const schar> subject,
1448 base::Vector<const pchar> pattern, int idx) {
1449 int pattern_length = pattern.length();
1450 DCHECK_GE(pattern_length, 1);
1451 DCHECK(idx + pattern_length <= subject.length());
1452
1453 if (sizeof(schar) == 1 && sizeof(pchar) > 1) {
1454 for (int i = 0; i < pattern_length; i++) {
1455 base::uc16 c = pattern[i];
1456 if (c > String::kMaxOneByteCharCode) {
1457 return -1;
1458 }
1459 }
1460 }
1461
1462 pchar pattern_first_char = pattern[0];
1463 for (int i = idx; i >= 0; i--) {
1464 if (subject[i] != pattern_first_char) continue;
1465 int j = 1;
1466 while (j < pattern_length) {
1467 if (pattern[j] != subject[i + j]) {
1468 break;
1469 }
1470 j++;
1471 }
1472 if (j == pattern_length) {
1473 return i;
1474 }
1475 }
1476 return -1;
1477 }
1478
1479 } // namespace
1480
LastIndexOf(Isolate * isolate,Handle<Object> receiver,Handle<Object> search,Handle<Object> position)1481 Object String::LastIndexOf(Isolate* isolate, Handle<Object> receiver,
1482 Handle<Object> search, Handle<Object> position) {
1483 if (receiver->IsNullOrUndefined(isolate)) {
1484 THROW_NEW_ERROR_RETURN_FAILURE(
1485 isolate, NewTypeError(MessageTemplate::kCalledOnNullOrUndefined,
1486 isolate->factory()->NewStringFromAsciiChecked(
1487 "String.prototype.lastIndexOf")));
1488 }
1489 Handle<String> receiver_string;
1490 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, receiver_string,
1491 Object::ToString(isolate, receiver));
1492
1493 Handle<String> search_string;
1494 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
1495 Object::ToString(isolate, search));
1496
1497 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1498 Object::ToNumber(isolate, position));
1499
1500 uint32_t start_index;
1501
1502 if (position->IsNaN()) {
1503 start_index = receiver_string->length();
1504 } else {
1505 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
1506 Object::ToInteger(isolate, position));
1507 start_index = ToValidIndex(*receiver_string, *position);
1508 }
1509
1510 uint32_t pattern_length = search_string->length();
1511 uint32_t receiver_length = receiver_string->length();
1512
1513 if (start_index + pattern_length > receiver_length) {
1514 start_index = receiver_length - pattern_length;
1515 }
1516
1517 if (pattern_length == 0) {
1518 return Smi::FromInt(start_index);
1519 }
1520
1521 receiver_string = String::Flatten(isolate, receiver_string);
1522 search_string = String::Flatten(isolate, search_string);
1523
1524 int last_index = -1;
1525 DisallowGarbageCollection no_gc; // ensure vectors stay valid
1526
1527 String::FlatContent receiver_content = receiver_string->GetFlatContent(no_gc);
1528 String::FlatContent search_content = search_string->GetFlatContent(no_gc);
1529
1530 if (search_content.IsOneByte()) {
1531 base::Vector<const uint8_t> pat_vector = search_content.ToOneByteVector();
1532 if (receiver_content.IsOneByte()) {
1533 last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1534 pat_vector, start_index);
1535 } else {
1536 last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1537 pat_vector, start_index);
1538 }
1539 } else {
1540 base::Vector<const base::uc16> pat_vector = search_content.ToUC16Vector();
1541 if (receiver_content.IsOneByte()) {
1542 last_index = StringMatchBackwards(receiver_content.ToOneByteVector(),
1543 pat_vector, start_index);
1544 } else {
1545 last_index = StringMatchBackwards(receiver_content.ToUC16Vector(),
1546 pat_vector, start_index);
1547 }
1548 }
1549 return Smi::FromInt(last_index);
1550 }
1551
HasOneBytePrefix(base::Vector<const char> str)1552 bool String::HasOneBytePrefix(base::Vector<const char> str) {
1553 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1554 return IsEqualToImpl<EqualityType::kPrefix>(
1555 str, GetPtrComprCageBase(*this),
1556 SharedStringAccessGuardIfNeeded::NotNeeded());
1557 }
1558
1559 namespace {
1560
1561 template <typename Char>
IsIdentifierVector(const base::Vector<Char> & vec)1562 bool IsIdentifierVector(const base::Vector<Char>& vec) {
1563 if (vec.empty()) {
1564 return false;
1565 }
1566 if (!IsIdentifierStart(vec[0])) {
1567 return false;
1568 }
1569 for (size_t i = 1; i < vec.size(); ++i) {
1570 if (!IsIdentifierPart(vec[i])) {
1571 return false;
1572 }
1573 }
1574 return true;
1575 }
1576
1577 } // namespace
1578
1579 // static
IsIdentifier(Isolate * isolate,Handle<String> str)1580 bool String::IsIdentifier(Isolate* isolate, Handle<String> str) {
1581 str = String::Flatten(isolate, str);
1582 DisallowGarbageCollection no_gc;
1583 String::FlatContent flat = str->GetFlatContent(no_gc);
1584 return flat.IsOneByte() ? IsIdentifierVector(flat.ToOneByteVector())
1585 : IsIdentifierVector(flat.ToUC16Vector());
1586 }
1587
1588 namespace {
1589
1590 template <typename Char>
HashString(String string,size_t start,int length,uint64_t seed,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)1591 uint32_t HashString(String string, size_t start, int length, uint64_t seed,
1592 PtrComprCageBase cage_base,
1593 const SharedStringAccessGuardIfNeeded& access_guard) {
1594 DisallowGarbageCollection no_gc;
1595
1596 if (length > String::kMaxHashCalcLength) {
1597 return StringHasher::GetTrivialHash(length);
1598 }
1599
1600 std::unique_ptr<Char[]> buffer;
1601 const Char* chars;
1602
1603 if (string.IsConsString(cage_base)) {
1604 DCHECK_EQ(0, start);
1605 DCHECK(!string.IsFlat());
1606 buffer.reset(new Char[length]);
1607 String::WriteToFlat(string, buffer.get(), 0, length, cage_base,
1608 access_guard);
1609 chars = buffer.get();
1610 } else {
1611 chars = string.GetChars<Char>(cage_base, no_gc, access_guard) + start;
1612 }
1613
1614 return StringHasher::HashSequentialString<Char>(chars, length, seed);
1615 }
1616
1617 } // namespace
1618
ComputeAndSetHash()1619 uint32_t String::ComputeAndSetHash() {
1620 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1621 return ComputeAndSetHash(SharedStringAccessGuardIfNeeded::NotNeeded());
1622 }
ComputeAndSetHash(const SharedStringAccessGuardIfNeeded & access_guard)1623 uint32_t String::ComputeAndSetHash(
1624 const SharedStringAccessGuardIfNeeded& access_guard) {
1625 DisallowGarbageCollection no_gc;
1626 // Should only be called if hash code has not yet been computed.
1627 //
1628 // If in-place internalizable strings are shared, there may be calls to
1629 // ComputeAndSetHash in parallel. Since only flat strings are in-place
1630 // internalizable and their contents do not change, the result hash is the
1631 // same. The raw hash field is stored with relaxed ordering.
1632 DCHECK_IMPLIES(!FLAG_shared_string_table, !HasHashCode());
1633
1634 // Store the hash code in the object.
1635 uint64_t seed = HashSeed(GetReadOnlyRoots());
1636 size_t start = 0;
1637 String string = *this;
1638 PtrComprCageBase cage_base = GetPtrComprCageBase(string);
1639 StringShape shape(string, cage_base);
1640 if (shape.IsSliced()) {
1641 SlicedString sliced = SlicedString::cast(string);
1642 start = sliced.offset();
1643 string = sliced.parent(cage_base);
1644 shape = StringShape(string, cage_base);
1645 }
1646 if (shape.IsCons() && string.IsFlat(cage_base)) {
1647 string = ConsString::cast(string).first(cage_base);
1648 shape = StringShape(string, cage_base);
1649 }
1650 if (shape.IsThin()) {
1651 string = ThinString::cast(string).actual(cage_base);
1652 shape = StringShape(string, cage_base);
1653 if (length() == string.length()) {
1654 set_raw_hash_field(string.raw_hash_field());
1655 return hash();
1656 }
1657 }
1658 uint32_t raw_hash_field =
1659 shape.encoding_tag() == kOneByteStringTag
1660 ? HashString<uint8_t>(string, start, length(), seed, cage_base,
1661 access_guard)
1662 : HashString<uint16_t>(string, start, length(), seed, cage_base,
1663 access_guard);
1664 set_raw_hash_field(raw_hash_field);
1665
1666 // Check the hash code is there.
1667 DCHECK(HasHashCode());
1668 uint32_t result = HashBits::decode(raw_hash_field);
1669 DCHECK_NE(result, 0); // Ensure that the hash value of 0 is never computed.
1670 return result;
1671 }
1672
SlowAsArrayIndex(uint32_t * index)1673 bool String::SlowAsArrayIndex(uint32_t* index) {
1674 DisallowGarbageCollection no_gc;
1675 int length = this->length();
1676 if (length <= kMaxCachedArrayIndexLength) {
1677 EnsureHash(); // Force computation of hash code.
1678 uint32_t field = raw_hash_field();
1679 if (!IsIntegerIndex(field)) return false;
1680 *index = ArrayIndexValueBits::decode(field);
1681 return true;
1682 }
1683 if (length == 0 || length > kMaxArrayIndexSize) return false;
1684 StringCharacterStream stream(*this);
1685 return StringToIndex(&stream, index);
1686 }
1687
SlowAsIntegerIndex(size_t * index)1688 bool String::SlowAsIntegerIndex(size_t* index) {
1689 DisallowGarbageCollection no_gc;
1690 int length = this->length();
1691 if (length <= kMaxCachedArrayIndexLength) {
1692 EnsureHash(); // Force computation of hash code.
1693 uint32_t field = raw_hash_field();
1694 if (!IsIntegerIndex(field)) return false;
1695 *index = ArrayIndexValueBits::decode(field);
1696 return true;
1697 }
1698 if (length == 0 || length > kMaxIntegerIndexSize) return false;
1699 StringCharacterStream stream(*this);
1700 return StringToIndex<StringCharacterStream, size_t, kToIntegerIndex>(&stream,
1701 index);
1702 }
1703
PrintOn(FILE * file)1704 void String::PrintOn(FILE* file) {
1705 int length = this->length();
1706 for (int i = 0; i < length; i++) {
1707 PrintF(file, "%c", Get(i));
1708 }
1709 }
1710
PrintOn(std::ostream & ostream)1711 void String::PrintOn(std::ostream& ostream) {
1712 int length = this->length();
1713 for (int i = 0; i < length; i++) {
1714 ostream.put(Get(i));
1715 }
1716 }
1717
Truncate(Handle<SeqString> string,int new_length)1718 Handle<String> SeqString::Truncate(Handle<SeqString> string, int new_length) {
1719 if (new_length == 0) return string->GetReadOnlyRoots().empty_string_handle();
1720
1721 int new_size, old_size;
1722 int old_length = string->length();
1723 if (old_length <= new_length) return string;
1724
1725 if (string->IsSeqOneByteString()) {
1726 old_size = SeqOneByteString::SizeFor(old_length);
1727 new_size = SeqOneByteString::SizeFor(new_length);
1728 } else {
1729 DCHECK(string->IsSeqTwoByteString());
1730 old_size = SeqTwoByteString::SizeFor(old_length);
1731 new_size = SeqTwoByteString::SizeFor(new_length);
1732 }
1733
1734 int delta = old_size - new_size;
1735
1736 Address start_of_string = string->address();
1737 DCHECK(IsAligned(start_of_string, kObjectAlignment));
1738 DCHECK(IsAligned(start_of_string + new_size, kObjectAlignment));
1739
1740 Heap* heap = Heap::FromWritableHeapObject(*string);
1741 if (!heap->IsLargeObject(*string)) {
1742 // Sizes are pointer size aligned, so that we can use filler objects
1743 // that are a multiple of pointer size.
1744 heap->CreateFillerObjectAt(start_of_string + new_size, delta,
1745 ClearRecordedSlots::kNo);
1746 }
1747 // We are storing the new length using release store after creating a filler
1748 // for the left-over space to avoid races with the sweeper thread.
1749 string->set_length(new_length, kReleaseStore);
1750
1751 return string;
1752 }
1753
clear_padding()1754 void SeqOneByteString::clear_padding() {
1755 int data_size = SeqString::kHeaderSize + length() * kOneByteSize;
1756 memset(reinterpret_cast<void*>(address() + data_size), 0,
1757 SizeFor(length()) - data_size);
1758 }
1759
clear_padding()1760 void SeqTwoByteString::clear_padding() {
1761 int data_size = SeqString::kHeaderSize + length() * base::kUC16Size;
1762 memset(reinterpret_cast<void*>(address() + data_size), 0,
1763 SizeFor(length()) - data_size);
1764 }
1765
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard) const1766 uint16_t ConsString::Get(
1767 int index, PtrComprCageBase cage_base,
1768 const SharedStringAccessGuardIfNeeded& access_guard) const {
1769 DCHECK(index >= 0 && index < this->length());
1770
1771 // Check for a flattened cons string
1772 if (second(cage_base).length() == 0) {
1773 String left = first(cage_base);
1774 return left.Get(index);
1775 }
1776
1777 String string = String::cast(*this);
1778
1779 while (true) {
1780 if (StringShape(string, cage_base).IsCons()) {
1781 ConsString cons_string = ConsString::cast(string);
1782 String left = cons_string.first();
1783 if (left.length() > index) {
1784 string = left;
1785 } else {
1786 index -= left.length();
1787 string = cons_string.second(cage_base);
1788 }
1789 } else {
1790 return string.Get(index, cage_base, access_guard);
1791 }
1792 }
1793
1794 UNREACHABLE();
1795 }
1796
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard) const1797 uint16_t ThinString::Get(
1798 int index, PtrComprCageBase cage_base,
1799 const SharedStringAccessGuardIfNeeded& access_guard) const {
1800 return actual(cage_base).Get(index, cage_base, access_guard);
1801 }
1802
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard) const1803 uint16_t SlicedString::Get(
1804 int index, PtrComprCageBase cage_base,
1805 const SharedStringAccessGuardIfNeeded& access_guard) const {
1806 return parent(cage_base).Get(offset() + index, cage_base, access_guard);
1807 }
1808
ExternalPayloadSize() const1809 int ExternalString::ExternalPayloadSize() const {
1810 int length_multiplier = IsTwoByteRepresentation() ? i::kShortSize : kCharSize;
1811 return length() * length_multiplier;
1812 }
1813
FlatStringReader(Isolate * isolate,Handle<String> str)1814 FlatStringReader::FlatStringReader(Isolate* isolate, Handle<String> str)
1815 : Relocatable(isolate), str_(str), length_(str->length()) {
1816 #if DEBUG
1817 // Check that this constructor is called only from the main thread.
1818 DCHECK_EQ(ThreadId::Current(), isolate->thread_id());
1819 #endif
1820 PostGarbageCollection();
1821 }
1822
PostGarbageCollection()1823 void FlatStringReader::PostGarbageCollection() {
1824 DCHECK(str_->IsFlat());
1825 DisallowGarbageCollection no_gc;
1826 // This does not actually prevent the vector from being relocated later.
1827 String::FlatContent content = str_->GetFlatContent(no_gc);
1828 DCHECK(content.IsFlat());
1829 is_one_byte_ = content.IsOneByte();
1830 if (is_one_byte_) {
1831 start_ = content.ToOneByteVector().begin();
1832 } else {
1833 start_ = content.ToUC16Vector().begin();
1834 }
1835 }
1836
Initialize(ConsString cons_string,int offset)1837 void ConsStringIterator::Initialize(ConsString cons_string, int offset) {
1838 DCHECK(!cons_string.is_null());
1839 root_ = cons_string;
1840 consumed_ = offset;
1841 // Force stack blown condition to trigger restart.
1842 depth_ = 1;
1843 maximum_depth_ = kStackSize + depth_;
1844 DCHECK(StackBlown());
1845 }
1846
Continue(int * offset_out)1847 String ConsStringIterator::Continue(int* offset_out) {
1848 DCHECK_NE(depth_, 0);
1849 DCHECK_EQ(0, *offset_out);
1850 bool blew_stack = StackBlown();
1851 String string;
1852 // Get the next leaf if there is one.
1853 if (!blew_stack) string = NextLeaf(&blew_stack);
1854 // Restart search from root.
1855 if (blew_stack) {
1856 DCHECK(string.is_null());
1857 string = Search(offset_out);
1858 }
1859 // Ensure future calls return null immediately.
1860 if (string.is_null()) Reset(ConsString());
1861 return string;
1862 }
1863
Search(int * offset_out)1864 String ConsStringIterator::Search(int* offset_out) {
1865 ConsString cons_string = root_;
1866 // Reset the stack, pushing the root string.
1867 depth_ = 1;
1868 maximum_depth_ = 1;
1869 frames_[0] = cons_string;
1870 const int consumed = consumed_;
1871 int offset = 0;
1872 while (true) {
1873 // Loop until the string is found which contains the target offset.
1874 String string = cons_string.first();
1875 int length = string.length();
1876 int32_t type;
1877 if (consumed < offset + length) {
1878 // Target offset is in the left branch.
1879 // Keep going if we're still in a ConString.
1880 type = string.map().instance_type();
1881 if ((type & kStringRepresentationMask) == kConsStringTag) {
1882 cons_string = ConsString::cast(string);
1883 PushLeft(cons_string);
1884 continue;
1885 }
1886 // Tell the stack we're done descending.
1887 AdjustMaximumDepth();
1888 } else {
1889 // Descend right.
1890 // Update progress through the string.
1891 offset += length;
1892 // Keep going if we're still in a ConString.
1893 string = cons_string.second();
1894 type = string.map().instance_type();
1895 if ((type & kStringRepresentationMask) == kConsStringTag) {
1896 cons_string = ConsString::cast(string);
1897 PushRight(cons_string);
1898 continue;
1899 }
1900 // Need this to be updated for the current string.
1901 length = string.length();
1902 // Account for the possibility of an empty right leaf.
1903 // This happens only if we have asked for an offset outside the string.
1904 if (length == 0) {
1905 // Reset so future operations will return null immediately.
1906 Reset(ConsString());
1907 return String();
1908 }
1909 // Tell the stack we're done descending.
1910 AdjustMaximumDepth();
1911 // Pop stack so next iteration is in correct place.
1912 Pop();
1913 }
1914 DCHECK_NE(length, 0);
1915 // Adjust return values and exit.
1916 consumed_ = offset + length;
1917 *offset_out = consumed - offset;
1918 return string;
1919 }
1920 UNREACHABLE();
1921 }
1922
NextLeaf(bool * blew_stack)1923 String ConsStringIterator::NextLeaf(bool* blew_stack) {
1924 while (true) {
1925 // Tree traversal complete.
1926 if (depth_ == 0) {
1927 *blew_stack = false;
1928 return String();
1929 }
1930 // We've lost track of higher nodes.
1931 if (StackBlown()) {
1932 *blew_stack = true;
1933 return String();
1934 }
1935 // Go right.
1936 ConsString cons_string = frames_[OffsetForDepth(depth_ - 1)];
1937 String string = cons_string.second();
1938 int32_t type = string.map().instance_type();
1939 if ((type & kStringRepresentationMask) != kConsStringTag) {
1940 // Pop stack so next iteration is in correct place.
1941 Pop();
1942 int length = string.length();
1943 // Could be a flattened ConsString.
1944 if (length == 0) continue;
1945 consumed_ += length;
1946 return string;
1947 }
1948 cons_string = ConsString::cast(string);
1949 PushRight(cons_string);
1950 // Need to traverse all the way left.
1951 while (true) {
1952 // Continue left.
1953 string = cons_string.first();
1954 type = string.map().instance_type();
1955 if ((type & kStringRepresentationMask) != kConsStringTag) {
1956 AdjustMaximumDepth();
1957 int length = string.length();
1958 if (length == 0) break; // Skip empty left-hand sides of ConsStrings.
1959 consumed_ += length;
1960 return string;
1961 }
1962 cons_string = ConsString::cast(string);
1963 PushLeft(cons_string);
1964 }
1965 }
1966 UNREACHABLE();
1967 }
1968
AddressOfCharacterAt(int start_index,const DisallowGarbageCollection & no_gc)1969 const byte* String::AddressOfCharacterAt(
1970 int start_index, const DisallowGarbageCollection& no_gc) {
1971 DCHECK(IsFlat());
1972 String subject = *this;
1973 PtrComprCageBase cage_base = GetPtrComprCageBase(subject);
1974 StringShape shape(subject, cage_base);
1975 if (subject.IsConsString(cage_base)) {
1976 subject = ConsString::cast(subject).first(cage_base);
1977 shape = StringShape(subject, cage_base);
1978 } else if (subject.IsSlicedString(cage_base)) {
1979 start_index += SlicedString::cast(subject).offset();
1980 subject = SlicedString::cast(subject).parent(cage_base);
1981 shape = StringShape(subject, cage_base);
1982 }
1983 if (subject.IsThinString(cage_base)) {
1984 subject = ThinString::cast(subject).actual(cage_base);
1985 shape = StringShape(subject, cage_base);
1986 }
1987 CHECK_LE(0, start_index);
1988 CHECK_LE(start_index, subject.length());
1989 switch (shape.representation_and_encoding_tag()) {
1990 case kOneByteStringTag | kSeqStringTag:
1991 return reinterpret_cast<const byte*>(
1992 SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
1993 case kTwoByteStringTag | kSeqStringTag:
1994 return reinterpret_cast<const byte*>(
1995 SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
1996 case kOneByteStringTag | kExternalStringTag:
1997 return reinterpret_cast<const byte*>(
1998 ExternalOneByteString::cast(subject).GetChars(cage_base) +
1999 start_index);
2000 case kTwoByteStringTag | kExternalStringTag:
2001 return reinterpret_cast<const byte*>(
2002 ExternalTwoByteString::cast(subject).GetChars(cage_base) +
2003 start_index);
2004 default:
2005 UNREACHABLE();
2006 }
2007 }
2008
2009 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2010 String source, uint16_t* sink, int from, int to);
2011 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2012 String source, uint8_t* sink, int from, int to);
2013 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2014 String source, uint16_t* sink, int from, int to, PtrComprCageBase cage_base,
2015 const SharedStringAccessGuardIfNeeded&);
2016 template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
2017 String source, uint8_t* sink, int from, int to, PtrComprCageBase cage_base,
2018 const SharedStringAccessGuardIfNeeded&);
2019
2020 namespace {
2021 // Check that the constants defined in src/objects/instance-type.h coincides
2022 // with the Torque-definition of string instance types in src/objects/string.tq.
2023
2024 DEFINE_TORQUE_GENERATED_STRING_INSTANCE_TYPE()
2025
2026 STATIC_ASSERT(kStringRepresentationMask == RepresentationBits::kMask);
2027
2028 STATIC_ASSERT(kStringEncodingMask == IsOneByteBit::kMask);
2029 STATIC_ASSERT(kTwoByteStringTag == IsOneByteBit::encode(false));
2030 STATIC_ASSERT(kOneByteStringTag == IsOneByteBit::encode(true));
2031
2032 STATIC_ASSERT(kUncachedExternalStringMask == IsUncachedBit::kMask);
2033 STATIC_ASSERT(kUncachedExternalStringTag == IsUncachedBit::encode(true));
2034
2035 STATIC_ASSERT(kIsNotInternalizedMask == IsNotInternalizedBit::kMask);
2036 STATIC_ASSERT(kNotInternalizedTag == IsNotInternalizedBit::encode(true));
2037 STATIC_ASSERT(kInternalizedTag == IsNotInternalizedBit::encode(false));
2038 } // namespace
2039
2040 } // namespace internal
2041 } // namespace v8
2042