1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_OBJECTS_STRING_INL_H_
6 #define V8_OBJECTS_STRING_INL_H_
7
8 #include "src/common/assert-scope.h"
9 #include "src/common/globals.h"
10 #include "src/execution/isolate-utils.h"
11 #include "src/handles/handles-inl.h"
12 #include "src/heap/factory.h"
13 #include "src/numbers/hash-seed-inl.h"
14 #include "src/objects/name-inl.h"
15 #include "src/objects/smi-inl.h"
16 #include "src/objects/string-table-inl.h"
17 #include "src/objects/string.h"
18 #include "src/sandbox/external-pointer-inl.h"
19 #include "src/sandbox/external-pointer.h"
20 #include "src/strings/string-hasher-inl.h"
21 #include "src/utils/utils.h"
22
23 // Has to be the last include (doesn't have include guards):
24 #include "src/objects/object-macros.h"
25
26 namespace v8 {
27 namespace internal {
28
29 #include "torque-generated/src/objects/string-tq-inl.inc"
30
31 class V8_NODISCARD SharedStringAccessGuardIfNeeded {
32 public:
33 // Creates no SharedMutexGuard<kShared> for the string access since it was
34 // called from the main thread.
SharedStringAccessGuardIfNeeded(Isolate * isolate)35 explicit SharedStringAccessGuardIfNeeded(Isolate* isolate) {}
36
37 // Creates a SharedMutexGuard<kShared> for the string access if it was called
38 // from a background thread.
SharedStringAccessGuardIfNeeded(LocalIsolate * local_isolate)39 explicit SharedStringAccessGuardIfNeeded(LocalIsolate* local_isolate) {
40 if (IsNeeded(local_isolate)) {
41 mutex_guard.emplace(local_isolate->internalized_string_access());
42 }
43 }
44
45 // Slow version which gets the isolate from the String.
SharedStringAccessGuardIfNeeded(String str)46 explicit SharedStringAccessGuardIfNeeded(String str) {
47 Isolate* isolate = GetIsolateIfNeeded(str);
48 if (isolate != nullptr)
49 mutex_guard.emplace(isolate->internalized_string_access());
50 }
51
NotNeeded()52 static SharedStringAccessGuardIfNeeded NotNeeded() {
53 return SharedStringAccessGuardIfNeeded();
54 }
55
56 #ifdef DEBUG
IsNeeded(String str)57 static bool IsNeeded(String str) {
58 return GetIsolateIfNeeded(str) != nullptr;
59 }
60 #endif
61
IsNeeded(LocalIsolate * local_isolate)62 static bool IsNeeded(LocalIsolate* local_isolate) {
63 // TODO(leszeks): Remove the nullptr check for local_isolate.
64 return local_isolate && !local_isolate->heap()->is_main_thread();
65 }
66
67 private:
68 // Default constructor and move constructor required for the NotNeeded()
69 // static constructor.
70 constexpr SharedStringAccessGuardIfNeeded() = default;
SharedStringAccessGuardIfNeeded(SharedStringAccessGuardIfNeeded &&)71 constexpr SharedStringAccessGuardIfNeeded(SharedStringAccessGuardIfNeeded&&)
72 V8_NOEXCEPT {
73 DCHECK(!mutex_guard.has_value());
74 }
75
76 // Returns the Isolate from the String if we need it for the lock.
GetIsolateIfNeeded(String str)77 static Isolate* GetIsolateIfNeeded(String str) {
78 LocalHeap* local_heap = LocalHeap::Current();
79 // Don't acquire the lock for the main thread.
80 if (!local_heap || local_heap->is_main_thread()) return nullptr;
81
82 Isolate* isolate;
83 if (!GetIsolateFromHeapObject(str, &isolate)) {
84 // If we can't get the isolate from the String, it must be read-only.
85 DCHECK(ReadOnlyHeap::Contains(str));
86 return nullptr;
87 }
88 return isolate;
89 }
90
91 base::Optional<base::SharedMutexGuard<base::kShared>> mutex_guard;
92 };
93
length(AcquireLoadTag)94 int String::length(AcquireLoadTag) const {
95 return base::AsAtomic32::Acquire_Load(
96 reinterpret_cast<const int32_t*>(field_address(kLengthOffset)));
97 }
98
set_length(int value,ReleaseStoreTag)99 void String::set_length(int value, ReleaseStoreTag) {
100 base::AsAtomic32::Release_Store(
101 reinterpret_cast<int32_t*>(field_address(kLengthOffset)), value);
102 }
103
104 TQ_OBJECT_CONSTRUCTORS_IMPL(String)
TQ_OBJECT_CONSTRUCTORS_IMPL(SeqString)105 TQ_OBJECT_CONSTRUCTORS_IMPL(SeqString)
106 TQ_OBJECT_CONSTRUCTORS_IMPL(SeqOneByteString)
107 TQ_OBJECT_CONSTRUCTORS_IMPL(SeqTwoByteString)
108 TQ_OBJECT_CONSTRUCTORS_IMPL(InternalizedString)
109 TQ_OBJECT_CONSTRUCTORS_IMPL(ConsString)
110 TQ_OBJECT_CONSTRUCTORS_IMPL(ThinString)
111 TQ_OBJECT_CONSTRUCTORS_IMPL(SlicedString)
112 TQ_OBJECT_CONSTRUCTORS_IMPL(ExternalString)
113 TQ_OBJECT_CONSTRUCTORS_IMPL(ExternalOneByteString)
114 TQ_OBJECT_CONSTRUCTORS_IMPL(ExternalTwoByteString)
115
116 StringShape::StringShape(const String str)
117 : type_(str.map(kAcquireLoad).instance_type()) {
118 set_valid();
119 DCHECK_EQ(type_ & kIsNotStringMask, kStringTag);
120 }
121
StringShape(const String str,PtrComprCageBase cage_base)122 StringShape::StringShape(const String str, PtrComprCageBase cage_base)
123 : type_(str.map(cage_base, kAcquireLoad).instance_type()) {
124 set_valid();
125 DCHECK_EQ(type_ & kIsNotStringMask, kStringTag);
126 }
127
StringShape(Map map)128 StringShape::StringShape(Map map) : type_(map.instance_type()) {
129 set_valid();
130 DCHECK_EQ(type_ & kIsNotStringMask, kStringTag);
131 }
132
StringShape(InstanceType t)133 StringShape::StringShape(InstanceType t) : type_(static_cast<uint32_t>(t)) {
134 set_valid();
135 DCHECK_EQ(type_ & kIsNotStringMask, kStringTag);
136 }
137
IsInternalized()138 bool StringShape::IsInternalized() const {
139 DCHECK(valid());
140 STATIC_ASSERT(kNotInternalizedTag != 0);
141 return (type_ & (kIsNotStringMask | kIsNotInternalizedMask)) ==
142 (kStringTag | kInternalizedTag);
143 }
144
IsCons()145 bool StringShape::IsCons() const {
146 return (type_ & kStringRepresentationMask) == kConsStringTag;
147 }
148
IsThin()149 bool StringShape::IsThin() const {
150 return (type_ & kStringRepresentationMask) == kThinStringTag;
151 }
152
IsSliced()153 bool StringShape::IsSliced() const {
154 return (type_ & kStringRepresentationMask) == kSlicedStringTag;
155 }
156
IsIndirect()157 bool StringShape::IsIndirect() const {
158 return (type_ & kIsIndirectStringMask) == kIsIndirectStringTag;
159 }
160
IsDirect()161 bool StringShape::IsDirect() const { return !IsIndirect(); }
162
IsExternal()163 bool StringShape::IsExternal() const {
164 return (type_ & kStringRepresentationMask) == kExternalStringTag;
165 }
166
IsSequential()167 bool StringShape::IsSequential() const {
168 return (type_ & kStringRepresentationMask) == kSeqStringTag;
169 }
170
IsUncachedExternal()171 bool StringShape::IsUncachedExternal() const {
172 return (type_ & kUncachedExternalStringMask) == kUncachedExternalStringTag;
173 }
174
IsShared()175 bool StringShape::IsShared() const {
176 // TODO(v8:12007): Set is_shared to true on internalized string when
177 // FLAG_shared_string_table is removed.
178 return (type_ & kSharedStringMask) == kSharedStringTag ||
179 (FLAG_shared_string_table && IsInternalized());
180 }
181
CanMigrateInParallel()182 bool StringShape::CanMigrateInParallel() const {
183 switch (representation_encoding_and_shared_tag()) {
184 case kSeqOneByteStringTag | kSharedStringTag:
185 case kSeqTwoByteStringTag | kSharedStringTag:
186 // Shared SeqStrings can migrate to ThinStrings.
187 return true;
188 case kThinStringTag | kOneByteStringTag | kSharedStringTag:
189 case kThinStringTag | kTwoByteStringTag | kSharedStringTag:
190 // Shared ThinStrings do not migrate.
191 return false;
192 default:
193 // TODO(v8:12007): Set is_shared to true on internalized string when
194 // FLAG_shared_string_table is removed.
195 //
196 // If you crashed here, you probably added a new shared string
197 // type. Explicitly handle all shared string cases above.
198 DCHECK((FLAG_shared_string_table && IsInternalized()) || !IsShared());
199 return false;
200 }
201 }
202
representation_tag()203 StringRepresentationTag StringShape::representation_tag() const {
204 uint32_t tag = (type_ & kStringRepresentationMask);
205 return static_cast<StringRepresentationTag>(tag);
206 }
207
encoding_tag()208 uint32_t StringShape::encoding_tag() const {
209 return type_ & kStringEncodingMask;
210 }
211
representation_and_encoding_tag()212 uint32_t StringShape::representation_and_encoding_tag() const {
213 return (type_ & (kStringRepresentationAndEncodingMask));
214 }
215
representation_encoding_and_shared_tag()216 uint32_t StringShape::representation_encoding_and_shared_tag() const {
217 return (type_ & (kStringRepresentationEncodingAndSharedMask));
218 }
219
220 STATIC_ASSERT((kStringRepresentationAndEncodingMask) ==
221 Internals::kStringRepresentationAndEncodingMask);
222
223 STATIC_ASSERT(static_cast<uint32_t>(kStringEncodingMask) ==
224 Internals::kStringEncodingMask);
225
IsSequentialOneByte()226 bool StringShape::IsSequentialOneByte() const {
227 return representation_and_encoding_tag() == kSeqOneByteStringTag;
228 }
229
IsSequentialTwoByte()230 bool StringShape::IsSequentialTwoByte() const {
231 return representation_and_encoding_tag() == kSeqTwoByteStringTag;
232 }
233
IsExternalOneByte()234 bool StringShape::IsExternalOneByte() const {
235 return representation_and_encoding_tag() == kExternalOneByteStringTag;
236 }
237
238 STATIC_ASSERT(kExternalOneByteStringTag ==
239 Internals::kExternalOneByteRepresentationTag);
240
241 STATIC_ASSERT(v8::String::ONE_BYTE_ENCODING == kOneByteStringTag);
242
IsExternalTwoByte()243 bool StringShape::IsExternalTwoByte() const {
244 return representation_and_encoding_tag() == kExternalTwoByteStringTag;
245 }
246
247 STATIC_ASSERT(kExternalTwoByteStringTag ==
248 Internals::kExternalTwoByteRepresentationTag);
249
250 STATIC_ASSERT(v8::String::TWO_BYTE_ENCODING == kTwoByteStringTag);
251
252 template <typename TDispatcher, typename TResult, typename... TArgs>
DispatchToSpecificTypeWithoutCast(TArgs &&...args)253 inline TResult StringShape::DispatchToSpecificTypeWithoutCast(TArgs&&... args) {
254 switch (representation_and_encoding_tag()) {
255 case kSeqStringTag | kOneByteStringTag:
256 return TDispatcher::HandleSeqOneByteString(std::forward<TArgs>(args)...);
257 case kSeqStringTag | kTwoByteStringTag:
258 return TDispatcher::HandleSeqTwoByteString(std::forward<TArgs>(args)...);
259 case kConsStringTag | kOneByteStringTag:
260 case kConsStringTag | kTwoByteStringTag:
261 return TDispatcher::HandleConsString(std::forward<TArgs>(args)...);
262 case kExternalStringTag | kOneByteStringTag:
263 return TDispatcher::HandleExternalOneByteString(
264 std::forward<TArgs>(args)...);
265 case kExternalStringTag | kTwoByteStringTag:
266 return TDispatcher::HandleExternalTwoByteString(
267 std::forward<TArgs>(args)...);
268 case kSlicedStringTag | kOneByteStringTag:
269 case kSlicedStringTag | kTwoByteStringTag:
270 return TDispatcher::HandleSlicedString(std::forward<TArgs>(args)...);
271 case kThinStringTag | kOneByteStringTag:
272 case kThinStringTag | kTwoByteStringTag:
273 return TDispatcher::HandleThinString(std::forward<TArgs>(args)...);
274 default:
275 return TDispatcher::HandleInvalidString(std::forward<TArgs>(args)...);
276 }
277 }
278
279 // All concrete subclasses of String (leaves of the inheritance tree).
280 #define STRING_CLASS_TYPES(V) \
281 V(SeqOneByteString) \
282 V(SeqTwoByteString) \
283 V(ConsString) \
284 V(ExternalOneByteString) \
285 V(ExternalTwoByteString) \
286 V(SlicedString) \
287 V(ThinString)
288
289 template <typename TDispatcher, typename TResult, typename... TArgs>
DispatchToSpecificType(String str,TArgs &&...args)290 inline TResult StringShape::DispatchToSpecificType(String str,
291 TArgs&&... args) {
292 class CastingDispatcher : public AllStatic {
293 public:
294 #define DEFINE_METHOD(Type) \
295 static inline TResult Handle##Type(String str, TArgs&&... args) { \
296 return TDispatcher::Handle##Type(Type::cast(str), \
297 std::forward<TArgs>(args)...); \
298 }
299 STRING_CLASS_TYPES(DEFINE_METHOD)
300 #undef DEFINE_METHOD
301 static inline TResult HandleInvalidString(String str, TArgs&&... args) {
302 return TDispatcher::HandleInvalidString(str,
303 std::forward<TArgs>(args)...);
304 }
305 };
306
307 return DispatchToSpecificTypeWithoutCast<CastingDispatcher, TResult>(
308 str, std::forward<TArgs>(args)...);
309 }
310
DEF_GETTER(String,IsOneByteRepresentation,bool)311 DEF_GETTER(String, IsOneByteRepresentation, bool) {
312 uint32_t type = map(cage_base).instance_type();
313 return (type & kStringEncodingMask) == kOneByteStringTag;
314 }
315
DEF_GETTER(String,IsTwoByteRepresentation,bool)316 DEF_GETTER(String, IsTwoByteRepresentation, bool) {
317 uint32_t type = map(cage_base).instance_type();
318 return (type & kStringEncodingMask) == kTwoByteStringTag;
319 }
320
321 // static
IsOneByteRepresentationUnderneath(String string)322 bool String::IsOneByteRepresentationUnderneath(String string) {
323 while (true) {
324 uint32_t type = string.map().instance_type();
325 STATIC_ASSERT(kIsIndirectStringTag != 0);
326 STATIC_ASSERT((kIsIndirectStringMask & kStringEncodingMask) == 0);
327 DCHECK(string.IsFlat());
328 switch (type & (kIsIndirectStringMask | kStringEncodingMask)) {
329 case kOneByteStringTag:
330 return true;
331 case kTwoByteStringTag:
332 return false;
333 default: // Cons, sliced, thin, strings need to go deeper.
334 string = string.GetUnderlying();
335 }
336 }
337 }
338
Get(int index)339 base::uc32 FlatStringReader::Get(int index) const {
340 if (is_one_byte_) {
341 return Get<uint8_t>(index);
342 } else {
343 return Get<base::uc16>(index);
344 }
345 }
346
347 template <typename Char>
Get(int index)348 Char FlatStringReader::Get(int index) const {
349 DCHECK_EQ(is_one_byte_, sizeof(Char) == 1);
350 DCHECK(0 <= index && index < length_);
351 if (sizeof(Char) == 1) {
352 return static_cast<Char>(static_cast<const uint8_t*>(start_)[index]);
353 } else {
354 return static_cast<Char>(static_cast<const base::uc16*>(start_)[index]);
355 }
356 }
357
358 template <typename Char>
359 class SequentialStringKey final : public StringTableKey {
360 public:
361 SequentialStringKey(const base::Vector<const Char>& chars, uint64_t seed,
362 bool convert = false)
363 : SequentialStringKey(StringHasher::HashSequentialString<Char>(
364 chars.begin(), chars.length(), seed),
365 chars, convert) {}
366
367 SequentialStringKey(int raw_hash_field, const base::Vector<const Char>& chars,
368 bool convert = false)
369 : StringTableKey(raw_hash_field, chars.length()),
370 chars_(chars),
371 convert_(convert) {}
372
373 template <typename IsolateT>
IsMatch(IsolateT * isolate,String s)374 bool IsMatch(IsolateT* isolate, String s) {
375 return s.IsEqualTo<String::EqualityType::kNoLengthCheck>(chars_, isolate);
376 }
377
378 template <typename IsolateT>
PrepareForInsertion(IsolateT * isolate)379 void PrepareForInsertion(IsolateT* isolate) {
380 if (sizeof(Char) == 1) {
381 internalized_string_ = isolate->factory()->NewOneByteInternalizedString(
382 base::Vector<const uint8_t>::cast(chars_), raw_hash_field());
383 } else {
384 internalized_string_ = isolate->factory()->NewTwoByteInternalizedString(
385 base::Vector<const uint16_t>::cast(chars_), raw_hash_field());
386 }
387 }
388
GetHandleForInsertion()389 Handle<String> GetHandleForInsertion() {
390 DCHECK(!internalized_string_.is_null());
391 return internalized_string_;
392 }
393
394 private:
395 base::Vector<const Char> chars_;
396 bool convert_;
397 Handle<String> internalized_string_;
398 };
399
400 using OneByteStringKey = SequentialStringKey<uint8_t>;
401 using TwoByteStringKey = SequentialStringKey<uint16_t>;
402
403 template <typename SeqString>
404 class SeqSubStringKey final : public StringTableKey {
405 public:
406 using Char = typename SeqString::Char;
407 // VS 2017 on official builds gives this spurious warning:
408 // warning C4789: buffer 'key' of size 16 bytes will be overrun; 4 bytes will
409 // be written starting at offset 16
410 // https://bugs.chromium.org/p/v8/issues/detail?id=6068
411 #if defined(V8_CC_MSVC)
412 #pragma warning(push)
413 #pragma warning(disable : 4789)
414 #endif
415 SeqSubStringKey(Isolate* isolate, Handle<SeqString> string, int from, int len,
416 bool convert = false)
417 : StringTableKey(0, len),
418 string_(string),
419 from_(from),
420 convert_(convert) {
421 // We have to set the hash later.
422 DisallowGarbageCollection no_gc;
423 uint32_t raw_hash_field = StringHasher::HashSequentialString(
424 string->GetChars(no_gc) + from, len, HashSeed(isolate));
425 set_raw_hash_field(raw_hash_field);
426
427 DCHECK_LE(0, length());
428 DCHECK_LE(from_ + length(), string_->length());
429 DCHECK_EQ(string_->IsSeqOneByteString(), sizeof(Char) == 1);
430 DCHECK_EQ(string_->IsSeqTwoByteString(), sizeof(Char) == 2);
431 }
432 #if defined(V8_CC_MSVC)
433 #pragma warning(pop)
434 #endif
435
IsMatch(Isolate * isolate,String string)436 bool IsMatch(Isolate* isolate, String string) {
437 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(string));
438 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*string_));
439 DisallowGarbageCollection no_gc;
440 return string.IsEqualTo<String::EqualityType::kNoLengthCheck>(
441 base::Vector<const Char>(string_->GetChars(no_gc) + from_, length()),
442 isolate);
443 }
444
PrepareForInsertion(Isolate * isolate)445 void PrepareForInsertion(Isolate* isolate) {
446 if (sizeof(Char) == 1 || (sizeof(Char) == 2 && convert_)) {
447 Handle<SeqOneByteString> result =
448 isolate->factory()->AllocateRawOneByteInternalizedString(
449 length(), raw_hash_field());
450 DisallowGarbageCollection no_gc;
451 CopyChars(result->GetChars(no_gc), string_->GetChars(no_gc) + from_,
452 length());
453 internalized_string_ = result;
454 }
455 Handle<SeqTwoByteString> result =
456 isolate->factory()->AllocateRawTwoByteInternalizedString(
457 length(), raw_hash_field());
458 DisallowGarbageCollection no_gc;
459 CopyChars(result->GetChars(no_gc), string_->GetChars(no_gc) + from_,
460 length());
461 internalized_string_ = result;
462 }
463
GetHandleForInsertion()464 Handle<String> GetHandleForInsertion() {
465 DCHECK(!internalized_string_.is_null());
466 return internalized_string_;
467 }
468
469 private:
470 Handle<typename CharTraits<Char>::String> string_;
471 int from_;
472 bool convert_;
473 Handle<String> internalized_string_;
474 };
475
476 using SeqOneByteSubStringKey = SeqSubStringKey<SeqOneByteString>;
477 using SeqTwoByteSubStringKey = SeqSubStringKey<SeqTwoByteString>;
478
Equals(String other)479 bool String::Equals(String other) const {
480 if (other == *this) return true;
481 if (this->IsInternalizedString() && other.IsInternalizedString()) {
482 return false;
483 }
484 return SlowEquals(other);
485 }
486
487 // static
Equals(Isolate * isolate,Handle<String> one,Handle<String> two)488 bool String::Equals(Isolate* isolate, Handle<String> one, Handle<String> two) {
489 if (one.is_identical_to(two)) return true;
490 if (one->IsInternalizedString() && two->IsInternalizedString()) {
491 return false;
492 }
493 return SlowEquals(isolate, one, two);
494 }
495
496 template <String::EqualityType kEqType, typename Char>
IsEqualTo(base::Vector<const Char> str,Isolate * isolate)497 bool String::IsEqualTo(base::Vector<const Char> str, Isolate* isolate) const {
498 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
499 return IsEqualToImpl<kEqType>(str, isolate,
500 SharedStringAccessGuardIfNeeded::NotNeeded());
501 }
502
503 template <String::EqualityType kEqType, typename Char>
IsEqualTo(base::Vector<const Char> str)504 bool String::IsEqualTo(base::Vector<const Char> str) const {
505 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
506 return IsEqualToImpl<kEqType>(str, GetPtrComprCageBase(*this),
507 SharedStringAccessGuardIfNeeded::NotNeeded());
508 }
509
510 template <String::EqualityType kEqType, typename Char>
IsEqualTo(base::Vector<const Char> str,LocalIsolate * isolate)511 bool String::IsEqualTo(base::Vector<const Char> str,
512 LocalIsolate* isolate) const {
513 SharedStringAccessGuardIfNeeded access_guard(isolate);
514 return IsEqualToImpl<kEqType>(str, isolate, access_guard);
515 }
516
517 template <String::EqualityType kEqType, typename Char>
IsEqualToImpl(base::Vector<const Char> str,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)518 bool String::IsEqualToImpl(
519 base::Vector<const Char> str, PtrComprCageBase cage_base,
520 const SharedStringAccessGuardIfNeeded& access_guard) const {
521 size_t len = str.size();
522 switch (kEqType) {
523 case EqualityType::kWholeString:
524 if (static_cast<size_t>(length()) != len) return false;
525 break;
526 case EqualityType::kPrefix:
527 if (static_cast<size_t>(length()) < len) return false;
528 break;
529 case EqualityType::kNoLengthCheck:
530 DCHECK_EQ(length(), len);
531 break;
532 }
533
534 DisallowGarbageCollection no_gc;
535
536 int slice_offset = 0;
537 String string = *this;
538 const Char* data = str.data();
539 while (true) {
540 int32_t type = string.map(cage_base).instance_type();
541 switch (type & kStringRepresentationAndEncodingMask) {
542 case kSeqOneByteStringTag:
543 return CompareCharsEqual(
544 SeqOneByteString::cast(string).GetChars(no_gc, access_guard) +
545 slice_offset,
546 data, len);
547 case kSeqTwoByteStringTag:
548 return CompareCharsEqual(
549 SeqTwoByteString::cast(string).GetChars(no_gc, access_guard) +
550 slice_offset,
551 data, len);
552 case kExternalOneByteStringTag:
553 return CompareCharsEqual(
554 ExternalOneByteString::cast(string).GetChars(cage_base) +
555 slice_offset,
556 data, len);
557 case kExternalTwoByteStringTag:
558 return CompareCharsEqual(
559 ExternalTwoByteString::cast(string).GetChars(cage_base) +
560 slice_offset,
561 data, len);
562
563 case kSlicedStringTag | kOneByteStringTag:
564 case kSlicedStringTag | kTwoByteStringTag: {
565 SlicedString slicedString = SlicedString::cast(string);
566 slice_offset += slicedString.offset();
567 string = slicedString.parent(cage_base);
568 continue;
569 }
570
571 case kConsStringTag | kOneByteStringTag:
572 case kConsStringTag | kTwoByteStringTag: {
573 // The ConsString path is more complex and rare, so call out to an
574 // out-of-line handler.
575 return IsConsStringEqualToImpl<Char>(ConsString::cast(string),
576 slice_offset, str, cage_base,
577 access_guard);
578 }
579
580 case kThinStringTag | kOneByteStringTag:
581 case kThinStringTag | kTwoByteStringTag:
582 string = ThinString::cast(string).actual(cage_base);
583 continue;
584
585 default:
586 UNREACHABLE();
587 }
588 }
589 }
590
591 // static
592 template <typename Char>
IsConsStringEqualToImpl(ConsString string,int slice_offset,base::Vector<const Char> str,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)593 bool String::IsConsStringEqualToImpl(
594 ConsString string, int slice_offset, base::Vector<const Char> str,
595 PtrComprCageBase cage_base,
596 const SharedStringAccessGuardIfNeeded& access_guard) {
597 // Already checked the len in IsEqualToImpl. Check GE rather than EQ in case
598 // this is a prefix check.
599 DCHECK_GE(string.length(), str.size());
600
601 ConsStringIterator iter(ConsString::cast(string), slice_offset);
602 base::Vector<const Char> remaining_str = str;
603 for (String segment = iter.Next(&slice_offset); !segment.is_null();
604 segment = iter.Next(&slice_offset)) {
605 // Compare the individual segment against the appropriate subvector of the
606 // remaining string.
607 size_t len = std::min<size_t>(segment.length(), remaining_str.size());
608 base::Vector<const Char> sub_str = remaining_str.SubVector(0, len);
609 if (!segment.IsEqualToImpl<EqualityType::kNoLengthCheck>(sub_str, cage_base,
610 access_guard)) {
611 return false;
612 }
613 remaining_str += len;
614 if (remaining_str.empty()) break;
615 }
616 DCHECK_EQ(remaining_str.data(), str.end());
617 DCHECK_EQ(remaining_str.size(), 0);
618 return true;
619 }
620
IsOneByteEqualTo(base::Vector<const char> str)621 bool String::IsOneByteEqualTo(base::Vector<const char> str) {
622 return IsEqualTo(str);
623 }
624
625 template <typename Char>
GetChars(PtrComprCageBase cage_base,const DisallowGarbageCollection & no_gc)626 const Char* String::GetChars(PtrComprCageBase cage_base,
627 const DisallowGarbageCollection& no_gc) const {
628 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
629 return StringShape(*this, cage_base).IsExternal()
630 ? CharTraits<Char>::ExternalString::cast(*this).GetChars(cage_base)
631 : CharTraits<Char>::String::cast(*this).GetChars(no_gc);
632 }
633
634 template <typename Char>
GetChars(PtrComprCageBase cage_base,const DisallowGarbageCollection & no_gc,const SharedStringAccessGuardIfNeeded & access_guard)635 const Char* String::GetChars(
636 PtrComprCageBase cage_base, const DisallowGarbageCollection& no_gc,
637 const SharedStringAccessGuardIfNeeded& access_guard) const {
638 return StringShape(*this, cage_base).IsExternal()
639 ? CharTraits<Char>::ExternalString::cast(*this).GetChars(cage_base)
640 : CharTraits<Char>::String::cast(*this).GetChars(no_gc,
641 access_guard);
642 }
643
644 // static
Flatten(Isolate * isolate,Handle<String> string,AllocationType allocation)645 Handle<String> String::Flatten(Isolate* isolate, Handle<String> string,
646 AllocationType allocation) {
647 DisallowGarbageCollection no_gc; // Unhandlified code.
648 PtrComprCageBase cage_base(isolate);
649 String s = *string;
650 StringShape shape(s, cage_base);
651
652 // Shortcut already-flat strings.
653 if (V8_LIKELY(shape.IsDirect())) return string;
654
655 if (shape.IsCons()) {
656 DCHECK(!s.InSharedHeap());
657 ConsString cons = ConsString::cast(s);
658 if (!cons.IsFlat(isolate)) {
659 AllowGarbageCollection yes_gc;
660 return SlowFlatten(isolate, handle(cons, isolate), allocation);
661 }
662 s = cons.first(cage_base);
663 shape = StringShape(s, cage_base);
664 }
665
666 if (shape.IsThin()) {
667 s = ThinString::cast(s).actual(cage_base);
668 DCHECK(!s.IsConsString());
669 }
670
671 return handle(s, isolate);
672 }
673
674 // static
Flatten(LocalIsolate * isolate,Handle<String> string,AllocationType allocation)675 Handle<String> String::Flatten(LocalIsolate* isolate, Handle<String> string,
676 AllocationType allocation) {
677 // We should never pass non-flat strings to String::Flatten when off-thread.
678 DCHECK(string->IsFlat());
679 return string;
680 }
681
682 // static
TryGetFlatContentFromDirectString(PtrComprCageBase cage_base,const DisallowGarbageCollection & no_gc,String string,int offset,int length,const SharedStringAccessGuardIfNeeded & access_guard)683 base::Optional<String::FlatContent> String::TryGetFlatContentFromDirectString(
684 PtrComprCageBase cage_base, const DisallowGarbageCollection& no_gc,
685 String string, int offset, int length,
686 const SharedStringAccessGuardIfNeeded& access_guard) {
687 DCHECK_GE(offset, 0);
688 DCHECK_GE(length, 0);
689 DCHECK_LE(offset + length, string.length());
690 switch (StringShape{string, cage_base}.representation_and_encoding_tag()) {
691 case kSeqOneByteStringTag:
692 return FlatContent(
693 SeqOneByteString::cast(string).GetChars(no_gc, access_guard) + offset,
694 length, no_gc);
695 case kSeqTwoByteStringTag:
696 return FlatContent(
697 SeqTwoByteString::cast(string).GetChars(no_gc, access_guard) + offset,
698 length, no_gc);
699 case kExternalOneByteStringTag:
700 return FlatContent(
701 ExternalOneByteString::cast(string).GetChars(cage_base) + offset,
702 length, no_gc);
703 case kExternalTwoByteStringTag:
704 return FlatContent(
705 ExternalTwoByteString::cast(string).GetChars(cage_base) + offset,
706 length, no_gc);
707 default:
708 return {};
709 }
710 UNREACHABLE();
711 }
712
GetFlatContent(const DisallowGarbageCollection & no_gc)713 String::FlatContent String::GetFlatContent(
714 const DisallowGarbageCollection& no_gc) {
715 #if DEBUG
716 // Check that this method is called only from the main thread.
717 {
718 Isolate* isolate;
719 // We don't have to check read only strings as those won't move.
720 //
721 // TODO(v8:12007): Currently character data is never overwritten for
722 // shared strings.
723 DCHECK_IMPLIES(GetIsolateFromHeapObject(*this, &isolate) && !InSharedHeap(),
724 ThreadId::Current() == isolate->thread_id());
725 }
726 #endif
727
728 return GetFlatContent(no_gc, SharedStringAccessGuardIfNeeded::NotNeeded());
729 }
730
FlatContent(const uint8_t * start,int length,const DisallowGarbageCollection & no_gc)731 String::FlatContent::FlatContent(const uint8_t* start, int length,
732 const DisallowGarbageCollection& no_gc)
733 : onebyte_start(start), length_(length), state_(ONE_BYTE), no_gc_(no_gc) {
734 #ifdef ENABLE_SLOW_DCHECKS
735 checksum_ = ComputeChecksum();
736 #endif
737 }
738
FlatContent(const base::uc16 * start,int length,const DisallowGarbageCollection & no_gc)739 String::FlatContent::FlatContent(const base::uc16* start, int length,
740 const DisallowGarbageCollection& no_gc)
741 : twobyte_start(start), length_(length), state_(TWO_BYTE), no_gc_(no_gc) {
742 #ifdef ENABLE_SLOW_DCHECKS
743 checksum_ = ComputeChecksum();
744 #endif
745 }
746
~FlatContent()747 String::FlatContent::~FlatContent() {
748 // When ENABLE_SLOW_DCHECKS, check the string contents did not change during
749 // the lifetime of the FlatContent. To avoid extra memory use, only the hash
750 // is checked instead of snapshotting the full character data.
751 //
752 // If you crashed here, it means something changed the character data of this
753 // FlatContent during its lifetime (e.g. GC relocated the string). This is
754 // almost always a bug. If you are certain it is not a bug, you can disable
755 // the checksum verification in the caller by calling
756 // UnsafeDisableChecksumVerification().
757 SLOW_DCHECK(checksum_ == kChecksumVerificationDisabled ||
758 checksum_ == ComputeChecksum());
759 }
760
761 #ifdef ENABLE_SLOW_DCHECKS
ComputeChecksum()762 uint32_t String::FlatContent::ComputeChecksum() const {
763 constexpr uint64_t hashseed = 1;
764 uint32_t hash;
765 if (state_ == ONE_BYTE) {
766 hash = StringHasher::HashSequentialString(onebyte_start, length_, hashseed);
767 } else {
768 DCHECK_EQ(TWO_BYTE, state_);
769 hash = StringHasher::HashSequentialString(twobyte_start, length_, hashseed);
770 }
771 DCHECK_NE(kChecksumVerificationDisabled, hash);
772 return hash;
773 }
774 #endif
775
GetFlatContent(const DisallowGarbageCollection & no_gc,const SharedStringAccessGuardIfNeeded & access_guard)776 String::FlatContent String::GetFlatContent(
777 const DisallowGarbageCollection& no_gc,
778 const SharedStringAccessGuardIfNeeded& access_guard) {
779 PtrComprCageBase cage_base = GetPtrComprCageBase(*this);
780 base::Optional<FlatContent> flat_content = TryGetFlatContentFromDirectString(
781 cage_base, no_gc, *this, 0, length(), access_guard);
782 if (flat_content.has_value()) return flat_content.value();
783 return SlowGetFlatContent(no_gc, access_guard);
784 }
785
Share(Isolate * isolate,Handle<String> string)786 Handle<String> String::Share(Isolate* isolate, Handle<String> string) {
787 DCHECK(FLAG_shared_string_table);
788 MaybeHandle<Map> new_map;
789 switch (
790 isolate->factory()->ComputeSharingStrategyForString(string, &new_map)) {
791 case StringTransitionStrategy::kCopy:
792 return SlowShare(isolate, string);
793 case StringTransitionStrategy::kInPlace:
794 // A relaxed write is sufficient here, because at this point the string
795 // has not yet escaped the current thread.
796 DCHECK(string->InSharedHeap());
797 string->set_map_no_write_barrier(*new_map.ToHandleChecked());
798 return string;
799 case StringTransitionStrategy::kAlreadyTransitioned:
800 return string;
801 }
802 }
803
Get(int index)804 uint16_t String::Get(int index) const {
805 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
806 return GetImpl(index, GetPtrComprCageBase(*this),
807 SharedStringAccessGuardIfNeeded::NotNeeded());
808 }
809
Get(int index,Isolate * isolate)810 uint16_t String::Get(int index, Isolate* isolate) const {
811 SharedStringAccessGuardIfNeeded scope(isolate);
812 return GetImpl(index, isolate, scope);
813 }
814
Get(int index,LocalIsolate * local_isolate)815 uint16_t String::Get(int index, LocalIsolate* local_isolate) const {
816 SharedStringAccessGuardIfNeeded scope(local_isolate);
817 return GetImpl(index, local_isolate, scope);
818 }
819
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)820 uint16_t String::Get(
821 int index, PtrComprCageBase cage_base,
822 const SharedStringAccessGuardIfNeeded& access_guard) const {
823 return GetImpl(index, cage_base, access_guard);
824 }
825
GetImpl(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)826 uint16_t String::GetImpl(
827 int index, PtrComprCageBase cage_base,
828 const SharedStringAccessGuardIfNeeded& access_guard) const {
829 DCHECK(index >= 0 && index < length());
830
831 class StringGetDispatcher : public AllStatic {
832 public:
833 #define DEFINE_METHOD(Type) \
834 static inline uint16_t Handle##Type( \
835 Type str, int index, PtrComprCageBase cage_base, \
836 const SharedStringAccessGuardIfNeeded& access_guard) { \
837 return str.Get(index, cage_base, access_guard); \
838 }
839 STRING_CLASS_TYPES(DEFINE_METHOD)
840 #undef DEFINE_METHOD
841 static inline uint16_t HandleInvalidString(
842 String str, int index, PtrComprCageBase cage_base,
843 const SharedStringAccessGuardIfNeeded& access_guard) {
844 UNREACHABLE();
845 }
846 };
847
848 return StringShape(*this)
849 .DispatchToSpecificType<StringGetDispatcher, uint16_t>(
850 *this, index, cage_base, access_guard);
851 }
852
Set(int index,uint16_t value)853 void String::Set(int index, uint16_t value) {
854 DCHECK(index >= 0 && index < length());
855 DCHECK(StringShape(*this).IsSequential());
856
857 return IsOneByteRepresentation()
858 ? SeqOneByteString::cast(*this).SeqOneByteStringSet(index, value)
859 : SeqTwoByteString::cast(*this).SeqTwoByteStringSet(index, value);
860 }
861
IsFlat()862 bool String::IsFlat() const { return IsFlat(GetPtrComprCageBase(*this)); }
863
IsFlat(PtrComprCageBase cage_base)864 bool String::IsFlat(PtrComprCageBase cage_base) const {
865 if (!StringShape(*this, cage_base).IsCons()) return true;
866 return ConsString::cast(*this).IsFlat(cage_base);
867 }
868
IsShared()869 bool String::IsShared() const { return IsShared(GetPtrComprCageBase(*this)); }
870
IsShared(PtrComprCageBase cage_base)871 bool String::IsShared(PtrComprCageBase cage_base) const {
872 const bool result = StringShape(*this, cage_base).IsShared();
873 DCHECK_IMPLIES(result, InSharedHeap());
874 return result;
875 }
876
GetUnderlying()877 String String::GetUnderlying() const {
878 // Giving direct access to underlying string only makes sense if the
879 // wrapping string is already flattened.
880 DCHECK(IsFlat());
881 DCHECK(StringShape(*this).IsIndirect());
882 STATIC_ASSERT(static_cast<int>(ConsString::kFirstOffset) ==
883 static_cast<int>(SlicedString::kParentOffset));
884 STATIC_ASSERT(static_cast<int>(ConsString::kFirstOffset) ==
885 static_cast<int>(ThinString::kActualOffset));
886 const int kUnderlyingOffset = SlicedString::kParentOffset;
887 return TaggedField<String, kUnderlyingOffset>::load(*this);
888 }
889
890 template <class Visitor>
VisitFlat(Visitor * visitor,String string,const int offset)891 ConsString String::VisitFlat(Visitor* visitor, String string,
892 const int offset) {
893 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(string));
894 return VisitFlat(visitor, string, offset,
895 SharedStringAccessGuardIfNeeded::NotNeeded());
896 }
897
898 template <class Visitor>
VisitFlat(Visitor * visitor,String string,const int offset,const SharedStringAccessGuardIfNeeded & access_guard)899 ConsString String::VisitFlat(
900 Visitor* visitor, String string, const int offset,
901 const SharedStringAccessGuardIfNeeded& access_guard) {
902 DisallowGarbageCollection no_gc;
903 int slice_offset = offset;
904 const int length = string.length();
905 DCHECK(offset <= length);
906 PtrComprCageBase cage_base = GetPtrComprCageBase(string);
907 while (true) {
908 int32_t tag =
909 StringShape(string, cage_base).representation_and_encoding_tag();
910 switch (tag) {
911 case kSeqOneByteStringTag:
912 visitor->VisitOneByteString(
913 SeqOneByteString::cast(string).GetChars(no_gc, access_guard) +
914 slice_offset,
915 length - offset);
916 return ConsString();
917
918 case kSeqTwoByteStringTag:
919 visitor->VisitTwoByteString(
920 SeqTwoByteString::cast(string).GetChars(no_gc, access_guard) +
921 slice_offset,
922 length - offset);
923 return ConsString();
924
925 case kExternalOneByteStringTag:
926 visitor->VisitOneByteString(
927 ExternalOneByteString::cast(string).GetChars(cage_base) +
928 slice_offset,
929 length - offset);
930 return ConsString();
931
932 case kExternalTwoByteStringTag:
933 visitor->VisitTwoByteString(
934 ExternalTwoByteString::cast(string).GetChars(cage_base) +
935 slice_offset,
936 length - offset);
937 return ConsString();
938
939 case kSlicedStringTag | kOneByteStringTag:
940 case kSlicedStringTag | kTwoByteStringTag: {
941 SlicedString slicedString = SlicedString::cast(string);
942 slice_offset += slicedString.offset();
943 string = slicedString.parent(cage_base);
944 continue;
945 }
946
947 case kConsStringTag | kOneByteStringTag:
948 case kConsStringTag | kTwoByteStringTag:
949 return ConsString::cast(string);
950
951 case kThinStringTag | kOneByteStringTag:
952 case kThinStringTag | kTwoByteStringTag:
953 string = ThinString::cast(string).actual(cage_base);
954 continue;
955
956 default:
957 UNREACHABLE();
958 }
959 }
960 }
961
962 template <>
GetCharVector(const DisallowGarbageCollection & no_gc)963 inline base::Vector<const uint8_t> String::GetCharVector(
964 const DisallowGarbageCollection& no_gc) {
965 String::FlatContent flat = GetFlatContent(no_gc);
966 DCHECK(flat.IsOneByte());
967 return flat.ToOneByteVector();
968 }
969
970 template <>
GetCharVector(const DisallowGarbageCollection & no_gc)971 inline base::Vector<const base::uc16> String::GetCharVector(
972 const DisallowGarbageCollection& no_gc) {
973 String::FlatContent flat = GetFlatContent(no_gc);
974 DCHECK(flat.IsTwoByte());
975 return flat.ToUC16Vector();
976 }
977
Get(int index)978 uint8_t SeqOneByteString::Get(int index) const {
979 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
980 return Get(index, GetPtrComprCageBase(*this),
981 SharedStringAccessGuardIfNeeded::NotNeeded());
982 }
983
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)984 uint8_t SeqOneByteString::Get(
985 int index, PtrComprCageBase cage_base,
986 const SharedStringAccessGuardIfNeeded& access_guard) const {
987 USE(access_guard);
988 DCHECK(index >= 0 && index < length());
989 return ReadField<byte>(kHeaderSize + index * kCharSize);
990 }
991
SeqOneByteStringSet(int index,uint16_t value)992 void SeqOneByteString::SeqOneByteStringSet(int index, uint16_t value) {
993 DCHECK_GE(index, 0);
994 DCHECK_LT(index, length());
995 DCHECK_LE(value, kMaxOneByteCharCode);
996 WriteField<byte>(kHeaderSize + index * kCharSize, static_cast<byte>(value));
997 }
998
SeqOneByteStringSetChars(int index,const uint8_t * string,int string_length)999 void SeqOneByteString::SeqOneByteStringSetChars(int index,
1000 const uint8_t* string,
1001 int string_length) {
1002 DCHECK_LE(0, index);
1003 DCHECK_LT(index + string_length, length());
1004 void* address =
1005 reinterpret_cast<void*>(field_address(kHeaderSize + index * kCharSize));
1006 memcpy(address, string, string_length);
1007 }
1008
GetCharsAddress()1009 Address SeqOneByteString::GetCharsAddress() const {
1010 return field_address(kHeaderSize);
1011 }
1012
GetChars(const DisallowGarbageCollection & no_gc)1013 uint8_t* SeqOneByteString::GetChars(
1014 const DisallowGarbageCollection& no_gc) const {
1015 USE(no_gc);
1016 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1017 return reinterpret_cast<uint8_t*>(GetCharsAddress());
1018 }
1019
GetChars(const DisallowGarbageCollection & no_gc,const SharedStringAccessGuardIfNeeded & access_guard)1020 uint8_t* SeqOneByteString::GetChars(
1021 const DisallowGarbageCollection& no_gc,
1022 const SharedStringAccessGuardIfNeeded& access_guard) const {
1023 USE(no_gc);
1024 USE(access_guard);
1025 return reinterpret_cast<uint8_t*>(GetCharsAddress());
1026 }
1027
GetCharsAddress()1028 Address SeqTwoByteString::GetCharsAddress() const {
1029 return field_address(kHeaderSize);
1030 }
1031
GetChars(const DisallowGarbageCollection & no_gc)1032 base::uc16* SeqTwoByteString::GetChars(
1033 const DisallowGarbageCollection& no_gc) const {
1034 USE(no_gc);
1035 DCHECK(!SharedStringAccessGuardIfNeeded::IsNeeded(*this));
1036 return reinterpret_cast<base::uc16*>(GetCharsAddress());
1037 }
1038
GetChars(const DisallowGarbageCollection & no_gc,const SharedStringAccessGuardIfNeeded & access_guard)1039 base::uc16* SeqTwoByteString::GetChars(
1040 const DisallowGarbageCollection& no_gc,
1041 const SharedStringAccessGuardIfNeeded& access_guard) const {
1042 USE(no_gc);
1043 USE(access_guard);
1044 return reinterpret_cast<base::uc16*>(GetCharsAddress());
1045 }
1046
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)1047 uint16_t SeqTwoByteString::Get(
1048 int index, PtrComprCageBase cage_base,
1049 const SharedStringAccessGuardIfNeeded& access_guard) const {
1050 USE(access_guard);
1051 DCHECK(index >= 0 && index < length());
1052 return ReadField<uint16_t>(kHeaderSize + index * kShortSize);
1053 }
1054
SeqTwoByteStringSet(int index,uint16_t value)1055 void SeqTwoByteString::SeqTwoByteStringSet(int index, uint16_t value) {
1056 DCHECK(index >= 0 && index < length());
1057 WriteField<uint16_t>(kHeaderSize + index * kShortSize, value);
1058 }
1059
1060 // Due to ThinString rewriting, concurrent visitors need to read the length with
1061 // acquire semantics.
AllocatedSize()1062 inline int SeqOneByteString::AllocatedSize() {
1063 return SizeFor(length(kAcquireLoad));
1064 }
AllocatedSize()1065 inline int SeqTwoByteString::AllocatedSize() {
1066 return SizeFor(length(kAcquireLoad));
1067 }
1068
1069 // static
IsCompatibleMap(Map map,ReadOnlyRoots roots)1070 bool SeqOneByteString::IsCompatibleMap(Map map, ReadOnlyRoots roots) {
1071 return map == roots.one_byte_string_map() ||
1072 map == roots.shared_one_byte_string_map();
1073 }
1074
1075 // static
IsCompatibleMap(Map map,ReadOnlyRoots roots)1076 bool SeqTwoByteString::IsCompatibleMap(Map map, ReadOnlyRoots roots) {
1077 return map == roots.string_map() || map == roots.shared_string_map();
1078 }
1079
set_parent(String parent,WriteBarrierMode mode)1080 void SlicedString::set_parent(String parent, WriteBarrierMode mode) {
1081 DCHECK(parent.IsSeqString() || parent.IsExternalString());
1082 TorqueGeneratedSlicedString<SlicedString, Super>::set_parent(parent, mode);
1083 }
1084
unchecked_first()1085 Object ConsString::unchecked_first() const {
1086 return TaggedField<Object, kFirstOffset>::load(*this);
1087 }
1088
unchecked_second()1089 Object ConsString::unchecked_second() const {
1090 return RELAXED_READ_FIELD(*this, kSecondOffset);
1091 }
1092
IsFlat(PtrComprCageBase cage_base)1093 bool ConsString::IsFlat(PtrComprCageBase cage_base) const {
1094 return second(cage_base).length() == 0;
1095 }
1096
DEF_GETTER(ThinString,unchecked_actual,HeapObject)1097 DEF_GETTER(ThinString, unchecked_actual, HeapObject) {
1098 return TaggedField<HeapObject, kActualOffset>::load(cage_base, *this);
1099 }
1100
is_uncached()1101 bool ExternalString::is_uncached() const {
1102 InstanceType type = map().instance_type();
1103 return (type & kUncachedExternalStringMask) == kUncachedExternalStringTag;
1104 }
1105
AllocateExternalPointerEntries(Isolate * isolate)1106 void ExternalString::AllocateExternalPointerEntries(Isolate* isolate) {
1107 InitExternalPointerField(kResourceOffset, isolate,
1108 kExternalStringResourceTag);
1109 if (is_uncached()) return;
1110 InitExternalPointerField(kResourceDataOffset, isolate,
1111 kExternalStringResourceDataTag);
1112 }
1113
DEF_GETTER(ExternalString,resource_as_address,Address)1114 DEF_GETTER(ExternalString, resource_as_address, Address) {
1115 Isolate* isolate = GetIsolateForSandbox(*this);
1116 return ReadExternalPointerField(kResourceOffset, isolate,
1117 kExternalStringResourceTag);
1118 }
1119
set_address_as_resource(Isolate * isolate,Address value)1120 void ExternalString::set_address_as_resource(Isolate* isolate, Address value) {
1121 WriteExternalPointerField(kResourceOffset, isolate, value,
1122 kExternalStringResourceTag);
1123 if (IsExternalOneByteString()) {
1124 ExternalOneByteString::cast(*this).update_data_cache(isolate);
1125 } else {
1126 ExternalTwoByteString::cast(*this).update_data_cache(isolate);
1127 }
1128 }
1129
GetResourceRefForDeserialization()1130 uint32_t ExternalString::GetResourceRefForDeserialization() {
1131 ExternalPointer_t encoded_address =
1132 ReadField<ExternalPointer_t>(kResourceOffset);
1133 return static_cast<uint32_t>(encoded_address);
1134 }
1135
SetResourceRefForSerialization(uint32_t ref)1136 void ExternalString::SetResourceRefForSerialization(uint32_t ref) {
1137 WriteField<ExternalPointer_t>(kResourceOffset,
1138 static_cast<ExternalPointer_t>(ref));
1139 if (is_uncached()) return;
1140 WriteField<ExternalPointer_t>(kResourceDataOffset, kNullExternalPointer);
1141 }
1142
DisposeResource(Isolate * isolate)1143 void ExternalString::DisposeResource(Isolate* isolate) {
1144 Address value = ReadExternalPointerField(kResourceOffset, isolate,
1145 kExternalStringResourceTag);
1146 v8::String::ExternalStringResourceBase* resource =
1147 reinterpret_cast<v8::String::ExternalStringResourceBase*>(value);
1148
1149 // Dispose of the C++ object if it has not already been disposed.
1150 if (resource != nullptr) {
1151 resource->Dispose();
1152 WriteExternalPointerField(kResourceOffset, isolate, kNullAddress,
1153 kExternalStringResourceTag);
1154 }
1155 }
1156
DEF_GETTER(ExternalOneByteString,resource,const ExternalOneByteString::Resource *)1157 DEF_GETTER(ExternalOneByteString, resource,
1158 const ExternalOneByteString::Resource*) {
1159 return mutable_resource();
1160 }
1161
DEF_GETTER(ExternalOneByteString,mutable_resource,ExternalOneByteString::Resource *)1162 DEF_GETTER(ExternalOneByteString, mutable_resource,
1163 ExternalOneByteString::Resource*) {
1164 return reinterpret_cast<Resource*>(resource_as_address(cage_base));
1165 }
1166
update_data_cache(Isolate * isolate)1167 void ExternalOneByteString::update_data_cache(Isolate* isolate) {
1168 DisallowGarbageCollection no_gc;
1169 if (is_uncached()) {
1170 if (resource(isolate)->IsCacheable())
1171 mutable_resource(isolate)->UpdateDataCache();
1172 } else {
1173 WriteExternalPointerField(
1174 kResourceDataOffset, isolate,
1175 reinterpret_cast<Address>(resource(isolate)->data()),
1176 kExternalStringResourceDataTag);
1177 }
1178 }
1179
SetResource(Isolate * isolate,const ExternalOneByteString::Resource * resource)1180 void ExternalOneByteString::SetResource(
1181 Isolate* isolate, const ExternalOneByteString::Resource* resource) {
1182 set_resource(isolate, resource);
1183 size_t new_payload = resource == nullptr ? 0 : resource->length();
1184 if (new_payload > 0) {
1185 isolate->heap()->UpdateExternalString(*this, 0, new_payload);
1186 }
1187 }
1188
set_resource(Isolate * isolate,const ExternalOneByteString::Resource * resource)1189 void ExternalOneByteString::set_resource(
1190 Isolate* isolate, const ExternalOneByteString::Resource* resource) {
1191 WriteExternalPointerField(kResourceOffset, isolate,
1192 reinterpret_cast<Address>(resource),
1193 kExternalStringResourceTag);
1194 if (resource != nullptr) update_data_cache(isolate);
1195 }
1196
GetChars(PtrComprCageBase cage_base)1197 const uint8_t* ExternalOneByteString::GetChars(
1198 PtrComprCageBase cage_base) const {
1199 DisallowGarbageCollection no_gc;
1200 auto res = resource(cage_base);
1201 if (is_uncached()) {
1202 if (res->IsCacheable()) {
1203 // TODO(solanes): Teach TurboFan/CSA to not bailout to the runtime to
1204 // avoid this call.
1205 return reinterpret_cast<const uint8_t*>(res->cached_data());
1206 }
1207 #if DEBUG
1208 // Check that this method is called only from the main thread if we have an
1209 // uncached string with an uncacheable resource.
1210 {
1211 Isolate* isolate;
1212 DCHECK_IMPLIES(GetIsolateFromHeapObject(*this, &isolate),
1213 ThreadId::Current() == isolate->thread_id());
1214 }
1215 #endif
1216 }
1217
1218 return reinterpret_cast<const uint8_t*>(res->data());
1219 }
1220
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)1221 uint8_t ExternalOneByteString::Get(
1222 int index, PtrComprCageBase cage_base,
1223 const SharedStringAccessGuardIfNeeded& access_guard) const {
1224 USE(access_guard);
1225 DCHECK(index >= 0 && index < length());
1226 return GetChars(cage_base)[index];
1227 }
1228
DEF_GETTER(ExternalTwoByteString,resource,const ExternalTwoByteString::Resource *)1229 DEF_GETTER(ExternalTwoByteString, resource,
1230 const ExternalTwoByteString::Resource*) {
1231 return mutable_resource();
1232 }
1233
DEF_GETTER(ExternalTwoByteString,mutable_resource,ExternalTwoByteString::Resource *)1234 DEF_GETTER(ExternalTwoByteString, mutable_resource,
1235 ExternalTwoByteString::Resource*) {
1236 return reinterpret_cast<Resource*>(resource_as_address(cage_base));
1237 }
1238
update_data_cache(Isolate * isolate)1239 void ExternalTwoByteString::update_data_cache(Isolate* isolate) {
1240 DisallowGarbageCollection no_gc;
1241 if (is_uncached()) {
1242 if (resource(isolate)->IsCacheable())
1243 mutable_resource(isolate)->UpdateDataCache();
1244 } else {
1245 WriteExternalPointerField(
1246 kResourceDataOffset, isolate,
1247 reinterpret_cast<Address>(resource(isolate)->data()),
1248 kExternalStringResourceDataTag);
1249 }
1250 }
1251
SetResource(Isolate * isolate,const ExternalTwoByteString::Resource * resource)1252 void ExternalTwoByteString::SetResource(
1253 Isolate* isolate, const ExternalTwoByteString::Resource* resource) {
1254 set_resource(isolate, resource);
1255 size_t new_payload = resource == nullptr ? 0 : resource->length() * 2;
1256 if (new_payload > 0) {
1257 isolate->heap()->UpdateExternalString(*this, 0, new_payload);
1258 }
1259 }
1260
set_resource(Isolate * isolate,const ExternalTwoByteString::Resource * resource)1261 void ExternalTwoByteString::set_resource(
1262 Isolate* isolate, const ExternalTwoByteString::Resource* resource) {
1263 WriteExternalPointerField(kResourceOffset, isolate,
1264 reinterpret_cast<Address>(resource),
1265 kExternalStringResourceTag);
1266 if (resource != nullptr) update_data_cache(isolate);
1267 }
1268
GetChars(PtrComprCageBase cage_base)1269 const uint16_t* ExternalTwoByteString::GetChars(
1270 PtrComprCageBase cage_base) const {
1271 DisallowGarbageCollection no_gc;
1272 auto res = resource(cage_base);
1273 if (is_uncached()) {
1274 if (res->IsCacheable()) {
1275 // TODO(solanes): Teach TurboFan/CSA to not bailout to the runtime to
1276 // avoid this call.
1277 return res->cached_data();
1278 }
1279 #if DEBUG
1280 // Check that this method is called only from the main thread if we have an
1281 // uncached string with an uncacheable resource.
1282 {
1283 Isolate* isolate;
1284 DCHECK_IMPLIES(GetIsolateFromHeapObject(*this, &isolate),
1285 ThreadId::Current() == isolate->thread_id());
1286 }
1287 #endif
1288 }
1289
1290 return res->data();
1291 }
1292
Get(int index,PtrComprCageBase cage_base,const SharedStringAccessGuardIfNeeded & access_guard)1293 uint16_t ExternalTwoByteString::Get(
1294 int index, PtrComprCageBase cage_base,
1295 const SharedStringAccessGuardIfNeeded& access_guard) const {
1296 USE(access_guard);
1297 DCHECK(index >= 0 && index < length());
1298 return GetChars(cage_base)[index];
1299 }
1300
ExternalTwoByteStringGetData(unsigned start)1301 const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData(
1302 unsigned start) {
1303 return GetChars(GetPtrComprCageBase(*this)) + start;
1304 }
1305
OffsetForDepth(int depth)1306 int ConsStringIterator::OffsetForDepth(int depth) { return depth & kDepthMask; }
1307
PushLeft(ConsString string)1308 void ConsStringIterator::PushLeft(ConsString string) {
1309 frames_[depth_++ & kDepthMask] = string;
1310 }
1311
PushRight(ConsString string)1312 void ConsStringIterator::PushRight(ConsString string) {
1313 // Inplace update.
1314 frames_[(depth_ - 1) & kDepthMask] = string;
1315 }
1316
AdjustMaximumDepth()1317 void ConsStringIterator::AdjustMaximumDepth() {
1318 if (depth_ > maximum_depth_) maximum_depth_ = depth_;
1319 }
1320
Pop()1321 void ConsStringIterator::Pop() {
1322 DCHECK_GT(depth_, 0);
1323 DCHECK(depth_ <= maximum_depth_);
1324 depth_--;
1325 }
1326
1327 class StringCharacterStream {
1328 public:
1329 inline explicit StringCharacterStream(String string, int offset = 0);
1330 StringCharacterStream(const StringCharacterStream&) = delete;
1331 StringCharacterStream& operator=(const StringCharacterStream&) = delete;
1332 inline uint16_t GetNext();
1333 inline bool HasMore();
1334 inline void Reset(String string, int offset = 0);
1335 inline void VisitOneByteString(const uint8_t* chars, int length);
1336 inline void VisitTwoByteString(const uint16_t* chars, int length);
1337
1338 private:
1339 ConsStringIterator iter_;
1340 bool is_one_byte_;
1341 union {
1342 const uint8_t* buffer8_;
1343 const uint16_t* buffer16_;
1344 };
1345 const uint8_t* end_;
1346 SharedStringAccessGuardIfNeeded access_guard_;
1347 };
1348
GetNext()1349 uint16_t StringCharacterStream::GetNext() {
1350 DCHECK(buffer8_ != nullptr && end_ != nullptr);
1351 // Advance cursor if needed.
1352 if (buffer8_ == end_) HasMore();
1353 DCHECK(buffer8_ < end_);
1354 return is_one_byte_ ? *buffer8_++ : *buffer16_++;
1355 }
1356
1357 // TODO(solanes, v8:7790, chromium:1166095): Assess if we need to use
1358 // Isolate/LocalIsolate and pipe them through, instead of using the slow
1359 // version of the SharedStringAccessGuardIfNeeded.
StringCharacterStream(String string,int offset)1360 StringCharacterStream::StringCharacterStream(String string, int offset)
1361 : is_one_byte_(false), access_guard_(string) {
1362 Reset(string, offset);
1363 }
1364
Reset(String string,int offset)1365 void StringCharacterStream::Reset(String string, int offset) {
1366 buffer8_ = nullptr;
1367 end_ = nullptr;
1368
1369 ConsString cons_string =
1370 String::VisitFlat(this, string, offset, access_guard_);
1371 iter_.Reset(cons_string, offset);
1372 if (!cons_string.is_null()) {
1373 string = iter_.Next(&offset);
1374 if (!string.is_null())
1375 String::VisitFlat(this, string, offset, access_guard_);
1376 }
1377 }
1378
HasMore()1379 bool StringCharacterStream::HasMore() {
1380 if (buffer8_ != end_) return true;
1381 int offset;
1382 String string = iter_.Next(&offset);
1383 DCHECK_EQ(offset, 0);
1384 if (string.is_null()) return false;
1385 String::VisitFlat(this, string, 0, access_guard_);
1386 DCHECK(buffer8_ != end_);
1387 return true;
1388 }
1389
VisitOneByteString(const uint8_t * chars,int length)1390 void StringCharacterStream::VisitOneByteString(const uint8_t* chars,
1391 int length) {
1392 is_one_byte_ = true;
1393 buffer8_ = chars;
1394 end_ = chars + length;
1395 }
1396
VisitTwoByteString(const uint16_t * chars,int length)1397 void StringCharacterStream::VisitTwoByteString(const uint16_t* chars,
1398 int length) {
1399 is_one_byte_ = false;
1400 buffer16_ = chars;
1401 end_ = reinterpret_cast<const uint8_t*>(chars + length);
1402 }
1403
AsArrayIndex(uint32_t * index)1404 bool String::AsArrayIndex(uint32_t* index) {
1405 DisallowGarbageCollection no_gc;
1406 uint32_t field = raw_hash_field();
1407 if (ContainsCachedArrayIndex(field)) {
1408 *index = ArrayIndexValueBits::decode(field);
1409 return true;
1410 }
1411 if (IsHashFieldComputed(field) && !IsIntegerIndex(field)) {
1412 return false;
1413 }
1414 return SlowAsArrayIndex(index);
1415 }
1416
AsIntegerIndex(size_t * index)1417 bool String::AsIntegerIndex(size_t* index) {
1418 uint32_t field = raw_hash_field();
1419 if (ContainsCachedArrayIndex(field)) {
1420 *index = ArrayIndexValueBits::decode(field);
1421 return true;
1422 }
1423 if (IsHashFieldComputed(field) && !IsIntegerIndex(field)) {
1424 return false;
1425 }
1426 return SlowAsIntegerIndex(index);
1427 }
1428
SubStringRange(String string,const DisallowGarbageCollection & no_gc,int first,int length)1429 SubStringRange::SubStringRange(String string,
1430 const DisallowGarbageCollection& no_gc,
1431 int first, int length)
1432 : string_(string),
1433 first_(first),
1434 length_(length == -1 ? string.length() : length),
1435 no_gc_(no_gc) {}
1436
1437 class SubStringRange::iterator final {
1438 public:
1439 using iterator_category = std::forward_iterator_tag;
1440 using difference_type = int;
1441 using value_type = base::uc16;
1442 using pointer = base::uc16*;
1443 using reference = base::uc16&;
1444
1445 iterator(const iterator& other) = default;
1446
1447 base::uc16 operator*() { return content_.Get(offset_); }
1448 bool operator==(const iterator& other) const {
1449 return content_.UsesSameString(other.content_) && offset_ == other.offset_;
1450 }
1451 bool operator!=(const iterator& other) const {
1452 return !content_.UsesSameString(other.content_) || offset_ != other.offset_;
1453 }
1454 iterator& operator++() {
1455 ++offset_;
1456 return *this;
1457 }
1458 iterator operator++(int);
1459
1460 private:
1461 friend class String;
1462 friend class SubStringRange;
iterator(String from,int offset,const DisallowGarbageCollection & no_gc)1463 iterator(String from, int offset, const DisallowGarbageCollection& no_gc)
1464 : content_(from.GetFlatContent(no_gc)), offset_(offset) {}
1465 String::FlatContent content_;
1466 int offset_;
1467 };
1468
begin()1469 SubStringRange::iterator SubStringRange::begin() {
1470 return SubStringRange::iterator(string_, first_, no_gc_);
1471 }
1472
end()1473 SubStringRange::iterator SubStringRange::end() {
1474 return SubStringRange::iterator(string_, first_ + length_, no_gc_);
1475 }
1476
1477 // static
IsInPlaceInternalizable(String string)1478 bool String::IsInPlaceInternalizable(String string) {
1479 return IsInPlaceInternalizable(string.map().instance_type());
1480 }
1481
1482 // static
IsInPlaceInternalizable(InstanceType instance_type)1483 bool String::IsInPlaceInternalizable(InstanceType instance_type) {
1484 switch (instance_type) {
1485 case STRING_TYPE:
1486 case ONE_BYTE_STRING_TYPE:
1487 case SHARED_STRING_TYPE:
1488 case SHARED_ONE_BYTE_STRING_TYPE:
1489 case EXTERNAL_STRING_TYPE:
1490 case EXTERNAL_ONE_BYTE_STRING_TYPE:
1491 return true;
1492 default:
1493 return false;
1494 }
1495 }
1496
1497 // static
IsInPlaceInternalizableExcludingExternal(InstanceType instance_type)1498 bool String::IsInPlaceInternalizableExcludingExternal(
1499 InstanceType instance_type) {
1500 return IsInPlaceInternalizable(instance_type) &&
1501 !InstanceTypeChecker::IsExternalString(instance_type);
1502 }
1503
1504 } // namespace internal
1505 } // namespace v8
1506
1507 #include "src/objects/object-macros-undef.h"
1508
1509 #endif // V8_OBJECTS_STRING_INL_H_
1510