1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_OBJECTS_STRING_H_ 6 #define V8_OBJECTS_STRING_H_ 7 8 #include <memory> 9 10 #include "src/base/bits.h" 11 #include "src/base/export-template.h" 12 #include "src/objects/instance-type.h" 13 #include "src/objects/name.h" 14 #include "src/objects/smi.h" 15 #include "src/strings/unicode-decoder.h" 16 #include "torque-generated/field-offsets.h" 17 18 // Has to be the last include (doesn't have include guards): 19 #include "src/objects/object-macros.h" 20 21 namespace v8 { 22 namespace internal { 23 24 class SharedStringAccessGuardIfNeeded; 25 26 enum InstanceType : uint16_t; 27 28 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS }; 29 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL }; 30 31 // The characteristics of a string are stored in its map. Retrieving these 32 // few bits of information is moderately expensive, involving two memory 33 // loads where the second is dependent on the first. To improve efficiency 34 // the shape of the string is given its own class so that it can be retrieved 35 // once and used for several string operations. A StringShape is small enough 36 // to be passed by value and is immutable, but be aware that flattening a 37 // string can potentially alter its shape. Also be aware that a GC caused by 38 // something else can alter the shape of a string due to ConsString 39 // shortcutting. Keeping these restrictions in mind has proven to be error- 40 // prone and so we no longer put StringShapes in variables unless there is a 41 // concrete performance benefit at that particular point in the code. 42 class StringShape { 43 public: 44 inline explicit StringShape(const String s); 45 inline explicit StringShape(Map s); 46 inline explicit StringShape(InstanceType t); 47 inline bool IsSequential(); 48 inline bool IsExternal(); 49 inline bool IsCons(); 50 inline bool IsSliced(); 51 inline bool IsThin(); 52 inline bool IsIndirect(); 53 inline bool IsExternalOneByte(); 54 inline bool IsExternalTwoByte(); 55 inline bool IsSequentialOneByte(); 56 inline bool IsSequentialTwoByte(); 57 inline bool IsInternalized(); 58 inline StringRepresentationTag representation_tag(); 59 inline uint32_t encoding_tag(); 60 inline uint32_t full_representation_tag(); 61 #ifdef DEBUG type()62 inline uint32_t type() { return type_; } invalidate()63 inline void invalidate() { valid_ = false; } valid()64 inline bool valid() { return valid_; } 65 #else invalidate()66 inline void invalidate() {} 67 #endif 68 69 // Run different behavior for each concrete string class type, as defined by 70 // the dispatcher. 71 template <typename TDispatcher, typename TResult, typename... TArgs> 72 inline TResult DispatchToSpecificTypeWithoutCast(TArgs&&... args); 73 template <typename TDispatcher, typename TResult, typename... TArgs> 74 inline TResult DispatchToSpecificType(String str, TArgs&&... args); 75 76 private: 77 uint32_t type_; 78 #ifdef DEBUG set_valid()79 inline void set_valid() { valid_ = true; } 80 bool valid_; 81 #else set_valid()82 inline void set_valid() {} 83 #endif 84 }; 85 86 #include "torque-generated/src/objects/string-tq.inc" 87 88 // The String abstract class captures JavaScript string values: 89 // 90 // Ecma-262: 91 // 4.3.16 String Value 92 // A string value is a member of the type String and is a finite 93 // ordered sequence of zero or more 16-bit unsigned integer values. 94 // 95 // All string values have a length field. 96 class String : public TorqueGeneratedString<String, Name> { 97 public: 98 enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING }; 99 100 // Representation of the flat content of a String. 101 // A non-flat string doesn't have flat content. 102 // A flat string has content that's encoded as a sequence of either 103 // one-byte chars or two-byte UC16. 104 // Returned by String::GetFlatContent(). 105 // Not safe to use from concurrent background threads. 106 // TODO(solanes): Move FlatContent into FlatStringReader, and make it private. 107 // This would de-duplicate code, as well as taking advantage of the fact that 108 // FlatStringReader is relocatable. 109 class FlatContent { 110 public: 111 // Returns true if the string is flat and this structure contains content. IsFlat()112 bool IsFlat() const { return state_ != NON_FLAT; } 113 // Returns true if the structure contains one-byte content. IsOneByte()114 bool IsOneByte() const { return state_ == ONE_BYTE; } 115 // Returns true if the structure contains two-byte content. IsTwoByte()116 bool IsTwoByte() const { return state_ == TWO_BYTE; } 117 118 // Return the one byte content of the string. Only use if IsOneByte() 119 // returns true. ToOneByteVector()120 Vector<const uint8_t> ToOneByteVector() const { 121 DCHECK_EQ(ONE_BYTE, state_); 122 return Vector<const uint8_t>(onebyte_start, length_); 123 } 124 // Return the two-byte content of the string. Only use if IsTwoByte() 125 // returns true. ToUC16Vector()126 Vector<const uc16> ToUC16Vector() const { 127 DCHECK_EQ(TWO_BYTE, state_); 128 return Vector<const uc16>(twobyte_start, length_); 129 } 130 Get(int i)131 uc16 Get(int i) const { 132 DCHECK(i < length_); 133 DCHECK(state_ != NON_FLAT); 134 if (state_ == ONE_BYTE) return onebyte_start[i]; 135 return twobyte_start[i]; 136 } 137 UsesSameString(const FlatContent & other)138 bool UsesSameString(const FlatContent& other) const { 139 return onebyte_start == other.onebyte_start; 140 } 141 142 private: 143 enum State { NON_FLAT, ONE_BYTE, TWO_BYTE }; 144 145 // Constructors only used by String::GetFlatContent(). FlatContent(const uint8_t * start,int length,const DisallowHeapAllocation & no_gc)146 FlatContent(const uint8_t* start, int length, 147 const DisallowHeapAllocation& no_gc) 148 : onebyte_start(start), 149 length_(length), 150 state_(ONE_BYTE), 151 no_gc_(no_gc) {} FlatContent(const uc16 * start,int length,const DisallowHeapAllocation & no_gc)152 FlatContent(const uc16* start, int length, 153 const DisallowHeapAllocation& no_gc) 154 : twobyte_start(start), 155 length_(length), 156 state_(TWO_BYTE), 157 no_gc_(no_gc) {} FlatContent(const DisallowHeapAllocation & no_gc)158 explicit FlatContent(const DisallowHeapAllocation& no_gc) 159 : onebyte_start(nullptr), length_(0), state_(NON_FLAT), no_gc_(no_gc) {} 160 161 union { 162 const uint8_t* onebyte_start; 163 const uc16* twobyte_start; 164 }; 165 int length_; 166 State state_; 167 const DisallowHeapAllocation& no_gc_; 168 169 friend class String; 170 friend class IterableSubString; 171 }; 172 173 void MakeThin(Isolate* isolate, String canonical); 174 175 template <typename Char> 176 V8_INLINE Vector<const Char> GetCharVector( 177 const DisallowHeapAllocation& no_gc); 178 179 // Get chars from sequential or external strings. May only be called when a 180 // SharedStringAccessGuard is not needed (i.e. on the main thread or on 181 // read-only strings). 182 template <typename Char> 183 inline const Char* GetChars(const DisallowHeapAllocation& no_gc); 184 185 // Get chars from sequential or external strings. 186 template <typename Char> 187 inline const Char* GetChars( 188 const DisallowHeapAllocation& no_gc, 189 const SharedStringAccessGuardIfNeeded& access_guard); 190 191 // Returns the address of the character at an offset into this string. 192 // Requires: this->IsFlat() 193 const byte* AddressOfCharacterAt(int start_index, 194 const DisallowHeapAllocation& no_gc); 195 196 // Get and set the length of the string using acquire loads and release 197 // stores. 198 DECL_SYNCHRONIZED_INT_ACCESSORS(length) 199 200 // Returns whether this string has only one-byte chars, i.e. all of them can 201 // be one-byte encoded. This might be the case even if the string is 202 // two-byte. Such strings may appear when the embedder prefers 203 // two-byte external representations even for one-byte data. 204 DECL_GETTER(IsOneByteRepresentation, bool) 205 DECL_GETTER(IsTwoByteRepresentation, bool) 206 207 // Cons and slices have an encoding flag that may not represent the actual 208 // encoding of the underlying string. This is taken into account here. 209 // This function is static because that helps it get inlined. 210 // Requires: string.IsFlat() 211 static inline bool IsOneByteRepresentationUnderneath(String string); 212 213 // Get and set individual two byte chars in the string. 214 inline void Set(int index, uint16_t value); 215 // Get individual two byte char in the string. Repeated calls 216 // to this method are not efficient unless the string is flat. 217 V8_INLINE uint16_t Get(int index); 218 219 // ES6 section 7.1.3.1 ToNumber Applied to the String Type 220 static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject); 221 222 // Flattens the string. Checks first inline to see if it is 223 // necessary. Does nothing if the string is not a cons string. 224 // Flattening allocates a sequential string with the same data as 225 // the given string and mutates the cons string to a degenerate 226 // form, where the first component is the new sequential string and 227 // the second component is the empty string. If allocation fails, 228 // this function returns a failure. If flattening succeeds, this 229 // function returns the sequential string that is now the first 230 // component of the cons string. 231 // 232 // Degenerate cons strings are handled specially by the garbage 233 // collector (see IsShortcutCandidate). 234 235 static inline Handle<String> Flatten( 236 Isolate* isolate, Handle<String> string, 237 AllocationType allocation = AllocationType::kYoung); 238 static inline Handle<String> Flatten( 239 LocalIsolate* isolate, Handle<String> string, 240 AllocationType allocation = AllocationType::kYoung); 241 242 // Tries to return the content of a flat string as a structure holding either 243 // a flat vector of char or of uc16. 244 // If the string isn't flat, and therefore doesn't have flat content, the 245 // returned structure will report so, and can't provide a vector of either 246 // kind. 247 V8_EXPORT_PRIVATE FlatContent 248 GetFlatContent(const DisallowHeapAllocation& no_gc); 249 250 // Returns the parent of a sliced string or first part of a flat cons string. 251 // Requires: StringShape(this).IsIndirect() && this->IsFlat() 252 inline String GetUnderlying(); 253 254 // String relational comparison, implemented according to ES6 section 7.2.11 255 // Abstract Relational Comparison (step 5): The comparison of Strings uses a 256 // simple lexicographic ordering on sequences of code unit values. There is no 257 // attempt to use the more complex, semantically oriented definitions of 258 // character or string equality and collating order defined in the Unicode 259 // specification. Therefore String values that are canonically equal according 260 // to the Unicode standard could test as unequal. In effect this algorithm 261 // assumes that both Strings are already in normalized form. Also, note that 262 // for strings containing supplementary characters, lexicographic ordering on 263 // sequences of UTF-16 code unit values differs from that on sequences of code 264 // point values. 265 V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate, 266 Handle<String> x, 267 Handle<String> y); 268 269 // Perform ES6 21.1.3.8, including checking arguments. 270 static Object IndexOf(Isolate* isolate, Handle<Object> receiver, 271 Handle<Object> search, Handle<Object> position); 272 // Perform string match of pattern on subject, starting at start index. 273 // Caller must ensure that 0 <= start_index <= sub->length(), as this does not 274 // check any arguments. 275 static int IndexOf(Isolate* isolate, Handle<String> receiver, 276 Handle<String> search, int start_index); 277 278 static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver, 279 Handle<Object> search, Handle<Object> position); 280 281 // Encapsulates logic related to a match and its capture groups as required 282 // by GetSubstitution. 283 class Match { 284 public: 285 virtual Handle<String> GetMatch() = 0; 286 virtual Handle<String> GetPrefix() = 0; 287 virtual Handle<String> GetSuffix() = 0; 288 289 // A named capture can be unmatched (either not specified in the pattern, 290 // or specified but unmatched in the current string), or matched. 291 enum CaptureState { UNMATCHED, MATCHED }; 292 293 virtual int CaptureCount() = 0; 294 virtual bool HasNamedCaptures() = 0; 295 virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0; 296 virtual MaybeHandle<String> GetNamedCapture(Handle<String> name, 297 CaptureState* state) = 0; 298 299 virtual ~Match() = default; 300 }; 301 302 // ES#sec-getsubstitution 303 // GetSubstitution(matched, str, position, captures, replacement) 304 // Expand the $-expressions in the string and return a new string with 305 // the result. 306 // A {start_index} can be passed to specify where to start scanning the 307 // replacement string. 308 V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution( 309 Isolate* isolate, Match* match, Handle<String> replacement, 310 int start_index = 0); 311 312 // String equality operations. 313 inline bool Equals(String other); 314 inline static bool Equals(Isolate* isolate, Handle<String> one, 315 Handle<String> two); 316 317 // Dispatches to Is{One,Two}ByteEqualTo. 318 template <typename Char> 319 bool IsEqualTo(Vector<const Char> str); 320 321 V8_EXPORT_PRIVATE bool HasOneBytePrefix(Vector<const char> str); 322 V8_EXPORT_PRIVATE bool IsOneByteEqualTo(Vector<const uint8_t> str); IsOneByteEqualTo(Vector<const char> str)323 V8_EXPORT_PRIVATE bool IsOneByteEqualTo(Vector<const char> str) { 324 return IsOneByteEqualTo(Vector<const uint8_t>::cast(str)); 325 } 326 bool IsTwoByteEqualTo(Vector<const uc16> str); 327 328 // Return a UTF8 representation of the string. The string is null 329 // terminated but may optionally contain nulls. Length is returned 330 // in length_output if length_output is not a null pointer The string 331 // should be nearly flat, otherwise the performance of this method may 332 // be very slow (quadratic in the length). Setting robustness_flag to 333 // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust This means it 334 // handles unexpected data without causing assert failures and it does not 335 // do any heap allocations. This is useful when printing stack traces. 336 std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls, 337 RobustnessFlag robustness_flag, int offset, 338 int length, int* length_output = nullptr); 339 V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString( 340 AllowNullsFlag allow_nulls = DISALLOW_NULLS, 341 RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL, 342 int* length_output = nullptr); 343 344 // Externalization. 345 V8_EXPORT_PRIVATE bool MakeExternal( 346 v8::String::ExternalStringResource* resource); 347 V8_EXPORT_PRIVATE bool MakeExternal( 348 v8::String::ExternalOneByteStringResource* resource); 349 bool SupportsExternalization(); 350 351 // Conversion. 352 // "array index": an index allowed by the ES spec for JSArrays. 353 inline bool AsArrayIndex(uint32_t* index); 354 355 // This is used for calculating array indices but differs from an 356 // Array Index in the regard that this does not support the full 357 // array index range. This only supports positive numbers less than 358 // or equal to INT_MAX. 359 // 360 // String::AsArrayIndex might be a better fit if you're looking to 361 // calculate the array index. 362 // 363 // if val < 0 or val > INT_MAX, returns -1 364 // if 0 <= val <= INT_MAX, returns val 365 static int32_t ToArrayIndex(Address addr); 366 367 uint32_t inline ToValidIndex(Object number); 368 // "integer index": the string is the decimal representation of an 369 // integer in the range of a size_t. Useful for TypedArray accesses. 370 inline bool AsIntegerIndex(size_t* index); 371 372 // Trimming. 373 enum TrimMode { kTrim, kTrimStart, kTrimEnd }; 374 static Handle<String> Trim(Isolate* isolate, Handle<String> string, 375 TrimMode mode); 376 377 V8_EXPORT_PRIVATE void PrintOn(FILE* out); 378 379 // For use during stack traces. Performs rudimentary sanity check. 380 bool LooksValid(); 381 382 // Printing utility functions. 383 // - PrintUC16 prints the raw string contents to the given stream. 384 // Non-printable characters are formatted as hex, but otherwise the string 385 // is printed as-is. 386 // - StringShortPrint and StringPrint have extra formatting: they add a 387 // prefix and suffix depending on the string kind, may add other information 388 // such as the string heap object address, may truncate long strings, etc. 389 const char* PrefixForDebugPrint() const; 390 const char* SuffixForDebugPrint() const; 391 void StringShortPrint(StringStream* accumulator); 392 void PrintUC16(std::ostream& os, int start = 0, int end = -1); // NOLINT 393 void PrintUC16(StringStream* accumulator, int start, int end); 394 395 // Dispatched behavior. 396 #if defined(DEBUG) || defined(OBJECT_PRINT) 397 char* ToAsciiArray(); 398 #endif 399 DECL_PRINTER(String) 400 DECL_VERIFIER(String) 401 402 inline bool IsFlat(); 403 404 // Max char codes. 405 static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar; 406 static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar; 407 static const int kMaxUtf16CodeUnit = 0xffff; 408 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit; 409 static const uc32 kMaxCodePoint = 0x10ffff; 410 411 // Maximal string length. 412 // The max length is different on 32 and 64 bit platforms. Max length for 413 // 32-bit platforms is ~268.4M chars. On 64-bit platforms, max length is 414 // ~536.8M chars. 415 // See include/v8.h for the definition. 416 static const int kMaxLength = v8::String::kMaxLength; 417 // There are several defining limits imposed by our current implementation: 418 // - any string's length must fit into a Smi. 419 static_assert(kMaxLength <= kSmiMaxValue, 420 "String length must fit into a Smi"); 421 // - adding two string lengths must still fit into a 32-bit int without 422 // overflow 423 static_assert(kMaxLength * 2 <= kMaxInt, 424 "String::kMaxLength * 2 must fit into an int32"); 425 // - any heap object's size in bytes must be able to fit into a Smi, because 426 // its space on the heap might be filled with a Filler; for strings this 427 // means SeqTwoByteString::kMaxSize must be able to fit into a Smi. 428 static_assert(kMaxLength * 2 + kHeaderSize <= kSmiMaxValue, 429 "String object size in bytes must fit into a Smi"); 430 // - any heap object's size in bytes must be able to fit into an int, because 431 // that's what our object handling code uses almost everywhere. 432 static_assert(kMaxLength * 2 + kHeaderSize <= kMaxInt, 433 "String object size in bytes must fit into an int"); 434 435 // Max length for computing hash. For strings longer than this limit the 436 // string length is used as the hash value. 437 static const int kMaxHashCalcLength = 16383; 438 439 // Limit for truncation in short printing. 440 static const int kMaxShortPrintLength = 1024; 441 442 // Helper function for flattening strings. 443 template <typename sinkchar> 444 EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE) 445 static void WriteToFlat(String source, sinkchar* sink, int from, int to); 446 IsAscii(const char * chars,int length)447 static inline bool IsAscii(const char* chars, int length) { 448 return IsAscii(reinterpret_cast<const uint8_t*>(chars), length); 449 } 450 IsAscii(const uint8_t * chars,int length)451 static inline bool IsAscii(const uint8_t* chars, int length) { 452 return NonAsciiStart(chars, length) >= length; 453 } 454 NonOneByteStart(const uc16 * chars,int length)455 static inline int NonOneByteStart(const uc16* chars, int length) { 456 DCHECK(IsAligned(reinterpret_cast<Address>(chars), sizeof(uc16))); 457 const uint16_t* start = chars; 458 const uint16_t* limit = chars + length; 459 460 if (static_cast<size_t>(length) >= kUIntptrSize) { 461 // Check unaligned chars. 462 while (!IsAligned(reinterpret_cast<Address>(chars), kUIntptrSize)) { 463 if (*chars > unibrow::Latin1::kMaxChar) { 464 return static_cast<int>(chars - start); 465 } 466 ++chars; 467 } 468 469 // Check aligned words. 470 STATIC_ASSERT(unibrow::Latin1::kMaxChar == 0xFF); 471 #ifdef V8_TARGET_LITTLE_ENDIAN 472 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0xFF00; 473 #else 474 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0x00FF; 475 #endif 476 while (chars + sizeof(uintptr_t) <= limit) { 477 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) { 478 break; 479 } 480 chars += (sizeof(uintptr_t) / sizeof(uc16)); 481 } 482 } 483 484 // Check remaining unaligned chars, or find non-one-byte char in word. 485 while (chars < limit) { 486 if (*chars > unibrow::Latin1::kMaxChar) { 487 return static_cast<int>(chars - start); 488 } 489 ++chars; 490 } 491 492 return static_cast<int>(chars - start); 493 } 494 IsOneByte(const uc16 * chars,int length)495 static inline bool IsOneByte(const uc16* chars, int length) { 496 return NonOneByteStart(chars, length) >= length; 497 } 498 499 template <class Visitor> 500 static inline ConsString VisitFlat(Visitor* visitor, String string, 501 int offset = 0); 502 503 template <typename LocalIsolate> 504 static Handle<FixedArray> CalculateLineEnds(LocalIsolate* isolate, 505 Handle<String> string, 506 bool include_ending_line); 507 508 private: 509 friend class Name; 510 friend class StringTableInsertionKey; 511 friend class InternalizedStringKey; 512 513 V8_EXPORT_PRIVATE static Handle<String> SlowFlatten( 514 Isolate* isolate, Handle<ConsString> cons, AllocationType allocation); 515 516 // Slow case of String::Equals. This implementation works on any strings 517 // but it is most efficient on strings that are almost flat. 518 V8_EXPORT_PRIVATE bool SlowEquals(String other); 519 520 V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one, 521 Handle<String> two); 522 523 // Slow case of AsArrayIndex. 524 V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index); 525 V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t* index); 526 527 // Compute and set the hash code. 528 V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash(); 529 530 TQ_OBJECT_CONSTRUCTORS(String) 531 }; 532 533 // clang-format off 534 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE) 535 void String::WriteToFlat(String source, uint16_t* sink, int from, int to); 536 // clang-format on 537 538 class SubStringRange { 539 public: 540 inline SubStringRange(String string, const DisallowHeapAllocation& no_gc, 541 int first = 0, int length = -1); 542 class iterator; 543 inline iterator begin(); 544 inline iterator end(); 545 546 private: 547 String string_; 548 int first_; 549 int length_; 550 const DisallowHeapAllocation& no_gc_; 551 }; 552 553 // The SeqString abstract class captures sequential string values. 554 class SeqString : public TorqueGeneratedSeqString<SeqString, String> { 555 public: 556 // Truncate the string in-place if possible and return the result. 557 // In case of new_length == 0, the empty string is returned without 558 // truncating the original string. 559 V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string, 560 int new_length); 561 562 TQ_OBJECT_CONSTRUCTORS(SeqString) 563 }; 564 565 class InternalizedString 566 : public TorqueGeneratedInternalizedString<InternalizedString, String> { 567 public: 568 // TODO(neis): Possibly move some stuff from String here. 569 570 TQ_OBJECT_CONSTRUCTORS(InternalizedString) 571 }; 572 573 // The OneByteString class captures sequential one-byte string objects. 574 // Each character in the OneByteString is an one-byte character. 575 class SeqOneByteString 576 : public TorqueGeneratedSeqOneByteString<SeqOneByteString, SeqString> { 577 public: 578 static const bool kHasOneByteEncoding = true; 579 using Char = uint8_t; 580 581 // Dispatched behavior. 582 inline uint8_t Get(int index); 583 inline void SeqOneByteStringSet(int index, uint16_t value); 584 585 // Get the address of the characters in this string. 586 inline Address GetCharsAddress(); 587 588 // Get a pointer to the characters of the string. May only be called when a 589 // SharedStringAccessGuard is not needed (i.e. on the main thread or on 590 // read-only strings). 591 inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc); 592 593 // Get a pointer to the characters of the string. 594 inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc, 595 const SharedStringAccessGuardIfNeeded& access_guard); 596 597 // Clear uninitialized padding space. This ensures that the snapshot content 598 // is deterministic. 599 void clear_padding(); 600 601 // Garbage collection support. This method is called by the 602 // garbage collector to compute the actual size of an OneByteString 603 // instance. 604 inline int SeqOneByteStringSize(InstanceType instance_type); 605 606 // Maximal memory usage for a single sequential one-byte string. 607 static const int kMaxCharsSize = kMaxLength; 608 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize); 609 STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength); 610 611 int AllocatedSize(); 612 613 class BodyDescriptor; 614 615 TQ_OBJECT_CONSTRUCTORS(SeqOneByteString) 616 }; 617 618 // The TwoByteString class captures sequential unicode string objects. 619 // Each character in the TwoByteString is a two-byte uint16_t. 620 class SeqTwoByteString 621 : public TorqueGeneratedSeqTwoByteString<SeqTwoByteString, SeqString> { 622 public: 623 static const bool kHasOneByteEncoding = false; 624 using Char = uint16_t; 625 626 // Dispatched behavior. 627 inline uint16_t Get(int index); 628 inline void SeqTwoByteStringSet(int index, uint16_t value); 629 630 // Get the address of the characters in this string. 631 inline Address GetCharsAddress(); 632 633 // Get a pointer to the characters of the string. May only be called when a 634 // SharedStringAccessGuard is not needed (i.e. on the main thread or on 635 // read-only strings). 636 inline uc16* GetChars(const DisallowHeapAllocation& no_gc); 637 638 // Get a pointer to the characters of the string. 639 inline uc16* GetChars(const DisallowHeapAllocation& no_gc, 640 const SharedStringAccessGuardIfNeeded& access_guard); 641 642 // Clear uninitialized padding space. This ensures that the snapshot content 643 // is deterministic. 644 void clear_padding(); 645 646 // Garbage collection support. This method is called by the 647 // garbage collector to compute the actual size of a TwoByteString 648 // instance. 649 inline int SeqTwoByteStringSize(InstanceType instance_type); 650 651 // Maximal memory usage for a single sequential two-byte string. 652 static const int kMaxCharsSize = kMaxLength * 2; 653 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize); 654 STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >= 655 String::kMaxLength); 656 657 int AllocatedSize(); 658 659 class BodyDescriptor; 660 661 TQ_OBJECT_CONSTRUCTORS(SeqTwoByteString) 662 }; 663 664 // The ConsString class describes string values built by using the 665 // addition operator on strings. A ConsString is a pair where the 666 // first and second components are pointers to other string values. 667 // One or both components of a ConsString can be pointers to other 668 // ConsStrings, creating a binary tree of ConsStrings where the leaves 669 // are non-ConsString string values. The string value represented by 670 // a ConsString can be obtained by concatenating the leaf string 671 // values in a left-to-right depth-first traversal of the tree. 672 class ConsString : public TorqueGeneratedConsString<ConsString, String> { 673 public: 674 // Doesn't check that the result is a string, even in debug mode. This is 675 // useful during GC where the mark bits confuse the checks. 676 inline Object unchecked_first(); 677 678 // Doesn't check that the result is a string, even in debug mode. This is 679 // useful during GC where the mark bits confuse the checks. 680 inline Object unchecked_second(); 681 682 // Dispatched behavior. 683 V8_EXPORT_PRIVATE uint16_t Get(int index); 684 685 // Minimum length for a cons string. 686 static const int kMinLength = 13; 687 688 class BodyDescriptor; 689 690 DECL_VERIFIER(ConsString) 691 692 TQ_OBJECT_CONSTRUCTORS(ConsString) 693 }; 694 695 // The ThinString class describes string objects that are just references 696 // to another string object. They are used for in-place internalization when 697 // the original string cannot actually be internalized in-place: in these 698 // cases, the original string is converted to a ThinString pointing at its 699 // internalized version (which is allocated as a new object). 700 // In terms of memory layout and most algorithms operating on strings, 701 // ThinStrings can be thought of as "one-part cons strings". 702 class ThinString : public TorqueGeneratedThinString<ThinString, String> { 703 public: 704 DECL_GETTER(unchecked_actual, HeapObject) 705 706 V8_EXPORT_PRIVATE uint16_t Get(int index); 707 708 DECL_VERIFIER(ThinString) 709 710 class BodyDescriptor; 711 712 TQ_OBJECT_CONSTRUCTORS(ThinString) 713 }; 714 715 // The Sliced String class describes strings that are substrings of another 716 // sequential string. The motivation is to save time and memory when creating 717 // a substring. A Sliced String is described as a pointer to the parent, 718 // the offset from the start of the parent string and the length. Using 719 // a Sliced String therefore requires unpacking of the parent string and 720 // adding the offset to the start address. A substring of a Sliced String 721 // are not nested since the double indirection is simplified when creating 722 // such a substring. 723 // Currently missing features are: 724 // - handling externalized parent strings 725 // - external strings as parent 726 // - truncating sliced string to enable otherwise unneeded parent to be GC'ed. 727 class SlicedString : public TorqueGeneratedSlicedString<SlicedString, String> { 728 public: 729 inline void set_parent(String parent, 730 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 731 // Dispatched behavior. 732 V8_EXPORT_PRIVATE uint16_t Get(int index); 733 734 // Minimum length for a sliced string. 735 static const int kMinLength = 13; 736 737 class BodyDescriptor; 738 739 DECL_VERIFIER(SlicedString) 740 741 TQ_OBJECT_CONSTRUCTORS(SlicedString) 742 }; 743 744 // The ExternalString class describes string values that are backed by 745 // a string resource that lies outside the V8 heap. ExternalStrings 746 // consist of the length field common to all strings, a pointer to the 747 // external resource. It is important to ensure (externally) that the 748 // resource is not deallocated while the ExternalString is live in the 749 // V8 heap. 750 // 751 // The API expects that all ExternalStrings are created through the 752 // API. Therefore, ExternalStrings should not be used internally. 753 class ExternalString : public String { 754 public: 755 DECL_CAST(ExternalString) 756 DECL_VERIFIER(ExternalString) 757 758 DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, 759 TORQUE_GENERATED_EXTERNAL_STRING_FIELDS) 760 761 // Size of uncached external strings. 762 static const int kUncachedSize = 763 kResourceOffset + FIELD_SIZE(kResourceOffset); 764 765 inline void AllocateExternalPointerEntries(Isolate* isolate); 766 767 // Return whether the external string data pointer is not cached. 768 inline bool is_uncached() const; 769 // Size in bytes of the external payload. 770 int ExternalPayloadSize() const; 771 772 // Used in the serializer/deserializer. 773 DECL_GETTER(resource_as_address, Address) 774 inline void set_address_as_resource(Isolate* isolate, Address address); 775 inline uint32_t GetResourceRefForDeserialization(); 776 inline void SetResourceRefForSerialization(uint32_t ref); 777 778 // Disposes string's resource object if it has not already been disposed. 779 inline void DisposeResource(Isolate* isolate); 780 781 STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset); 782 static const int kSizeOfAllExternalStrings = kHeaderSize; 783 784 OBJECT_CONSTRUCTORS(ExternalString, String); 785 }; 786 787 // The ExternalOneByteString class is an external string backed by an 788 // one-byte string. 789 class ExternalOneByteString : public ExternalString { 790 public: 791 static const bool kHasOneByteEncoding = true; 792 793 using Resource = v8::String::ExternalOneByteStringResource; 794 795 // The underlying resource. 796 DECL_GETTER(resource, const Resource*) 797 798 // It is assumed that the previous resource is null. If it is not null, then 799 // it is the responsability of the caller the handle the previous resource. 800 inline void SetResource(Isolate* isolate, const Resource* buffer); 801 802 // Used only during serialization. 803 inline void set_resource(Isolate* isolate, const Resource* buffer); 804 805 // Update the pointer cache to the external character array. 806 // The cached pointer is always valid, as the external character array does = 807 // not move during lifetime. Deserialization is the only exception, after 808 // which the pointer cache has to be refreshed. 809 inline void update_data_cache(Isolate* isolate); 810 811 inline const uint8_t* GetChars(); 812 813 // Dispatched behavior. 814 inline uint8_t Get(int index); 815 816 DECL_CAST(ExternalOneByteString) 817 818 class BodyDescriptor; 819 820 DEFINE_FIELD_OFFSET_CONSTANTS( 821 ExternalString::kHeaderSize, 822 TORQUE_GENERATED_EXTERNAL_ONE_BYTE_STRING_FIELDS) 823 824 STATIC_ASSERT(kSize == kSizeOfAllExternalStrings); 825 826 OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString); 827 }; 828 829 // The ExternalTwoByteString class is an external string backed by a UTF-16 830 // encoded string. 831 class ExternalTwoByteString : public ExternalString { 832 public: 833 static const bool kHasOneByteEncoding = false; 834 835 using Resource = v8::String::ExternalStringResource; 836 837 // The underlying string resource. 838 DECL_GETTER(resource, const Resource*) 839 840 // It is assumed that the previous resource is null. If it is not null, then 841 // it is the responsability of the caller the handle the previous resource. 842 inline void SetResource(Isolate* isolate, const Resource* buffer); 843 844 // Used only during serialization. 845 inline void set_resource(Isolate* isolate, const Resource* buffer); 846 847 // Update the pointer cache to the external character array. 848 // The cached pointer is always valid, as the external character array does = 849 // not move during lifetime. Deserialization is the only exception, after 850 // which the pointer cache has to be refreshed. 851 inline void update_data_cache(Isolate* isolate); 852 853 inline const uint16_t* GetChars(); 854 855 // Dispatched behavior. 856 inline uint16_t Get(int index); 857 858 // For regexp code. 859 inline const uint16_t* ExternalTwoByteStringGetData(unsigned start); 860 861 DECL_CAST(ExternalTwoByteString) 862 863 class BodyDescriptor; 864 865 DEFINE_FIELD_OFFSET_CONSTANTS( 866 ExternalString::kHeaderSize, 867 TORQUE_GENERATED_EXTERNAL_TWO_BYTE_STRING_FIELDS) 868 869 STATIC_ASSERT(kSize == kSizeOfAllExternalStrings); 870 871 OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString); 872 }; 873 874 // A flat string reader provides random access to the contents of a 875 // string independent of the character width of the string. The handle 876 // must be valid as long as the reader is being used. 877 // Not safe to use from concurrent background threads. 878 class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable { 879 public: 880 FlatStringReader(Isolate* isolate, Handle<String> str); 881 void PostGarbageCollection() override; 882 inline uc32 Get(int index); 883 template <typename Char> 884 inline Char Get(int index); 885 int length() { return length_; } 886 887 private: 888 Handle<String> str_; 889 bool is_one_byte_; 890 int length_; 891 const void* start_; 892 }; 893 894 // This maintains an off-stack representation of the stack frames required 895 // to traverse a ConsString, allowing an entirely iterative and restartable 896 // traversal of the entire string 897 class ConsStringIterator { 898 public: 899 inline ConsStringIterator() = default; 900 inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) { 901 Reset(cons_string, offset); 902 } 903 ConsStringIterator(const ConsStringIterator&) = delete; 904 ConsStringIterator& operator=(const ConsStringIterator&) = delete; 905 inline void Reset(ConsString cons_string, int offset = 0) { 906 depth_ = 0; 907 // Next will always return nullptr. 908 if (cons_string.is_null()) return; 909 Initialize(cons_string, offset); 910 } 911 // Returns nullptr when complete. 912 inline String Next(int* offset_out) { 913 *offset_out = 0; 914 if (depth_ == 0) return String(); 915 return Continue(offset_out); 916 } 917 918 private: 919 static const int kStackSize = 32; 920 // Use a mask instead of doing modulo operations for stack wrapping. 921 static const int kDepthMask = kStackSize - 1; 922 static_assert(base::bits::IsPowerOfTwo(kStackSize), 923 "kStackSize must be power of two"); 924 static inline int OffsetForDepth(int depth); 925 926 inline void PushLeft(ConsString string); 927 inline void PushRight(ConsString string); 928 inline void AdjustMaximumDepth(); 929 inline void Pop(); 930 inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; } 931 V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset); 932 V8_EXPORT_PRIVATE String Continue(int* offset_out); 933 String NextLeaf(bool* blew_stack); 934 String Search(int* offset_out); 935 936 // Stack must always contain only frames for which right traversal 937 // has not yet been performed. 938 ConsString frames_[kStackSize]; 939 ConsString root_; 940 int depth_; 941 int maximum_depth_; 942 int consumed_; 943 }; 944 945 class StringCharacterStream { 946 public: 947 inline explicit StringCharacterStream(String string, int offset = 0); 948 StringCharacterStream(const StringCharacterStream&) = delete; 949 StringCharacterStream& operator=(const StringCharacterStream&) = delete; 950 inline uint16_t GetNext(); 951 inline bool HasMore(); 952 inline void Reset(String string, int offset = 0); 953 inline void VisitOneByteString(const uint8_t* chars, int length); 954 inline void VisitTwoByteString(const uint16_t* chars, int length); 955 956 private: 957 ConsStringIterator iter_; 958 bool is_one_byte_; 959 union { 960 const uint8_t* buffer8_; 961 const uint16_t* buffer16_; 962 }; 963 const uint8_t* end_; 964 }; 965 966 template <typename Char> 967 struct CharTraits; 968 969 template <> 970 struct CharTraits<uint8_t> { 971 using String = SeqOneByteString; 972 using ExternalString = ExternalOneByteString; 973 }; 974 975 template <> 976 struct CharTraits<uint16_t> { 977 using String = SeqTwoByteString; 978 using ExternalString = ExternalTwoByteString; 979 }; 980 981 } // namespace internal 982 } // namespace v8 983 984 #include "src/objects/object-macros-undef.h" 985 986 #endif // V8_OBJECTS_STRING_H_ 987