1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_OBJECTS_STRING_H_ 6 #define V8_OBJECTS_STRING_H_ 7 8 #include "src/base/bits.h" 9 #include "src/objects/name.h" 10 #include "src/unicode-decoder.h" 11 12 // Has to be the last include (doesn't have include guards): 13 #include "src/objects/object-macros.h" 14 15 namespace v8 { 16 namespace internal { 17 18 class BigInt; 19 20 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS }; 21 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL }; 22 23 // The characteristics of a string are stored in its map. Retrieving these 24 // few bits of information is moderately expensive, involving two memory 25 // loads where the second is dependent on the first. To improve efficiency 26 // the shape of the string is given its own class so that it can be retrieved 27 // once and used for several string operations. A StringShape is small enough 28 // to be passed by value and is immutable, but be aware that flattening a 29 // string can potentially alter its shape. Also be aware that a GC caused by 30 // something else can alter the shape of a string due to ConsString 31 // shortcutting. Keeping these restrictions in mind has proven to be error- 32 // prone and so we no longer put StringShapes in variables unless there is a 33 // concrete performance benefit at that particular point in the code. 34 class StringShape BASE_EMBEDDED { 35 public: 36 inline explicit StringShape(const String* s); 37 inline explicit StringShape(Map* s); 38 inline explicit StringShape(InstanceType t); 39 inline bool IsSequential(); 40 inline bool IsExternal(); 41 inline bool IsCons(); 42 inline bool IsSliced(); 43 inline bool IsThin(); 44 inline bool IsIndirect(); 45 inline bool IsExternalOneByte(); 46 inline bool IsExternalTwoByte(); 47 inline bool IsSequentialOneByte(); 48 inline bool IsSequentialTwoByte(); 49 inline bool IsInternalized(); 50 inline StringRepresentationTag representation_tag(); 51 inline uint32_t encoding_tag(); 52 inline uint32_t full_representation_tag(); 53 inline bool HasOnlyOneByteChars(); 54 #ifdef DEBUG type()55 inline uint32_t type() { return type_; } invalidate()56 inline void invalidate() { valid_ = false; } valid()57 inline bool valid() { return valid_; } 58 #else invalidate()59 inline void invalidate() {} 60 #endif 61 62 private: 63 uint32_t type_; 64 #ifdef DEBUG set_valid()65 inline void set_valid() { valid_ = true; } 66 bool valid_; 67 #else set_valid()68 inline void set_valid() {} 69 #endif 70 }; 71 72 // The String abstract class captures JavaScript string values: 73 // 74 // Ecma-262: 75 // 4.3.16 String Value 76 // A string value is a member of the type String and is a finite 77 // ordered sequence of zero or more 16-bit unsigned integer values. 78 // 79 // All string values have a length field. 80 class String : public Name { 81 public: 82 enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING }; 83 84 class SubStringRange { 85 public: 86 explicit inline SubStringRange(String* string, int first = 0, 87 int length = -1); 88 class iterator; 89 inline iterator begin(); 90 inline iterator end(); 91 92 private: 93 String* string_; 94 int first_; 95 int length_; 96 }; 97 98 // Representation of the flat content of a String. 99 // A non-flat string doesn't have flat content. 100 // A flat string has content that's encoded as a sequence of either 101 // one-byte chars or two-byte UC16. 102 // Returned by String::GetFlatContent(). 103 class FlatContent { 104 public: 105 // Returns true if the string is flat and this structure contains content. IsFlat()106 bool IsFlat() const { return state_ != NON_FLAT; } 107 // Returns true if the structure contains one-byte content. IsOneByte()108 bool IsOneByte() const { return state_ == ONE_BYTE; } 109 // Returns true if the structure contains two-byte content. IsTwoByte()110 bool IsTwoByte() const { return state_ == TWO_BYTE; } 111 112 // Return the one byte content of the string. Only use if IsOneByte() 113 // returns true. ToOneByteVector()114 Vector<const uint8_t> ToOneByteVector() const { 115 DCHECK_EQ(ONE_BYTE, state_); 116 return Vector<const uint8_t>(onebyte_start, length_); 117 } 118 // Return the two-byte content of the string. Only use if IsTwoByte() 119 // returns true. ToUC16Vector()120 Vector<const uc16> ToUC16Vector() const { 121 DCHECK_EQ(TWO_BYTE, state_); 122 return Vector<const uc16>(twobyte_start, length_); 123 } 124 Get(int i)125 uc16 Get(int i) const { 126 DCHECK(i < length_); 127 DCHECK(state_ != NON_FLAT); 128 if (state_ == ONE_BYTE) return onebyte_start[i]; 129 return twobyte_start[i]; 130 } 131 UsesSameString(const FlatContent & other)132 bool UsesSameString(const FlatContent& other) const { 133 return onebyte_start == other.onebyte_start; 134 } 135 136 private: 137 enum State { NON_FLAT, ONE_BYTE, TWO_BYTE }; 138 139 // Constructors only used by String::GetFlatContent(). FlatContent(const uint8_t * start,int length)140 explicit FlatContent(const uint8_t* start, int length) 141 : onebyte_start(start), length_(length), state_(ONE_BYTE) {} FlatContent(const uc16 * start,int length)142 explicit FlatContent(const uc16* start, int length) 143 : twobyte_start(start), length_(length), state_(TWO_BYTE) {} FlatContent()144 FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {} 145 146 union { 147 const uint8_t* onebyte_start; 148 const uc16* twobyte_start; 149 }; 150 int length_; 151 State state_; 152 153 friend class String; 154 friend class IterableSubString; 155 }; 156 157 template <typename Char> 158 V8_INLINE Vector<const Char> GetCharVector(); 159 160 // Get and set the length of the string. 161 inline int length() const; 162 inline void set_length(int value); 163 164 // Get and set the length of the string using acquire loads and release 165 // stores. 166 inline int synchronized_length() const; 167 inline void synchronized_set_length(int value); 168 169 // Returns whether this string has only one-byte chars, i.e. all of them can 170 // be one-byte encoded. This might be the case even if the string is 171 // two-byte. Such strings may appear when the embedder prefers 172 // two-byte external representations even for one-byte data. 173 inline bool IsOneByteRepresentation() const; 174 inline bool IsTwoByteRepresentation() const; 175 176 // Cons and slices have an encoding flag that may not represent the actual 177 // encoding of the underlying string. This is taken into account here. 178 // Requires: this->IsFlat() 179 inline bool IsOneByteRepresentationUnderneath(); 180 inline bool IsTwoByteRepresentationUnderneath(); 181 182 // NOTE: this should be considered only a hint. False negatives are 183 // possible. 184 inline bool HasOnlyOneByteChars(); 185 186 // Get and set individual two byte chars in the string. 187 inline void Set(int index, uint16_t value); 188 // Get individual two byte char in the string. Repeated calls 189 // to this method are not efficient unless the string is flat. 190 V8_INLINE uint16_t Get(int index); 191 192 // ES6 section 7.1.3.1 ToNumber Applied to the String Type 193 static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject); 194 195 // Flattens the string. Checks first inline to see if it is 196 // necessary. Does nothing if the string is not a cons string. 197 // Flattening allocates a sequential string with the same data as 198 // the given string and mutates the cons string to a degenerate 199 // form, where the first component is the new sequential string and 200 // the second component is the empty string. If allocation fails, 201 // this function returns a failure. If flattening succeeds, this 202 // function returns the sequential string that is now the first 203 // component of the cons string. 204 // 205 // Degenerate cons strings are handled specially by the garbage 206 // collector (see IsShortcutCandidate). 207 208 static inline Handle<String> Flatten(Isolate* isolate, Handle<String> string, 209 PretenureFlag pretenure = NOT_TENURED); 210 211 // Tries to return the content of a flat string as a structure holding either 212 // a flat vector of char or of uc16. 213 // If the string isn't flat, and therefore doesn't have flat content, the 214 // returned structure will report so, and can't provide a vector of either 215 // kind. 216 FlatContent GetFlatContent(); 217 218 // Returns the parent of a sliced string or first part of a flat cons string. 219 // Requires: StringShape(this).IsIndirect() && this->IsFlat() 220 inline String* GetUnderlying(); 221 222 // String relational comparison, implemented according to ES6 section 7.2.11 223 // Abstract Relational Comparison (step 5): The comparison of Strings uses a 224 // simple lexicographic ordering on sequences of code unit values. There is no 225 // attempt to use the more complex, semantically oriented definitions of 226 // character or string equality and collating order defined in the Unicode 227 // specification. Therefore String values that are canonically equal according 228 // to the Unicode standard could test as unequal. In effect this algorithm 229 // assumes that both Strings are already in normalized form. Also, note that 230 // for strings containing supplementary characters, lexicographic ordering on 231 // sequences of UTF-16 code unit values differs from that on sequences of code 232 // point values. 233 V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate, 234 Handle<String> x, 235 Handle<String> y); 236 237 // Perform ES6 21.1.3.8, including checking arguments. 238 static Object* IndexOf(Isolate* isolate, Handle<Object> receiver, 239 Handle<Object> search, Handle<Object> position); 240 // Perform string match of pattern on subject, starting at start index. 241 // Caller must ensure that 0 <= start_index <= sub->length(), as this does not 242 // check any arguments. 243 static int IndexOf(Isolate* isolate, Handle<String> receiver, 244 Handle<String> search, int start_index); 245 246 static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver, 247 Handle<Object> search, Handle<Object> position); 248 249 // Encapsulates logic related to a match and its capture groups as required 250 // by GetSubstitution. 251 class Match { 252 public: 253 virtual Handle<String> GetMatch() = 0; 254 virtual Handle<String> GetPrefix() = 0; 255 virtual Handle<String> GetSuffix() = 0; 256 257 // A named capture can be invalid (if it is not specified in the pattern), 258 // unmatched (specified but not matched in the current string), and matched. 259 enum CaptureState { INVALID, UNMATCHED, MATCHED }; 260 261 virtual int CaptureCount() = 0; 262 virtual bool HasNamedCaptures() = 0; 263 virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0; 264 virtual MaybeHandle<String> GetNamedCapture(Handle<String> name, 265 CaptureState* state) = 0; 266 ~Match()267 virtual ~Match() {} 268 }; 269 270 // ES#sec-getsubstitution 271 // GetSubstitution(matched, str, position, captures, replacement) 272 // Expand the $-expressions in the string and return a new string with 273 // the result. 274 // A {start_index} can be passed to specify where to start scanning the 275 // replacement string. 276 V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution( 277 Isolate* isolate, Match* match, Handle<String> replacement, 278 int start_index = 0); 279 280 // String equality operations. 281 inline bool Equals(String* other); 282 inline static bool Equals(Isolate* isolate, Handle<String> one, 283 Handle<String> two); 284 bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false); 285 286 // Dispatches to Is{One,Two}ByteEqualTo. 287 template <typename Char> 288 bool IsEqualTo(Vector<const Char> str); 289 290 bool IsOneByteEqualTo(Vector<const uint8_t> str); 291 bool IsTwoByteEqualTo(Vector<const uc16> str); 292 293 // Return a UTF8 representation of the string. The string is null 294 // terminated but may optionally contain nulls. Length is returned 295 // in length_output if length_output is not a null pointer The string 296 // should be nearly flat, otherwise the performance of this method may 297 // be very slow (quadratic in the length). Setting robustness_flag to 298 // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust This means it 299 // handles unexpected data without causing assert failures and it does not 300 // do any heap allocations. This is useful when printing stack traces. 301 std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls, 302 RobustnessFlag robustness_flag, int offset, 303 int length, int* length_output = 0); 304 std::unique_ptr<char[]> ToCString( 305 AllowNullsFlag allow_nulls = DISALLOW_NULLS, 306 RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL, 307 int* length_output = 0); 308 309 bool ComputeArrayIndex(uint32_t* index); 310 311 // Externalization. 312 bool MakeExternal(v8::String::ExternalStringResource* resource); 313 bool MakeExternal(v8::String::ExternalOneByteStringResource* resource); 314 bool SupportsExternalization(); 315 316 // Conversion. 317 inline bool AsArrayIndex(uint32_t* index); 318 uint32_t inline ToValidIndex(Object* number); 319 320 // Trimming. 321 enum TrimMode { kTrim, kTrimStart, kTrimEnd }; 322 static Handle<String> Trim(Isolate* isolate, Handle<String> string, 323 TrimMode mode); 324 325 DECL_CAST(String) 326 327 void PrintOn(FILE* out); 328 329 // For use during stack traces. Performs rudimentary sanity check. 330 bool LooksValid(); 331 332 // Dispatched behavior. 333 void StringShortPrint(StringStream* accumulator, bool show_details = true); 334 void PrintUC16(std::ostream& os, int start = 0, int end = -1); // NOLINT 335 #if defined(DEBUG) || defined(OBJECT_PRINT) 336 char* ToAsciiArray(); 337 #endif 338 DECL_PRINTER(String) 339 DECL_VERIFIER(String) 340 341 inline bool IsFlat(); 342 343 // Layout description. 344 static const int kLengthOffset = Name::kSize; 345 static const int kSize = kLengthOffset + kPointerSize; 346 347 // Max char codes. 348 static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar; 349 static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar; 350 static const int kMaxUtf16CodeUnit = 0xffff; 351 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit; 352 static const uc32 kMaxCodePoint = 0x10ffff; 353 354 // Maximal string length. 355 // The max length is different on 32 and 64 bit platforms. Max length for a 356 // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is 357 // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize 358 // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as 359 // each char needs two bytes, subtract 24 bytes for the string header size. 360 361 // See include/v8.h for the definition. 362 static const int kMaxLength = v8::String::kMaxLength; 363 static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kSize), 364 "Unexpected max String length"); 365 366 // Max length for computing hash. For strings longer than this limit the 367 // string length is used as the hash value. 368 static const int kMaxHashCalcLength = 16383; 369 370 // Limit for truncation in short printing. 371 static const int kMaxShortPrintLength = 1024; 372 373 // Support for regular expressions. 374 const uc16* GetTwoByteData(unsigned start); 375 376 // Helper function for flattening strings. 377 template <typename sinkchar> 378 static void WriteToFlat(String* source, sinkchar* sink, int from, int to); 379 380 // The return value may point to the first aligned word containing the first 381 // non-one-byte character, rather than directly to the non-one-byte character. 382 // If the return value is >= the passed length, the entire string was 383 // one-byte. NonAsciiStart(const char * chars,int length)384 static inline int NonAsciiStart(const char* chars, int length) { 385 const char* start = chars; 386 const char* limit = chars + length; 387 388 if (length >= kIntptrSize) { 389 // Check unaligned bytes. 390 while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) { 391 if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) { 392 return static_cast<int>(chars - start); 393 } 394 ++chars; 395 } 396 // Check aligned words. 397 DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F); 398 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80; 399 while (chars + sizeof(uintptr_t) <= limit) { 400 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) { 401 return static_cast<int>(chars - start); 402 } 403 chars += sizeof(uintptr_t); 404 } 405 } 406 // Check remaining unaligned bytes. 407 while (chars < limit) { 408 if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) { 409 return static_cast<int>(chars - start); 410 } 411 ++chars; 412 } 413 414 return static_cast<int>(chars - start); 415 } 416 IsAscii(const char * chars,int length)417 static inline bool IsAscii(const char* chars, int length) { 418 return NonAsciiStart(chars, length) >= length; 419 } 420 IsAscii(const uint8_t * chars,int length)421 static inline bool IsAscii(const uint8_t* chars, int length) { 422 return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >= 423 length; 424 } 425 NonOneByteStart(const uc16 * chars,int length)426 static inline int NonOneByteStart(const uc16* chars, int length) { 427 const uc16* limit = chars + length; 428 const uc16* start = chars; 429 while (chars < limit) { 430 if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start); 431 ++chars; 432 } 433 return static_cast<int>(chars - start); 434 } 435 IsOneByte(const uc16 * chars,int length)436 static inline bool IsOneByte(const uc16* chars, int length) { 437 return NonOneByteStart(chars, length) >= length; 438 } 439 440 template <class Visitor> 441 static inline ConsString* VisitFlat(Visitor* visitor, String* string, 442 int offset = 0); 443 444 static Handle<FixedArray> CalculateLineEnds(Isolate* isolate, 445 Handle<String> string, 446 bool include_ending_line); 447 448 private: 449 friend class Name; 450 friend class StringTableInsertionKey; 451 friend class InternalizedStringKey; 452 453 static Handle<String> SlowFlatten(Isolate* isolate, Handle<ConsString> cons, 454 PretenureFlag tenure); 455 456 // Slow case of String::Equals. This implementation works on any strings 457 // but it is most efficient on strings that are almost flat. 458 bool SlowEquals(String* other); 459 460 static bool SlowEquals(Isolate* isolate, Handle<String> one, 461 Handle<String> two); 462 463 // Slow case of AsArrayIndex. 464 V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index); 465 466 // Compute and set the hash code. 467 uint32_t ComputeAndSetHash(Isolate* isolate); 468 469 DISALLOW_IMPLICIT_CONSTRUCTORS(String); 470 }; 471 472 // The SeqString abstract class captures sequential string values. 473 class SeqString : public String { 474 public: 475 DECL_CAST(SeqString) 476 477 // Layout description. 478 static const int kHeaderSize = String::kSize; 479 480 // Truncate the string in-place if possible and return the result. 481 // In case of new_length == 0, the empty string is returned without 482 // truncating the original string. 483 V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string, 484 int new_length); 485 486 private: 487 DISALLOW_IMPLICIT_CONSTRUCTORS(SeqString); 488 }; 489 490 class InternalizedString : public String { 491 public: 492 DECL_CAST(InternalizedString) 493 // TODO(neis): Possibly move some stuff from String here. 494 495 private: 496 DISALLOW_IMPLICIT_CONSTRUCTORS(InternalizedString); 497 }; 498 499 // The OneByteString class captures sequential one-byte string objects. 500 // Each character in the OneByteString is an one-byte character. 501 class SeqOneByteString : public SeqString { 502 public: 503 static const bool kHasOneByteEncoding = true; 504 505 // Dispatched behavior. 506 inline uint16_t SeqOneByteStringGet(int index); 507 inline void SeqOneByteStringSet(int index, uint16_t value); 508 509 // Get the address of the characters in this string. 510 inline Address GetCharsAddress(); 511 512 inline uint8_t* GetChars(); 513 514 // Clear uninitialized padding space. This ensures that the snapshot content 515 // is deterministic. 516 void clear_padding(); 517 518 DECL_CAST(SeqOneByteString) 519 520 // Garbage collection support. This method is called by the 521 // garbage collector to compute the actual size of an OneByteString 522 // instance. 523 inline int SeqOneByteStringSize(InstanceType instance_type); 524 525 // Computes the size for an OneByteString instance of a given length. SizeFor(int length)526 static int SizeFor(int length) { 527 return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize); 528 } 529 530 // Maximal memory usage for a single sequential one-byte string. 531 static const int kMaxCharsSize = kMaxLength; 532 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize); 533 STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength); 534 535 class BodyDescriptor; 536 // No weak fields. 537 typedef BodyDescriptor BodyDescriptorWeak; 538 539 private: 540 DISALLOW_IMPLICIT_CONSTRUCTORS(SeqOneByteString); 541 }; 542 543 // The TwoByteString class captures sequential unicode string objects. 544 // Each character in the TwoByteString is a two-byte uint16_t. 545 class SeqTwoByteString : public SeqString { 546 public: 547 static const bool kHasOneByteEncoding = false; 548 549 // Dispatched behavior. 550 inline uint16_t SeqTwoByteStringGet(int index); 551 inline void SeqTwoByteStringSet(int index, uint16_t value); 552 553 // Get the address of the characters in this string. 554 inline Address GetCharsAddress(); 555 556 inline uc16* GetChars(); 557 558 // Clear uninitialized padding space. This ensures that the snapshot content 559 // is deterministic. 560 void clear_padding(); 561 562 // For regexp code. 563 const uint16_t* SeqTwoByteStringGetData(unsigned start); 564 565 DECL_CAST(SeqTwoByteString) 566 567 // Garbage collection support. This method is called by the 568 // garbage collector to compute the actual size of a TwoByteString 569 // instance. 570 inline int SeqTwoByteStringSize(InstanceType instance_type); 571 572 // Computes the size for a TwoByteString instance of a given length. SizeFor(int length)573 static int SizeFor(int length) { 574 return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize); 575 } 576 577 // Maximal memory usage for a single sequential two-byte string. 578 static const int kMaxCharsSize = kMaxLength * 2; 579 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize); 580 STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >= 581 String::kMaxLength); 582 583 class BodyDescriptor; 584 // No weak fields. 585 typedef BodyDescriptor BodyDescriptorWeak; 586 587 private: 588 DISALLOW_IMPLICIT_CONSTRUCTORS(SeqTwoByteString); 589 }; 590 591 // The ConsString class describes string values built by using the 592 // addition operator on strings. A ConsString is a pair where the 593 // first and second components are pointers to other string values. 594 // One or both components of a ConsString can be pointers to other 595 // ConsStrings, creating a binary tree of ConsStrings where the leaves 596 // are non-ConsString string values. The string value represented by 597 // a ConsString can be obtained by concatenating the leaf string 598 // values in a left-to-right depth-first traversal of the tree. 599 class ConsString : public String { 600 public: 601 // First string of the cons cell. 602 inline String* first(); 603 // Doesn't check that the result is a string, even in debug mode. This is 604 // useful during GC where the mark bits confuse the checks. 605 inline Object* unchecked_first(); 606 inline void set_first(Isolate* isolate, String* first, 607 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 608 609 // Second string of the cons cell. 610 inline String* second(); 611 // Doesn't check that the result is a string, even in debug mode. This is 612 // useful during GC where the mark bits confuse the checks. 613 inline Object* unchecked_second(); 614 inline void set_second(Isolate* isolate, String* second, 615 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 616 617 // Dispatched behavior. 618 V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index); 619 620 DECL_CAST(ConsString) 621 622 // Layout description. 623 static const int kFirstOffset = POINTER_SIZE_ALIGN(String::kSize); 624 static const int kSecondOffset = kFirstOffset + kPointerSize; 625 static const int kSize = kSecondOffset + kPointerSize; 626 627 // Minimum length for a cons string. 628 static const int kMinLength = 13; 629 630 typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize> 631 BodyDescriptor; 632 // No weak fields. 633 typedef BodyDescriptor BodyDescriptorWeak; 634 635 DECL_VERIFIER(ConsString) 636 637 private: 638 DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString); 639 }; 640 641 // The ThinString class describes string objects that are just references 642 // to another string object. They are used for in-place internalization when 643 // the original string cannot actually be internalized in-place: in these 644 // cases, the original string is converted to a ThinString pointing at its 645 // internalized version (which is allocated as a new object). 646 // In terms of memory layout and most algorithms operating on strings, 647 // ThinStrings can be thought of as "one-part cons strings". 648 class ThinString : public String { 649 public: 650 // Actual string that this ThinString refers to. 651 inline String* actual() const; 652 inline HeapObject* unchecked_actual() const; 653 inline void set_actual(String* s, 654 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 655 656 V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index); 657 658 DECL_CAST(ThinString) 659 DECL_VERIFIER(ThinString) 660 661 // Layout description. 662 static const int kActualOffset = String::kSize; 663 static const int kSize = kActualOffset + kPointerSize; 664 665 typedef FixedBodyDescriptor<kActualOffset, kSize, kSize> BodyDescriptor; 666 // No weak fields. 667 typedef BodyDescriptor BodyDescriptorWeak; 668 669 private: 670 DISALLOW_COPY_AND_ASSIGN(ThinString); 671 }; 672 673 // The Sliced String class describes strings that are substrings of another 674 // sequential string. The motivation is to save time and memory when creating 675 // a substring. A Sliced String is described as a pointer to the parent, 676 // the offset from the start of the parent string and the length. Using 677 // a Sliced String therefore requires unpacking of the parent string and 678 // adding the offset to the start address. A substring of a Sliced String 679 // are not nested since the double indirection is simplified when creating 680 // such a substring. 681 // Currently missing features are: 682 // - handling externalized parent strings 683 // - external strings as parent 684 // - truncating sliced string to enable otherwise unneeded parent to be GC'ed. 685 class SlicedString : public String { 686 public: 687 inline String* parent(); 688 inline void set_parent(Isolate* isolate, String* parent, 689 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 690 inline int offset() const; 691 inline void set_offset(int offset); 692 693 // Dispatched behavior. 694 V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index); 695 696 DECL_CAST(SlicedString) 697 698 // Layout description. 699 static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize); 700 static const int kOffsetOffset = kParentOffset + kPointerSize; 701 static const int kSize = kOffsetOffset + kPointerSize; 702 703 // Minimum length for a sliced string. 704 static const int kMinLength = 13; 705 706 typedef FixedBodyDescriptor<kParentOffset, kOffsetOffset + kPointerSize, 707 kSize> 708 BodyDescriptor; 709 // No weak fields. 710 typedef BodyDescriptor BodyDescriptorWeak; 711 712 DECL_VERIFIER(SlicedString) 713 714 private: 715 DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString); 716 }; 717 718 // The ExternalString class describes string values that are backed by 719 // a string resource that lies outside the V8 heap. ExternalStrings 720 // consist of the length field common to all strings, a pointer to the 721 // external resource. It is important to ensure (externally) that the 722 // resource is not deallocated while the ExternalString is live in the 723 // V8 heap. 724 // 725 // The API expects that all ExternalStrings are created through the 726 // API. Therefore, ExternalStrings should not be used internally. 727 class ExternalString : public String { 728 public: 729 DECL_CAST(ExternalString) 730 731 // Layout description. 732 static const int kResourceOffset = POINTER_SIZE_ALIGN(String::kSize); 733 static const int kShortSize = kResourceOffset + kPointerSize; 734 static const int kResourceDataOffset = kResourceOffset + kPointerSize; 735 static const int kSize = kResourceDataOffset + kPointerSize; 736 737 // Return whether external string is short (data pointer is not cached). 738 inline bool is_short() const; 739 // Size in bytes of the external payload. 740 int ExternalPayloadSize() const; 741 742 // Used in the serializer/deserializer. 743 inline Address resource_as_address(); 744 inline void set_address_as_resource(Address address); 745 inline uint32_t resource_as_uint32(); 746 inline void set_uint32_as_resource(uint32_t value); 747 748 STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset); 749 750 private: 751 DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalString); 752 }; 753 754 // The ExternalOneByteString class is an external string backed by an 755 // one-byte string. 756 class ExternalOneByteString : public ExternalString { 757 public: 758 static const bool kHasOneByteEncoding = true; 759 760 typedef v8::String::ExternalOneByteStringResource Resource; 761 762 // The underlying resource. 763 inline const Resource* resource(); 764 765 // It is assumed that the previous resource is null. If it is not null, then 766 // it is the responsability of the caller the handle the previous resource. 767 inline void SetResource(Isolate* isolate, const Resource* buffer); 768 // Used only during serialization. 769 inline void set_resource(const Resource* buffer); 770 771 // Update the pointer cache to the external character array. 772 // The cached pointer is always valid, as the external character array does = 773 // not move during lifetime. Deserialization is the only exception, after 774 // which the pointer cache has to be refreshed. 775 inline void update_data_cache(); 776 777 inline const uint8_t* GetChars(); 778 779 // Dispatched behavior. 780 inline uint16_t ExternalOneByteStringGet(int index); 781 782 DECL_CAST(ExternalOneByteString) 783 784 class BodyDescriptor; 785 // No weak fields. 786 typedef BodyDescriptor BodyDescriptorWeak; 787 788 private: 789 DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalOneByteString); 790 }; 791 792 // The ExternalTwoByteString class is an external string backed by a UTF-16 793 // encoded string. 794 class ExternalTwoByteString : public ExternalString { 795 public: 796 static const bool kHasOneByteEncoding = false; 797 798 typedef v8::String::ExternalStringResource Resource; 799 800 // The underlying string resource. 801 inline const Resource* resource(); 802 803 // It is assumed that the previous resource is null. If it is not null, then 804 // it is the responsability of the caller the handle the previous resource. 805 inline void SetResource(Isolate* isolate, const Resource* buffer); 806 // Used only during serialization. 807 inline void set_resource(const Resource* buffer); 808 809 // Update the pointer cache to the external character array. 810 // The cached pointer is always valid, as the external character array does = 811 // not move during lifetime. Deserialization is the only exception, after 812 // which the pointer cache has to be refreshed. 813 inline void update_data_cache(); 814 815 inline const uint16_t* GetChars(); 816 817 // Dispatched behavior. 818 inline uint16_t ExternalTwoByteStringGet(int index); 819 820 // For regexp code. 821 inline const uint16_t* ExternalTwoByteStringGetData(unsigned start); 822 823 DECL_CAST(ExternalTwoByteString) 824 825 class BodyDescriptor; 826 // No weak fields. 827 typedef BodyDescriptor BodyDescriptorWeak; 828 829 private: 830 DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalTwoByteString); 831 }; 832 833 // A flat string reader provides random access to the contents of a 834 // string independent of the character width of the string. The handle 835 // must be valid as long as the reader is being used. 836 class FlatStringReader : public Relocatable { 837 public: 838 FlatStringReader(Isolate* isolate, Handle<String> str); 839 FlatStringReader(Isolate* isolate, Vector<const char> input); 840 void PostGarbageCollection(); 841 inline uc32 Get(int index); 842 template <typename Char> 843 inline Char Get(int index); length()844 int length() { return length_; } 845 846 private: 847 String** str_; 848 bool is_one_byte_; 849 int length_; 850 const void* start_; 851 }; 852 853 // This maintains an off-stack representation of the stack frames required 854 // to traverse a ConsString, allowing an entirely iterative and restartable 855 // traversal of the entire string 856 class ConsStringIterator { 857 public: ConsStringIterator()858 inline ConsStringIterator() {} 859 inline explicit ConsStringIterator(ConsString* cons_string, int offset = 0) { 860 Reset(cons_string, offset); 861 } 862 inline void Reset(ConsString* cons_string, int offset = 0) { 863 depth_ = 0; 864 // Next will always return nullptr. 865 if (cons_string == nullptr) return; 866 Initialize(cons_string, offset); 867 } 868 // Returns nullptr when complete. Next(int * offset_out)869 inline String* Next(int* offset_out) { 870 *offset_out = 0; 871 if (depth_ == 0) return nullptr; 872 return Continue(offset_out); 873 } 874 875 private: 876 static const int kStackSize = 32; 877 // Use a mask instead of doing modulo operations for stack wrapping. 878 static const int kDepthMask = kStackSize - 1; 879 static_assert(base::bits::IsPowerOfTwo(kStackSize), 880 "kStackSize must be power of two"); 881 static inline int OffsetForDepth(int depth); 882 883 inline void PushLeft(ConsString* string); 884 inline void PushRight(ConsString* string); 885 inline void AdjustMaximumDepth(); 886 inline void Pop(); StackBlown()887 inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; } 888 void Initialize(ConsString* cons_string, int offset); 889 String* Continue(int* offset_out); 890 String* NextLeaf(bool* blew_stack); 891 String* Search(int* offset_out); 892 893 // Stack must always contain only frames for which right traversal 894 // has not yet been performed. 895 ConsString* frames_[kStackSize]; 896 ConsString* root_; 897 int depth_; 898 int maximum_depth_; 899 int consumed_; 900 DISALLOW_COPY_AND_ASSIGN(ConsStringIterator); 901 }; 902 903 class StringCharacterStream { 904 public: 905 inline explicit StringCharacterStream(String* string, int offset = 0); 906 inline uint16_t GetNext(); 907 inline bool HasMore(); 908 inline void Reset(String* string, int offset = 0); 909 inline void VisitOneByteString(const uint8_t* chars, int length); 910 inline void VisitTwoByteString(const uint16_t* chars, int length); 911 912 private: 913 ConsStringIterator iter_; 914 bool is_one_byte_; 915 union { 916 const uint8_t* buffer8_; 917 const uint16_t* buffer16_; 918 }; 919 const uint8_t* end_; 920 DISALLOW_COPY_AND_ASSIGN(StringCharacterStream); 921 }; 922 923 } // namespace internal 924 } // namespace v8 925 926 #include "src/objects/object-macros-undef.h" 927 928 #endif // V8_OBJECTS_STRING_H_ 929