• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_OBJECTS_STRING_H_
6 #define V8_OBJECTS_STRING_H_
7 
8 #include <memory>
9 
10 #include "src/base/bits.h"
11 #include "src/base/export-template.h"
12 #include "src/objects/instance-type.h"
13 #include "src/objects/name.h"
14 #include "src/objects/smi.h"
15 #include "src/strings/unicode-decoder.h"
16 #include "torque-generated/field-offsets.h"
17 
18 // Has to be the last include (doesn't have include guards):
19 #include "src/objects/object-macros.h"
20 
21 namespace v8 {
22 namespace internal {
23 
24 class SharedStringAccessGuardIfNeeded;
25 
26 enum InstanceType : uint16_t;
27 
28 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
29 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
30 
31 // The characteristics of a string are stored in its map.  Retrieving these
32 // few bits of information is moderately expensive, involving two memory
33 // loads where the second is dependent on the first.  To improve efficiency
34 // the shape of the string is given its own class so that it can be retrieved
35 // once and used for several string operations.  A StringShape is small enough
36 // to be passed by value and is immutable, but be aware that flattening a
37 // string can potentially alter its shape.  Also be aware that a GC caused by
38 // something else can alter the shape of a string due to ConsString
39 // shortcutting.  Keeping these restrictions in mind has proven to be error-
40 // prone and so we no longer put StringShapes in variables unless there is a
41 // concrete performance benefit at that particular point in the code.
42 class StringShape {
43  public:
44   inline explicit StringShape(const String s);
45   inline explicit StringShape(Map s);
46   inline explicit StringShape(InstanceType t);
47   inline bool IsSequential();
48   inline bool IsExternal();
49   inline bool IsCons();
50   inline bool IsSliced();
51   inline bool IsThin();
52   inline bool IsIndirect();
53   inline bool IsExternalOneByte();
54   inline bool IsExternalTwoByte();
55   inline bool IsSequentialOneByte();
56   inline bool IsSequentialTwoByte();
57   inline bool IsInternalized();
58   inline StringRepresentationTag representation_tag();
59   inline uint32_t encoding_tag();
60   inline uint32_t full_representation_tag();
61 #ifdef DEBUG
type()62   inline uint32_t type() { return type_; }
invalidate()63   inline void invalidate() { valid_ = false; }
valid()64   inline bool valid() { return valid_; }
65 #else
invalidate()66   inline void invalidate() {}
67 #endif
68 
69   // Run different behavior for each concrete string class type, as defined by
70   // the dispatcher.
71   template <typename TDispatcher, typename TResult, typename... TArgs>
72   inline TResult DispatchToSpecificTypeWithoutCast(TArgs&&... args);
73   template <typename TDispatcher, typename TResult, typename... TArgs>
74   inline TResult DispatchToSpecificType(String str, TArgs&&... args);
75 
76  private:
77   uint32_t type_;
78 #ifdef DEBUG
set_valid()79   inline void set_valid() { valid_ = true; }
80   bool valid_;
81 #else
set_valid()82   inline void set_valid() {}
83 #endif
84 };
85 
86 #include "torque-generated/src/objects/string-tq.inc"
87 
88 // The String abstract class captures JavaScript string values:
89 //
90 // Ecma-262:
91 //  4.3.16 String Value
92 //    A string value is a member of the type String and is a finite
93 //    ordered sequence of zero or more 16-bit unsigned integer values.
94 //
95 // All string values have a length field.
96 class String : public TorqueGeneratedString<String, Name> {
97  public:
98   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
99 
100   // Representation of the flat content of a String.
101   // A non-flat string doesn't have flat content.
102   // A flat string has content that's encoded as a sequence of either
103   // one-byte chars or two-byte UC16.
104   // Returned by String::GetFlatContent().
105   // Not safe to use from concurrent background threads.
106   // TODO(solanes): Move FlatContent into FlatStringReader, and make it private.
107   // This would de-duplicate code, as well as taking advantage of the fact that
108   // FlatStringReader is relocatable.
109   class FlatContent {
110    public:
111     // Returns true if the string is flat and this structure contains content.
IsFlat()112     bool IsFlat() const { return state_ != NON_FLAT; }
113     // Returns true if the structure contains one-byte content.
IsOneByte()114     bool IsOneByte() const { return state_ == ONE_BYTE; }
115     // Returns true if the structure contains two-byte content.
IsTwoByte()116     bool IsTwoByte() const { return state_ == TWO_BYTE; }
117 
118     // Return the one byte content of the string. Only use if IsOneByte()
119     // returns true.
ToOneByteVector()120     Vector<const uint8_t> ToOneByteVector() const {
121       DCHECK_EQ(ONE_BYTE, state_);
122       return Vector<const uint8_t>(onebyte_start, length_);
123     }
124     // Return the two-byte content of the string. Only use if IsTwoByte()
125     // returns true.
ToUC16Vector()126     Vector<const uc16> ToUC16Vector() const {
127       DCHECK_EQ(TWO_BYTE, state_);
128       return Vector<const uc16>(twobyte_start, length_);
129     }
130 
Get(int i)131     uc16 Get(int i) const {
132       DCHECK(i < length_);
133       DCHECK(state_ != NON_FLAT);
134       if (state_ == ONE_BYTE) return onebyte_start[i];
135       return twobyte_start[i];
136     }
137 
UsesSameString(const FlatContent & other)138     bool UsesSameString(const FlatContent& other) const {
139       return onebyte_start == other.onebyte_start;
140     }
141 
142    private:
143     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
144 
145     // Constructors only used by String::GetFlatContent().
FlatContent(const uint8_t * start,int length,const DisallowHeapAllocation & no_gc)146     FlatContent(const uint8_t* start, int length,
147                 const DisallowHeapAllocation& no_gc)
148         : onebyte_start(start),
149           length_(length),
150           state_(ONE_BYTE),
151           no_gc_(no_gc) {}
FlatContent(const uc16 * start,int length,const DisallowHeapAllocation & no_gc)152     FlatContent(const uc16* start, int length,
153                 const DisallowHeapAllocation& no_gc)
154         : twobyte_start(start),
155           length_(length),
156           state_(TWO_BYTE),
157           no_gc_(no_gc) {}
FlatContent(const DisallowHeapAllocation & no_gc)158     explicit FlatContent(const DisallowHeapAllocation& no_gc)
159         : onebyte_start(nullptr), length_(0), state_(NON_FLAT), no_gc_(no_gc) {}
160 
161     union {
162       const uint8_t* onebyte_start;
163       const uc16* twobyte_start;
164     };
165     int length_;
166     State state_;
167     const DisallowHeapAllocation& no_gc_;
168 
169     friend class String;
170     friend class IterableSubString;
171   };
172 
173   void MakeThin(Isolate* isolate, String canonical);
174 
175   template <typename Char>
176   V8_INLINE Vector<const Char> GetCharVector(
177       const DisallowHeapAllocation& no_gc);
178 
179   // Get chars from sequential or external strings. May only be called when a
180   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
181   // read-only strings).
182   template <typename Char>
183   inline const Char* GetChars(const DisallowHeapAllocation& no_gc);
184 
185   // Get chars from sequential or external strings.
186   template <typename Char>
187   inline const Char* GetChars(
188       const DisallowHeapAllocation& no_gc,
189       const SharedStringAccessGuardIfNeeded& access_guard);
190 
191   // Returns the address of the character at an offset into this string.
192   // Requires: this->IsFlat()
193   const byte* AddressOfCharacterAt(int start_index,
194                                    const DisallowHeapAllocation& no_gc);
195 
196   // Get and set the length of the string using acquire loads and release
197   // stores.
198   DECL_SYNCHRONIZED_INT_ACCESSORS(length)
199 
200   // Returns whether this string has only one-byte chars, i.e. all of them can
201   // be one-byte encoded.  This might be the case even if the string is
202   // two-byte.  Such strings may appear when the embedder prefers
203   // two-byte external representations even for one-byte data.
204   DECL_GETTER(IsOneByteRepresentation, bool)
205   DECL_GETTER(IsTwoByteRepresentation, bool)
206 
207   // Cons and slices have an encoding flag that may not represent the actual
208   // encoding of the underlying string.  This is taken into account here.
209   // This function is static because that helps it get inlined.
210   // Requires: string.IsFlat()
211   static inline bool IsOneByteRepresentationUnderneath(String string);
212 
213   // Get and set individual two byte chars in the string.
214   inline void Set(int index, uint16_t value);
215   // Get individual two byte char in the string.  Repeated calls
216   // to this method are not efficient unless the string is flat.
217   V8_INLINE uint16_t Get(int index);
218 
219   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
220   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
221 
222   // Flattens the string.  Checks first inline to see if it is
223   // necessary.  Does nothing if the string is not a cons string.
224   // Flattening allocates a sequential string with the same data as
225   // the given string and mutates the cons string to a degenerate
226   // form, where the first component is the new sequential string and
227   // the second component is the empty string.  If allocation fails,
228   // this function returns a failure.  If flattening succeeds, this
229   // function returns the sequential string that is now the first
230   // component of the cons string.
231   //
232   // Degenerate cons strings are handled specially by the garbage
233   // collector (see IsShortcutCandidate).
234 
235   static inline Handle<String> Flatten(
236       Isolate* isolate, Handle<String> string,
237       AllocationType allocation = AllocationType::kYoung);
238   static inline Handle<String> Flatten(
239       LocalIsolate* isolate, Handle<String> string,
240       AllocationType allocation = AllocationType::kYoung);
241 
242   // Tries to return the content of a flat string as a structure holding either
243   // a flat vector of char or of uc16.
244   // If the string isn't flat, and therefore doesn't have flat content, the
245   // returned structure will report so, and can't provide a vector of either
246   // kind.
247   V8_EXPORT_PRIVATE FlatContent
248   GetFlatContent(const DisallowHeapAllocation& no_gc);
249 
250   // Returns the parent of a sliced string or first part of a flat cons string.
251   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
252   inline String GetUnderlying();
253 
254   // String relational comparison, implemented according to ES6 section 7.2.11
255   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
256   // simple lexicographic ordering on sequences of code unit values. There is no
257   // attempt to use the more complex, semantically oriented definitions of
258   // character or string equality and collating order defined in the Unicode
259   // specification. Therefore String values that are canonically equal according
260   // to the Unicode standard could test as unequal. In effect this algorithm
261   // assumes that both Strings are already in normalized form. Also, note that
262   // for strings containing supplementary characters, lexicographic ordering on
263   // sequences of UTF-16 code unit values differs from that on sequences of code
264   // point values.
265   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
266                                                         Handle<String> x,
267                                                         Handle<String> y);
268 
269   // Perform ES6 21.1.3.8, including checking arguments.
270   static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
271                         Handle<Object> search, Handle<Object> position);
272   // Perform string match of pattern on subject, starting at start index.
273   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
274   // check any arguments.
275   static int IndexOf(Isolate* isolate, Handle<String> receiver,
276                      Handle<String> search, int start_index);
277 
278   static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
279                             Handle<Object> search, Handle<Object> position);
280 
281   // Encapsulates logic related to a match and its capture groups as required
282   // by GetSubstitution.
283   class Match {
284    public:
285     virtual Handle<String> GetMatch() = 0;
286     virtual Handle<String> GetPrefix() = 0;
287     virtual Handle<String> GetSuffix() = 0;
288 
289     // A named capture can be unmatched (either not specified in the pattern,
290     // or specified but unmatched in the current string), or matched.
291     enum CaptureState { UNMATCHED, MATCHED };
292 
293     virtual int CaptureCount() = 0;
294     virtual bool HasNamedCaptures() = 0;
295     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
296     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
297                                                 CaptureState* state) = 0;
298 
299     virtual ~Match() = default;
300   };
301 
302   // ES#sec-getsubstitution
303   // GetSubstitution(matched, str, position, captures, replacement)
304   // Expand the $-expressions in the string and return a new string with
305   // the result.
306   // A {start_index} can be passed to specify where to start scanning the
307   // replacement string.
308   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
309       Isolate* isolate, Match* match, Handle<String> replacement,
310       int start_index = 0);
311 
312   // String equality operations.
313   inline bool Equals(String other);
314   inline static bool Equals(Isolate* isolate, Handle<String> one,
315                             Handle<String> two);
316 
317   // Dispatches to Is{One,Two}ByteEqualTo.
318   template <typename Char>
319   bool IsEqualTo(Vector<const Char> str);
320 
321   V8_EXPORT_PRIVATE bool HasOneBytePrefix(Vector<const char> str);
322   V8_EXPORT_PRIVATE bool IsOneByteEqualTo(Vector<const uint8_t> str);
IsOneByteEqualTo(Vector<const char> str)323   V8_EXPORT_PRIVATE bool IsOneByteEqualTo(Vector<const char> str) {
324     return IsOneByteEqualTo(Vector<const uint8_t>::cast(str));
325   }
326   bool IsTwoByteEqualTo(Vector<const uc16> str);
327 
328   // Return a UTF8 representation of the string.  The string is null
329   // terminated but may optionally contain nulls.  Length is returned
330   // in length_output if length_output is not a null pointer  The string
331   // should be nearly flat, otherwise the performance of this method may
332   // be very slow (quadratic in the length).  Setting robustness_flag to
333   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
334   // handles unexpected data without causing assert failures and it does not
335   // do any heap allocations.  This is useful when printing stack traces.
336   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
337                                     RobustnessFlag robustness_flag, int offset,
338                                     int length, int* length_output = nullptr);
339   V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
340       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
341       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
342       int* length_output = nullptr);
343 
344   // Externalization.
345   V8_EXPORT_PRIVATE bool MakeExternal(
346       v8::String::ExternalStringResource* resource);
347   V8_EXPORT_PRIVATE bool MakeExternal(
348       v8::String::ExternalOneByteStringResource* resource);
349   bool SupportsExternalization();
350 
351   // Conversion.
352   // "array index": an index allowed by the ES spec for JSArrays.
353   inline bool AsArrayIndex(uint32_t* index);
354 
355   // This is used for calculating array indices but differs from an
356   // Array Index in the regard that this does not support the full
357   // array index range. This only supports positive numbers less than
358   // or equal to INT_MAX.
359   //
360   // String::AsArrayIndex might be a better fit if you're looking to
361   // calculate the array index.
362   //
363   // if val < 0 or val > INT_MAX, returns -1
364   // if 0 <= val <= INT_MAX, returns val
365   static int32_t ToArrayIndex(Address addr);
366 
367   uint32_t inline ToValidIndex(Object number);
368   // "integer index": the string is the decimal representation of an
369   // integer in the range of a size_t. Useful for TypedArray accesses.
370   inline bool AsIntegerIndex(size_t* index);
371 
372   // Trimming.
373   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
374   static Handle<String> Trim(Isolate* isolate, Handle<String> string,
375                              TrimMode mode);
376 
377   V8_EXPORT_PRIVATE void PrintOn(FILE* out);
378 
379   // For use during stack traces.  Performs rudimentary sanity check.
380   bool LooksValid();
381 
382   // Printing utility functions.
383   // - PrintUC16 prints the raw string contents to the given stream.
384   //   Non-printable characters are formatted as hex, but otherwise the string
385   //   is printed as-is.
386   // - StringShortPrint and StringPrint have extra formatting: they add a
387   //   prefix and suffix depending on the string kind, may add other information
388   //   such as the string heap object address, may truncate long strings, etc.
389   const char* PrefixForDebugPrint() const;
390   const char* SuffixForDebugPrint() const;
391   void StringShortPrint(StringStream* accumulator);
392   void PrintUC16(std::ostream& os, int start = 0, int end = -1);  // NOLINT
393   void PrintUC16(StringStream* accumulator, int start, int end);
394 
395   // Dispatched behavior.
396 #if defined(DEBUG) || defined(OBJECT_PRINT)
397   char* ToAsciiArray();
398 #endif
399   DECL_PRINTER(String)
400   DECL_VERIFIER(String)
401 
402   inline bool IsFlat();
403 
404   // Max char codes.
405   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
406   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
407   static const int kMaxUtf16CodeUnit = 0xffff;
408   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
409   static const uc32 kMaxCodePoint = 0x10ffff;
410 
411   // Maximal string length.
412   // The max length is different on 32 and 64 bit platforms. Max length for
413   // 32-bit platforms is ~268.4M chars. On 64-bit platforms, max length is
414   // ~536.8M chars.
415   // See include/v8.h for the definition.
416   static const int kMaxLength = v8::String::kMaxLength;
417   // There are several defining limits imposed by our current implementation:
418   // - any string's length must fit into a Smi.
419   static_assert(kMaxLength <= kSmiMaxValue,
420                 "String length must fit into a Smi");
421   // - adding two string lengths must still fit into a 32-bit int without
422   //   overflow
423   static_assert(kMaxLength * 2 <= kMaxInt,
424                 "String::kMaxLength * 2 must fit into an int32");
425   // - any heap object's size in bytes must be able to fit into a Smi, because
426   //   its space on the heap might be filled with a Filler; for strings this
427   //   means SeqTwoByteString::kMaxSize must be able to fit into a Smi.
428   static_assert(kMaxLength * 2 + kHeaderSize <= kSmiMaxValue,
429                 "String object size in bytes must fit into a Smi");
430   // - any heap object's size in bytes must be able to fit into an int, because
431   //   that's what our object handling code uses almost everywhere.
432   static_assert(kMaxLength * 2 + kHeaderSize <= kMaxInt,
433                 "String object size in bytes must fit into an int");
434 
435   // Max length for computing hash. For strings longer than this limit the
436   // string length is used as the hash value.
437   static const int kMaxHashCalcLength = 16383;
438 
439   // Limit for truncation in short printing.
440   static const int kMaxShortPrintLength = 1024;
441 
442   // Helper function for flattening strings.
443   template <typename sinkchar>
444   EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
445   static void WriteToFlat(String source, sinkchar* sink, int from, int to);
446 
IsAscii(const char * chars,int length)447   static inline bool IsAscii(const char* chars, int length) {
448     return IsAscii(reinterpret_cast<const uint8_t*>(chars), length);
449   }
450 
IsAscii(const uint8_t * chars,int length)451   static inline bool IsAscii(const uint8_t* chars, int length) {
452     return NonAsciiStart(chars, length) >= length;
453   }
454 
NonOneByteStart(const uc16 * chars,int length)455   static inline int NonOneByteStart(const uc16* chars, int length) {
456     DCHECK(IsAligned(reinterpret_cast<Address>(chars), sizeof(uc16)));
457     const uint16_t* start = chars;
458     const uint16_t* limit = chars + length;
459 
460     if (static_cast<size_t>(length) >= kUIntptrSize) {
461       // Check unaligned chars.
462       while (!IsAligned(reinterpret_cast<Address>(chars), kUIntptrSize)) {
463         if (*chars > unibrow::Latin1::kMaxChar) {
464           return static_cast<int>(chars - start);
465         }
466         ++chars;
467       }
468 
469       // Check aligned words.
470       STATIC_ASSERT(unibrow::Latin1::kMaxChar == 0xFF);
471 #ifdef V8_TARGET_LITTLE_ENDIAN
472       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0xFF00;
473 #else
474       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0x00FF;
475 #endif
476       while (chars + sizeof(uintptr_t) <= limit) {
477         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
478           break;
479         }
480         chars += (sizeof(uintptr_t) / sizeof(uc16));
481       }
482     }
483 
484     // Check remaining unaligned chars, or find non-one-byte char in word.
485     while (chars < limit) {
486       if (*chars > unibrow::Latin1::kMaxChar) {
487         return static_cast<int>(chars - start);
488       }
489       ++chars;
490     }
491 
492     return static_cast<int>(chars - start);
493   }
494 
IsOneByte(const uc16 * chars,int length)495   static inline bool IsOneByte(const uc16* chars, int length) {
496     return NonOneByteStart(chars, length) >= length;
497   }
498 
499   template <class Visitor>
500   static inline ConsString VisitFlat(Visitor* visitor, String string,
501                                      int offset = 0);
502 
503   template <typename LocalIsolate>
504   static Handle<FixedArray> CalculateLineEnds(LocalIsolate* isolate,
505                                               Handle<String> string,
506                                               bool include_ending_line);
507 
508  private:
509   friend class Name;
510   friend class StringTableInsertionKey;
511   friend class InternalizedStringKey;
512 
513   V8_EXPORT_PRIVATE static Handle<String> SlowFlatten(
514       Isolate* isolate, Handle<ConsString> cons, AllocationType allocation);
515 
516   // Slow case of String::Equals.  This implementation works on any strings
517   // but it is most efficient on strings that are almost flat.
518   V8_EXPORT_PRIVATE bool SlowEquals(String other);
519 
520   V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one,
521                                            Handle<String> two);
522 
523   // Slow case of AsArrayIndex.
524   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
525   V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t* index);
526 
527   // Compute and set the hash code.
528   V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash();
529 
530   TQ_OBJECT_CONSTRUCTORS(String)
531 };
532 
533 // clang-format off
534 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
535 void String::WriteToFlat(String source, uint16_t* sink, int from, int to);
536 // clang-format on
537 
538 class SubStringRange {
539  public:
540   inline SubStringRange(String string, const DisallowHeapAllocation& no_gc,
541                         int first = 0, int length = -1);
542   class iterator;
543   inline iterator begin();
544   inline iterator end();
545 
546  private:
547   String string_;
548   int first_;
549   int length_;
550   const DisallowHeapAllocation& no_gc_;
551 };
552 
553 // The SeqString abstract class captures sequential string values.
554 class SeqString : public TorqueGeneratedSeqString<SeqString, String> {
555  public:
556   // Truncate the string in-place if possible and return the result.
557   // In case of new_length == 0, the empty string is returned without
558   // truncating the original string.
559   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
560                                                        int new_length);
561 
562   TQ_OBJECT_CONSTRUCTORS(SeqString)
563 };
564 
565 class InternalizedString
566     : public TorqueGeneratedInternalizedString<InternalizedString, String> {
567  public:
568   // TODO(neis): Possibly move some stuff from String here.
569 
570   TQ_OBJECT_CONSTRUCTORS(InternalizedString)
571 };
572 
573 // The OneByteString class captures sequential one-byte string objects.
574 // Each character in the OneByteString is an one-byte character.
575 class SeqOneByteString
576     : public TorqueGeneratedSeqOneByteString<SeqOneByteString, SeqString> {
577  public:
578   static const bool kHasOneByteEncoding = true;
579   using Char = uint8_t;
580 
581   // Dispatched behavior.
582   inline uint8_t Get(int index);
583   inline void SeqOneByteStringSet(int index, uint16_t value);
584 
585   // Get the address of the characters in this string.
586   inline Address GetCharsAddress();
587 
588   // Get a pointer to the characters of the string. May only be called when a
589   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
590   // read-only strings).
591   inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc);
592 
593   // Get a pointer to the characters of the string.
594   inline uint8_t* GetChars(const DisallowHeapAllocation& no_gc,
595                            const SharedStringAccessGuardIfNeeded& access_guard);
596 
597   // Clear uninitialized padding space. This ensures that the snapshot content
598   // is deterministic.
599   void clear_padding();
600 
601   // Garbage collection support.  This method is called by the
602   // garbage collector to compute the actual size of an OneByteString
603   // instance.
604   inline int SeqOneByteStringSize(InstanceType instance_type);
605 
606   // Maximal memory usage for a single sequential one-byte string.
607   static const int kMaxCharsSize = kMaxLength;
608   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
609   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
610 
611   int AllocatedSize();
612 
613   class BodyDescriptor;
614 
615   TQ_OBJECT_CONSTRUCTORS(SeqOneByteString)
616 };
617 
618 // The TwoByteString class captures sequential unicode string objects.
619 // Each character in the TwoByteString is a two-byte uint16_t.
620 class SeqTwoByteString
621     : public TorqueGeneratedSeqTwoByteString<SeqTwoByteString, SeqString> {
622  public:
623   static const bool kHasOneByteEncoding = false;
624   using Char = uint16_t;
625 
626   // Dispatched behavior.
627   inline uint16_t Get(int index);
628   inline void SeqTwoByteStringSet(int index, uint16_t value);
629 
630   // Get the address of the characters in this string.
631   inline Address GetCharsAddress();
632 
633   // Get a pointer to the characters of the string. May only be called when a
634   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
635   // read-only strings).
636   inline uc16* GetChars(const DisallowHeapAllocation& no_gc);
637 
638   // Get a pointer to the characters of the string.
639   inline uc16* GetChars(const DisallowHeapAllocation& no_gc,
640                         const SharedStringAccessGuardIfNeeded& access_guard);
641 
642   // Clear uninitialized padding space. This ensures that the snapshot content
643   // is deterministic.
644   void clear_padding();
645 
646   // Garbage collection support.  This method is called by the
647   // garbage collector to compute the actual size of a TwoByteString
648   // instance.
649   inline int SeqTwoByteStringSize(InstanceType instance_type);
650 
651   // Maximal memory usage for a single sequential two-byte string.
652   static const int kMaxCharsSize = kMaxLength * 2;
653   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
654   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
655                 String::kMaxLength);
656 
657   int AllocatedSize();
658 
659   class BodyDescriptor;
660 
661   TQ_OBJECT_CONSTRUCTORS(SeqTwoByteString)
662 };
663 
664 // The ConsString class describes string values built by using the
665 // addition operator on strings.  A ConsString is a pair where the
666 // first and second components are pointers to other string values.
667 // One or both components of a ConsString can be pointers to other
668 // ConsStrings, creating a binary tree of ConsStrings where the leaves
669 // are non-ConsString string values.  The string value represented by
670 // a ConsString can be obtained by concatenating the leaf string
671 // values in a left-to-right depth-first traversal of the tree.
672 class ConsString : public TorqueGeneratedConsString<ConsString, String> {
673  public:
674   // Doesn't check that the result is a string, even in debug mode.  This is
675   // useful during GC where the mark bits confuse the checks.
676   inline Object unchecked_first();
677 
678   // Doesn't check that the result is a string, even in debug mode.  This is
679   // useful during GC where the mark bits confuse the checks.
680   inline Object unchecked_second();
681 
682   // Dispatched behavior.
683   V8_EXPORT_PRIVATE uint16_t Get(int index);
684 
685   // Minimum length for a cons string.
686   static const int kMinLength = 13;
687 
688   class BodyDescriptor;
689 
690   DECL_VERIFIER(ConsString)
691 
692   TQ_OBJECT_CONSTRUCTORS(ConsString)
693 };
694 
695 // The ThinString class describes string objects that are just references
696 // to another string object. They are used for in-place internalization when
697 // the original string cannot actually be internalized in-place: in these
698 // cases, the original string is converted to a ThinString pointing at its
699 // internalized version (which is allocated as a new object).
700 // In terms of memory layout and most algorithms operating on strings,
701 // ThinStrings can be thought of as "one-part cons strings".
702 class ThinString : public TorqueGeneratedThinString<ThinString, String> {
703  public:
704   DECL_GETTER(unchecked_actual, HeapObject)
705 
706   V8_EXPORT_PRIVATE uint16_t Get(int index);
707 
708   DECL_VERIFIER(ThinString)
709 
710   class BodyDescriptor;
711 
712   TQ_OBJECT_CONSTRUCTORS(ThinString)
713 };
714 
715 // The Sliced String class describes strings that are substrings of another
716 // sequential string.  The motivation is to save time and memory when creating
717 // a substring.  A Sliced String is described as a pointer to the parent,
718 // the offset from the start of the parent string and the length.  Using
719 // a Sliced String therefore requires unpacking of the parent string and
720 // adding the offset to the start address.  A substring of a Sliced String
721 // are not nested since the double indirection is simplified when creating
722 // such a substring.
723 // Currently missing features are:
724 //  - handling externalized parent strings
725 //  - external strings as parent
726 //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
727 class SlicedString : public TorqueGeneratedSlicedString<SlicedString, String> {
728  public:
729   inline void set_parent(String parent,
730                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
731   // Dispatched behavior.
732   V8_EXPORT_PRIVATE uint16_t Get(int index);
733 
734   // Minimum length for a sliced string.
735   static const int kMinLength = 13;
736 
737   class BodyDescriptor;
738 
739   DECL_VERIFIER(SlicedString)
740 
741   TQ_OBJECT_CONSTRUCTORS(SlicedString)
742 };
743 
744 // The ExternalString class describes string values that are backed by
745 // a string resource that lies outside the V8 heap.  ExternalStrings
746 // consist of the length field common to all strings, a pointer to the
747 // external resource.  It is important to ensure (externally) that the
748 // resource is not deallocated while the ExternalString is live in the
749 // V8 heap.
750 //
751 // The API expects that all ExternalStrings are created through the
752 // API.  Therefore, ExternalStrings should not be used internally.
753 class ExternalString : public String {
754  public:
755   DECL_CAST(ExternalString)
756   DECL_VERIFIER(ExternalString)
757 
758   DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize,
759                                 TORQUE_GENERATED_EXTERNAL_STRING_FIELDS)
760 
761   // Size of uncached external strings.
762   static const int kUncachedSize =
763       kResourceOffset + FIELD_SIZE(kResourceOffset);
764 
765   inline void AllocateExternalPointerEntries(Isolate* isolate);
766 
767   // Return whether the external string data pointer is not cached.
768   inline bool is_uncached() const;
769   // Size in bytes of the external payload.
770   int ExternalPayloadSize() const;
771 
772   // Used in the serializer/deserializer.
773   DECL_GETTER(resource_as_address, Address)
774   inline void set_address_as_resource(Isolate* isolate, Address address);
775   inline uint32_t GetResourceRefForDeserialization();
776   inline void SetResourceRefForSerialization(uint32_t ref);
777 
778   // Disposes string's resource object if it has not already been disposed.
779   inline void DisposeResource(Isolate* isolate);
780 
781   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
782   static const int kSizeOfAllExternalStrings = kHeaderSize;
783 
784   OBJECT_CONSTRUCTORS(ExternalString, String);
785 };
786 
787 // The ExternalOneByteString class is an external string backed by an
788 // one-byte string.
789 class ExternalOneByteString : public ExternalString {
790  public:
791   static const bool kHasOneByteEncoding = true;
792 
793   using Resource = v8::String::ExternalOneByteStringResource;
794 
795   // The underlying resource.
796   DECL_GETTER(resource, const Resource*)
797 
798   // It is assumed that the previous resource is null. If it is not null, then
799   // it is the responsability of the caller the handle the previous resource.
800   inline void SetResource(Isolate* isolate, const Resource* buffer);
801 
802   // Used only during serialization.
803   inline void set_resource(Isolate* isolate, const Resource* buffer);
804 
805   // Update the pointer cache to the external character array.
806   // The cached pointer is always valid, as the external character array does =
807   // not move during lifetime.  Deserialization is the only exception, after
808   // which the pointer cache has to be refreshed.
809   inline void update_data_cache(Isolate* isolate);
810 
811   inline const uint8_t* GetChars();
812 
813   // Dispatched behavior.
814   inline uint8_t Get(int index);
815 
816   DECL_CAST(ExternalOneByteString)
817 
818   class BodyDescriptor;
819 
820   DEFINE_FIELD_OFFSET_CONSTANTS(
821       ExternalString::kHeaderSize,
822       TORQUE_GENERATED_EXTERNAL_ONE_BYTE_STRING_FIELDS)
823 
824   STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);
825 
826   OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString);
827 };
828 
829 // The ExternalTwoByteString class is an external string backed by a UTF-16
830 // encoded string.
831 class ExternalTwoByteString : public ExternalString {
832  public:
833   static const bool kHasOneByteEncoding = false;
834 
835   using Resource = v8::String::ExternalStringResource;
836 
837   // The underlying string resource.
838   DECL_GETTER(resource, const Resource*)
839 
840   // It is assumed that the previous resource is null. If it is not null, then
841   // it is the responsability of the caller the handle the previous resource.
842   inline void SetResource(Isolate* isolate, const Resource* buffer);
843 
844   // Used only during serialization.
845   inline void set_resource(Isolate* isolate, const Resource* buffer);
846 
847   // Update the pointer cache to the external character array.
848   // The cached pointer is always valid, as the external character array does =
849   // not move during lifetime.  Deserialization is the only exception, after
850   // which the pointer cache has to be refreshed.
851   inline void update_data_cache(Isolate* isolate);
852 
853   inline const uint16_t* GetChars();
854 
855   // Dispatched behavior.
856   inline uint16_t Get(int index);
857 
858   // For regexp code.
859   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
860 
861   DECL_CAST(ExternalTwoByteString)
862 
863   class BodyDescriptor;
864 
865   DEFINE_FIELD_OFFSET_CONSTANTS(
866       ExternalString::kHeaderSize,
867       TORQUE_GENERATED_EXTERNAL_TWO_BYTE_STRING_FIELDS)
868 
869   STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);
870 
871   OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString);
872 };
873 
874 // A flat string reader provides random access to the contents of a
875 // string independent of the character width of the string. The handle
876 // must be valid as long as the reader is being used.
877 // Not safe to use from concurrent background threads.
878 class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {
879  public:
880   FlatStringReader(Isolate* isolate, Handle<String> str);
881   void PostGarbageCollection() override;
882   inline uc32 Get(int index);
883   template <typename Char>
884   inline Char Get(int index);
885   int length() { return length_; }
886 
887  private:
888   Handle<String> str_;
889   bool is_one_byte_;
890   int length_;
891   const void* start_;
892 };
893 
894 // This maintains an off-stack representation of the stack frames required
895 // to traverse a ConsString, allowing an entirely iterative and restartable
896 // traversal of the entire string
897 class ConsStringIterator {
898  public:
899   inline ConsStringIterator() = default;
900   inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
901     Reset(cons_string, offset);
902   }
903   ConsStringIterator(const ConsStringIterator&) = delete;
904   ConsStringIterator& operator=(const ConsStringIterator&) = delete;
905   inline void Reset(ConsString cons_string, int offset = 0) {
906     depth_ = 0;
907     // Next will always return nullptr.
908     if (cons_string.is_null()) return;
909     Initialize(cons_string, offset);
910   }
911   // Returns nullptr when complete.
912   inline String Next(int* offset_out) {
913     *offset_out = 0;
914     if (depth_ == 0) return String();
915     return Continue(offset_out);
916   }
917 
918  private:
919   static const int kStackSize = 32;
920   // Use a mask instead of doing modulo operations for stack wrapping.
921   static const int kDepthMask = kStackSize - 1;
922   static_assert(base::bits::IsPowerOfTwo(kStackSize),
923                 "kStackSize must be power of two");
924   static inline int OffsetForDepth(int depth);
925 
926   inline void PushLeft(ConsString string);
927   inline void PushRight(ConsString string);
928   inline void AdjustMaximumDepth();
929   inline void Pop();
930   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
931   V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset);
932   V8_EXPORT_PRIVATE String Continue(int* offset_out);
933   String NextLeaf(bool* blew_stack);
934   String Search(int* offset_out);
935 
936   // Stack must always contain only frames for which right traversal
937   // has not yet been performed.
938   ConsString frames_[kStackSize];
939   ConsString root_;
940   int depth_;
941   int maximum_depth_;
942   int consumed_;
943 };
944 
945 class StringCharacterStream {
946  public:
947   inline explicit StringCharacterStream(String string, int offset = 0);
948   StringCharacterStream(const StringCharacterStream&) = delete;
949   StringCharacterStream& operator=(const StringCharacterStream&) = delete;
950   inline uint16_t GetNext();
951   inline bool HasMore();
952   inline void Reset(String string, int offset = 0);
953   inline void VisitOneByteString(const uint8_t* chars, int length);
954   inline void VisitTwoByteString(const uint16_t* chars, int length);
955 
956  private:
957   ConsStringIterator iter_;
958   bool is_one_byte_;
959   union {
960     const uint8_t* buffer8_;
961     const uint16_t* buffer16_;
962   };
963   const uint8_t* end_;
964 };
965 
966 template <typename Char>
967 struct CharTraits;
968 
969 template <>
970 struct CharTraits<uint8_t> {
971   using String = SeqOneByteString;
972   using ExternalString = ExternalOneByteString;
973 };
974 
975 template <>
976 struct CharTraits<uint16_t> {
977   using String = SeqTwoByteString;
978   using ExternalString = ExternalTwoByteString;
979 };
980 
981 }  // namespace internal
982 }  // namespace v8
983 
984 #include "src/objects/object-macros-undef.h"
985 
986 #endif  // V8_OBJECTS_STRING_H_
987