• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_OBJECTS_STRING_H_
6 #define V8_OBJECTS_STRING_H_
7 
8 #include <memory>
9 
10 #include "src/base/bits.h"
11 #include "src/base/export-template.h"
12 #include "src/base/strings.h"
13 #include "src/common/globals.h"
14 #include "src/objects/instance-type.h"
15 #include "src/objects/map.h"
16 #include "src/objects/name.h"
17 #include "src/objects/smi.h"
18 #include "src/strings/unicode-decoder.h"
19 
20 // Has to be the last include (doesn't have include guards):
21 #include "src/objects/object-macros.h"
22 
23 namespace v8 {
24 namespace internal {
25 
26 class SharedStringAccessGuardIfNeeded;
27 
28 enum InstanceType : uint16_t;
29 
30 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
31 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
32 
33 // The characteristics of a string are stored in its map.  Retrieving these
34 // few bits of information is moderately expensive, involving two memory
35 // loads where the second is dependent on the first.  To improve efficiency
36 // the shape of the string is given its own class so that it can be retrieved
37 // once and used for several string operations.  A StringShape is small enough
38 // to be passed by value and is immutable, but be aware that flattening a
39 // string can potentially alter its shape.  Also be aware that a GC caused by
40 // something else can alter the shape of a string due to ConsString
41 // shortcutting.  Keeping these restrictions in mind has proven to be error-
42 // prone and so we no longer put StringShapes in variables unless there is a
43 // concrete performance benefit at that particular point in the code.
44 class StringShape {
45  public:
46   V8_INLINE explicit StringShape(const String s);
47   V8_INLINE explicit StringShape(const String s, PtrComprCageBase cage_base);
48   V8_INLINE explicit StringShape(Map s);
49   V8_INLINE explicit StringShape(InstanceType t);
50   V8_INLINE bool IsSequential() const;
51   V8_INLINE bool IsExternal() const;
52   V8_INLINE bool IsCons() const;
53   V8_INLINE bool IsSliced() const;
54   V8_INLINE bool IsThin() const;
55   V8_INLINE bool IsDirect() const;
56   V8_INLINE bool IsIndirect() const;
57   V8_INLINE bool IsUncachedExternal() const;
58   V8_INLINE bool IsExternalOneByte() const;
59   V8_INLINE bool IsExternalTwoByte() const;
60   V8_INLINE bool IsSequentialOneByte() const;
61   V8_INLINE bool IsSequentialTwoByte() const;
62   V8_INLINE bool IsInternalized() const;
63   V8_INLINE bool IsShared() const;
64   V8_INLINE bool CanMigrateInParallel() const;
65   V8_INLINE StringRepresentationTag representation_tag() const;
66   V8_INLINE uint32_t encoding_tag() const;
67   V8_INLINE uint32_t representation_and_encoding_tag() const;
68   V8_INLINE uint32_t representation_encoding_and_shared_tag() const;
69 #ifdef DEBUG
type()70   inline uint32_t type() const { return type_; }
invalidate()71   inline void invalidate() { valid_ = false; }
valid()72   inline bool valid() const { return valid_; }
73 #else
invalidate()74   inline void invalidate() {}
75 #endif
76 
77   // Run different behavior for each concrete string class type, as defined by
78   // the dispatcher.
79   template <typename TDispatcher, typename TResult, typename... TArgs>
80   inline TResult DispatchToSpecificTypeWithoutCast(TArgs&&... args);
81   template <typename TDispatcher, typename TResult, typename... TArgs>
82   inline TResult DispatchToSpecificType(String str, TArgs&&... args);
83 
84  private:
85   uint32_t type_;
86 #ifdef DEBUG
set_valid()87   inline void set_valid() { valid_ = true; }
88   bool valid_;
89 #else
set_valid()90   inline void set_valid() {}
91 #endif
92 };
93 
94 #include "torque-generated/src/objects/string-tq.inc"
95 
96 // The String abstract class captures JavaScript string values:
97 //
98 // Ecma-262:
99 //  4.3.16 String Value
100 //    A string value is a member of the type String and is a finite
101 //    ordered sequence of zero or more 16-bit unsigned integer values.
102 //
103 // All string values have a length field.
104 class String : public TorqueGeneratedString<String, Name> {
105  public:
106   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
107 
108   // Representation of the flat content of a String.
109   // A non-flat string doesn't have flat content.
110   // A flat string has content that's encoded as a sequence of either
111   // one-byte chars or two-byte UC16.
112   // Returned by String::GetFlatContent().
113   // Not safe to use from concurrent background threads.
114   // TODO(solanes): Move FlatContent into FlatStringReader, and make it private.
115   // This would de-duplicate code, as well as taking advantage of the fact that
116   // FlatStringReader is relocatable.
117   class FlatContent {
118    public:
119     inline ~FlatContent();
120 
121     // Returns true if the string is flat and this structure contains content.
IsFlat()122     bool IsFlat() const { return state_ != NON_FLAT; }
123     // Returns true if the structure contains one-byte content.
IsOneByte()124     bool IsOneByte() const { return state_ == ONE_BYTE; }
125     // Returns true if the structure contains two-byte content.
IsTwoByte()126     bool IsTwoByte() const { return state_ == TWO_BYTE; }
127 
128     // Return the one byte content of the string. Only use if IsOneByte()
129     // returns true.
ToOneByteVector()130     base::Vector<const uint8_t> ToOneByteVector() const {
131       DCHECK_EQ(ONE_BYTE, state_);
132       return base::Vector<const uint8_t>(onebyte_start, length_);
133     }
134     // Return the two-byte content of the string. Only use if IsTwoByte()
135     // returns true.
ToUC16Vector()136     base::Vector<const base::uc16> ToUC16Vector() const {
137       DCHECK_EQ(TWO_BYTE, state_);
138       return base::Vector<const base::uc16>(twobyte_start, length_);
139     }
140 
Get(int i)141     base::uc16 Get(int i) const {
142       DCHECK(i < length_);
143       DCHECK(state_ != NON_FLAT);
144       if (state_ == ONE_BYTE) return onebyte_start[i];
145       return twobyte_start[i];
146     }
147 
UsesSameString(const FlatContent & other)148     bool UsesSameString(const FlatContent& other) const {
149       return onebyte_start == other.onebyte_start;
150     }
151 
152     // It is almost always a bug if the contents of a FlatContent changes during
153     // its lifetime, which can happen due to GC or bugs in concurrent string
154     // access. Rarely, callers need the ability to GC and have ensured safety in
155     // other ways, such as in IrregexpInterpreter. Those callers can disable the
156     // checksum verification with this call.
UnsafeDisableChecksumVerification()157     void UnsafeDisableChecksumVerification() {
158 #ifdef ENABLE_SLOW_DCHECKS
159       checksum_ = kChecksumVerificationDisabled;
160 #endif
161     }
162 
length()163     int length() const { return length_; }
164 
165    private:
166     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
167 
168     // Constructors only used by String::GetFlatContent().
169     inline FlatContent(const uint8_t* start, int length,
170                        const DisallowGarbageCollection& no_gc);
171     inline FlatContent(const base::uc16* start, int length,
172                        const DisallowGarbageCollection& no_gc);
FlatContent(const DisallowGarbageCollection & no_gc)173     explicit FlatContent(const DisallowGarbageCollection& no_gc)
174         : onebyte_start(nullptr), length_(0), state_(NON_FLAT), no_gc_(no_gc) {}
175 
176     union {
177       const uint8_t* onebyte_start;
178       const base::uc16* twobyte_start;
179     };
180     int length_;
181     State state_;
182     const DisallowGarbageCollection& no_gc_;
183 
184     static constexpr uint32_t kChecksumVerificationDisabled = 0;
185 
186 #ifdef ENABLE_SLOW_DCHECKS
187     inline uint32_t ComputeChecksum() const;
188 
189     uint32_t checksum_;
190 #endif
191 
192     friend class String;
193     friend class IterableSubString;
194   };
195 
196   template <typename IsolateT>
197   EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
198   void MakeThin(IsolateT* isolate, String canonical);
199 
200   template <typename Char>
201   V8_INLINE base::Vector<const Char> GetCharVector(
202       const DisallowGarbageCollection& no_gc);
203 
204   // Get chars from sequential or external strings. May only be called when a
205   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
206   // read-only strings).
207   template <typename Char>
208   inline const Char* GetChars(PtrComprCageBase cage_base,
209                               const DisallowGarbageCollection& no_gc) const;
210 
211   // Get chars from sequential or external strings.
212   template <typename Char>
213   inline const Char* GetChars(
214       PtrComprCageBase cage_base, const DisallowGarbageCollection& no_gc,
215       const SharedStringAccessGuardIfNeeded& access_guard) const;
216 
217   // Returns the address of the character at an offset into this string.
218   // Requires: this->IsFlat()
219   const byte* AddressOfCharacterAt(int start_index,
220                                    const DisallowGarbageCollection& no_gc);
221 
222   // Forward declare the non-atomic (set_)length defined in torque.
223   using TorqueGeneratedString::length;
224   using TorqueGeneratedString::set_length;
225   DECL_RELEASE_ACQUIRE_INT_ACCESSORS(length)
226 
227   // Returns whether this string has only one-byte chars, i.e. all of them can
228   // be one-byte encoded.  This might be the case even if the string is
229   // two-byte.  Such strings may appear when the embedder prefers
230   // two-byte external representations even for one-byte data.
231   DECL_GETTER(IsOneByteRepresentation, bool)
232   DECL_GETTER(IsTwoByteRepresentation, bool)
233 
234   // Cons and slices have an encoding flag that may not represent the actual
235   // encoding of the underlying string.  This is taken into account here.
236   // This function is static because that helps it get inlined.
237   // Requires: string.IsFlat()
238   static inline bool IsOneByteRepresentationUnderneath(String string);
239 
240   // Get and set individual two byte chars in the string.
241   inline void Set(int index, uint16_t value);
242   // Get individual two byte char in the string.  Repeated calls
243   // to this method are not efficient unless the string is flat.
244   // If it is called from a background thread, the LocalIsolate version should
245   // be used.
246   V8_INLINE uint16_t Get(int index) const;
247   V8_INLINE uint16_t Get(int index, Isolate* isolate) const;
248   V8_INLINE uint16_t Get(int index, LocalIsolate* local_isolate) const;
249   // Method to pass down the access_guard. Useful for recursive calls such as
250   // ThinStrings where we go String::Get into ThinString::Get into String::Get
251   // again for the internalized string.
252   V8_INLINE uint16_t
253   Get(int index, PtrComprCageBase cage_base,
254       const SharedStringAccessGuardIfNeeded& access_guard) const;
255 
256   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
257   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
258 
259   // Flattens the string.  Checks first inline to see if it is
260   // necessary.  Does nothing if the string is not a cons string.
261   // Flattening allocates a sequential string with the same data as
262   // the given string and mutates the cons string to a degenerate
263   // form, where the first component is the new sequential string and
264   // the second component is the empty string.  If allocation fails,
265   // this function returns a failure.  If flattening succeeds, this
266   // function returns the sequential string that is now the first
267   // component of the cons string.
268   //
269   // Degenerate cons strings are handled specially by the garbage
270   // collector (see IsShortcutCandidate).
271 
272   static V8_INLINE Handle<String> Flatten(
273       Isolate* isolate, Handle<String> string,
274       AllocationType allocation = AllocationType::kYoung);
275   static V8_INLINE Handle<String> Flatten(
276       LocalIsolate* isolate, Handle<String> string,
277       AllocationType allocation = AllocationType::kYoung);
278 
279   // Tries to return the content of a flat string as a structure holding either
280   // a flat vector of char or of base::uc16.
281   // If the string isn't flat, and therefore doesn't have flat content, the
282   // returned structure will report so, and can't provide a vector of either
283   // kind.
284   // When using a SharedStringAccessGuard, the guard's must outlive the
285   // returned FlatContent.
286   V8_EXPORT_PRIVATE V8_INLINE FlatContent
287   GetFlatContent(const DisallowGarbageCollection& no_gc);
288   V8_EXPORT_PRIVATE V8_INLINE FlatContent
289   GetFlatContent(const DisallowGarbageCollection& no_gc,
290                  const SharedStringAccessGuardIfNeeded&);
291 
292   // Returns the parent of a sliced string or first part of a flat cons string.
293   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
294   inline String GetUnderlying() const;
295 
296   // Shares the string. Checks inline if the string is already shared or can be
297   // shared by transitioning its map in-place. If neither is possible, flattens
298   // and copies into a new shared sequential string.
299   static inline Handle<String> Share(Isolate* isolate, Handle<String> string);
300 
301   // String relational comparison, implemented according to ES6 section 7.2.11
302   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
303   // simple lexicographic ordering on sequences of code unit values. There is no
304   // attempt to use the more complex, semantically oriented definitions of
305   // character or string equality and collating order defined in the Unicode
306   // specification. Therefore String values that are canonically equal according
307   // to the Unicode standard could test as unequal. In effect this algorithm
308   // assumes that both Strings are already in normalized form. Also, note that
309   // for strings containing supplementary characters, lexicographic ordering on
310   // sequences of UTF-16 code unit values differs from that on sequences of code
311   // point values.
312   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
313                                                         Handle<String> x,
314                                                         Handle<String> y);
315 
316   // Perform ES6 21.1.3.8, including checking arguments.
317   static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
318                         Handle<Object> search, Handle<Object> position);
319   // Perform string match of pattern on subject, starting at start index.
320   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
321   // check any arguments.
322   static int IndexOf(Isolate* isolate, Handle<String> receiver,
323                      Handle<String> search, int start_index);
324 
325   static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
326                             Handle<Object> search, Handle<Object> position);
327 
328   // Encapsulates logic related to a match and its capture groups as required
329   // by GetSubstitution.
330   class Match {
331    public:
332     virtual Handle<String> GetMatch() = 0;
333     virtual Handle<String> GetPrefix() = 0;
334     virtual Handle<String> GetSuffix() = 0;
335 
336     // A named capture can be unmatched (either not specified in the pattern,
337     // or specified but unmatched in the current string), or matched.
338     enum CaptureState { UNMATCHED, MATCHED };
339 
340     virtual int CaptureCount() = 0;
341     virtual bool HasNamedCaptures() = 0;
342     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
343     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
344                                                 CaptureState* state) = 0;
345 
346     virtual ~Match() = default;
347   };
348 
349   // ES#sec-getsubstitution
350   // GetSubstitution(matched, str, position, captures, replacement)
351   // Expand the $-expressions in the string and return a new string with
352   // the result.
353   // A {start_index} can be passed to specify where to start scanning the
354   // replacement string.
355   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
356       Isolate* isolate, Match* match, Handle<String> replacement,
357       int start_index = 0);
358 
359   // String equality operations.
360   inline bool Equals(String other) const;
361   inline static bool Equals(Isolate* isolate, Handle<String> one,
362                             Handle<String> two);
363 
364   enum class EqualityType { kWholeString, kPrefix, kNoLengthCheck };
365 
366   // Check if this string matches the given vector of characters, either as a
367   // whole string or just a prefix.
368   //
369   // The Isolate is passed as "evidence" that this call is on the main thread,
370   // and to distiguish from the LocalIsolate overload.
371   template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
372   inline bool IsEqualTo(base::Vector<const Char> str, Isolate* isolate) const;
373 
374   // Check if this string matches the given vector of characters, either as a
375   // whole string or just a prefix.
376   //
377   // This is main-thread only, like the Isolate* overload, but additionally
378   // computes the PtrComprCageBase for IsEqualToImpl.
379   template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
380   inline bool IsEqualTo(base::Vector<const Char> str) const;
381 
382   // Check if this string matches the given vector of characters, either as a
383   // whole string or just a prefix.
384   //
385   // The LocalIsolate is passed to provide access to the string access lock,
386   // which is taken when reading the string's contents on a background thread.
387   template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
388   inline bool IsEqualTo(base::Vector<const Char> str,
389                         LocalIsolate* isolate) const;
390 
391   V8_EXPORT_PRIVATE bool HasOneBytePrefix(base::Vector<const char> str);
392   V8_EXPORT_PRIVATE inline bool IsOneByteEqualTo(base::Vector<const char> str);
393 
394   // Returns true if the |str| is a valid ECMAScript identifier.
395   static bool IsIdentifier(Isolate* isolate, Handle<String> str);
396 
397   // Return a UTF8 representation of the string.  The string is null
398   // terminated but may optionally contain nulls.  Length is returned
399   // in length_output if length_output is not a null pointer  The string
400   // should be nearly flat, otherwise the performance of this method may
401   // be very slow (quadratic in the length).  Setting robustness_flag to
402   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
403   // handles unexpected data without causing assert failures and it does not
404   // do any heap allocations.  This is useful when printing stack traces.
405   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
406                                     RobustnessFlag robustness_flag, int offset,
407                                     int length, int* length_output = nullptr);
408   V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
409       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
410       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
411       int* length_output = nullptr);
412 
413   // Externalization.
414   V8_EXPORT_PRIVATE bool MakeExternal(
415       v8::String::ExternalStringResource* resource);
416   V8_EXPORT_PRIVATE bool MakeExternal(
417       v8::String::ExternalOneByteStringResource* resource);
418   bool SupportsExternalization();
419 
420   // Conversion.
421   // "array index": an index allowed by the ES spec for JSArrays.
422   inline bool AsArrayIndex(uint32_t* index);
423 
424   // This is used for calculating array indices but differs from an
425   // Array Index in the regard that this does not support the full
426   // array index range. This only supports positive numbers less than
427   // or equal to INT_MAX.
428   //
429   // String::AsArrayIndex might be a better fit if you're looking to
430   // calculate the array index.
431   //
432   // if val < 0 or val > INT_MAX, returns -1
433   // if 0 <= val <= INT_MAX, returns val
434   static int32_t ToArrayIndex(Address addr);
435 
436   // "integer index": the string is the decimal representation of an
437   // integer in the range of a size_t. Useful for TypedArray accesses.
438   inline bool AsIntegerIndex(size_t* index);
439 
440   // Trimming.
441   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
442 
443   V8_EXPORT_PRIVATE void PrintOn(FILE* out);
444   V8_EXPORT_PRIVATE void PrintOn(std::ostream& out);
445 
446   // For use during stack traces.  Performs rudimentary sanity check.
447   bool LooksValid();
448 
449   // Printing utility functions.
450   // - PrintUC16 prints the raw string contents to the given stream.
451   //   Non-printable characters are formatted as hex, but otherwise the string
452   //   is printed as-is.
453   // - StringShortPrint and StringPrint have extra formatting: they add a
454   //   prefix and suffix depending on the string kind, may add other information
455   //   such as the string heap object address, may truncate long strings, etc.
456   const char* PrefixForDebugPrint() const;
457   const char* SuffixForDebugPrint() const;
458   void StringShortPrint(StringStream* accumulator);
459   void PrintUC16(std::ostream& os, int start = 0, int end = -1);
460   void PrintUC16(StringStream* accumulator, int start, int end);
461 
462   // Dispatched behavior.
463 #if defined(DEBUG) || defined(OBJECT_PRINT)
464   char* ToAsciiArray();
465 #endif
466   DECL_PRINTER(String)
467   DECL_VERIFIER(String)
468 
469   inline bool IsFlat() const;
470   inline bool IsFlat(PtrComprCageBase cage_base) const;
471 
472   inline bool IsShared() const;
473   inline bool IsShared(PtrComprCageBase cage_base) const;
474 
475   // Max char codes.
476   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
477   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
478   static const int kMaxUtf16CodeUnit = 0xffff;
479   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
480   static const base::uc32 kMaxCodePoint = 0x10ffff;
481 
482   // Maximal string length.
483   // The max length is different on 32 and 64 bit platforms. Max length for
484   // 32-bit platforms is ~268.4M chars. On 64-bit platforms, max length is
485   // ~536.8M chars.
486   // See include/v8.h for the definition.
487   static const int kMaxLength = v8::String::kMaxLength;
488   // There are several defining limits imposed by our current implementation:
489   // - any string's length must fit into a Smi.
490   static_assert(kMaxLength <= kSmiMaxValue,
491                 "String length must fit into a Smi");
492   // - adding two string lengths must still fit into a 32-bit int without
493   //   overflow
494   static_assert(kMaxLength * 2 <= kMaxInt,
495                 "String::kMaxLength * 2 must fit into an int32");
496   // - any heap object's size in bytes must be able to fit into a Smi, because
497   //   its space on the heap might be filled with a Filler; for strings this
498   //   means SeqTwoByteString::kMaxSize must be able to fit into a Smi.
499   static_assert(kMaxLength * 2 + kHeaderSize <= kSmiMaxValue,
500                 "String object size in bytes must fit into a Smi");
501   // - any heap object's size in bytes must be able to fit into an int, because
502   //   that's what our object handling code uses almost everywhere.
503   static_assert(kMaxLength * 2 + kHeaderSize <= kMaxInt,
504                 "String object size in bytes must fit into an int");
505 
506   // Max length for computing hash. For strings longer than this limit the
507   // string length is used as the hash value.
508   static const int kMaxHashCalcLength = 16383;
509 
510   // Limit for truncation in short printing.
511   static const int kMaxShortPrintLength = 1024;
512 
513   // Helper function for flattening strings.
514   template <typename sinkchar>
515   EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
516   static void WriteToFlat(String source, sinkchar* sink, int from, int to);
517   template <typename sinkchar>
518   static void WriteToFlat(String source, sinkchar* sink, int from, int to,
519                           PtrComprCageBase cage_base,
520                           const SharedStringAccessGuardIfNeeded&);
521 
IsAscii(const char * chars,int length)522   static inline bool IsAscii(const char* chars, int length) {
523     return IsAscii(reinterpret_cast<const uint8_t*>(chars), length);
524   }
525 
IsAscii(const uint8_t * chars,int length)526   static inline bool IsAscii(const uint8_t* chars, int length) {
527     return NonAsciiStart(chars, length) >= length;
528   }
529 
NonOneByteStart(const base::uc16 * chars,int length)530   static inline int NonOneByteStart(const base::uc16* chars, int length) {
531     DCHECK(IsAligned(reinterpret_cast<Address>(chars), sizeof(base::uc16)));
532     const uint16_t* start = chars;
533     const uint16_t* limit = chars + length;
534 
535     if (static_cast<size_t>(length) >= kUIntptrSize) {
536       // Check unaligned chars.
537       while (!IsAligned(reinterpret_cast<Address>(chars), kUIntptrSize)) {
538         if (*chars > unibrow::Latin1::kMaxChar) {
539           return static_cast<int>(chars - start);
540         }
541         ++chars;
542       }
543 
544       // Check aligned words.
545       STATIC_ASSERT(unibrow::Latin1::kMaxChar == 0xFF);
546 #ifdef V8_TARGET_LITTLE_ENDIAN
547       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0xFF00;
548 #else
549       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0x00FF;
550 #endif
551       while (chars + sizeof(uintptr_t) <= limit) {
552         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
553           break;
554         }
555         chars += (sizeof(uintptr_t) / sizeof(base::uc16));
556       }
557     }
558 
559     // Check remaining unaligned chars, or find non-one-byte char in word.
560     while (chars < limit) {
561       if (*chars > unibrow::Latin1::kMaxChar) {
562         return static_cast<int>(chars - start);
563       }
564       ++chars;
565     }
566 
567     return static_cast<int>(chars - start);
568   }
569 
IsOneByte(const base::uc16 * chars,int length)570   static inline bool IsOneByte(const base::uc16* chars, int length) {
571     return NonOneByteStart(chars, length) >= length;
572   }
573 
574   // May only be called when a SharedStringAccessGuard is not needed (i.e. on
575   // the main thread or on read-only strings).
576   template <class Visitor>
577   static inline ConsString VisitFlat(Visitor* visitor, String string,
578                                      int offset = 0);
579 
580   template <class Visitor>
581   static inline ConsString VisitFlat(
582       Visitor* visitor, String string, int offset,
583       const SharedStringAccessGuardIfNeeded& access_guard);
584 
585   template <typename IsolateT>
586   static Handle<FixedArray> CalculateLineEnds(IsolateT* isolate,
587                                               Handle<String> string,
588                                               bool include_ending_line);
589 
590   // Returns true if string can be internalized without copying. In such cases
591   // the string is inserted into the string table and its map is changed to an
592   // internalized equivalent.
593   static inline bool IsInPlaceInternalizable(String string);
594   static inline bool IsInPlaceInternalizable(InstanceType instance_type);
595 
596   static inline bool IsInPlaceInternalizableExcludingExternal(
597       InstanceType instance_type);
598 
599  private:
600   friend class Name;
601   friend class StringTableInsertionKey;
602   friend class SharedStringTableInsertionKey;
603   friend class InternalizedStringKey;
604 
605   // Implementation of the Get() public methods. Do not use directly.
606   V8_INLINE uint16_t
607   GetImpl(int index, PtrComprCageBase cage_base,
608           const SharedStringAccessGuardIfNeeded& access_guard) const;
609 
610   // Implementation of the IsEqualTo() public methods. Do not use directly.
611   template <EqualityType kEqType, typename Char>
612   V8_INLINE bool IsEqualToImpl(
613       base::Vector<const Char> str, PtrComprCageBase cage_base,
614       const SharedStringAccessGuardIfNeeded& access_guard) const;
615 
616   // Out-of-line IsEqualToImpl for ConsString.
617   template <typename Char>
618   V8_NOINLINE static bool IsConsStringEqualToImpl(
619       ConsString string, int slice_offset, base::Vector<const Char> str,
620       PtrComprCageBase cage_base,
621       const SharedStringAccessGuardIfNeeded& access_guard);
622 
623   V8_EXPORT_PRIVATE static Handle<String> SlowFlatten(
624       Isolate* isolate, Handle<ConsString> cons, AllocationType allocation);
625 
626   V8_EXPORT_PRIVATE V8_INLINE static base::Optional<FlatContent>
627   TryGetFlatContentFromDirectString(PtrComprCageBase cage_base,
628                                     const DisallowGarbageCollection& no_gc,
629                                     String string, int offset, int length,
630                                     const SharedStringAccessGuardIfNeeded&);
631   V8_EXPORT_PRIVATE FlatContent
632   SlowGetFlatContent(const DisallowGarbageCollection& no_gc,
633                      const SharedStringAccessGuardIfNeeded&);
634 
635   V8_EXPORT_PRIVATE static Handle<String> SlowShare(Isolate* isolate,
636                                                     Handle<String> source);
637 
638   // Slow case of String::Equals.  This implementation works on any strings
639   // but it is most efficient on strings that are almost flat.
640   V8_EXPORT_PRIVATE bool SlowEquals(String other) const;
641   V8_EXPORT_PRIVATE bool SlowEquals(
642       String other, const SharedStringAccessGuardIfNeeded&) const;
643 
644   V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one,
645                                            Handle<String> two);
646 
647   // Slow case of AsArrayIndex.
648   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
649   V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t* index);
650 
651   // Compute and set the hash code.
652   V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash();
653   V8_EXPORT_PRIVATE uint32_t
654   ComputeAndSetHash(const SharedStringAccessGuardIfNeeded&);
655 
656   TQ_OBJECT_CONSTRUCTORS(String)
657 };
658 
659 // clang-format off
660 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
661 void String::WriteToFlat(String source, uint8_t* sink, int from, int to);
662 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
663 void String::WriteToFlat(String source, uint16_t* sink, int from, int to);
664 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
665 void String::WriteToFlat(String source, uint8_t* sink, int from, int to,
666                          PtrComprCageBase cage_base,
667                          const SharedStringAccessGuardIfNeeded&);
668 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
669 void String::WriteToFlat(String source, uint16_t* sink, int from, int to,
670                          PtrComprCageBase cage_base,
671                          const SharedStringAccessGuardIfNeeded&);
672 // clang-format on
673 
674 class SubStringRange {
675  public:
676   inline SubStringRange(String string, const DisallowGarbageCollection& no_gc,
677                         int first = 0, int length = -1);
678   class iterator;
679   inline iterator begin();
680   inline iterator end();
681 
682  private:
683   String string_;
684   int first_;
685   int length_;
686   const DisallowGarbageCollection& no_gc_;
687 };
688 
689 // The SeqString abstract class captures sequential string values.
690 class SeqString : public TorqueGeneratedSeqString<SeqString, String> {
691  public:
692   // Truncate the string in-place if possible and return the result.
693   // In case of new_length == 0, the empty string is returned without
694   // truncating the original string.
695   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
696                                                        int new_length);
697 
698   TQ_OBJECT_CONSTRUCTORS(SeqString)
699 };
700 
701 class InternalizedString
702     : public TorqueGeneratedInternalizedString<InternalizedString, String> {
703  public:
704   // TODO(neis): Possibly move some stuff from String here.
705 
706   TQ_OBJECT_CONSTRUCTORS(InternalizedString)
707 };
708 
709 // The OneByteString class captures sequential one-byte string objects.
710 // Each character in the OneByteString is an one-byte character.
711 class SeqOneByteString
712     : public TorqueGeneratedSeqOneByteString<SeqOneByteString, SeqString> {
713  public:
714   static const bool kHasOneByteEncoding = true;
715   using Char = uint8_t;
716 
717   // Dispatched behavior. The non SharedStringAccessGuardIfNeeded method is also
718   // defined for convenience and it will check that the access guard is not
719   // needed.
720   inline uint8_t Get(int index) const;
721   inline uint8_t Get(int index, PtrComprCageBase cage_base,
722                      const SharedStringAccessGuardIfNeeded& access_guard) const;
723   inline void SeqOneByteStringSet(int index, uint16_t value);
724   inline void SeqOneByteStringSetChars(int index, const uint8_t* string,
725                                        int length);
726 
727   // Get the address of the characters in this string.
728   inline Address GetCharsAddress() const;
729 
730   // Get a pointer to the characters of the string. May only be called when a
731   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
732   // read-only strings).
733   inline uint8_t* GetChars(const DisallowGarbageCollection& no_gc) const;
734 
735   // Get a pointer to the characters of the string.
736   inline uint8_t* GetChars(
737       const DisallowGarbageCollection& no_gc,
738       const SharedStringAccessGuardIfNeeded& access_guard) const;
739 
740   // Clear uninitialized padding space. This ensures that the snapshot content
741   // is deterministic.
742   void clear_padding();
743 
744   // Maximal memory usage for a single sequential one-byte string.
745   static const int kMaxCharsSize = kMaxLength;
746   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
747   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
748 
749   int AllocatedSize();
750 
751   // A SeqOneByteString have different maps depending on whether it is shared.
752   static inline bool IsCompatibleMap(Map map, ReadOnlyRoots roots);
753 
754   class BodyDescriptor;
755 
756   TQ_OBJECT_CONSTRUCTORS(SeqOneByteString)
757 };
758 
759 // The TwoByteString class captures sequential unicode string objects.
760 // Each character in the TwoByteString is a two-byte uint16_t.
761 class SeqTwoByteString
762     : public TorqueGeneratedSeqTwoByteString<SeqTwoByteString, SeqString> {
763  public:
764   static const bool kHasOneByteEncoding = false;
765   using Char = uint16_t;
766 
767   // Dispatched behavior.
768   inline uint16_t Get(
769       int index, PtrComprCageBase cage_base,
770       const SharedStringAccessGuardIfNeeded& access_guard) const;
771   inline void SeqTwoByteStringSet(int index, uint16_t value);
772 
773   // Get the address of the characters in this string.
774   inline Address GetCharsAddress() const;
775 
776   // Get a pointer to the characters of the string. May only be called when a
777   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
778   // read-only strings).
779   inline base::uc16* GetChars(const DisallowGarbageCollection& no_gc) const;
780 
781   // Get a pointer to the characters of the string.
782   inline base::uc16* GetChars(
783       const DisallowGarbageCollection& no_gc,
784       const SharedStringAccessGuardIfNeeded& access_guard) const;
785 
786   // Clear uninitialized padding space. This ensures that the snapshot content
787   // is deterministic.
788   void clear_padding();
789 
790   // Maximal memory usage for a single sequential two-byte string.
791   static const int kMaxCharsSize = kMaxLength * 2;
792   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
793   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
794                 String::kMaxLength);
795 
796   int AllocatedSize();
797 
798   // A SeqTwoByteString have different maps depending on whether it is shared.
799   static inline bool IsCompatibleMap(Map map, ReadOnlyRoots roots);
800 
801   class BodyDescriptor;
802 
803   TQ_OBJECT_CONSTRUCTORS(SeqTwoByteString)
804 };
805 
806 // The ConsString class describes string values built by using the
807 // addition operator on strings.  A ConsString is a pair where the
808 // first and second components are pointers to other string values.
809 // One or both components of a ConsString can be pointers to other
810 // ConsStrings, creating a binary tree of ConsStrings where the leaves
811 // are non-ConsString string values.  The string value represented by
812 // a ConsString can be obtained by concatenating the leaf string
813 // values in a left-to-right depth-first traversal of the tree.
814 class ConsString : public TorqueGeneratedConsString<ConsString, String> {
815  public:
816   // Doesn't check that the result is a string, even in debug mode.  This is
817   // useful during GC where the mark bits confuse the checks.
818   inline Object unchecked_first() const;
819 
820   // Doesn't check that the result is a string, even in debug mode.  This is
821   // useful during GC where the mark bits confuse the checks.
822   inline Object unchecked_second() const;
823 
824   V8_INLINE bool IsFlat(PtrComprCageBase cage_base) const;
825 
826   // Dispatched behavior.
827   V8_EXPORT_PRIVATE uint16_t
828   Get(int index, PtrComprCageBase cage_base,
829       const SharedStringAccessGuardIfNeeded& access_guard) const;
830 
831   // Minimum length for a cons string.
832   static const int kMinLength = 13;
833 
834   class BodyDescriptor;
835 
836   DECL_VERIFIER(ConsString)
837 
838   TQ_OBJECT_CONSTRUCTORS(ConsString)
839 };
840 
841 // The ThinString class describes string objects that are just references
842 // to another string object. They are used for in-place internalization when
843 // the original string cannot actually be internalized in-place: in these
844 // cases, the original string is converted to a ThinString pointing at its
845 // internalized version (which is allocated as a new object).
846 // In terms of memory layout and most algorithms operating on strings,
847 // ThinStrings can be thought of as "one-part cons strings".
848 class ThinString : public TorqueGeneratedThinString<ThinString, String> {
849  public:
850   DECL_GETTER(unchecked_actual, HeapObject)
851 
852   V8_EXPORT_PRIVATE uint16_t
853   Get(int index, PtrComprCageBase cage_base,
854       const SharedStringAccessGuardIfNeeded& access_guard) const;
855 
856   DECL_VERIFIER(ThinString)
857 
858   class BodyDescriptor;
859 
860   TQ_OBJECT_CONSTRUCTORS(ThinString)
861 };
862 
863 // The Sliced String class describes strings that are substrings of another
864 // sequential string.  The motivation is to save time and memory when creating
865 // a substring.  A Sliced String is described as a pointer to the parent,
866 // the offset from the start of the parent string and the length.  Using
867 // a Sliced String therefore requires unpacking of the parent string and
868 // adding the offset to the start address.  A substring of a Sliced String
869 // are not nested since the double indirection is simplified when creating
870 // such a substring.
871 // Currently missing features are:
872 //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
873 class SlicedString : public TorqueGeneratedSlicedString<SlicedString, String> {
874  public:
875   inline void set_parent(String parent,
876                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
877   // Dispatched behavior.
878   V8_EXPORT_PRIVATE uint16_t
879   Get(int index, PtrComprCageBase cage_base,
880       const SharedStringAccessGuardIfNeeded& access_guard) const;
881 
882   // Minimum length for a sliced string.
883   static const int kMinLength = 13;
884 
885   class BodyDescriptor;
886 
887   DECL_VERIFIER(SlicedString)
888 
889   TQ_OBJECT_CONSTRUCTORS(SlicedString)
890 };
891 
892 // The ExternalString class describes string values that are backed by
893 // a string resource that lies outside the V8 heap.  ExternalStrings
894 // consist of the length field common to all strings, a pointer to the
895 // external resource.  It is important to ensure (externally) that the
896 // resource is not deallocated while the ExternalString is live in the
897 // V8 heap.
898 //
899 // The API expects that all ExternalStrings are created through the
900 // API.  Therefore, ExternalStrings should not be used internally.
901 class ExternalString
902     : public TorqueGeneratedExternalString<ExternalString, String> {
903  public:
904   DECL_VERIFIER(ExternalString)
905 
906   // Size of uncached external strings.
907   static const int kUncachedSize =
908       kResourceOffset + FIELD_SIZE(kResourceOffset);
909 
910   inline void AllocateExternalPointerEntries(Isolate* isolate);
911 
912   // Return whether the external string data pointer is not cached.
913   inline bool is_uncached() const;
914   // Size in bytes of the external payload.
915   int ExternalPayloadSize() const;
916 
917   // Used in the serializer/deserializer.
918   DECL_GETTER(resource_as_address, Address)
919   inline void set_address_as_resource(Isolate* isolate, Address address);
920   inline uint32_t GetResourceRefForDeserialization();
921   inline void SetResourceRefForSerialization(uint32_t ref);
922 
923   // Disposes string's resource object if it has not already been disposed.
924   inline void DisposeResource(Isolate* isolate);
925 
926   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
927   static const int kSizeOfAllExternalStrings = kHeaderSize;
928 
929  private:
930   // Hide generated accessors.
931   DECL_ACCESSORS(resource, void*)
932   DECL_ACCESSORS(resource_data, void*)
933 
934   TQ_OBJECT_CONSTRUCTORS(ExternalString)
935 };
936 
937 // The ExternalOneByteString class is an external string backed by an
938 // one-byte string.
939 class ExternalOneByteString
940     : public TorqueGeneratedExternalOneByteString<ExternalOneByteString,
941                                                   ExternalString> {
942  public:
943   static const bool kHasOneByteEncoding = true;
944 
945   using Resource = v8::String::ExternalOneByteStringResource;
946 
947   // The underlying resource.
948   DECL_GETTER(resource, const Resource*)
949 
950   // It is assumed that the previous resource is null. If it is not null, then
951   // it is the responsability of the caller the handle the previous resource.
952   inline void SetResource(Isolate* isolate, const Resource* buffer);
953 
954   // Used only during serialization.
955   inline void set_resource(Isolate* isolate, const Resource* buffer);
956 
957   // Update the pointer cache to the external character array.
958   // The cached pointer is always valid, as the external character array does =
959   // not move during lifetime.  Deserialization is the only exception, after
960   // which the pointer cache has to be refreshed.
961   inline void update_data_cache(Isolate* isolate);
962 
963   inline const uint8_t* GetChars(PtrComprCageBase cage_base) const;
964 
965   // Dispatched behavior.
966   inline uint8_t Get(int index, PtrComprCageBase cage_base,
967                      const SharedStringAccessGuardIfNeeded& access_guard) const;
968 
969   class BodyDescriptor;
970 
971   STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);
972 
973   TQ_OBJECT_CONSTRUCTORS(ExternalOneByteString)
974 
975  private:
976   // The underlying resource as a non-const pointer.
977   DECL_GETTER(mutable_resource, Resource*)
978 };
979 
980 // The ExternalTwoByteString class is an external string backed by a UTF-16
981 // encoded string.
982 class ExternalTwoByteString
983     : public TorqueGeneratedExternalTwoByteString<ExternalTwoByteString,
984                                                   ExternalString> {
985  public:
986   static const bool kHasOneByteEncoding = false;
987 
988   using Resource = v8::String::ExternalStringResource;
989 
990   // The underlying string resource.
991   DECL_GETTER(resource, const Resource*)
992 
993   // It is assumed that the previous resource is null. If it is not null, then
994   // it is the responsability of the caller the handle the previous resource.
995   inline void SetResource(Isolate* isolate, const Resource* buffer);
996 
997   // Used only during serialization.
998   inline void set_resource(Isolate* isolate, const Resource* buffer);
999 
1000   // Update the pointer cache to the external character array.
1001   // The cached pointer is always valid, as the external character array does =
1002   // not move during lifetime.  Deserialization is the only exception, after
1003   // which the pointer cache has to be refreshed.
1004   inline void update_data_cache(Isolate* isolate);
1005 
1006   inline const uint16_t* GetChars(PtrComprCageBase cage_base) const;
1007 
1008   // Dispatched behavior.
1009   inline uint16_t Get(
1010       int index, PtrComprCageBase cage_base,
1011       const SharedStringAccessGuardIfNeeded& access_guard) const;
1012 
1013   // For regexp code.
1014   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
1015 
1016   class BodyDescriptor;
1017 
1018   STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);
1019 
1020   TQ_OBJECT_CONSTRUCTORS(ExternalTwoByteString)
1021 
1022  private:
1023   // The underlying resource as a non-const pointer.
1024   DECL_GETTER(mutable_resource, Resource*)
1025 };
1026 
1027 // A flat string reader provides random access to the contents of a
1028 // string independent of the character width of the string. The handle
1029 // must be valid as long as the reader is being used.
1030 // Not safe to use from concurrent background threads.
1031 class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {
1032  public:
1033   FlatStringReader(Isolate* isolate, Handle<String> str);
1034   void PostGarbageCollection() override;
1035   inline base::uc32 Get(int index) const;
1036   template <typename Char>
1037   inline Char Get(int index) const;
1038   int length() const { return length_; }
1039 
1040  private:
1041   Handle<String> str_;
1042   bool is_one_byte_;
1043   int const length_;
1044   const void* start_;
1045 };
1046 
1047 // This maintains an off-stack representation of the stack frames required
1048 // to traverse a ConsString, allowing an entirely iterative and restartable
1049 // traversal of the entire string
1050 class ConsStringIterator {
1051  public:
1052   inline ConsStringIterator() = default;
1053   inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
1054     Reset(cons_string, offset);
1055   }
1056   ConsStringIterator(const ConsStringIterator&) = delete;
1057   ConsStringIterator& operator=(const ConsStringIterator&) = delete;
1058   inline void Reset(ConsString cons_string, int offset = 0) {
1059     depth_ = 0;
1060     // Next will always return nullptr.
1061     if (cons_string.is_null()) return;
1062     Initialize(cons_string, offset);
1063   }
1064   // Returns nullptr when complete.
1065   inline String Next(int* offset_out) {
1066     *offset_out = 0;
1067     if (depth_ == 0) return String();
1068     return Continue(offset_out);
1069   }
1070 
1071  private:
1072   static const int kStackSize = 32;
1073   // Use a mask instead of doing modulo operations for stack wrapping.
1074   static const int kDepthMask = kStackSize - 1;
1075   static_assert(base::bits::IsPowerOfTwo(kStackSize),
1076                 "kStackSize must be power of two");
1077   static inline int OffsetForDepth(int depth);
1078 
1079   inline void PushLeft(ConsString string);
1080   inline void PushRight(ConsString string);
1081   inline void AdjustMaximumDepth();
1082   inline void Pop();
1083   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
1084   V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset);
1085   V8_EXPORT_PRIVATE String Continue(int* offset_out);
1086   String NextLeaf(bool* blew_stack);
1087   String Search(int* offset_out);
1088 
1089   // Stack must always contain only frames for which right traversal
1090   // has not yet been performed.
1091   ConsString frames_[kStackSize];
1092   ConsString root_;
1093   int depth_;
1094   int maximum_depth_;
1095   int consumed_;
1096 };
1097 
1098 class StringCharacterStream;
1099 
1100 template <typename Char>
1101 struct CharTraits;
1102 
1103 template <>
1104 struct CharTraits<uint8_t> {
1105   using String = SeqOneByteString;
1106   using ExternalString = ExternalOneByteString;
1107 };
1108 
1109 template <>
1110 struct CharTraits<uint16_t> {
1111   using String = SeqTwoByteString;
1112   using ExternalString = ExternalTwoByteString;
1113 };
1114 
1115 }  // namespace internal
1116 }  // namespace v8
1117 
1118 #include "src/objects/object-macros-undef.h"
1119 
1120 #endif  // V8_OBJECTS_STRING_H_
1121