• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_STRING_H
17 #define ECMASCRIPT_STRING_H
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 
23 #include "ecmascript/base/utf_helper.h"
24 #include "ecmascript/common.h"
25 #include "ecmascript/ecma_macros.h"
26 #include "ecmascript/js_hclass.h"
27 #include "ecmascript/js_tagged_value.h"
28 #include "ecmascript/mem/barriers.h"
29 #include "ecmascript/mem/space.h"
30 #include "ecmascript/mem/tagged_object.h"
31 
32 #include "libpandabase/macros.h"
33 #include "securec.h"
34 #include "unicode/locid.h"
35 
36 namespace panda {
37 namespace test {
38     class EcmaStringEqualsTest;
39 }
40 namespace ecmascript {
41 template<typename T>
42 class JSHandle;
43 class JSPandaFile;
44 class EcmaVM;
45 class LineEcmaString;
46 class ConstantString;
47 class TreeEcmaString;
48 class SlicedString;
49 class FlatStringInfo;
50 
51 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
52 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length)                                        \
53     if ((length) >= MAX_STRING_LENGTH) {                                                      \
54         THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr);  \
55     }
56 
57 class EcmaString : public TaggedObject {
58     /* Mix Hash Code: --   { 0 | [31 bits raw hash code] }     computed through string
59                       \    { 1 | [31 bits integer numbers] }   fastpath for string to number
60     */
61 public:
62     CAST_CHECK(EcmaString, IsString);
63 
64     static constexpr uint32_t IS_INTEGER_MASK = 1U << 31;
65     static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1;
66     static constexpr uint32_t STRING_INTERN_BIT = 0x2;
67     static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning
68     static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U;
69     static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF;
70     static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9;
71 
72     static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize();
73     // In last bit of mix_length we store if this string is compressed or not.
74     ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET)
75     // In last bit of mix_hash we store if this string is small-integer number or not.
76     ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE)
77 
78     enum CompressedStatus {
79         STRING_COMPRESSED,
80         STRING_UNCOMPRESSED,
81     };
82 
83     enum IsIntegerStatus {
84         NOT_INTEGER = 0,
85         IS_INTEGER,
86     };
87 
88     enum TrimMode : uint8_t {
89         TRIM,
90         TRIM_START,
91         TRIM_END,
92     };
93 
94     enum ConcatOptStatus {
95         BEGIN_STRING_ADD = 1,
96         IN_STRING_ADD,
97         CONFIRMED_IN_STRING_ADD,
98         END_STRING_ADD,
99         INVALID_STRING_ADD,
100         HAS_BACKING_STORE,
101     };
102 
103 private:
104     friend class EcmaStringAccessor;
105     friend class LineEcmaString;
106     friend class ConstantString;
107     friend class TreeEcmaString;
108     friend class SlicedString;
109     friend class FlatStringInfo;
110     friend class NameDictionary;
111     friend class panda::test::EcmaStringEqualsTest;
112 
113     static EcmaString *CreateEmptyString(const EcmaVM *vm);
114     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
115         bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
116         uint32_t idOffset = 0);
117     static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
118         uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
119     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
120         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
121     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
122         bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
123     static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
124     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
125     static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed);
126     static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm,
127         size_t length, bool compressed, MemSpaceType type);
128     static EcmaString *CreateTreeString(const EcmaVM *vm,
129         const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed);
130     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
131         size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0);
132     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left,
133         const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
134     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
135         uint32_t length, bool compressed);
136     static EcmaString *FastSubString(const EcmaVM *vm,
137         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
138     static EcmaString *GetSlicedString(const EcmaVM *vm,
139         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
140     static EcmaString *GetSubString(const EcmaVM *vm,
141         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
142     // require src is LineString
143     // not change src data structure
144     static inline EcmaString *FastSubUtf8String(const EcmaVM *vm,
145         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
146     // require src is LineString
147     // not change src data structure
148     static inline EcmaString *FastSubUtf16String(const EcmaVM *vm,
149         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
150     inline void TrimLineString(const JSThread *thread, uint32_t newLength);
IsUtf8()151     inline bool IsUtf8() const
152     {
153         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED;
154     }
155 
IsUtf16()156     inline bool IsUtf16() const
157     {
158         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED;
159     }
160 
IsInteger()161     inline bool IsInteger()
162     {
163         return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK;
164     }
165 
166     // require is LineString
167     inline uint16_t *GetData() const;
168     inline const uint8_t *GetDataUtf8() const;
169     inline const uint16_t *GetDataUtf16() const;
170 
171     // require is LineString
172     inline uint8_t *GetDataUtf8Writable();
173     inline uint16_t *GetDataUtf16Writable();
174 
GetLength()175     inline uint32_t GetLength() const
176     {
177         return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT;
178     }
179 
180     inline void SetLength(uint32_t length, bool compressed = false)
181     {
182         ASSERT(length < MAX_STRING_LENGTH);
183         // Use 0u for compressed/utf8 expression
184         SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED));
185     }
186 
GetRawHashcode()187     inline uint32_t GetRawHashcode() const
188     {
189         return GetMixHashcode() & (~IS_INTEGER_MASK);
190     }
191 
MixHashcode(uint32_t hashcode,bool isInteger)192     static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger)
193     {
194         return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK));
195     }
196 
197     inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false)
198     {
199         // Use 0u for not integer string's expression
200         SetMixHashcode(MixHashcode(hashcode, isInteger));
201     }
202 
203     inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const;
204 
SetIsInternString()205     inline void SetIsInternString()
206     {
207         SetMixLength(GetMixLength() | STRING_INTERN_BIT);
208     }
209 
IsInternString()210     inline bool IsInternString() const
211     {
212         return (GetMixLength() & STRING_INTERN_BIT) != 0;
213     }
214 
ClearInternStringFlag()215     inline void ClearInternStringFlag()
216     {
217         SetMixLength(GetMixLength() & ~STRING_INTERN_BIT);
218     }
219 
TryGetHashCode(uint32_t * hash)220     inline bool TryGetHashCode(uint32_t *hash)
221     {
222         uint32_t hashcode = GetMixHashcode();
223         if (hashcode == 0 && GetLength() != 0) {
224             return false;
225         }
226         *hash = hashcode;
227         return true;
228     }
229 
GetIntegerCode()230     inline uint32_t GetIntegerCode()
231     {
232         ASSERT(GetMixHashcode() & IS_INTEGER_MASK);
233         return GetRawHashcode();
234     }
235 
236     // not change this data structure.
237     // if string is not flat, this func has low efficiency.
GetHashcode()238     uint32_t PUBLIC_API GetHashcode()
239     {
240         uint32_t hashcode = GetMixHashcode();
241         // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0.
242         if (hashcode == 0 && GetLength() != 0) {
243             hashcode = ComputeHashcode();
244             SetMixHashcode(hashcode);
245         }
246         return hashcode;
247     }
248 
249     template<typename T>
IsDecimalDigitChar(const T c)250     inline static bool IsDecimalDigitChar(const T c)
251     {
252         return (c >= '0' && c <= '9');
253     }
254 
ComputeIntegerHash(uint32_t * num,uint8_t c)255     static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c)
256     {
257         if (!IsDecimalDigitChar(c)) {
258             return false;
259         }
260         int charDate = c - '0';
261         *num = (*num) * 10 + charDate; // 10: decimal factor
262         return true;
263     }
264 
265     bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const;
266 
267     template<typename T>
HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)268     static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
269     {
270         ASSERT(size >= 0);
271         if (hashSeed == 0) {
272             if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
273                 uint32_t num = data[0] - '0';
274                 uint32_t i = 1;
275                 do {
276                     if (i == size) {
277                         // compute mix hash
278                         if (num <= MAX_INTEGER_HASH_NUMBER) {
279                             *hash = MixHashcode(num, IS_INTEGER);
280                             return true;
281                         }
282                         return false;
283                     }
284                 } while (ComputeIntegerHash(&num, data[i++]));
285             }
286             if (size == 1 && (data[0] == '0')) {
287                 *hash = MixHashcode(0, IS_INTEGER);
288                 return true;
289             }
290         } else {
291             if (IsDecimalDigitChar(data[0])) {
292                 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
293                 uint32_t i = 1;
294                 do {
295                     if (i == size) {
296                         // compute mix hash
297                         if (num <= MAX_INTEGER_HASH_NUMBER) {
298                             *hash = MixHashcode(num, IS_INTEGER);
299                             return true;
300                         }
301                         return false;
302                     }
303                 } while (ComputeIntegerHash(&num, data[i++]));
304             }
305         }
306         return false;
307     }
308 
309     // not change this data structure.
310     // if string is not flat, this func has low efficiency.
311     uint32_t PUBLIC_API ComputeHashcode() const;
312     std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const;
313     uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const;
314 
315     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
316     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
317 
318     template<bool verify = true>
319     uint16_t At(int32_t index) const;
320 
321     // require is LineString
322     void WriteData(uint32_t index, uint16_t src);
323 
324     // can change left and right data structure
325     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right);
326 
327     static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
328         const JSHandle<EcmaString>& right, uint32_t offset);
329 
330     // Check that two spans are equal. Should have the same length.
331     /* static */
332     template<typename T, typename T1>
StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)333     static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2)
334     {
335         ASSERT(str1.Size() <= str2.Size());
336         size_t size = str1.Size();
337         if (!std::is_same_v<T, T1>) {
338             for (size_t i = 0; i < size; i++) {
339                 auto left = static_cast<uint16_t>(str1[i]);
340                 auto right = static_cast<uint16_t>(str2[i]);
341                 if (left != right) {
342                     return false;
343                 }
344             }
345             return true;
346         }
347 
348         return !memcmp(str1.data(), str2.data(), size * sizeof(T));
349     }
350 
351     // Converts utf8Data to utf16 and compare it with given utf16_data.
352     static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
353                                   uint32_t utf16Len);
354     // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
355     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2);
356     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
357     static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1,
358         const JSHandle<EcmaString> &str2);
359     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
360     static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2);
361     // Two strings have the same type of utf encoding format.
362     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2);
363     static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2);
364     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
365     // not change str1 data structure.
366     // if str1 is not flat, this func has low efficiency.
367     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
368                                        bool canBeCompress);
369     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
370     // not change str1 data structure.
371     // if str1 is not flat, this func has low efficiency.
372     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
373 
374     // can change receiver and search data structure
375     static int32_t IndexOf(const EcmaVM *vm,
376         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
377 
378     // can change receiver and search data structure
379     static int32_t LastIndexOf(const EcmaVM *vm,
380         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
381 
382     inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const
383     {
384         if (maxLength == 0) {
385             return 1; // maxLength was -1 at napi
386         }
387         size_t length = GetLength();
388         if (length > maxLength) {
389             return 0;
390         }
391         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
392         buf[maxLength - 1] = '\0';
393         // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8
394         return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1;  // add place for zero in the end
395     }
396 
397     // It allows user to copy into buffer even if maxLength < length
398     inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const
399     {
400         if (maxLength == 0) {
401             return 1; // maxLength was -1 at napi
402         }
403         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
404         buf[maxLength - 1] = '\0';
405         return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1;
406     }
407 
CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)408     size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const
409     {
410         if (IsUtf16()) {
411             CVector<uint16_t> tmpBuf;
412             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
413             if (length > bufLength) {
414                 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) {
415                     LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength";
416                     UNREACHABLE();
417                 }
418                 return bufLength;
419             }
420             if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
421                 LOG_FULL(FATAL) << "memcpy_s failed";
422                 UNREACHABLE();
423             }
424             return length;
425         }
426         CVector<uint8_t> tmpBuf;
427         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
428         if (length > bufLength) {
429             return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength);
430         }
431         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength);
432     }
433 
434     // It allows user to copy into buffer even if maxLength < length
WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)435     inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const
436     {
437         if (bufLength == 0) {
438             return 0;
439         }
440         // Returns a number representing a valid backrest length.
441         return CopyDataToUtf16(buf, targetLength, bufLength);
442     }
443 
WriteOneByte(uint8_t * buf,size_t maxLength)444     size_t WriteOneByte(uint8_t *buf, size_t maxLength) const
445     {
446         if (maxLength == 0) {
447             return 0;
448         }
449         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
450         buf[maxLength - 1] = '\0';
451         uint32_t length = GetLength();
452         if (!IsUtf16()) {
453             CVector<uint8_t> tmpBuf;
454             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
455             if (length > maxLength) {
456                 length = maxLength;
457             }
458             if (memcpy_s(buf, maxLength, data, length) != EOK) {
459                 LOG_FULL(FATAL) << "memcpy_s failed when write one byte";
460                 UNREACHABLE();
461             }
462             return length;
463         }
464 
465         CVector<uint16_t> tmpBuf;
466         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
467         if (length > maxLength) {
468             return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength);
469         }
470         return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength);
471     }
472 
473     size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength,
474                               bool modify = true, bool isWriteBuffer = false) const
475     {
476         uint32_t len = GetLength();
477         if (start + length > len) {
478             return 0;
479         }
480         if (!IsUtf16()) {
481             if (length > std::numeric_limits<size_t>::max() / 2 - 1) {  // 2: half
482                 LOG_FULL(FATAL) << " length is higher than half of size_t::max";
483                 UNREACHABLE();
484             }
485             CVector<uint8_t> tmpBuf;
486             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start;
487             // Only copy maxLength number of chars into buffer if length > maxLength
488             auto dataLen = std::min(length, maxLength);
489             std::copy(data, data + dataLen, buf);
490             return dataLen;
491         }
492         CVector<uint16_t> tmpBuf;
493         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
494         if (length > maxLength) {
495             return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start,
496                                                               modify, isWriteBuffer);
497         }
498         return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start,
499                                                           modify, isWriteBuffer);
500     }
501 
CopyDataUtf16(uint16_t * buf,uint32_t maxLength)502     inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
503     {
504         uint32_t length = GetLength();
505         if (length > maxLength) {
506             return 0;
507         }
508         if (IsUtf16()) {
509             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
510             CVector<uint16_t> tmpBuf;
511             const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
512             if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
513                 LOG_FULL(FATAL) << "memcpy_s failed";
514                 UNREACHABLE();
515             }
516             return length;
517         }
518         CVector<uint8_t> tmpBuf;
519         const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
520         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength);
521     }
522 
523     std::u16string ToU16String(uint32_t len = 0);
524 
ToOneByteDataForced()525     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
526     {
527         uint8_t *buf = nullptr;
528         auto length = GetLength();
529         if (IsUtf16()) {
530             auto size = length * sizeof(uint16_t);
531             buf = new uint8_t[size]();
532             CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length);
533         } else {
534             buf = new uint8_t[length + 1]();
535             CopyDataUtf8(buf, length + 1);
536         }
537         return std::unique_ptr<uint8_t[]>(buf);
538     }
539 
540     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false)
541     {
542         Span<const uint8_t> str;
543         uint32_t strLen = GetLength();
544         if (UNLIKELY(IsUtf16())) {
545             CVector<uint16_t> tmpBuf;
546             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
547             ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
548             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
549             buf.reserve(len);
550             len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
551             str = Span<const uint8_t>(buf.data(), len);
552         } else {
553             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
554             str = Span<const uint8_t>(data, strLen);
555         }
556         return str;
557     }
558 
559     Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
560     {
561         Span<const uint8_t> str;
562         uint32_t strLen = GetLength();
563         if (UNLIKELY(IsUtf16())) {
564             CVector<uint16_t> tmpBuf;
565             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
566             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
567             buf.reserve(len);
568             len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
569             str = Span<const uint8_t>(buf.data(), len);
570         } else {
571             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
572             str = Span<const uint8_t>(data, strLen);
573         }
574         return str;
575     }
576 
577     inline Span<const uint8_t> FastToUtf8Span() const;
578 
TryToGetInteger(uint32_t * result)579     bool TryToGetInteger(uint32_t *result)
580     {
581         if (!IsInteger()) {
582             return false;
583         }
584         ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE);
585         *result = GetIntegerCode();
586         return true;
587     }
588 
589     // using integer number set into hash
TryToSetIntegerHash(int32_t num)590     inline bool TryToSetIntegerHash(int32_t num)
591     {
592         uint32_t hashcode = GetMixHashcode();
593         if (hashcode == 0 && GetLength() != 0) {
594             SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER);
595             return true;
596         }
597         return false;
598     }
599 
600     void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
601 
602     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
603     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
604     static bool CanBeCompressed(const EcmaString *string);
605 
606     bool PUBLIC_API ToElementIndex(uint32_t *index);
607 
608     bool ToInt(int32_t *index, bool *negative);
609 
610     bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data);
611 
612     bool PUBLIC_API ToTypedArrayIndex(uint32_t *index);
613 
614     template<bool isLower>
615     static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src);
616 
617     template<bool isLower>
618     static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
619 
620     template<typename T>
621     static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode);
622 
623     static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM);
624 
625     // single char copy for loop
626     template<typename DstType, typename SrcType>
CopyChars(DstType * dst,SrcType * src,uint32_t count)627     static void CopyChars(DstType *dst, SrcType *src, uint32_t count)
628     {
629         Span<SrcType> srcSp(src, count);
630         Span<DstType> dstSp(dst, count);
631         for (uint32_t i = 0; i < count; i++) {
632             dstSp[i] = srcSp[i];
633         }
634     }
635 
636     // memory block copy
637     template<typename T>
638     static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count);
639 
640     template<typename T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)641     static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
642     {
643         uint32_t hash = hashSeed;
644         Span<const T> sp(data, size);
645         for (auto c : sp) {
646             constexpr size_t SHIFT = 5;
647             hash = (hash << SHIFT) - hash + c;
648         }
649         return hash;
650     }
651 
IsASCIICharacter(uint16_t data)652     static bool IsASCIICharacter(uint16_t data)
653     {
654         if (data == 0) {
655             return false;
656         }
657         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
658         return data <= base::utf_helper::UTF8_1B_MAX;
659     }
660 
661     template<typename T1, typename T2>
662     static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
663 
664     template<typename T1, typename T2>
665     static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos);
666 
667     bool IsFlat() const;
668 
IsLineString()669     bool IsLineString() const
670     {
671         return GetClass()->IsLineString();
672     }
IsConstantString()673     bool IsConstantString() const
674     {
675         return GetClass()->IsConstantString();
676     }
IsSlicedString()677     bool IsSlicedString() const
678     {
679         return GetClass()->IsSlicedString();
680     }
IsTreeString()681     bool IsTreeString() const
682     {
683         return GetClass()->IsTreeString();
684     }
NotTreeString()685     bool NotTreeString() const
686     {
687         return !IsTreeString();
688     }
IsLineOrConstantString()689     bool IsLineOrConstantString() const
690     {
691         auto hclass = GetClass();
692         return hclass->IsLineString() || hclass->IsConstantString();
693     }
694 
GetStringType()695     JSType GetStringType() const
696     {
697         JSType type = GetClass()->GetObjectType();
698         ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST);
699         return type;
700     }
701 
702     template <typename Char>
703     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength);
704 
705     template <typename Char>
706     static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos);
707 
708     static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf);
709 
710     static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src);
711 
712     static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf);
713 
714     static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src);
715 
716     // string must be not flat
717     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type);
718 
719     PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
720                                MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
721 
722     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
723                                             MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
724 
725     static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string);
726 
727     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
728 
729     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
730 
731     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
732 
733     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
734 
735     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
736 
737     static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
738 
739     static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
740                                                  bool toLower, uint32_t startIndex = 0);
741 };
742 
743 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data
744 class LineEcmaString : public EcmaString {
745 public:
746     static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16;
747     static constexpr uint32_t INIT_LENGTH_TIMES = 4;
748     // DATA_OFFSET: the string data stored after the string header.
749     // Data can be stored in utf8 or utf16 form according to compressed bit.
750     static constexpr size_t DATA_OFFSET = EcmaString::SIZE;  // DATA_OFFSET equal to Empty String size
751 
752     CAST_CHECK(LineEcmaString, IsLineString);
753 
754     DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength());
755 
Cast(EcmaString * str)756     static LineEcmaString *Cast(EcmaString *str)
757     {
758         return static_cast<LineEcmaString *>(str);
759     }
760 
Cast(const EcmaString * str)761     static LineEcmaString *Cast(const EcmaString *str)
762     {
763         return LineEcmaString::Cast(const_cast<EcmaString *>(str));
764     }
765 
ComputeSizeUtf8(uint32_t utf8Len)766     static size_t ComputeSizeUtf8(uint32_t utf8Len)
767     {
768         return DATA_OFFSET + utf8Len;
769     }
770 
ComputeSizeUtf16(uint32_t utf16Len)771     static size_t ComputeSizeUtf16(uint32_t utf16Len)
772     {
773         return DATA_OFFSET + utf16Len * sizeof(uint16_t);
774     }
775 
ObjectSize(EcmaString * str)776     static size_t ObjectSize(EcmaString *str)
777     {
778         uint32_t length = str->GetLength();
779         return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length);
780     }
781 
DataSize(EcmaString * str)782     static size_t DataSize(EcmaString *str)
783     {
784         uint32_t length = str->GetLength();
785         return str->IsUtf16() ? length * sizeof(uint16_t) : length;
786     }
787 
GetPointerLength()788     size_t GetPointerLength()
789     {
790         size_t byteSize = DataSize(this);
791         return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType);
792     }
793 
GetData()794     uint16_t *GetData() const
795     {
796         return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET);
797     }
798 
799     template<bool verify = true>
Get(int32_t index)800     uint16_t Get(int32_t index) const
801     {
802         int32_t length = static_cast<int32_t>(GetLength());
803         if (verify) {
804             if ((index < 0) || (index >= length)) {
805                 return 0;
806             }
807         }
808         if (!IsUtf16()) {
809             Span<const uint8_t> sp(GetDataUtf8(), length);
810             return sp[index];
811         }
812         Span<const uint16_t> sp(GetDataUtf16(), length);
813         return sp[index];
814     }
815 
Set(uint32_t index,uint16_t src)816     void Set(uint32_t index, uint16_t src)
817     {
818         ASSERT(index < GetLength());
819         if (IsUtf8()) {
820             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
821             *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src);
822         } else {
823             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
824             *(GetData() + index) = src;
825         }
826     }
827 };
828 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0);
829 
830 class ConstantString : public EcmaString {
831 public:
832     static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE;
833     // ConstantData is the pointer of const string in the pandafile.
834     // String in pandafile is encoded by the utf8 format.
835     // EntityId is normally the uint32_t index in the pandafile.
836     // When the pandafile is to be removed, EntityId will become -1.
837     // The real string data will be reloacted into bytearray and stored in RelocatedData.
838     // ConstantData will also point at data of bytearray data.
839     ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
840     ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET);
841     ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET);
842     DEFINE_ALIGN_SIZE(LAST_OFFSET);
843 
844     CAST_CHECK(ConstantString, IsConstantString);
845     DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
846 
Cast(EcmaString * str)847     static ConstantString *Cast(EcmaString *str)
848     {
849         return static_cast<ConstantString *>(str);
850     }
851 
Cast(const EcmaString * str)852     static ConstantString *Cast(const EcmaString *str)
853     {
854         return ConstantString::Cast(const_cast<EcmaString *>(str));
855     }
856 
ObjectSize()857     static size_t ObjectSize()
858     {
859         return ConstantString::SIZE;
860     }
861 
GetEntityIdU32()862     uint32_t GetEntityIdU32() const
863     {
864         ASSERT(GetEntityId() >= 0);
865         return static_cast<uint32_t>(GetEntityId());
866     }
867 
868     template<bool verify = true>
Get(int32_t index)869     uint16_t Get(int32_t index) const
870     {
871         int32_t length = static_cast<int32_t>(GetLength());
872         if (verify) {
873             if ((index < 0) || (index >= length)) {
874                 return 0;
875             }
876         }
877         ASSERT(IsUtf8());
878         Span<const uint8_t> sp(GetConstantData(), length);
879         return sp[index];
880     }
881 };
882 
883 // The substrings of another string use SlicedString to describe.
884 class SlicedString : public EcmaString {
885 public:
886     static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13;
887     static constexpr size_t PARENT_OFFSET = EcmaString::SIZE;
888     ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET);
889     ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG);
890     ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE);
891 
892     DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET);
893 
894     CAST_CHECK(SlicedString, IsSlicedString);
895 private:
896     friend class EcmaString;
Cast(EcmaString * str)897     static SlicedString *Cast(EcmaString *str)
898     {
899         return static_cast<SlicedString *>(str);
900     }
901 
Cast(const EcmaString * str)902     static SlicedString *Cast(const EcmaString *str)
903     {
904         return SlicedString::Cast(const_cast<EcmaString *>(str));
905     }
906 
ObjectSize()907     static size_t ObjectSize()
908     {
909         return SlicedString::SIZE;
910     }
911 
912     // Minimum length for a sliced string
913     template<bool verify = true>
Get(int32_t index)914     uint16_t Get(int32_t index) const
915     {
916         int32_t length = static_cast<int32_t>(GetLength());
917         if (verify) {
918             if ((index < 0) || (index >= length)) {
919                 return 0;
920             }
921         }
922         EcmaString *parent = EcmaString::Cast(GetParent());
923         if (parent->IsLineString()) {
924             if (parent->IsUtf8()) {
925                 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length);
926                 return sp[index];
927             }
928             Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length);
929             return sp[index];
930         }
931         Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length);
932         return sp[index];
933     }
934 };
935 
936 class TreeEcmaString : public EcmaString {
937 public:
938     // Minimum length for a tree string
939     static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13;
940 
941     static constexpr size_t FIRST_OFFSET = EcmaString::SIZE;
942     ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET);
943     ACCESSORS(Second, SECOND_OFFSET, SIZE);
944 
945     DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE);
946 
947     CAST_CHECK(TreeEcmaString, IsTreeString);
948 
Cast(EcmaString * str)949     static TreeEcmaString *Cast(EcmaString *str)
950     {
951         return static_cast<TreeEcmaString *>(str);
952     }
953 
Cast(const EcmaString * str)954     static TreeEcmaString *Cast(const EcmaString *str)
955     {
956         return TreeEcmaString::Cast(const_cast<EcmaString *>(str));
957     }
958 
IsFlat()959     bool IsFlat() const
960     {
961         auto strSecond = EcmaString::Cast(GetSecond());
962         return strSecond->GetLength() == 0;
963     }
964 
965     template<bool verify = true>
Get(int32_t index)966     uint16_t Get(int32_t index) const
967     {
968         int32_t length = static_cast<int32_t>(GetLength());
969         if (verify) {
970             if ((index < 0) || (index >= length)) {
971                 return 0;
972             }
973         }
974 
975         if (IsFlat()) {
976             EcmaString *first = EcmaString::Cast(GetFirst());
977             return first->At<verify>(index);
978         }
979         EcmaString *string = const_cast<TreeEcmaString *>(this);
980         while (true) {
981             if (string->IsTreeString()) {
982                 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
983                 if (static_cast<int32_t>(first->GetLength()) > index) {
984                     string = first;
985                 } else {
986                     index -= static_cast<int32_t>(first->GetLength());
987                     string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond());
988                 }
989             } else {
990                 return string->At<verify>(index);
991             }
992         }
993         UNREACHABLE();
994     }
995 };
996 
997 class FlatStringInfo {
998 public:
FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)999     FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string),
1000                                                                                startIndex_(startIndex),
1001                                                                                length_(length) {}
IsUtf8()1002     bool IsUtf8() const
1003     {
1004         return string_->IsUtf8();
1005     }
1006 
IsUtf16()1007     bool IsUtf16() const
1008     {
1009         return string_->IsUtf16();
1010     }
1011 
GetString()1012     EcmaString *GetString() const
1013     {
1014         return string_;
1015     }
1016 
SetString(EcmaString * string)1017     void SetString(EcmaString *string)
1018     {
1019         string_ = string;
1020     }
1021 
GetStartIndex()1022     uint32_t GetStartIndex() const
1023     {
1024         return startIndex_;
1025     }
1026 
SetStartIndex(uint32_t index)1027     void SetStartIndex(uint32_t index)
1028     {
1029         startIndex_ = index;
1030     }
1031 
GetLength()1032     uint32_t GetLength() const
1033     {
1034         return length_;
1035     }
1036 
1037     const uint8_t *GetDataUtf8() const;
1038     const uint16_t *GetDataUtf16() const;
1039     uint8_t *GetDataUtf8Writable() const;
1040     uint16_t *GetDataUtf16Writable() const;
1041     std::u16string ToU16String(uint32_t len = 0);
1042 private:
1043     EcmaString *string_ {nullptr};
1044     uint32_t startIndex_ {0};
1045     uint32_t length_ {0};
1046 };
1047 
1048 // if you want to use functions of EcmaString, please not use directly,
1049 // and use functions of EcmaStringAccessor alternatively.
1050 // eg: EcmaString *str = ***; str->GetLength() ----->  EcmaStringAccessor(str).GetLength()
1051 class PUBLIC_API EcmaStringAccessor {
1052 public:
EcmaStringAccessor(EcmaString * string)1053     explicit inline EcmaStringAccessor(EcmaString *string)
1054     {
1055         ASSERT(string != nullptr);
1056         string_ = string;
1057     }
1058 
1059     explicit EcmaStringAccessor(TaggedObject *obj);
1060 
1061     explicit EcmaStringAccessor(JSTaggedValue value);
1062 
1063     explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle);
1064 
1065     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
1066 
CreateEmptyString(const EcmaVM * vm)1067     static EcmaString *CreateEmptyString(const EcmaVM *vm)
1068     {
1069         return EcmaString::CreateEmptyString(vm);
1070     }
1071 
1072     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress,
1073                                       MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
1074                                       uint32_t idOffset = 0)
1075     {
1076         return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset);
1077     }
1078 
1079     static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1080                                                          uint32_t offset, uint32_t utf8Len,
1081                                                          MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1082     {
1083         return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type);
1084     }
1085 
1086     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length,
1087         bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0)
1088     {
1089         return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset);
1090     }
1091 
1092     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
1093         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1094     {
1095         return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type);
1096     }
1097 
1098     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
1099                                        bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1100     {
1101         return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type);
1102     }
1103 
1104     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle,
1105         const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1106     {
1107         return EcmaString::Concat(vm, str1Handle, str2Handle, type);
1108     }
1109 
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1110     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
1111         uint32_t length, bool compressed)
1112     {
1113         return EcmaString::CopyStringToOldSpace(vm, original, length, compressed);
1114     }
1115 
1116     // can change src data structure
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1117     static EcmaString *FastSubString(const EcmaVM *vm,
1118         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1119     {
1120         return EcmaString::FastSubString(vm, src, start, length);
1121     }
1122 
1123     // get
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1124     static EcmaString *GetSubString(const EcmaVM *vm,
1125         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1126     {
1127         return EcmaString::GetSubString(vm, src, start, length);
1128     }
1129 
IsUtf8()1130     bool IsUtf8() const
1131     {
1132         return string_->IsUtf8();
1133     }
1134 
IsUtf16()1135     bool IsUtf16() const
1136     {
1137         return string_->IsUtf16();
1138     }
1139 
GetLength()1140     uint32_t GetLength() const
1141     {
1142         return string_->GetLength();
1143     }
1144 
1145     // require is LineString
1146     inline size_t GetUtf8Length(bool isGetBufferSize = false) const;
1147 
ObjectSize()1148     size_t ObjectSize() const
1149     {
1150         if (string_->IsLineString()) {
1151             return LineEcmaString::ObjectSize(string_);
1152         } if (string_->IsConstantString()) {
1153             return ConstantString::ObjectSize();
1154         } else {
1155             return TreeEcmaString::SIZE;
1156         }
1157     }
1158 
1159     // For TreeString, the calculation result is size of LineString correspondingly.
GetFlatStringSize()1160     size_t GetFlatStringSize() const
1161     {
1162         if (string_->IsConstantString()) {
1163             return ConstantString::ObjectSize();
1164         }
1165         return LineEcmaString::ObjectSize(string_);
1166     }
1167 
IsInternString()1168     bool IsInternString() const
1169     {
1170         return string_->IsInternString();
1171     }
1172 
SetInternString()1173     void SetInternString()
1174     {
1175         string_->SetIsInternString();
1176     }
1177 
ClearInternString()1178     void ClearInternString()
1179     {
1180         string_->ClearInternStringFlag();
1181     }
1182 
1183     // require is LineString
1184     // It's Utf8 format, but without 0 in the end.
1185     inline const uint8_t *GetDataUtf8();
1186 
1187     // require is LineString
1188     inline const uint16_t *GetDataUtf16();
1189 
1190     // not change string data structure.
1191     // if string is not flat, this func has low efficiency.
1192     std::u16string ToU16String(uint32_t len = 0)
1193     {
1194         return string_->ToU16String(len);
1195     }
1196 
1197     // not change string data structure.
1198     // if string is not flat, this func has low efficiency.
ToOneByteDataForced()1199     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
1200     {
1201         return string_->ToOneByteDataForced();
1202     }
1203 
1204     // not change string data structure.
1205     // if string is not flat, this func has low efficiency.
ToUtf8Span(CVector<uint8_t> & buf)1206     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf)
1207     {
1208         return string_->ToUtf8Span(buf);
1209     }
1210 
1211     // only for string is flat and using UTF8 encoding
1212     inline Span<const uint8_t> FastToUtf8Span();
1213 
1214     // Using string's hash to figure out whether the string can be converted to integer
TryToGetInteger(uint32_t * result)1215     inline bool TryToGetInteger(uint32_t *result)
1216     {
1217         return string_->TryToGetInteger(result);
1218     }
1219 
TryToSetIntegerHash(int32_t num)1220     inline bool TryToSetIntegerHash(int32_t num)
1221     {
1222         return string_->TryToSetIntegerHash(num);
1223     }
1224 
1225     // not change string data structure.
1226     // if string is not flat, this func has low efficiency.
1227     std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1228 
1229     // this function convert for Utf8
1230     CString Utf8ConvertToString();
1231 
1232     std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1233     // not change string data structure.
1234     // if string is not flat, this func has low efficiency.
1235     CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false);
1236 
1237     // not change string data structure.
1238     // if string is not flat, this func has low efficiency.
1239     uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false)
1240     {
1241         return string_->WriteUtf8(buf, maxLength, isWriteBuffer);
1242     }
1243 
WriteToUtf16(uint16_t * buf,uint32_t bufLength)1244     uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength)
1245     {
1246         return string_->WriteUtf16(buf, GetLength(), bufLength);
1247     }
1248 
WriteToOneByte(uint8_t * buf,uint32_t maxLength)1249     uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength)
1250     {
1251         return string_->WriteOneByte(buf, maxLength);
1252     }
1253 
1254     // not change string data structure.
1255     // if string is not flat, this func has low efficiency.
WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1256     uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const
1257     {
1258         return string_->CopyDataUtf16(buf, maxLength);
1259     }
1260 
1261     template <typename Char>
WriteToFlatWithPos(EcmaString * src,Char * buf,uint32_t length,uint32_t pos)1262     static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)
1263     {
1264         src->WriteToFlatWithPos(src, buf, length, pos);
1265     }
1266 
1267     template <typename Char>
WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)1268     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
1269     {
1270         src->WriteToFlat(src, buf, maxLength);
1271     }
1272 
1273     // require dst is LineString
1274     // not change src data structure.
1275     // if src is not flat, this func has low efficiency.
1276     inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
1277 
1278     // not change src data structure.
1279     // if src is not flat, this func has low efficiency.
1280     template<bool verify = true>
Get(uint32_t index)1281     uint16_t Get(uint32_t index) const
1282     {
1283         return string_->At<verify>(index);
1284     }
1285 
1286     // require string is LineString.
Set(uint32_t index,uint16_t src)1287     void Set(uint32_t index, uint16_t src)
1288     {
1289         return string_->WriteData(index, src);
1290     }
1291 
1292     // not change src data structure.
1293     // if src is not flat, this func has low efficiency.
GetHashcode()1294     uint32_t GetHashcode()
1295     {
1296         return string_->GetHashcode();
1297     }
1298 
GetRawHashcode()1299     uint32_t GetRawHashcode()
1300     {
1301         return string_->GetRawHashcode();
1302     }
1303 
1304     // not change src data structure.
1305     // if src is not flat, this func has low efficiency.
ComputeRawHashcode()1306     std::pair<uint32_t, bool> ComputeRawHashcode()
1307     {
1308         return string_->ComputeRawHashcode();
1309     }
1310 
ComputeHashcode()1311     uint32_t ComputeHashcode()
1312     {
1313         return string_->ComputeHashcode();
1314     }
1315 
ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1316     uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger)
1317     {
1318         return string_->ComputeHashcode(rawHashSeed, isInteger);
1319     }
1320 
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1321     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
1322     {
1323         return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
1324     }
1325 
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1326     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
1327     {
1328         return EcmaString::ComputeHashcodeUtf16(utf16Data, length);
1329     }
1330 
1331     // can change receiver and search data structure
1332     static int32_t IndexOf(const EcmaVM *vm,
1333         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1334     {
1335         return EcmaString::IndexOf(vm, receiver, search, pos);
1336     }
1337 
1338     // can change receiver and search data structure
1339     static int32_t LastIndexOf(const EcmaVM *vm,
1340         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1341     {
1342         return EcmaString::LastIndexOf(vm, receiver, search, pos);
1343     }
1344 
1345     // can change receiver and search data structure
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1346     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right)
1347     {
1348         return EcmaString::Compare(vm, left, right);
1349     }
1350 
1351 
1352     // can change receiver and search data structure
1353     static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
1354         const JSHandle<EcmaString>& right, uint32_t offset = 0)
1355     {
1356         return EcmaString::IsSubStringAt(vm, left, right, offset);
1357     }
1358 
1359     // can change str1 and str2 data structure
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1360     static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
1361     {
1362         return EcmaString::StringsAreEqual(vm, str1, str2);
1363     }
1364 
1365     // not change str1 and str2 data structure.
1366     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqual(EcmaString * str1,EcmaString * str2)1367     static bool StringsAreEqual(EcmaString *str1, EcmaString *str2)
1368     {
1369         return EcmaString::StringsAreEqual(str1, str2);
1370     }
1371 
1372     // not change str1 and str2 data structure.
1373     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1374     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2)
1375     {
1376         return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2);
1377     }
1378 
1379     // not change str1 data structure.
1380     // if str1 is not flat, this func has low efficiency.
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1381     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
1382                                        bool canBeCompress)
1383     {
1384         return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress);
1385     }
1386 
1387     // not change str1 data structure.
1388     // if str1 is not flat, this func has low efficiency.
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1389     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
1390     {
1391         return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len);
1392     }
1393 
1394     // require str1 and str2 are LineString.
1395     // not change string data structure.
1396     // if string is not flat, this func has low efficiency.
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1397     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
1398     {
1399         return string_->EqualToSplicedString(str1, str2);
1400     }
1401 
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1402     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
1403     {
1404         return EcmaString::CanBeCompressed(utf8Data, utf8Len);
1405     }
1406 
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1407     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
1408     {
1409         return EcmaString::CanBeCompressed(utf16Data, utf16Len);
1410     }
1411 
1412     // require string is LineString
CanBeCompressed(const EcmaString * string)1413     static bool CanBeCompressed(const EcmaString *string)
1414     {
1415         return EcmaString::CanBeCompressed(string);
1416     }
1417 
1418     // not change string data structure.
1419     // if string is not flat, this func has low efficiency.
ToElementIndex(uint32_t * index)1420     bool ToElementIndex(uint32_t *index)
1421     {
1422         return string_->ToElementIndex(index);
1423     }
1424 
1425     // not change string data structure.
1426     // if string is not flat, this func has low efficiency.
ToInt(int32_t * index,bool * negative)1427     bool ToInt(int32_t *index, bool *negative)
1428     {
1429         return string_->ToInt(index, negative);
1430     }
1431 
1432     // not change string data structure.
1433     // if string is not flat, this func has low efficiency.
ToTypedArrayIndex(uint32_t * index)1434     bool PUBLIC_API ToTypedArrayIndex(uint32_t *index)
1435     {
1436         return string_->ToTypedArrayIndex(index);
1437     }
1438 
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1439     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1440     {
1441         return EcmaString::ToLower(vm, src);
1442     }
1443 
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1444     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1445     {
1446         return EcmaString::TryToLower(vm, src);
1447     }
1448 
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1449     static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1450     {
1451         return EcmaString::TryToUpper(vm, src);
1452     }
1453 
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1454     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1455     {
1456         return EcmaString::ToUpper(vm, src);
1457     }
1458 
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1459     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1460     {
1461         return EcmaString::ToLocaleLower(vm, src, locale);
1462     }
1463 
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1464     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1465     {
1466         return EcmaString::ToLocaleUpper(vm, src, locale);
1467     }
1468 
1469     static EcmaString *Trim(const JSThread *thread,
1470         const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM)
1471     {
1472         return EcmaString::Trim(thread, src, mode);
1473     }
1474 
IsASCIICharacter(uint16_t data)1475     static bool IsASCIICharacter(uint16_t data)
1476     {
1477         if (data == 0) {
1478             return false;
1479         }
1480         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
1481         return data <= base::utf_helper::UTF8_1B_MAX;
1482     }
1483 
IsFlat()1484     bool IsFlat() const
1485     {
1486         return string_->IsFlat();
1487     }
1488 
IsLineString()1489     bool IsLineString() const
1490     {
1491         return string_->IsLineString();
1492     }
1493 
IsConstantString()1494     bool IsConstantString() const
1495     {
1496         return string_->IsConstantString();
1497     }
1498 
IsSlicedString()1499     bool IsSlicedString() const
1500     {
1501         return string_->IsSlicedString();
1502     }
1503 
IsLineOrConstantString()1504     bool IsLineOrConstantString() const
1505     {
1506         return string_->IsLineOrConstantString();
1507     }
1508 
GetStringType()1509     JSType GetStringType() const
1510     {
1511         return string_->GetStringType();
1512     }
1513 
IsTreeString()1514     bool IsTreeString() const
1515     {
1516         return string_->IsTreeString();
1517     }
1518 
NotTreeString()1519     bool NotTreeString() const
1520     {
1521         return string_->NotTreeString();
1522     }
1523 
1524     // the returned string may be a linestring, constantstring, or slicestring!!
1525     PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1526         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1527     {
1528         return EcmaString::Flatten(vm, string, type);
1529     }
1530 
1531     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1532         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1533     {
1534         return EcmaString::FlattenAllString(vm, string, type);
1535     }
1536 
1537     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1538         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1539     {
1540         return EcmaString::SlowFlatten(vm, string, type);
1541     }
1542 
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1543     static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1544     {
1545         return EcmaString::FlattenNoGC(vm, string);
1546     }
1547 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1548     static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1549     {
1550         return EcmaString::GetUtf8DataFlat(src, buf);
1551     }
1552 
GetNonTreeUtf8Data(const EcmaString * src)1553     static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src)
1554     {
1555         return EcmaString::GetNonTreeUtf8Data(src);
1556     }
1557 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1558     static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1559     {
1560         return EcmaString::GetUtf16DataFlat(src, buf);
1561     }
1562 
GetNonTreeUtf16Data(const EcmaString * src)1563     static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src)
1564     {
1565         return EcmaString::GetNonTreeUtf16Data(src);
1566     }
1567 
1568     static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str);
1569 
1570 private:
1571     EcmaString *string_ {nullptr};
1572 };
1573 }  // namespace ecmascript
1574 }  // namespace panda
1575 #endif  // ECMASCRIPT_STRING_H
1576