• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_STRING_H
17 #define ECMASCRIPT_STRING_H
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 
23 #include "ecmascript/base/utf_helper.h"
24 #include "ecmascript/common.h"
25 #include "ecmascript/ecma_macros.h"
26 #include "ecmascript/js_hclass.h"
27 #include "ecmascript/js_tagged_value.h"
28 #include "ecmascript/mem/barriers.h"
29 #include "ecmascript/mem/space.h"
30 #include "ecmascript/mem/tagged_object.h"
31 
32 #include "libpandabase/macros.h"
33 #include "securec.h"
34 #include "unicode/locid.h"
35 
36 namespace panda {
37 namespace test {
38     class EcmaStringEqualsTest;
39 }
40 namespace ecmascript {
41 template<typename T>
42 class JSHandle;
43 class JSPandaFile;
44 class EcmaVM;
45 class LineEcmaString;
46 class ConstantString;
47 class TreeEcmaString;
48 class SlicedString;
49 class FlatStringInfo;
50 
51 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
52 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length)                                        \
53     if ((length) >= MAX_STRING_LENGTH) {                                                      \
54         THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr);  \
55     }
56 
57 class EcmaString : public TaggedObject {
58     /* Mix Hash Code: --   { 0 | [31 bits raw hash code] }     computed through string
59                       \    { 1 | [31 bits integer numbers] }   fastpath for string to number
60     */
61 public:
62     CAST_CHECK(EcmaString, IsString);
63 
64     static constexpr uint32_t IS_INTEGER_MASK = 1U << 31;
65     static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1;
66     static constexpr uint32_t STRING_INTERN_BIT = 0x2;
67     static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning
68     static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U;
69     static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF;
70     static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9;
71 
72     static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize();
73     // In last bit of mix_length we store if this string is compressed or not.
74     ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET)
75     // In last bit of mix_hash we store if this string is small-integer number or not.
76     ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE)
77 
78     enum CompressedStatus {
79         STRING_COMPRESSED,
80         STRING_UNCOMPRESSED,
81     };
82 
83     enum IsIntegerStatus {
84         NOT_INTEGER = 0,
85         IS_INTEGER,
86     };
87 
88     enum TrimMode : uint8_t {
89         TRIM,
90         TRIM_START,
91         TRIM_END,
92     };
93 
94     enum ConcatOptStatus {
95         BEGIN_STRING_ADD = 1,
96         IN_STRING_ADD,
97         CONFIRMED_IN_STRING_ADD,
98         END_STRING_ADD,
99         INVALID_STRING_ADD,
100         HAS_BACKING_STORE,
101     };
102 
103 private:
104     friend class EcmaStringAccessor;
105     friend class LineEcmaString;
106     friend class ConstantString;
107     friend class TreeEcmaString;
108     friend class SlicedString;
109     friend class FlatStringInfo;
110     friend class NameDictionary;
111     friend class panda::test::EcmaStringEqualsTest;
112 
113     static EcmaString *CreateEmptyString(const EcmaVM *vm);
114     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
115         bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
116         uint32_t idOffset = 0);
117     static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
118         uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
119     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
120         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
121     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
122         bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
123     static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
124     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
125     static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed);
126     static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm,
127         size_t length, bool compressed, MemSpaceType type);
128     static EcmaString *CreateTreeString(const EcmaVM *vm,
129         const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed);
130     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
131         size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0);
132     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left,
133         const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
134     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
135         uint32_t length, bool compressed);
136     static EcmaString *FastSubString(const EcmaVM *vm,
137         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
138     static EcmaString *GetSlicedString(const EcmaVM *vm,
139         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
140     static EcmaString *GetSubString(const EcmaVM *vm,
141         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
142     // require src is LineString
143     // not change src data structure
144     static inline EcmaString *FastSubUtf8String(const EcmaVM *vm,
145         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
146     // require src is LineString
147     // not change src data structure
148     static inline EcmaString *FastSubUtf16String(const EcmaVM *vm,
149         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
150     inline void TrimLineString(const JSThread *thread, uint32_t newLength);
IsUtf8()151     inline bool IsUtf8() const
152     {
153         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED;
154     }
155 
IsUtf16()156     inline bool IsUtf16() const
157     {
158         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED;
159     }
160 
IsInteger()161     inline bool IsInteger()
162     {
163         return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK;
164     }
165 
166     // require is LineString
167     inline uint16_t *GetData() const;
168     inline const uint8_t *GetDataUtf8() const;
169     inline const uint16_t *GetDataUtf16() const;
170 
171     // require is LineString
172     inline uint8_t *GetDataUtf8Writable();
173     inline uint16_t *GetDataUtf16Writable();
174 
GetLength()175     inline uint32_t GetLength() const
176     {
177         return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT;
178     }
179 
180     inline void SetLength(uint32_t length, bool compressed = false)
181     {
182         ASSERT(length < MAX_STRING_LENGTH);
183         // Use 0u for compressed/utf8 expression
184         SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED));
185     }
186 
GetRawHashcode()187     inline uint32_t GetRawHashcode() const
188     {
189         return GetMixHashcode() & (~IS_INTEGER_MASK);
190     }
191 
MixHashcode(uint32_t hashcode,bool isInteger)192     static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger)
193     {
194         return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK));
195     }
196 
197     inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false)
198     {
199         // Use 0u for not integer string's expression
200         SetMixHashcode(MixHashcode(hashcode, isInteger));
201     }
202 
203     inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const;
204 
SetIsInternString()205     inline void SetIsInternString()
206     {
207         SetMixLength(GetMixLength() | STRING_INTERN_BIT);
208     }
209 
IsInternString()210     inline bool IsInternString() const
211     {
212         return (GetMixLength() & STRING_INTERN_BIT) != 0;
213     }
214 
ClearInternStringFlag()215     inline void ClearInternStringFlag()
216     {
217         SetMixLength(GetMixLength() & ~STRING_INTERN_BIT);
218     }
219 
TryGetHashCode(uint32_t * hash)220     inline bool TryGetHashCode(uint32_t *hash)
221     {
222         uint32_t hashcode = GetMixHashcode();
223         if (hashcode == 0 && GetLength() != 0) {
224             return false;
225         }
226         *hash = hashcode;
227         return true;
228     }
229 
GetIntegerCode()230     inline uint32_t GetIntegerCode()
231     {
232         ASSERT(GetMixHashcode() & IS_INTEGER_MASK);
233         return GetRawHashcode();
234     }
235 
236     // not change this data structure.
237     // if string is not flat, this func has low efficiency.
GetHashcode()238     uint32_t PUBLIC_API GetHashcode()
239     {
240         uint32_t hashcode = GetMixHashcode();
241         // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0.
242         if (hashcode == 0 && GetLength() != 0) {
243             hashcode = ComputeHashcode();
244             SetMixHashcode(hashcode);
245         }
246         return hashcode;
247     }
248 
249     template<typename T>
IsDecimalDigitChar(const T c)250     inline static bool IsDecimalDigitChar(const T c)
251     {
252         return (c >= '0' && c <= '9');
253     }
254 
ComputeIntegerHash(uint32_t * num,uint8_t c)255     static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c)
256     {
257         if (!IsDecimalDigitChar(c)) {
258             return false;
259         }
260         int charDate = c - '0';
261         *num = (*num) * 10 + charDate; // 10: decimal factor
262         return true;
263     }
264 
265     bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const;
266 
267     template<typename T>
HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)268     static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
269     {
270         ASSERT(size >= 0);
271         if (hashSeed == 0) {
272             if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
273                 uint32_t num = data[0] - '0';
274                 uint32_t i = 1;
275                 do {
276                     if (i == size) {
277                         // compute mix hash
278                         if (num <= MAX_INTEGER_HASH_NUMBER) {
279                             *hash = MixHashcode(num, IS_INTEGER);
280                             return true;
281                         }
282                         return false;
283                     }
284                 } while (ComputeIntegerHash(&num, data[i++]));
285             }
286             if (size == 1 && (data[0] == '0')) {
287                 *hash = MixHashcode(0, IS_INTEGER);
288                 return true;
289             }
290         } else {
291             if (IsDecimalDigitChar(data[0])) {
292                 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
293                 uint32_t i = 1;
294                 do {
295                     if (i == size) {
296                         // compute mix hash
297                         if (num <= MAX_INTEGER_HASH_NUMBER) {
298                             *hash = MixHashcode(num, IS_INTEGER);
299                             return true;
300                         }
301                         return false;
302                     }
303                 } while (ComputeIntegerHash(&num, data[i++]));
304             }
305         }
306         return false;
307     }
308 
309     // not change this data structure.
310     // if string is not flat, this func has low efficiency.
311     uint32_t PUBLIC_API ComputeHashcode() const;
312     std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const;
313     uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const;
314 
315     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
316     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
317 
318     template<bool verify = true>
319     uint16_t At(int32_t index) const;
320 
321     // require is LineString
322     void WriteData(uint32_t index, uint16_t src);
323 
324     // can change left and right data structure
325     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right);
326 
327     static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
328         const JSHandle<EcmaString>& right, uint32_t offset);
329 
330     // Check that two spans are equal. Should have the same length.
331     /* static */
332     template<typename T, typename T1>
StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)333     static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2)
334     {
335         ASSERT(str1.Size() <= str2.Size());
336         size_t size = str1.Size();
337         if (!std::is_same_v<T, T1>) {
338             for (size_t i = 0; i < size; i++) {
339                 auto left = static_cast<uint16_t>(str1[i]);
340                 auto right = static_cast<uint16_t>(str2[i]);
341                 if (left != right) {
342                     return false;
343                 }
344             }
345             return true;
346         }
347 
348         return !memcmp(str1.data(), str2.data(), size * sizeof(T));
349     }
350 
351     // Converts utf8Data to utf16 and compare it with given utf16_data.
352     static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
353                                   uint32_t utf16Len);
354     // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
355     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2);
356     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
357     static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1,
358         const JSHandle<EcmaString> &str2);
359     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
360     static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2);
361     // Two strings have the same type of utf encoding format.
362     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2);
363     static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2);
364     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
365     // not change str1 data structure.
366     // if str1 is not flat, this func has low efficiency.
367     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
368                                        bool canBeCompress);
369     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
370     // not change str1 data structure.
371     // if str1 is not flat, this func has low efficiency.
372     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
373 
374     // can change receiver and search data structure
375     static int32_t IndexOf(const EcmaVM *vm,
376         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
377 
378     // can change receiver and search data structure
379     static int32_t LastIndexOf(const EcmaVM *vm,
380         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
381 
382     inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const
383     {
384         if (maxLength == 0) {
385             return 1; // maxLength was -1 at napi
386         }
387         size_t length = GetLength();
388         if (length > maxLength) {
389             return 0;
390         }
391         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
392         buf[maxLength - 1] = '\0';
393         // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8
394         return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1;  // add place for zero in the end
395     }
396 
397     // It allows user to copy into buffer even if maxLength < length
398     inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const
399     {
400         if (maxLength == 0) {
401             return 1; // maxLength was -1 at napi
402         }
403         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
404         buf[maxLength - 1] = '\0';
405         return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1;
406     }
407 
CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)408     size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const
409     {
410         if (IsUtf16()) {
411             CVector<uint16_t> tmpBuf;
412             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
413             if (length > bufLength) {
414                 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) {
415                     LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength";
416                     UNREACHABLE();
417                 }
418                 return bufLength;
419             }
420             if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
421                 LOG_FULL(FATAL) << "memcpy_s failed";
422                 UNREACHABLE();
423             }
424             return length;
425         }
426         CVector<uint8_t> tmpBuf;
427         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
428         if (length > bufLength) {
429             return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength);
430         }
431         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength);
432     }
433 
434     // It allows user to copy into buffer even if maxLength < length
WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)435     inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const
436     {
437         if (bufLength == 0) {
438             return 0;
439         }
440         // Returns a number representing a valid backrest length.
441         return CopyDataToUtf16(buf, targetLength, bufLength);
442     }
443 
WriteOneByte(uint8_t * buf,size_t maxLength)444     size_t WriteOneByte(uint8_t *buf, size_t maxLength) const
445     {
446         if (maxLength == 0) {
447             return 0;
448         }
449         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
450         buf[maxLength - 1] = '\0';
451         uint32_t length = GetLength();
452         if (!IsUtf16()) {
453             CVector<uint8_t> tmpBuf;
454             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
455             if (length > maxLength) {
456                 length = maxLength;
457             }
458             if (memcpy_s(buf, maxLength, data, length) != EOK) {
459                 LOG_FULL(FATAL) << "memcpy_s failed when write one byte";
460                 UNREACHABLE();
461             }
462             return length;
463         }
464 
465         CVector<uint16_t> tmpBuf;
466         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
467         if (length > maxLength) {
468             return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength);
469         }
470         return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength);
471     }
472 
473     size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength,
474                               bool modify = true, bool isWriteBuffer = false) const
475     {
476         uint32_t len = GetLength();
477         if (start + length > len) {
478             return 0;
479         }
480         if (!IsUtf16()) {
481             if (length > std::numeric_limits<size_t>::max() / 2 - 1) {  // 2: half
482                 LOG_FULL(FATAL) << " length is higher than half of size_t::max";
483                 UNREACHABLE();
484             }
485             CVector<uint8_t> tmpBuf;
486             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start;
487             // Only copy maxLength number of chars into buffer if length > maxLength
488             auto dataLen = std::min(length, maxLength);
489             std::copy(data, data + dataLen, buf);
490             return dataLen;
491         }
492         CVector<uint16_t> tmpBuf;
493         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
494         if (length > maxLength) {
495             return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start,
496                                                               modify, isWriteBuffer);
497         }
498         return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start,
499                                                           modify, isWriteBuffer);
500     }
501 
CopyDataUtf16(uint16_t * buf,uint32_t maxLength)502     inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
503     {
504         uint32_t length = GetLength();
505         if (length > maxLength) {
506             return 0;
507         }
508         if (IsUtf16()) {
509             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
510             CVector<uint16_t> tmpBuf;
511             const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
512             if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
513                 LOG_FULL(FATAL) << "memcpy_s failed";
514                 UNREACHABLE();
515             }
516             return length;
517         }
518         CVector<uint8_t> tmpBuf;
519         const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
520         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength);
521     }
522 
523     std::u16string ToU16String(uint32_t len = 0);
524 
ToOneByteDataForced()525     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
526     {
527         uint8_t *buf = nullptr;
528         auto length = GetLength();
529         if (IsUtf16()) {
530             auto size = length * sizeof(uint16_t);
531             buf = new uint8_t[size]();
532             CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length);
533         } else {
534             buf = new uint8_t[length + 1]();
535             CopyDataUtf8(buf, length + 1);
536         }
537         return std::unique_ptr<uint8_t[]>(buf);
538     }
539 
540     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false)
541     {
542         Span<const uint8_t> str;
543         uint32_t strLen = GetLength();
544         if (UNLIKELY(IsUtf16())) {
545             CVector<uint16_t> tmpBuf;
546             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
547             ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
548             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
549             buf.reserve(len);
550             len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
551             str = Span<const uint8_t>(buf.data(), len);
552         } else {
553             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
554             str = Span<const uint8_t>(data, strLen);
555         }
556         return str;
557     }
558 
559     Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
560     {
561         Span<const uint8_t> str;
562         uint32_t strLen = GetLength();
563         if (UNLIKELY(IsUtf16())) {
564             CVector<uint16_t> tmpBuf;
565             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
566             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
567             buf.reserve(len);
568             len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
569             str = Span<const uint8_t>(buf.data(), len);
570         } else {
571             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
572             str = Span<const uint8_t>(data, strLen);
573         }
574         return str;
575     }
576 
577     inline Span<const uint8_t> FastToUtf8Span() const;
578 
TryToGetInteger(uint32_t * result)579     bool TryToGetInteger(uint32_t *result)
580     {
581         if (!IsInteger()) {
582             return false;
583         }
584         ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE);
585         *result = GetIntegerCode();
586         return true;
587     }
588 
589     // using integer number set into hash
TryToSetIntegerHash(int32_t num)590     inline bool TryToSetIntegerHash(int32_t num)
591     {
592         uint32_t hashcode = GetMixHashcode();
593         if (hashcode == 0 && GetLength() != 0) {
594             SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER);
595             return true;
596         }
597         return false;
598     }
599 
600     void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
601 
602     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
603     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
604     static bool CanBeCompressed(const EcmaString *string);
605 
606     bool PUBLIC_API ToElementIndex(uint32_t *index);
607 
608     bool ToInt(int32_t *index, bool *negative);
609 
610     bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data);
611 
612     bool PUBLIC_API ToTypedArrayIndex(uint32_t *index);
613 
614     template<bool isLower>
615     static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src);
616 
617     template<bool isLower>
618     static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
619 
620     template<typename T>
621     static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode);
622 
623     static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM);
624 
625     // single char copy for loop
626     template<typename DstType, typename SrcType>
CopyChars(DstType * dst,SrcType * src,uint32_t count)627     static void CopyChars(DstType *dst, SrcType *src, uint32_t count)
628     {
629         Span<SrcType> srcSp(src, count);
630         Span<DstType> dstSp(dst, count);
631         for (uint32_t i = 0; i < count; i++) {
632             dstSp[i] = srcSp[i];
633         }
634     }
635 
636     // memory block copy
637     template<typename T>
638     static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count);
639 
640     template<typename T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)641     static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
642     {
643         uint32_t hash = hashSeed;
644         Span<const T> sp(data, size);
645         for (auto c : sp) {
646             constexpr size_t SHIFT = 5;
647             hash = (hash << SHIFT) - hash + c;
648         }
649         return hash;
650     }
651 
IsASCIICharacter(uint16_t data)652     static bool IsASCIICharacter(uint16_t data)
653     {
654         if (data == 0) {
655             return false;
656         }
657         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
658         return data <= base::utf_helper::UTF8_1B_MAX;
659     }
660 
661     template<typename T1, typename T2>
662     static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
663 
664     template<typename T1, typename T2>
665     static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos);
666 
667     bool IsFlat() const;
668 
IsLineString()669     bool IsLineString() const
670     {
671         return GetClass()->IsLineString();
672     }
IsConstantString()673     bool IsConstantString() const
674     {
675         return GetClass()->IsConstantString();
676     }
IsSlicedString()677     bool IsSlicedString() const
678     {
679         return GetClass()->IsSlicedString();
680     }
IsTreeString()681     bool IsTreeString() const
682     {
683         return GetClass()->IsTreeString();
684     }
NotTreeString()685     bool NotTreeString() const
686     {
687         return !IsTreeString();
688     }
IsLineOrConstantString()689     bool IsLineOrConstantString() const
690     {
691         auto hclass = GetClass();
692         return hclass->IsLineString() || hclass->IsConstantString();
693     }
694 
GetStringType()695     JSType GetStringType() const
696     {
697         JSType type = GetClass()->GetObjectType();
698         ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST);
699         return type;
700     }
701 
702     template <typename Char>
703     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength);
704 
705     template <typename Char>
706     static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos);
707 
708     static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf);
709 
710     static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src);
711 
712     static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf);
713 
714     static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src);
715 
716     // string must be not flat
717     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type);
718 
719     PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
720                                MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
721 
722     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
723                                             MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
724 
725     static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string);
726 
727     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
728 
729     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
730 
731     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
732 
733     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
734 
735     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
736 
737     static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
738 
739     static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
740                                                  bool toLower, uint32_t startIndex = 0);
741 };
742 
743 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data
744 class LineEcmaString : public EcmaString {
745 public:
746     static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16;
747     static constexpr uint32_t INIT_LENGTH_TIMES = 4;
748     // DATA_OFFSET: the string data stored after the string header.
749     // Data can be stored in utf8 or utf16 form according to compressed bit.
750     static constexpr size_t DATA_OFFSET = EcmaString::SIZE;  // DATA_OFFSET equal to Empty String size
751 
752     CAST_CHECK(LineEcmaString, IsLineString);
753 
754     DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength());
755 
Cast(EcmaString * str)756     static LineEcmaString *Cast(EcmaString *str)
757     {
758         return static_cast<LineEcmaString *>(str);
759     }
760 
Cast(const EcmaString * str)761     static LineEcmaString *Cast(const EcmaString *str)
762     {
763         return LineEcmaString::Cast(const_cast<EcmaString *>(str));
764     }
765 
ComputeSizeUtf8(uint32_t utf8Len)766     static size_t ComputeSizeUtf8(uint32_t utf8Len)
767     {
768         return DATA_OFFSET + utf8Len;
769     }
770 
ComputeSizeUtf16(uint32_t utf16Len)771     static size_t ComputeSizeUtf16(uint32_t utf16Len)
772     {
773         return DATA_OFFSET + utf16Len * sizeof(uint16_t);
774     }
775 
ObjectSize(EcmaString * str)776     static size_t ObjectSize(EcmaString *str)
777     {
778         uint32_t length = str->GetLength();
779         return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length);
780     }
781 
DataSize(EcmaString * str)782     static size_t DataSize(EcmaString *str)
783     {
784         uint32_t length = str->GetLength();
785         return str->IsUtf16() ? length * sizeof(uint16_t) : length;
786     }
787 
GetPointerLength()788     size_t GetPointerLength()
789     {
790         size_t byteSize = DataSize(this);
791         return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType);
792     }
793 
GetData()794     uint16_t *GetData() const
795     {
796         return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET);
797     }
798 
799     template<bool verify = true>
Get(int32_t index)800     uint16_t Get(int32_t index) const
801     {
802         int32_t length = static_cast<int32_t>(GetLength());
803         if (verify) {
804             if ((index < 0) || (index >= length)) {
805                 return 0;
806             }
807         }
808         if (!IsUtf16()) {
809             Span<const uint8_t> sp(GetDataUtf8(), length);
810             return sp[index];
811         }
812         Span<const uint16_t> sp(GetDataUtf16(), length);
813         return sp[index];
814     }
815 
Set(uint32_t index,uint16_t src)816     void Set(uint32_t index, uint16_t src)
817     {
818         ASSERT(index < GetLength());
819         if (IsUtf8()) {
820             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
821             *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src);
822         } else {
823             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
824             *(GetData() + index) = src;
825         }
826     }
827 };
828 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0);
829 
830 class ConstantString : public EcmaString {
831 public:
832     static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE;
833     // ConstantData is the pointer of const string in the pandafile.
834     // String in pandafile is encoded by the utf8 format.
835     // EntityId is normally the uint32_t index in the pandafile.
836     // When the pandafile is to be removed, EntityId will become -1.
837     // The real string data will be reloacted into bytearray and stored in RelocatedData.
838     // ConstantData will also point at data of bytearray data.
839     ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
840     ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET);
841     ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET);
842     DEFINE_ALIGN_SIZE(LAST_OFFSET);
843 
844     CAST_CHECK(ConstantString, IsConstantString);
845     DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
846 
Cast(EcmaString * str)847     static ConstantString *Cast(EcmaString *str)
848     {
849         return static_cast<ConstantString *>(str);
850     }
851 
Cast(const EcmaString * str)852     static ConstantString *Cast(const EcmaString *str)
853     {
854         return ConstantString::Cast(const_cast<EcmaString *>(str));
855     }
856 
ObjectSize()857     static size_t ObjectSize()
858     {
859         return ConstantString::SIZE;
860     }
861 
GetEntityIdU32()862     uint32_t GetEntityIdU32() const
863     {
864         ASSERT(GetEntityId() >= 0);
865         return static_cast<uint32_t>(GetEntityId());
866     }
867 
868     template<bool verify = true>
Get(int32_t index)869     uint16_t Get(int32_t index) const
870     {
871         int32_t length = static_cast<int32_t>(GetLength());
872         if (verify) {
873             if ((index < 0) || (index >= length)) {
874                 return 0;
875             }
876         }
877         ASSERT(IsUtf8());
878         Span<const uint8_t> sp(GetConstantData(), length);
879         return sp[index];
880     }
881 };
882 
883 // The substrings of another string use SlicedString to describe.
884 class SlicedString : public EcmaString {
885 public:
886     static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13;
887     static constexpr size_t PARENT_OFFSET = EcmaString::SIZE;
888     ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET);
889     ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG);
890     ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE);
891 
892     DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET);
893 
894     CAST_CHECK(SlicedString, IsSlicedString);
895 private:
896     friend class EcmaString;
Cast(EcmaString * str)897     static SlicedString *Cast(EcmaString *str)
898     {
899         return static_cast<SlicedString *>(str);
900     }
901 
Cast(const EcmaString * str)902     static SlicedString *Cast(const EcmaString *str)
903     {
904         return SlicedString::Cast(const_cast<EcmaString *>(str));
905     }
906 
ObjectSize()907     static size_t ObjectSize()
908     {
909         return SlicedString::SIZE;
910     }
911 
912     // Minimum length for a sliced string
913     template<bool verify = true>
Get(int32_t index)914     uint16_t Get(int32_t index) const
915     {
916         int32_t length = static_cast<int32_t>(GetLength());
917         if (verify) {
918             if ((index < 0) || (index >= length)) {
919                 return 0;
920             }
921         }
922         EcmaString *parent = EcmaString::Cast(GetParent());
923         if (parent->IsLineString()) {
924             if (parent->IsUtf8()) {
925                 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length);
926                 return sp[index];
927             }
928             Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length);
929             return sp[index];
930         }
931         Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length);
932         return sp[index];
933     }
934 };
935 
936 class TreeEcmaString : public EcmaString {
937 public:
938     // Minimum length for a tree string
939     static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13;
940 
941     static constexpr size_t FIRST_OFFSET = EcmaString::SIZE;
942     ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET);
943     ACCESSORS(Second, SECOND_OFFSET, SIZE);
944 
945     DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE);
946 
947     CAST_CHECK(TreeEcmaString, IsTreeString);
948 
Cast(EcmaString * str)949     static TreeEcmaString *Cast(EcmaString *str)
950     {
951         return static_cast<TreeEcmaString *>(str);
952     }
953 
Cast(const EcmaString * str)954     static TreeEcmaString *Cast(const EcmaString *str)
955     {
956         return TreeEcmaString::Cast(const_cast<EcmaString *>(str));
957     }
958 
IsFlat()959     bool IsFlat() const
960     {
961         auto strSecond = EcmaString::Cast(GetSecond());
962         return strSecond->GetLength() == 0;
963     }
964 
965     template<bool verify = true>
Get(int32_t index)966     uint16_t Get(int32_t index) const
967     {
968         int32_t length = static_cast<int32_t>(GetLength());
969         if (verify) {
970             if ((index < 0) || (index >= length)) {
971                 return 0;
972             }
973         }
974 
975         if (IsFlat()) {
976             EcmaString *first = EcmaString::Cast(GetFirst());
977             return first->At<verify>(index);
978         }
979         EcmaString *string = const_cast<TreeEcmaString *>(this);
980         while (true) {
981             if (string->IsTreeString()) {
982                 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
983                 if (static_cast<int32_t>(first->GetLength()) > index) {
984                     string = first;
985                 } else {
986                     index -= static_cast<int32_t>(first->GetLength());
987                     string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond());
988                 }
989             } else {
990                 return string->At<verify>(index);
991             }
992         }
993         UNREACHABLE();
994     }
995 };
996 
997 // FlatStringInfo holds an EcmaString* instead of a JSHandle. If a GC occurs during its usage period,
998 // it may cause the pointer to become invalid, necessitating the pointer to be reset.
999 class FlatStringInfo {
1000 public:
FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)1001     FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string),
1002                                                                                startIndex_(startIndex),
1003                                                                                length_(length) {}
IsUtf8()1004     bool IsUtf8() const
1005     {
1006         return string_->IsUtf8();
1007     }
1008 
IsUtf16()1009     bool IsUtf16() const
1010     {
1011         return string_->IsUtf16();
1012     }
1013 
GetString()1014     EcmaString *GetString() const
1015     {
1016         return string_;
1017     }
1018 
SetString(EcmaString * string)1019     void SetString(EcmaString *string)
1020     {
1021         string_ = string;
1022     }
1023 
GetStartIndex()1024     uint32_t GetStartIndex() const
1025     {
1026         return startIndex_;
1027     }
1028 
SetStartIndex(uint32_t index)1029     void SetStartIndex(uint32_t index)
1030     {
1031         startIndex_ = index;
1032     }
1033 
GetLength()1034     uint32_t GetLength() const
1035     {
1036         return length_;
1037     }
1038 
1039     const uint8_t *GetDataUtf8() const;
1040     const uint16_t *GetDataUtf16() const;
1041     uint8_t *GetDataUtf8Writable() const;
1042     uint16_t *GetDataUtf16Writable() const;
1043     std::u16string ToU16String(uint32_t len = 0);
1044 private:
1045     EcmaString *string_ {nullptr};
1046     uint32_t startIndex_ {0};
1047     uint32_t length_ {0};
1048 };
1049 
1050 // if you want to use functions of EcmaString, please not use directly,
1051 // and use functions of EcmaStringAccessor alternatively.
1052 // eg: EcmaString *str = ***; str->GetLength() ----->  EcmaStringAccessor(str).GetLength()
1053 class PUBLIC_API EcmaStringAccessor {
1054 public:
EcmaStringAccessor(EcmaString * string)1055     explicit inline EcmaStringAccessor(EcmaString *string)
1056     {
1057         ASSERT(string != nullptr);
1058         string_ = string;
1059     }
1060 
1061     explicit EcmaStringAccessor(TaggedObject *obj);
1062 
1063     explicit EcmaStringAccessor(JSTaggedValue value);
1064 
1065     explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle);
1066 
1067     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
1068 
CreateEmptyString(const EcmaVM * vm)1069     static EcmaString *CreateEmptyString(const EcmaVM *vm)
1070     {
1071         return EcmaString::CreateEmptyString(vm);
1072     }
1073 
1074     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress,
1075                                       MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
1076                                       uint32_t idOffset = 0)
1077     {
1078         return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset);
1079     }
1080 
1081     static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1082                                                          uint32_t offset, uint32_t utf8Len,
1083                                                          MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1084     {
1085         return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type);
1086     }
1087 
1088     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length,
1089         bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0)
1090     {
1091         return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset);
1092     }
1093 
1094     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
1095         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1096     {
1097         return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type);
1098     }
1099 
1100     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
1101                                        bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1102     {
1103         return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type);
1104     }
1105 
1106     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle,
1107         const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1108     {
1109         return EcmaString::Concat(vm, str1Handle, str2Handle, type);
1110     }
1111 
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1112     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
1113         uint32_t length, bool compressed)
1114     {
1115         return EcmaString::CopyStringToOldSpace(vm, original, length, compressed);
1116     }
1117 
1118     // can change src data structure
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1119     static EcmaString *FastSubString(const EcmaVM *vm,
1120         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1121     {
1122         return EcmaString::FastSubString(vm, src, start, length);
1123     }
1124 
1125     // get
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1126     static EcmaString *GetSubString(const EcmaVM *vm,
1127         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1128     {
1129         return EcmaString::GetSubString(vm, src, start, length);
1130     }
1131 
IsUtf8()1132     bool IsUtf8() const
1133     {
1134         return string_->IsUtf8();
1135     }
1136 
IsUtf16()1137     bool IsUtf16() const
1138     {
1139         return string_->IsUtf16();
1140     }
1141 
GetLength()1142     uint32_t GetLength() const
1143     {
1144         return string_->GetLength();
1145     }
1146 
1147     // require is LineString
1148     inline size_t GetUtf8Length(bool isGetBufferSize = false) const;
1149 
ObjectSize()1150     size_t ObjectSize() const
1151     {
1152         if (string_->IsLineString()) {
1153             return LineEcmaString::ObjectSize(string_);
1154         } if (string_->IsConstantString()) {
1155             return ConstantString::ObjectSize();
1156         } else {
1157             return TreeEcmaString::SIZE;
1158         }
1159     }
1160 
1161     // For TreeString, the calculation result is size of LineString correspondingly.
GetFlatStringSize()1162     size_t GetFlatStringSize() const
1163     {
1164         if (string_->IsConstantString()) {
1165             return ConstantString::ObjectSize();
1166         }
1167         return LineEcmaString::ObjectSize(string_);
1168     }
1169 
IsInternString()1170     bool IsInternString() const
1171     {
1172         return string_->IsInternString();
1173     }
1174 
SetInternString()1175     void SetInternString()
1176     {
1177         string_->SetIsInternString();
1178     }
1179 
ClearInternString()1180     void ClearInternString()
1181     {
1182         string_->ClearInternStringFlag();
1183     }
1184 
1185     // require is LineString
1186     // It's Utf8 format, but without 0 in the end.
1187     inline const uint8_t *GetDataUtf8();
1188 
1189     // require is LineString
1190     inline const uint16_t *GetDataUtf16();
1191 
1192     // not change string data structure.
1193     // if string is not flat, this func has low efficiency.
1194     std::u16string ToU16String(uint32_t len = 0)
1195     {
1196         return string_->ToU16String(len);
1197     }
1198 
1199     // not change string data structure.
1200     // if string is not flat, this func has low efficiency.
ToOneByteDataForced()1201     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
1202     {
1203         return string_->ToOneByteDataForced();
1204     }
1205 
1206     // not change string data structure.
1207     // if string is not flat, this func has low efficiency.
ToUtf8Span(CVector<uint8_t> & buf)1208     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf)
1209     {
1210         return string_->ToUtf8Span(buf);
1211     }
1212 
1213     // only for string is flat and using UTF8 encoding
1214     inline Span<const uint8_t> FastToUtf8Span();
1215 
1216     // Using string's hash to figure out whether the string can be converted to integer
TryToGetInteger(uint32_t * result)1217     inline bool TryToGetInteger(uint32_t *result)
1218     {
1219         return string_->TryToGetInteger(result);
1220     }
1221 
TryToSetIntegerHash(int32_t num)1222     inline bool TryToSetIntegerHash(int32_t num)
1223     {
1224         return string_->TryToSetIntegerHash(num);
1225     }
1226 
1227     // not change string data structure.
1228     // if string is not flat, this func has low efficiency.
1229     std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1230 
1231     // this function convert for Utf8
1232     CString Utf8ConvertToString();
1233 
1234     std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1235     // not change string data structure.
1236     // if string is not flat, this func has low efficiency.
1237     CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false);
1238 
1239     // not change string data structure.
1240     // if string is not flat, this func has low efficiency.
1241     uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false)
1242     {
1243         return string_->WriteUtf8(buf, maxLength, isWriteBuffer);
1244     }
1245 
WriteToUtf16(uint16_t * buf,uint32_t bufLength)1246     uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength)
1247     {
1248         return string_->WriteUtf16(buf, GetLength(), bufLength);
1249     }
1250 
WriteToOneByte(uint8_t * buf,uint32_t maxLength)1251     uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength)
1252     {
1253         return string_->WriteOneByte(buf, maxLength);
1254     }
1255 
1256     // not change string data structure.
1257     // if string is not flat, this func has low efficiency.
WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1258     uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const
1259     {
1260         return string_->CopyDataUtf16(buf, maxLength);
1261     }
1262 
1263     template <typename Char>
WriteToFlatWithPos(EcmaString * src,Char * buf,uint32_t length,uint32_t pos)1264     static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)
1265     {
1266         src->WriteToFlatWithPos(src, buf, length, pos);
1267     }
1268 
1269     template <typename Char>
WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)1270     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
1271     {
1272         src->WriteToFlat(src, buf, maxLength);
1273     }
1274 
1275     // require dst is LineString
1276     // not change src data structure.
1277     // if src is not flat, this func has low efficiency.
1278     inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
1279 
1280     // not change src data structure.
1281     // if src is not flat, this func has low efficiency.
1282     template<bool verify = true>
Get(uint32_t index)1283     uint16_t Get(uint32_t index) const
1284     {
1285         return string_->At<verify>(index);
1286     }
1287 
1288     // require string is LineString.
Set(uint32_t index,uint16_t src)1289     void Set(uint32_t index, uint16_t src)
1290     {
1291         return string_->WriteData(index, src);
1292     }
1293 
1294     // not change src data structure.
1295     // if src is not flat, this func has low efficiency.
GetHashcode()1296     uint32_t GetHashcode()
1297     {
1298         return string_->GetHashcode();
1299     }
1300 
GetRawHashcode()1301     uint32_t GetRawHashcode()
1302     {
1303         return string_->GetRawHashcode();
1304     }
1305 
1306     // not change src data structure.
1307     // if src is not flat, this func has low efficiency.
ComputeRawHashcode()1308     std::pair<uint32_t, bool> ComputeRawHashcode()
1309     {
1310         return string_->ComputeRawHashcode();
1311     }
1312 
ComputeHashcode()1313     uint32_t ComputeHashcode()
1314     {
1315         return string_->ComputeHashcode();
1316     }
1317 
ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1318     uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger)
1319     {
1320         return string_->ComputeHashcode(rawHashSeed, isInteger);
1321     }
1322 
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1323     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
1324     {
1325         return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
1326     }
1327 
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1328     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
1329     {
1330         return EcmaString::ComputeHashcodeUtf16(utf16Data, length);
1331     }
1332 
1333     // can change receiver and search data structure
1334     static int32_t IndexOf(const EcmaVM *vm,
1335         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1336     {
1337         return EcmaString::IndexOf(vm, receiver, search, pos);
1338     }
1339 
1340     // can change receiver and search data structure
1341     static int32_t LastIndexOf(const EcmaVM *vm,
1342         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1343     {
1344         return EcmaString::LastIndexOf(vm, receiver, search, pos);
1345     }
1346 
1347     // can change receiver and search data structure
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1348     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right)
1349     {
1350         return EcmaString::Compare(vm, left, right);
1351     }
1352 
1353 
1354     // can change receiver and search data structure
1355     static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
1356         const JSHandle<EcmaString>& right, uint32_t offset = 0)
1357     {
1358         return EcmaString::IsSubStringAt(vm, left, right, offset);
1359     }
1360 
1361     // can change str1 and str2 data structure
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1362     static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
1363     {
1364         return EcmaString::StringsAreEqual(vm, str1, str2);
1365     }
1366 
1367     // not change str1 and str2 data structure.
1368     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqual(EcmaString * str1,EcmaString * str2)1369     static bool StringsAreEqual(EcmaString *str1, EcmaString *str2)
1370     {
1371         return EcmaString::StringsAreEqual(str1, str2);
1372     }
1373 
1374     // not change str1 and str2 data structure.
1375     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1376     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2)
1377     {
1378         return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2);
1379     }
1380 
1381     // not change str1 data structure.
1382     // if str1 is not flat, this func has low efficiency.
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1383     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
1384                                        bool canBeCompress)
1385     {
1386         return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress);
1387     }
1388 
1389     // not change str1 data structure.
1390     // if str1 is not flat, this func has low efficiency.
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1391     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
1392     {
1393         return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len);
1394     }
1395 
1396     // require str1 and str2 are LineString.
1397     // not change string data structure.
1398     // if string is not flat, this func has low efficiency.
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1399     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
1400     {
1401         return string_->EqualToSplicedString(str1, str2);
1402     }
1403 
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1404     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
1405     {
1406         return EcmaString::CanBeCompressed(utf8Data, utf8Len);
1407     }
1408 
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1409     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
1410     {
1411         return EcmaString::CanBeCompressed(utf16Data, utf16Len);
1412     }
1413 
1414     // require string is LineString
CanBeCompressed(const EcmaString * string)1415     static bool CanBeCompressed(const EcmaString *string)
1416     {
1417         return EcmaString::CanBeCompressed(string);
1418     }
1419 
1420     // not change string data structure.
1421     // if string is not flat, this func has low efficiency.
ToElementIndex(uint32_t * index)1422     bool ToElementIndex(uint32_t *index)
1423     {
1424         return string_->ToElementIndex(index);
1425     }
1426 
1427     // not change string data structure.
1428     // if string is not flat, this func has low efficiency.
ToInt(int32_t * index,bool * negative)1429     bool ToInt(int32_t *index, bool *negative)
1430     {
1431         return string_->ToInt(index, negative);
1432     }
1433 
1434     // not change string data structure.
1435     // if string is not flat, this func has low efficiency.
ToTypedArrayIndex(uint32_t * index)1436     bool PUBLIC_API ToTypedArrayIndex(uint32_t *index)
1437     {
1438         return string_->ToTypedArrayIndex(index);
1439     }
1440 
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1441     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1442     {
1443         return EcmaString::ToLower(vm, src);
1444     }
1445 
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1446     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1447     {
1448         return EcmaString::TryToLower(vm, src);
1449     }
1450 
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1451     static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1452     {
1453         return EcmaString::TryToUpper(vm, src);
1454     }
1455 
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1456     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1457     {
1458         return EcmaString::ToUpper(vm, src);
1459     }
1460 
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1461     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1462     {
1463         return EcmaString::ToLocaleLower(vm, src, locale);
1464     }
1465 
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1466     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1467     {
1468         return EcmaString::ToLocaleUpper(vm, src, locale);
1469     }
1470 
1471     static EcmaString *Trim(const JSThread *thread,
1472         const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM)
1473     {
1474         return EcmaString::Trim(thread, src, mode);
1475     }
1476 
IsASCIICharacter(uint16_t data)1477     static bool IsASCIICharacter(uint16_t data)
1478     {
1479         if (data == 0) {
1480             return false;
1481         }
1482         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
1483         return data <= base::utf_helper::UTF8_1B_MAX;
1484     }
1485 
IsFlat()1486     bool IsFlat() const
1487     {
1488         return string_->IsFlat();
1489     }
1490 
IsLineString()1491     bool IsLineString() const
1492     {
1493         return string_->IsLineString();
1494     }
1495 
IsConstantString()1496     bool IsConstantString() const
1497     {
1498         return string_->IsConstantString();
1499     }
1500 
IsSlicedString()1501     bool IsSlicedString() const
1502     {
1503         return string_->IsSlicedString();
1504     }
1505 
IsLineOrConstantString()1506     bool IsLineOrConstantString() const
1507     {
1508         return string_->IsLineOrConstantString();
1509     }
1510 
GetStringType()1511     JSType GetStringType() const
1512     {
1513         return string_->GetStringType();
1514     }
1515 
IsTreeString()1516     bool IsTreeString() const
1517     {
1518         return string_->IsTreeString();
1519     }
1520 
NotTreeString()1521     bool NotTreeString() const
1522     {
1523         return string_->NotTreeString();
1524     }
1525 
1526     // the returned string may be a linestring, constantstring, or slicestring!!
1527     PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1528         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1529     {
1530         return EcmaString::Flatten(vm, string, type);
1531     }
1532 
1533     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1534         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1535     {
1536         return EcmaString::FlattenAllString(vm, string, type);
1537     }
1538 
1539     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1540         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1541     {
1542         return EcmaString::SlowFlatten(vm, string, type);
1543     }
1544 
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1545     static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1546     {
1547         return EcmaString::FlattenNoGC(vm, string);
1548     }
1549 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1550     static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1551     {
1552         return EcmaString::GetUtf8DataFlat(src, buf);
1553     }
1554 
GetNonTreeUtf8Data(const EcmaString * src)1555     static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src)
1556     {
1557         return EcmaString::GetNonTreeUtf8Data(src);
1558     }
1559 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1560     static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1561     {
1562         return EcmaString::GetUtf16DataFlat(src, buf);
1563     }
1564 
GetNonTreeUtf16Data(const EcmaString * src)1565     static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src)
1566     {
1567         return EcmaString::GetNonTreeUtf16Data(src);
1568     }
1569 
1570     static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str);
1571 
1572 private:
1573     EcmaString *string_ {nullptr};
1574 };
1575 }  // namespace ecmascript
1576 }  // namespace panda
1577 #endif  // ECMASCRIPT_STRING_H
1578