• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_STRING_H
17 #define ECMASCRIPT_STRING_H
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 
23 #include "ecmascript/base/utf_helper.h"
24 #include "ecmascript/common.h"
25 #include "ecmascript/ecma_macros.h"
26 #include "ecmascript/js_hclass.h"
27 #include "ecmascript/js_tagged_value.h"
28 #include "ecmascript/mem/barriers.h"
29 #include "ecmascript/mem/space.h"
30 #include "ecmascript/mem/tagged_object.h"
31 #include "ecmascript/platform/ecma_string_hash_helper.h"
32 
33 #include "libpandabase/macros.h"
34 #include "securec.h"
35 #include "unicode/locid.h"
36 
37 namespace panda {
38 namespace test {
39     class EcmaStringEqualsTest;
40 }
41 namespace ecmascript {
42 template<typename T>
43 class JSHandle;
44 class JSPandaFile;
45 class EcmaVM;
46 class LineEcmaString;
47 class ConstantString;
48 class TreeEcmaString;
49 class SlicedString;
50 class FlatStringInfo;
51 
52 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
53 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length)                                        \
54     if ((length) >= MAX_STRING_LENGTH) {                                                      \
55         THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr);  \
56     }
57 
58 class EcmaString : public TaggedObject {
59     /* Mix Hash Code: --   { 0 | [31 bits raw hash code] }     computed through string
60                       \    { 1 | [31 bits integer numbers] }   fastpath for string to number
61     */
62 public:
63     CAST_CHECK(EcmaString, IsString);
64 
65     static constexpr uint32_t IS_INTEGER_MASK = 1U << 31;
66     static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1;
67     static constexpr uint32_t STRING_INTERN_BIT = 0x2;
68     static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning
69     static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U;
70     static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF;
71     static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9;
72 
73     static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize();
74     // In last bit of mix_length we store if this string is compressed or not.
75     ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET)
76     // In last bit of mix_hash we store if this string is small-integer number or not.
77     ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE)
78 
79     enum CompressedStatus {
80         STRING_COMPRESSED,
81         STRING_UNCOMPRESSED,
82     };
83 
84     enum IsIntegerStatus {
85         NOT_INTEGER = 0,
86         IS_INTEGER,
87     };
88 
89     enum TrimMode : uint8_t {
90         TRIM,
91         TRIM_START,
92         TRIM_END,
93     };
94 
95     enum ConcatOptStatus {
96         BEGIN_STRING_ADD = 1,
97         IN_STRING_ADD,
98         CONFIRMED_IN_STRING_ADD,
99         END_STRING_ADD,
100         INVALID_STRING_ADD,
101         HAS_BACKING_STORE,
102     };
103 
104 private:
105     friend class EcmaStringAccessor;
106     friend class LineEcmaString;
107     friend class ConstantString;
108     friend class TreeEcmaString;
109     friend class SlicedString;
110     friend class FlatStringInfo;
111     friend class NameDictionary;
112     friend class panda::test::EcmaStringEqualsTest;
113 
114     static EcmaString *CreateEmptyString(const EcmaVM *vm);
115     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
116         bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
117         uint32_t idOffset = 0);
118     static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
119         uint32_t offset, uint32_t utf8Len, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
120     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
121         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
122     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
123         bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
124     static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
125     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
126     static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed);
127     static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm,
128         size_t length, bool compressed, MemSpaceType type);
129     static EcmaString *CreateTreeString(const EcmaVM *vm,
130         const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed);
131     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
132         size_t length, bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0);
133     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left,
134         const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
135     template<typename T1, typename T2>
136     static uint32_t CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
137                                                 const T2 *dataSecond, size_t sizeSecond);
138     static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
139                                                const JSHandle<EcmaString> &secondString);
140     static uint32_t CalculateConcatHashCode(const JSHandle<EcmaString> &firstString,
141                                             const JSHandle<EcmaString> &secondString);
142     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
143         uint32_t length, bool compressed);
144     static EcmaString *FastSubString(const EcmaVM *vm,
145         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
146     static bool SubStringIsUtf8(const EcmaVM *vm,
147         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
148     static EcmaString *GetSlicedString(const EcmaVM *vm,
149         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
150     static EcmaString *GetSubString(const EcmaVM *vm,
151         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
152     // require src is LineString
153     // not change src data structure
154     static inline EcmaString *FastSubUtf8String(const EcmaVM *vm,
155         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
156     // require src is LineString
157     // not change src data structure
158     static inline EcmaString *FastSubUtf16String(const EcmaVM *vm,
159         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
160     inline void TrimLineString(const JSThread *thread, uint32_t newLength);
IsUtf8()161     inline bool IsUtf8() const
162     {
163         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED;
164     }
165 
IsUtf16()166     inline bool IsUtf16() const
167     {
168         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED;
169     }
170 
IsInteger()171     inline bool IsInteger()
172     {
173         return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK;
174     }
175 
176     // require is LineString
177     inline uint16_t *GetData() const;
178     inline const uint8_t *GetDataUtf8() const;
179     inline const uint16_t *GetDataUtf16() const;
180 
181     // require is LineString
182     inline uint8_t *GetDataUtf8Writable();
183     inline uint16_t *GetDataUtf16Writable();
184 
GetLength()185     inline uint32_t GetLength() const
186     {
187         return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT;
188     }
189 
190     inline void SetLength(uint32_t length, bool compressed = false)
191     {
192         ASSERT(length < MAX_STRING_LENGTH);
193         // Use 0u for compressed/utf8 expression
194         SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED));
195     }
196 
GetRawHashcode()197     inline uint32_t GetRawHashcode() const
198     {
199         return GetMixHashcode() & (~IS_INTEGER_MASK);
200     }
201 
MixHashcode(uint32_t hashcode,bool isInteger)202     static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger)
203     {
204         return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK));
205     }
206 
207     inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false)
208     {
209         // Use 0u for not integer string's expression
210         SetMixHashcode(MixHashcode(hashcode, isInteger));
211     }
212 
213     inline size_t GetUtf8Length(bool modify = true, bool isGetBufferSize = false) const;
214 
SetIsInternString()215     inline void SetIsInternString()
216     {
217         SetMixLength(GetMixLength() | STRING_INTERN_BIT);
218     }
219 
IsInternString()220     inline bool IsInternString() const
221     {
222         return (GetMixLength() & STRING_INTERN_BIT) != 0;
223     }
224 
ClearInternStringFlag()225     inline void ClearInternStringFlag()
226     {
227         SetMixLength(GetMixLength() & ~STRING_INTERN_BIT);
228     }
229 
TryGetHashCode(uint32_t * hash)230     inline bool TryGetHashCode(uint32_t *hash)
231     {
232         uint32_t hashcode = GetMixHashcode();
233         if (hashcode == 0 && GetLength() != 0) {
234             return false;
235         }
236         *hash = hashcode;
237         return true;
238     }
239 
GetIntegerCode()240     inline uint32_t GetIntegerCode()
241     {
242         ASSERT(GetMixHashcode() & IS_INTEGER_MASK);
243         return GetRawHashcode();
244     }
245 
246     // not change this data structure.
247     // if string is not flat, this func has low efficiency.
GetHashcode()248     uint32_t PUBLIC_API GetHashcode()
249     {
250         uint32_t hashcode = GetMixHashcode();
251         // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0.
252         if (hashcode == 0 && GetLength() != 0) {
253             hashcode = ComputeHashcode();
254             SetMixHashcode(hashcode);
255         }
256         return hashcode;
257     }
258 
259     template<typename T>
IsDecimalDigitChar(const T c)260     inline static bool IsDecimalDigitChar(const T c)
261     {
262         return (c >= '0' && c <= '9');
263     }
264 
ComputeIntegerHash(uint32_t * num,uint8_t c)265     static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c)
266     {
267         if (!IsDecimalDigitChar(c)) {
268             return false;
269         }
270         int charDate = c - '0';
271         *num = (*num) * 10 + charDate; // 10: decimal factor
272         return true;
273     }
274 
275     bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const;
276 
277     template<typename T>
HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)278     static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
279     {
280         ASSERT(size >= 0);
281         if (hashSeed == 0) {
282             if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
283                 uint32_t num = data[0] - '0';
284                 uint32_t i = 1;
285                 do {
286                     if (i == size) {
287                         // compute mix hash
288                         if (num <= MAX_INTEGER_HASH_NUMBER) {
289                             *hash = MixHashcode(num, IS_INTEGER);
290                             return true;
291                         }
292                         return false;
293                     }
294                 } while (ComputeIntegerHash(&num, data[i++]));
295             }
296             if (size == 1 && (data[0] == '0')) {
297                 *hash = MixHashcode(0, IS_INTEGER);
298                 return true;
299             }
300         } else {
301             if (IsDecimalDigitChar(data[0])) {
302                 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
303                 uint32_t i = 1;
304                 do {
305                     if (i == size) {
306                         // compute mix hash
307                         if (num <= MAX_INTEGER_HASH_NUMBER) {
308                             *hash = MixHashcode(num, IS_INTEGER);
309                             return true;
310                         }
311                         return false;
312                     }
313                 } while (ComputeIntegerHash(&num, data[i++]));
314             }
315         }
316         return false;
317     }
318 
319     // not change this data structure.
320     // if string is not flat, this func has low efficiency.
321     uint32_t PUBLIC_API ComputeHashcode() const;
322     std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const;
323     uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const;
324 
325     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
326     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
327 
328     template<bool verify = true>
329     uint16_t At(int32_t index) const;
330 
331     // require is LineString
332     void WriteData(uint32_t index, uint16_t src);
333 
334     // can change left and right data structure
335     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right);
336 
337     static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
338         const JSHandle<EcmaString>& right, uint32_t offset);
339 
340     // Check that two spans are equal. Should have the same length.
341     /* static */
342     template<typename T, typename T1>
StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)343     static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2)
344     {
345         ASSERT(str1.Size() <= str2.Size());
346         size_t size = str1.Size();
347         if constexpr (std::is_same_v<T, T1>) {
348             return !memcmp(str1.data(), str2.data(), size * sizeof(T));
349         } else {
350             for (size_t i = 0; i < size; i++) {
351                 auto left = static_cast<uint16_t>(str1[i]);
352                 auto right = static_cast<uint16_t>(str2[i]);
353                 if (left != right) {
354                     return false;
355                 }
356             }
357             return true;
358         }
359     }
360 
361     // Converts utf8Data to utf16 and compare it with given utf16_data.
362     static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
363                                   uint32_t utf16Len);
364     // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
365     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2);
366     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
367     static PUBLIC_API bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1,
368         const JSHandle<EcmaString> &str2);
369     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
370     static PUBLIC_API bool StringsAreEqual(EcmaString *str1, EcmaString *str2);
371     // Two strings have the same type of utf encoding format.
372     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2);
373     static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2);
374     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
375     // not change str1 data structure.
376     // if str1 is not flat, this func has low efficiency.
377     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
378                                        bool canBeCompress);
379     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
380     // not change str1 data structure.
381     // if str1 is not flat, this func has low efficiency.
382     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
383 
384     // can change receiver and search data structure
385     static int32_t IndexOf(const EcmaVM *vm,
386         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
387 
388     // can change receiver and search data structure
389     static int32_t LastIndexOf(const EcmaVM *vm,
390         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
391 
392     inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const
393     {
394         if (maxLength == 0) {
395             return 1; // maxLength was -1 at napi
396         }
397         size_t length = GetLength();
398         if (length > maxLength) {
399             return 0;
400         }
401         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
402         buf[maxLength - 1] = '\0';
403         // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8
404         return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1;  // add place for zero in the end
405     }
406 
407     // It allows user to copy into buffer even if maxLength < length
408     inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const
409     {
410         if (maxLength == 0) {
411             return 1; // maxLength was -1 at napi
412         }
413         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
414         buf[maxLength - 1] = '\0';
415         return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1;
416     }
417 
CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)418     size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const
419     {
420         if (IsUtf16()) {
421             CVector<uint16_t> tmpBuf;
422             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
423             if (length > bufLength) {
424                 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) {
425                     LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength";
426                     UNREACHABLE();
427                 }
428                 return bufLength;
429             }
430             if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
431                 LOG_FULL(FATAL) << "memcpy_s failed";
432                 UNREACHABLE();
433             }
434             return length;
435         }
436         CVector<uint8_t> tmpBuf;
437         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
438         if (length > bufLength) {
439             return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength);
440         }
441         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength);
442     }
443 
444     // It allows user to copy into buffer even if maxLength < length
WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)445     inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const
446     {
447         if (bufLength == 0) {
448             return 0;
449         }
450         // Returns a number representing a valid backrest length.
451         return CopyDataToUtf16(buf, targetLength, bufLength);
452     }
453 
WriteOneByte(uint8_t * buf,size_t maxLength)454     size_t WriteOneByte(uint8_t *buf, size_t maxLength) const
455     {
456         if (maxLength == 0) {
457             return 0;
458         }
459         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
460         buf[maxLength - 1] = '\0';
461         uint32_t length = GetLength();
462         if (!IsUtf16()) {
463             CVector<uint8_t> tmpBuf;
464             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
465             if (length > maxLength) {
466                 length = maxLength;
467             }
468             if (memcpy_s(buf, maxLength, data, length) != EOK) {
469                 LOG_FULL(FATAL) << "memcpy_s failed when write one byte";
470                 UNREACHABLE();
471             }
472             return length;
473         }
474 
475         CVector<uint16_t> tmpBuf;
476         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
477         if (length > maxLength) {
478             return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength);
479         }
480         return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength);
481     }
482 
483     size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength,
484                               bool modify = true, bool isWriteBuffer = false) const
485     {
486         uint32_t len = GetLength();
487         if (start + length > len) {
488             return 0;
489         }
490         if (!IsUtf16()) {
491             if (length > std::numeric_limits<size_t>::max() / 2 - 1) {  // 2: half
492                 LOG_FULL(FATAL) << " length is higher than half of size_t::max";
493                 UNREACHABLE();
494             }
495             CVector<uint8_t> tmpBuf;
496             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf) + start;
497             // Only copy maxLength number of chars into buffer if length > maxLength
498             auto dataLen = std::min(length, maxLength);
499             std::copy(data, data + dataLen, buf);
500             return dataLen;
501         }
502         CVector<uint16_t> tmpBuf;
503         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
504         if (length > maxLength) {
505             return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start,
506                                                               modify, isWriteBuffer);
507         }
508         return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start,
509                                                           modify, isWriteBuffer);
510     }
511 
CopyDataUtf16(uint16_t * buf,uint32_t maxLength)512     inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
513     {
514         uint32_t length = GetLength();
515         if (length > maxLength) {
516             return 0;
517         }
518         if (IsUtf16()) {
519             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
520             CVector<uint16_t> tmpBuf;
521             const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
522             if (memcpy_s(buf, maxLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
523                 LOG_FULL(FATAL) << "memcpy_s failed";
524                 UNREACHABLE();
525             }
526             return length;
527         }
528         CVector<uint8_t> tmpBuf;
529         const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
530         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, maxLength);
531     }
532 
533     std::u16string ToU16String(uint32_t len = 0);
534 
ToOneByteDataForced()535     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
536     {
537         uint8_t *buf = nullptr;
538         auto length = GetLength();
539         if (IsUtf16()) {
540             auto size = length * sizeof(uint16_t);
541             buf = new uint8_t[size]();
542             CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length);
543         } else {
544             buf = new uint8_t[length + 1]();
545             CopyDataUtf8(buf, length + 1);
546         }
547         return std::unique_ptr<uint8_t[]>(buf);
548     }
549 
550     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true, bool cesu8 = false)
551     {
552         Span<const uint8_t> str;
553         uint32_t strLen = GetLength();
554         if (UNLIKELY(IsUtf16())) {
555             CVector<uint16_t> tmpBuf;
556             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
557             ASSERT(base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) > 0);
558             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify, false, cesu8) - 1;
559             buf.reserve(len);
560             len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify, false, cesu8);
561             str = Span<const uint8_t>(buf.data(), len);
562         } else {
563             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
564             str = Span<const uint8_t>(data, strLen);
565         }
566         return str;
567     }
568 
569     Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
570     {
571         Span<const uint8_t> str;
572         uint32_t strLen = GetLength();
573         if (UNLIKELY(IsUtf16())) {
574             CVector<uint16_t> tmpBuf;
575             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
576             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
577             buf.reserve(len);
578             len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
579             str = Span<const uint8_t>(buf.data(), len);
580         } else {
581             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
582             str = Span<const uint8_t>(data, strLen);
583         }
584         return str;
585     }
586 
587     inline Span<const uint8_t> FastToUtf8Span() const;
588 
TryToGetInteger(uint32_t * result)589     bool TryToGetInteger(uint32_t *result)
590     {
591         if (!IsInteger()) {
592             return false;
593         }
594         ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE);
595         *result = GetIntegerCode();
596         return true;
597     }
598 
599     // using integer number set into hash
TryToSetIntegerHash(int32_t num)600     inline bool TryToSetIntegerHash(int32_t num)
601     {
602         uint32_t hashcode = GetMixHashcode();
603         if (hashcode == 0 && GetLength() != 0) {
604             SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER);
605             return true;
606         }
607         return false;
608     }
609 
610     void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
611 
612     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
613     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
614     static bool CanBeCompressed(const EcmaString *string);
615 
616     bool PUBLIC_API ToElementIndex(uint32_t *index);
617 
618     bool ToInt(int32_t *index, bool *negative);
619 
620     bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data);
621 
622     bool PUBLIC_API ToTypedArrayIndex(uint32_t *index);
623 
624     template<bool isLower>
625     static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src);
626 
627     template<bool isLower>
628     static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
629 
630     template<typename T>
631     static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode);
632 
633     static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM);
634 
635     // single char copy for loop
636     template<typename DstType, typename SrcType>
CopyChars(DstType * dst,SrcType * src,uint32_t count)637     static void CopyChars(DstType *dst, SrcType *src, uint32_t count)
638     {
639         Span<SrcType> srcSp(src, count);
640         Span<DstType> dstSp(dst, count);
641         for (uint32_t i = 0; i < count; i++) {
642             dstSp[i] = srcSp[i];
643         }
644     }
645 
646     // memory block copy
647     template<typename T>
648     static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count);
649 
650     // To change the hash algorithm of EcmaString, please modify EcmaString::CalculateConcatHashCode
651     // and EcmaStringHashHelper::ComputeHashForDataPlatform simultaneously!!
652     template <typename T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)653     static uint32_t ComputeHashForData(const T *data, size_t size,
654                                        uint32_t hashSeed)
655     {
656         if (size <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) {
657             uint32_t hash = hashSeed;
658             for (uint32_t i = 0; i < size ; i++) {
659                 hash = (hash << static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT)) - hash + data[i];
660             }
661             return hash;
662         }
663         return EcmaStringHashHelper::ComputeHashForDataPlatform(data, size, hashSeed);
664     }
665 
IsASCIICharacter(uint16_t data)666     static bool IsASCIICharacter(uint16_t data)
667     {
668         if (data == 0) {
669             return false;
670         }
671         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
672         return data <= base::utf_helper::UTF8_1B_MAX;
673     }
674 
675     template<typename T1, typename T2>
676     static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
677 
678     template<typename T1, typename T2>
679     static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos);
680 
681     bool IsFlat() const;
682 
IsLineString()683     bool IsLineString() const
684     {
685         return GetClass()->IsLineString();
686     }
IsConstantString()687     bool IsConstantString() const
688     {
689         return GetClass()->IsConstantString();
690     }
IsSlicedString()691     bool IsSlicedString() const
692     {
693         return GetClass()->IsSlicedString();
694     }
IsTreeString()695     bool IsTreeString() const
696     {
697         return GetClass()->IsTreeString();
698     }
NotTreeString()699     bool NotTreeString() const
700     {
701         return !IsTreeString();
702     }
IsLineOrConstantString()703     bool IsLineOrConstantString() const
704     {
705         auto hclass = GetClass();
706         return hclass->IsLineString() || hclass->IsConstantString();
707     }
708 
GetStringType()709     JSType GetStringType() const
710     {
711         JSType type = GetClass()->GetObjectType();
712         ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST);
713         return type;
714     }
715 
716     template <typename Char>
717     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength);
718 
719     template <typename Char>
720     static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos);
721 
722     static const uint8_t *PUBLIC_API GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf);
723 
724     static const uint8_t *PUBLIC_API GetNonTreeUtf8Data(const EcmaString *src);
725 
726     static const uint16_t *PUBLIC_API GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf);
727 
728     static const uint16_t *PUBLIC_API GetNonTreeUtf16Data(const EcmaString *src);
729 
730     // string must be not flat
731     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type);
732 
733     PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
734                                MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
735 
736     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
737                                             MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE);
738 
739     static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string);
740 
741     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
742 
743     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
744 
745     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
746 
747     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
748 
749     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
750 
751     static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
752 
753     static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
754                                                  bool toLower, uint32_t startIndex = 0);
755 };
756 
757 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data
758 class LineEcmaString : public EcmaString {
759 public:
760     static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16;
761     static constexpr uint32_t INIT_LENGTH_TIMES = 4;
762     // DATA_OFFSET: the string data stored after the string header.
763     // Data can be stored in utf8 or utf16 form according to compressed bit.
764     static constexpr size_t DATA_OFFSET = EcmaString::SIZE;  // DATA_OFFSET equal to Empty String size
765 
766     CAST_CHECK(LineEcmaString, IsLineString);
767 
768     DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength());
769 
Cast(EcmaString * str)770     static LineEcmaString *Cast(EcmaString *str)
771     {
772         return static_cast<LineEcmaString *>(str);
773     }
774 
Cast(const EcmaString * str)775     static LineEcmaString *Cast(const EcmaString *str)
776     {
777         return LineEcmaString::Cast(const_cast<EcmaString *>(str));
778     }
779 
ComputeSizeUtf8(uint32_t utf8Len)780     static size_t ComputeSizeUtf8(uint32_t utf8Len)
781     {
782         return DATA_OFFSET + utf8Len;
783     }
784 
ComputeSizeUtf16(uint32_t utf16Len)785     static size_t ComputeSizeUtf16(uint32_t utf16Len)
786     {
787         return DATA_OFFSET + utf16Len * sizeof(uint16_t);
788     }
789 
ObjectSize(EcmaString * str)790     static size_t ObjectSize(EcmaString *str)
791     {
792         uint32_t length = str->GetLength();
793         return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length);
794     }
795 
DataSize(EcmaString * str)796     static size_t DataSize(EcmaString *str)
797     {
798         uint32_t length = str->GetLength();
799         return str->IsUtf16() ? length * sizeof(uint16_t) : length;
800     }
801 
GetPointerLength()802     size_t GetPointerLength()
803     {
804         size_t byteSize = DataSize(this);
805         return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType);
806     }
807 
GetData()808     uint16_t *GetData() const
809     {
810         return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET);
811     }
812 
813     template<bool verify = true>
Get(int32_t index)814     uint16_t Get(int32_t index) const
815     {
816         int32_t length = static_cast<int32_t>(GetLength());
817         if (verify) {
818             if ((index < 0) || (index >= length)) {
819                 return 0;
820             }
821         }
822         if (!IsUtf16()) {
823             Span<const uint8_t> sp(GetDataUtf8(), length);
824             return sp[index];
825         }
826         Span<const uint16_t> sp(GetDataUtf16(), length);
827         return sp[index];
828     }
829 
Set(uint32_t index,uint16_t src)830     void Set(uint32_t index, uint16_t src)
831     {
832         ASSERT(index < GetLength());
833         if (IsUtf8()) {
834             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
835             *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src);
836         } else {
837             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
838             *(GetData() + index) = src;
839         }
840     }
841 };
842 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0);
843 
844 class ConstantString : public EcmaString {
845 public:
846     static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE;
847     // ConstantData is the pointer of const string in the pandafile.
848     // String in pandafile is encoded by the utf8 format.
849     // EntityId is normally the uint32_t index in the pandafile.
850     // When the pandafile is to be removed, EntityId will become -1.
851     // The real string data will be reloacted into bytearray and stored in RelocatedData.
852     // ConstantData will also point at data of bytearray data.
853     ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
854     ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET);
855     ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET);
856     DEFINE_ALIGN_SIZE(LAST_OFFSET);
857 
858     CAST_CHECK(ConstantString, IsConstantString);
859     DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
860 
Cast(EcmaString * str)861     static ConstantString *Cast(EcmaString *str)
862     {
863         return static_cast<ConstantString *>(str);
864     }
865 
Cast(const EcmaString * str)866     static ConstantString *Cast(const EcmaString *str)
867     {
868         return ConstantString::Cast(const_cast<EcmaString *>(str));
869     }
870 
ObjectSize()871     static size_t ObjectSize()
872     {
873         return ConstantString::SIZE;
874     }
875 
GetEntityIdU32()876     uint32_t GetEntityIdU32() const
877     {
878         ASSERT(GetEntityId() >= 0);
879         return static_cast<uint32_t>(GetEntityId());
880     }
881 
882     template<bool verify = true>
Get(int32_t index)883     uint16_t Get(int32_t index) const
884     {
885         int32_t length = static_cast<int32_t>(GetLength());
886         if (verify) {
887             if ((index < 0) || (index >= length)) {
888                 return 0;
889             }
890         }
891         ASSERT(IsUtf8());
892         Span<const uint8_t> sp(GetConstantData(), length);
893         return sp[index];
894     }
895 };
896 
897 // The substrings of another string use SlicedString to describe.
898 class SlicedString : public EcmaString {
899 public:
900     static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13;
901     static constexpr size_t PARENT_OFFSET = EcmaString::SIZE;
902     ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET);
903     ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG);
904     ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE);
905 
906     DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET);
907 
908     CAST_CHECK(SlicedString, IsSlicedString);
909 private:
910     friend class EcmaString;
Cast(EcmaString * str)911     static SlicedString *Cast(EcmaString *str)
912     {
913         return static_cast<SlicedString *>(str);
914     }
915 
Cast(const EcmaString * str)916     static SlicedString *Cast(const EcmaString *str)
917     {
918         return SlicedString::Cast(const_cast<EcmaString *>(str));
919     }
920 
ObjectSize()921     static size_t ObjectSize()
922     {
923         return SlicedString::SIZE;
924     }
925 
926     // Minimum length for a sliced string
927     template<bool verify = true>
Get(int32_t index)928     uint16_t Get(int32_t index) const
929     {
930         int32_t length = static_cast<int32_t>(GetLength());
931         if (verify) {
932             if ((index < 0) || (index >= length)) {
933                 return 0;
934             }
935         }
936         EcmaString *parent = EcmaString::Cast(GetParent());
937         if (parent->IsLineString()) {
938             if (parent->IsUtf8()) {
939                 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length);
940                 return sp[index];
941             }
942             Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length);
943             return sp[index];
944         }
945         Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length);
946         return sp[index];
947     }
948 };
949 
950 class TreeEcmaString : public EcmaString {
951 public:
952     // Minimum length for a tree string
953     static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13;
954 
955     static constexpr size_t FIRST_OFFSET = EcmaString::SIZE;
956     ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET);
957     ACCESSORS(Second, SECOND_OFFSET, SIZE);
958 
959     DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE);
960 
961     CAST_CHECK(TreeEcmaString, IsTreeString);
962 
Cast(EcmaString * str)963     static TreeEcmaString *Cast(EcmaString *str)
964     {
965         return static_cast<TreeEcmaString *>(str);
966     }
967 
Cast(const EcmaString * str)968     static TreeEcmaString *Cast(const EcmaString *str)
969     {
970         return TreeEcmaString::Cast(const_cast<EcmaString *>(str));
971     }
972 
IsFlat()973     bool IsFlat() const
974     {
975         auto strSecond = EcmaString::Cast(GetSecond());
976         return strSecond->GetLength() == 0;
977     }
978 
979     template<bool verify = true>
Get(int32_t index)980     uint16_t Get(int32_t index) const
981     {
982         int32_t length = static_cast<int32_t>(GetLength());
983         if (verify) {
984             if ((index < 0) || (index >= length)) {
985                 return 0;
986             }
987         }
988 
989         if (IsFlat()) {
990             EcmaString *first = EcmaString::Cast(GetFirst());
991             return first->At<verify>(index);
992         }
993         EcmaString *string = const_cast<TreeEcmaString *>(this);
994         while (true) {
995             if (string->IsTreeString()) {
996                 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
997                 if (static_cast<int32_t>(first->GetLength()) > index) {
998                     string = first;
999                 } else {
1000                     index -= static_cast<int32_t>(first->GetLength());
1001                     string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond());
1002                 }
1003             } else {
1004                 return string->At<verify>(index);
1005             }
1006         }
1007         UNREACHABLE();
1008     }
1009 };
1010 
1011 // FlatStringInfo holds an EcmaString* instead of a JSHandle. If a GC occurs during its usage period,
1012 // it may cause the pointer to become invalid, necessitating the pointer to be reset.
1013 class FlatStringInfo {
1014 public:
FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)1015     FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string),
1016                                                                                startIndex_(startIndex),
1017                                                                                length_(length) {}
IsUtf8()1018     bool IsUtf8() const
1019     {
1020         return string_->IsUtf8();
1021     }
1022 
IsUtf16()1023     bool IsUtf16() const
1024     {
1025         return string_->IsUtf16();
1026     }
1027 
GetString()1028     EcmaString *GetString() const
1029     {
1030         return string_;
1031     }
1032 
SetString(EcmaString * string)1033     void SetString(EcmaString *string)
1034     {
1035         string_ = string;
1036     }
1037 
GetStartIndex()1038     uint32_t GetStartIndex() const
1039     {
1040         return startIndex_;
1041     }
1042 
SetStartIndex(uint32_t index)1043     void SetStartIndex(uint32_t index)
1044     {
1045         startIndex_ = index;
1046     }
1047 
GetLength()1048     uint32_t GetLength() const
1049     {
1050         return length_;
1051     }
1052 
1053     const uint8_t *GetDataUtf8() const;
1054     const uint16_t *GetDataUtf16() const;
1055     uint8_t *GetDataUtf8Writable() const;
1056     uint16_t *GetDataUtf16Writable() const;
1057     std::u16string ToU16String(uint32_t len = 0);
1058 private:
1059     EcmaString *string_ {nullptr};
1060     uint32_t startIndex_ {0};
1061     uint32_t length_ {0};
1062 };
1063 
1064 // if you want to use functions of EcmaString, please not use directly,
1065 // and use functions of EcmaStringAccessor alternatively.
1066 // eg: EcmaString *str = ***; str->GetLength() ----->  EcmaStringAccessor(str).GetLength()
1067 class PUBLIC_API EcmaStringAccessor {
1068 public:
EcmaStringAccessor(EcmaString * string)1069     explicit inline EcmaStringAccessor(EcmaString *string)
1070     {
1071         ASSERT(string != nullptr);
1072         string_ = string;
1073     }
1074 
1075     explicit EcmaStringAccessor(TaggedObject *obj);
1076 
1077     explicit EcmaStringAccessor(JSTaggedValue value);
1078 
1079     explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle);
1080 
CalculateAllConcatHashCode(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)1081     static uint32_t CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
1082                                                const JSHandle<EcmaString> &secondString)
1083     {
1084         return EcmaString::CalculateAllConcatHashCode(firstString, secondString);
1085     }
1086 
1087     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
1088 
CreateEmptyString(const EcmaVM * vm)1089     static EcmaString *CreateEmptyString(const EcmaVM *vm)
1090     {
1091         return EcmaString::CreateEmptyString(vm);
1092     }
1093 
1094     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress,
1095                                       MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, bool isConstantString = false,
1096                                       uint32_t idOffset = 0)
1097     {
1098         return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset);
1099     }
1100 
1101     static EcmaString *CreateFromUtf8CompressedSubString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1102                                                          uint32_t offset, uint32_t utf8Len,
1103                                                          MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1104     {
1105         return EcmaString::CreateFromUtf8CompressedSubString(vm, string, offset, utf8Len, type);
1106     }
1107 
1108     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length,
1109         bool compressed, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE, uint32_t idOffset = 0)
1110     {
1111         return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset);
1112     }
1113 
1114     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
1115         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1116     {
1117         return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type);
1118     }
1119 
1120     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
1121                                        bool canBeCompress, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1122     {
1123         return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type);
1124     }
1125 
1126     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle,
1127         const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1128     {
1129         return EcmaString::Concat(vm, str1Handle, str2Handle, type);
1130     }
1131 
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1132     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
1133         uint32_t length, bool compressed)
1134     {
1135         return EcmaString::CopyStringToOldSpace(vm, original, length, compressed);
1136     }
1137 
1138     // can change src data structure
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1139     static EcmaString *FastSubString(const EcmaVM *vm,
1140         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1141     {
1142         return EcmaString::FastSubString(vm, src, start, length);
1143     }
SubStringIsUtf8(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1144     static bool SubStringIsUtf8(const EcmaVM *vm,
1145         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1146     {
1147         return EcmaString::SubStringIsUtf8(vm, src, start, length);
1148     }
1149     // get
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1150     static EcmaString *GetSubString(const EcmaVM *vm,
1151         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1152     {
1153         return EcmaString::GetSubString(vm, src, start, length);
1154     }
1155 
IsUtf8()1156     bool IsUtf8() const
1157     {
1158         return string_->IsUtf8();
1159     }
1160 
IsUtf16()1161     bool IsUtf16() const
1162     {
1163         return string_->IsUtf16();
1164     }
1165 
GetLength()1166     uint32_t GetLength() const
1167     {
1168         return string_->GetLength();
1169     }
1170 
1171     // require is LineString
1172     inline size_t GetUtf8Length(bool isGetBufferSize = false) const;
1173 
ObjectSize()1174     size_t ObjectSize() const
1175     {
1176         if (string_->IsLineString()) {
1177             return LineEcmaString::ObjectSize(string_);
1178         } if (string_->IsConstantString()) {
1179             return ConstantString::ObjectSize();
1180         } else {
1181             return TreeEcmaString::SIZE;
1182         }
1183     }
1184 
1185     // For TreeString, the calculation result is size of LineString correspondingly.
GetFlatStringSize()1186     size_t GetFlatStringSize() const
1187     {
1188         if (string_->IsConstantString()) {
1189             return ConstantString::ObjectSize();
1190         }
1191         return LineEcmaString::ObjectSize(string_);
1192     }
1193 
IsInternString()1194     bool IsInternString() const
1195     {
1196         return string_->IsInternString();
1197     }
1198 
SetInternString()1199     void SetInternString()
1200     {
1201         string_->SetIsInternString();
1202     }
1203 
ClearInternString()1204     void ClearInternString()
1205     {
1206         string_->ClearInternStringFlag();
1207     }
1208 
1209     // require is LineString
1210     // It's Utf8 format, but without 0 in the end.
1211     inline const uint8_t *GetDataUtf8();
1212 
1213     // require is LineString
1214     inline const uint16_t *GetDataUtf16();
1215 
1216     // not change string data structure.
1217     // if string is not flat, this func has low efficiency.
1218     std::u16string ToU16String(uint32_t len = 0)
1219     {
1220         return string_->ToU16String(len);
1221     }
1222 
1223     // not change string data structure.
1224     // if string is not flat, this func has low efficiency.
ToOneByteDataForced()1225     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
1226     {
1227         return string_->ToOneByteDataForced();
1228     }
1229 
1230     // not change string data structure.
1231     // if string is not flat, this func has low efficiency.
ToUtf8Span(CVector<uint8_t> & buf)1232     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf)
1233     {
1234         return string_->ToUtf8Span(buf);
1235     }
1236 
1237     // only for string is flat and using UTF8 encoding
1238     inline Span<const uint8_t> FastToUtf8Span();
1239 
1240     // Using string's hash to figure out whether the string can be converted to integer
TryToGetInteger(uint32_t * result)1241     inline bool TryToGetInteger(uint32_t *result)
1242     {
1243         return string_->TryToGetInteger(result);
1244     }
1245 
TryToSetIntegerHash(int32_t num)1246     inline bool TryToSetIntegerHash(int32_t num)
1247     {
1248         return string_->TryToSetIntegerHash(num);
1249     }
1250 
1251     // not change string data structure.
1252     // if string is not flat, this func has low efficiency.
1253     std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1254 
1255     // this function convert for Utf8
1256     CString Utf8ConvertToString();
1257 
1258     std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1259     // not change string data structure.
1260     // if string is not flat, this func has low efficiency.
1261     CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION, bool cesu8 = false);
1262 
1263     void AppendToCString(CString &str, StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION,
1264                          bool cesu8 = false);
1265 
1266     void AppendQuotedStringToCString(CString &str, StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION,
1267                                      bool cesu8 = false);
1268 
1269     // not change string data structure.
1270     // if string is not flat, this func has low efficiency.
1271     uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false)
1272     {
1273         return string_->WriteUtf8(buf, maxLength, isWriteBuffer);
1274     }
1275 
WriteToUtf16(uint16_t * buf,uint32_t bufLength)1276     uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength)
1277     {
1278         return string_->WriteUtf16(buf, GetLength(), bufLength);
1279     }
1280 
WriteToOneByte(uint8_t * buf,uint32_t maxLength)1281     uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength)
1282     {
1283         return string_->WriteOneByte(buf, maxLength);
1284     }
1285 
1286     // not change string data structure.
1287     // if string is not flat, this func has low efficiency.
WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1288     uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const
1289     {
1290         return string_->CopyDataUtf16(buf, maxLength);
1291     }
1292 
1293     template <typename Char>
WriteToFlatWithPos(EcmaString * src,Char * buf,uint32_t length,uint32_t pos)1294     static void WriteToFlatWithPos(EcmaString *src, Char *buf, uint32_t length, uint32_t pos)
1295     {
1296         src->WriteToFlatWithPos(src, buf, length, pos);
1297     }
1298 
1299     template <typename Char>
WriteToFlat(EcmaString * src,Char * buf,uint32_t maxLength)1300     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength)
1301     {
1302         src->WriteToFlat(src, buf, maxLength);
1303     }
1304 
1305     // require dst is LineString
1306     // not change src data structure.
1307     // if src is not flat, this func has low efficiency.
1308     inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
1309 
1310     // not change src data structure.
1311     // if src is not flat, this func has low efficiency.
1312     template<bool verify = true>
Get(uint32_t index)1313     uint16_t Get(uint32_t index) const
1314     {
1315         return string_->At<verify>(index);
1316     }
1317 
1318     // require string is LineString.
Set(uint32_t index,uint16_t src)1319     void Set(uint32_t index, uint16_t src)
1320     {
1321         return string_->WriteData(index, src);
1322     }
1323 
1324     // not change src data structure.
1325     // if src is not flat, this func has low efficiency.
GetHashcode()1326     uint32_t GetHashcode()
1327     {
1328         return string_->GetHashcode();
1329     }
1330 
GetRawHashcode()1331     uint32_t GetRawHashcode()
1332     {
1333         return string_->GetRawHashcode();
1334     }
1335 
1336     // not change src data structure.
1337     // if src is not flat, this func has low efficiency.
ComputeRawHashcode()1338     std::pair<uint32_t, bool> ComputeRawHashcode()
1339     {
1340         return string_->ComputeRawHashcode();
1341     }
1342 
ComputeHashcode()1343     uint32_t ComputeHashcode()
1344     {
1345         return string_->ComputeHashcode();
1346     }
1347 
ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1348     uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger)
1349     {
1350         return string_->ComputeHashcode(rawHashSeed, isInteger);
1351     }
1352 
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1353     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
1354     {
1355         return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
1356     }
1357 
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1358     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
1359     {
1360         return EcmaString::ComputeHashcodeUtf16(utf16Data, length);
1361     }
1362 
1363     // can change receiver and search data structure
1364     static int32_t IndexOf(const EcmaVM *vm,
1365         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1366     {
1367         return EcmaString::IndexOf(vm, receiver, search, pos);
1368     }
1369 
1370     // can change receiver and search data structure
1371     static int32_t LastIndexOf(const EcmaVM *vm,
1372         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1373     {
1374         return EcmaString::LastIndexOf(vm, receiver, search, pos);
1375     }
1376 
1377     // can change receiver and search data structure
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1378     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right)
1379     {
1380         return EcmaString::Compare(vm, left, right);
1381     }
1382 
1383 
1384     // can change receiver and search data structure
1385     static bool IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
1386         const JSHandle<EcmaString>& right, uint32_t offset = 0)
1387     {
1388         return EcmaString::IsSubStringAt(vm, left, right, offset);
1389     }
1390 
1391     // can change str1 and str2 data structure
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1392     static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
1393     {
1394         return EcmaString::StringsAreEqual(vm, str1, str2);
1395     }
1396 
1397     // not change str1 and str2 data structure.
1398     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqual(EcmaString * str1,EcmaString * str2)1399     static bool StringsAreEqual(EcmaString *str1, EcmaString *str2)
1400     {
1401         return EcmaString::StringsAreEqual(str1, str2);
1402     }
1403 
1404     // not change str1 and str2 data structure.
1405     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1406     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2)
1407     {
1408         return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2);
1409     }
1410 
1411     // not change str1 data structure.
1412     // if str1 is not flat, this func has low efficiency.
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1413     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
1414                                        bool canBeCompress)
1415     {
1416         return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress);
1417     }
1418 
1419     // not change str1 data structure.
1420     // if str1 is not flat, this func has low efficiency.
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1421     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
1422     {
1423         return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len);
1424     }
1425 
1426     // require str1 and str2 are LineString.
1427     // not change string data structure.
1428     // if string is not flat, this func has low efficiency.
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1429     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
1430     {
1431         return string_->EqualToSplicedString(str1, str2);
1432     }
1433 
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1434     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
1435     {
1436         return EcmaString::CanBeCompressed(utf8Data, utf8Len);
1437     }
1438 
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1439     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
1440     {
1441         return EcmaString::CanBeCompressed(utf16Data, utf16Len);
1442     }
1443 
1444     // require string is LineString
CanBeCompressed(const EcmaString * string)1445     static bool CanBeCompressed(const EcmaString *string)
1446     {
1447         return EcmaString::CanBeCompressed(string);
1448     }
1449 
1450     // not change string data structure.
1451     // if string is not flat, this func has low efficiency.
ToElementIndex(uint32_t * index)1452     bool ToElementIndex(uint32_t *index)
1453     {
1454         return string_->ToElementIndex(index);
1455     }
1456 
1457     // not change string data structure.
1458     // if string is not flat, this func has low efficiency.
ToInt(int32_t * index,bool * negative)1459     bool ToInt(int32_t *index, bool *negative)
1460     {
1461         return string_->ToInt(index, negative);
1462     }
1463 
1464     // not change string data structure.
1465     // if string is not flat, this func has low efficiency.
ToTypedArrayIndex(uint32_t * index)1466     bool PUBLIC_API ToTypedArrayIndex(uint32_t *index)
1467     {
1468         return string_->ToTypedArrayIndex(index);
1469     }
1470 
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1471     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1472     {
1473         return EcmaString::ToLower(vm, src);
1474     }
1475 
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1476     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1477     {
1478         return EcmaString::TryToLower(vm, src);
1479     }
1480 
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1481     static EcmaString *TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1482     {
1483         return EcmaString::TryToUpper(vm, src);
1484     }
1485 
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1486     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1487     {
1488         return EcmaString::ToUpper(vm, src);
1489     }
1490 
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1491     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1492     {
1493         return EcmaString::ToLocaleLower(vm, src, locale);
1494     }
1495 
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1496     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1497     {
1498         return EcmaString::ToLocaleUpper(vm, src, locale);
1499     }
1500 
1501     static EcmaString *Trim(const JSThread *thread,
1502         const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM)
1503     {
1504         return EcmaString::Trim(thread, src, mode);
1505     }
1506 
IsASCIICharacter(uint16_t data)1507     static bool IsASCIICharacter(uint16_t data)
1508     {
1509         if (data == 0) {
1510             return false;
1511         }
1512         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
1513         return data <= base::utf_helper::UTF8_1B_MAX;
1514     }
1515 
IsFlat()1516     bool IsFlat() const
1517     {
1518         return string_->IsFlat();
1519     }
1520 
IsLineString()1521     bool IsLineString() const
1522     {
1523         return string_->IsLineString();
1524     }
1525 
IsConstantString()1526     bool IsConstantString() const
1527     {
1528         return string_->IsConstantString();
1529     }
1530 
IsSlicedString()1531     bool IsSlicedString() const
1532     {
1533         return string_->IsSlicedString();
1534     }
1535 
IsLineOrConstantString()1536     bool IsLineOrConstantString() const
1537     {
1538         return string_->IsLineOrConstantString();
1539     }
1540 
IsInteger()1541     bool IsInteger() const
1542     {
1543         return string_->IsInteger();
1544     }
1545 
GetIntegerCode()1546     uint32_t GetIntegerCode() const
1547     {
1548         return string_->GetIntegerCode();
1549     }
1550 
GetStringType()1551     JSType GetStringType() const
1552     {
1553         return string_->GetStringType();
1554     }
1555 
IsTreeString()1556     bool IsTreeString() const
1557     {
1558         return string_->IsTreeString();
1559     }
1560 
NotTreeString()1561     bool NotTreeString() const
1562     {
1563         return string_->NotTreeString();
1564     }
1565 
1566     // the returned string may be a linestring, constantstring, or slicestring!!
1567     PUBLIC_API static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1568         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1569     {
1570         return EcmaString::Flatten(vm, string, type);
1571     }
1572 
1573     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1574         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1575     {
1576         return EcmaString::FlattenAllString(vm, string, type);
1577     }
1578 
1579     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1580         MemSpaceType type = MemSpaceType::SHARED_OLD_SPACE)
1581     {
1582         return EcmaString::SlowFlatten(vm, string, type);
1583     }
1584 
FlattenNoGCForSnapshot(const EcmaVM * vm,EcmaString * string)1585     static EcmaString *FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
1586     {
1587         return EcmaString::FlattenNoGCForSnapshot(vm, string);
1588     }
1589 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1590     static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1591     {
1592         return EcmaString::GetUtf8DataFlat(src, buf);
1593     }
1594 
GetNonTreeUtf8Data(const EcmaString * src)1595     static const uint8_t *GetNonTreeUtf8Data(const EcmaString *src)
1596     {
1597         return EcmaString::GetNonTreeUtf8Data(src);
1598     }
1599 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1600     static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1601     {
1602         return EcmaString::GetUtf16DataFlat(src, buf);
1603     }
1604 
GetNonTreeUtf16Data(const EcmaString * src)1605     static const uint16_t *GetNonTreeUtf16Data(const EcmaString *src)
1606     {
1607         return EcmaString::GetNonTreeUtf16Data(src);
1608     }
1609 
1610     static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str);
1611 
1612 private:
1613     EcmaString *string_ {nullptr};
1614 };
1615 }  // namespace ecmascript
1616 }  // namespace panda
1617 #endif  // ECMASCRIPT_STRING_H
1618