• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ECMASCRIPT_STRING_H
17 #define ECMASCRIPT_STRING_H
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 
23 #include "ecmascript/base/utf_helper.h"
24 #include "ecmascript/common.h"
25 #include "ecmascript/ecma_macros.h"
26 #include "ecmascript/js_hclass.h"
27 #include "ecmascript/js_tagged_value.h"
28 #include "ecmascript/mem/barriers.h"
29 #include "ecmascript/mem/space.h"
30 #include "ecmascript/mem/tagged_object.h"
31 
32 #include "libpandabase/macros.h"
33 #include "securec.h"
34 #include "unicode/locid.h"
35 
36 namespace panda {
37 namespace ecmascript {
38 template<typename T>
39 class JSHandle;
40 class JSPandaFile;
41 class EcmaVM;
42 class LineEcmaString;
43 class ConstantString;
44 class TreeEcmaString;
45 class SlicedString;
46 class FlatStringInfo;
47 
48 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
49 #define ECMA_STRING_CHECK_LENGTH_AND_TRHOW(vm, length)                                        \
50     if ((length) >= MAX_STRING_LENGTH) {                                                      \
51         THROW_RANGE_ERROR_AND_RETURN((vm)->GetJSThread(), "Invalid string length", nullptr);  \
52     }
53 
54 class EcmaString : public TaggedObject {
55     /* Mix Hash Code: --   { 0 | [31 bits raw hash code] }     computed through string
56                       \    { 1 | [31 bits integer numbers] }   fastpath for string to number
57     */
58 public:
59     CAST_CHECK(EcmaString, IsString);
60 
61     static constexpr uint32_t IS_INTEGER_MASK = 1U << 31;
62     static constexpr uint32_t STRING_COMPRESSED_BIT = 0x1;
63     static constexpr uint32_t STRING_INTERN_BIT = 0x2;
64     static constexpr size_t MAX_STRING_LENGTH = 0x40000000U; // 30 bits for string length, 2 bits for special meaning
65     static constexpr uint32_t STRING_LENGTH_SHIFT_COUNT = 2U;
66     static constexpr uint32_t MAX_INTEGER_HASH_NUMBER = 0x3B9AC9FF;
67     static constexpr uint32_t MAX_CACHED_INTEGER_SIZE = 9;
68 
69     static constexpr size_t MIX_LENGTH_OFFSET = TaggedObjectSize();
70     // In last bit of mix_length we store if this string is compressed or not.
71     ACCESSORS_PRIMITIVE_FIELD(MixLength, uint32_t, MIX_LENGTH_OFFSET, MIX_HASHCODE_OFFSET)
72     // In last bit of mix_hash we store if this string is small-integer number or not.
73     ACCESSORS_PRIMITIVE_FIELD(MixHashcode, uint32_t, MIX_HASHCODE_OFFSET, SIZE)
74 
75     enum CompressedStatus {
76         STRING_COMPRESSED,
77         STRING_UNCOMPRESSED,
78     };
79 
80     enum IsIntegerStatus {
81         NOT_INTEGER = 0,
82         IS_INTEGER,
83     };
84 
85     enum TrimMode : uint8_t {
86         TRIM,
87         TRIM_START,
88         TRIM_END,
89     };
90 
91     enum ConcatOptStatus {
92         BEGIN_STRING_ADD = 1,
93         IN_STRING_ADD,
94         CONFIRMED_IN_STRING_ADD,
95         END_STRING_ADD,
96         INVALID_STRING_ADD,
97         HAS_BACKING_STORE,
98     };
99 
100 private:
101     friend class EcmaStringAccessor;
102     friend class LineEcmaString;
103     friend class ConstantString;
104     friend class TreeEcmaString;
105     friend class SlicedString;
106     friend class FlatStringInfo;
107     friend class NameDictionary;
108 
109     static constexpr int SMALL_STRING_SIZE = 128;
110 
111     static EcmaString *CreateEmptyString(const EcmaVM *vm);
112     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
113         bool canBeCompress, MemSpaceType type = MemSpaceType::SEMI_SPACE, bool isConstantString = false,
114         uint32_t idOffset = 0);
115     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
116         MemSpaceType type = MemSpaceType::SEMI_SPACE);
117     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
118         bool canBeCompress, MemSpaceType type = MemSpaceType::SEMI_SPACE);
119     static SlicedString *CreateSlicedString(const EcmaVM *vm, MemSpaceType type = MemSpaceType::SEMI_SPACE);
120     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed);
121     static EcmaString *CreateLineStringNoGC(const EcmaVM *vm, size_t length, bool compressed);
122     static EcmaString *CreateLineStringWithSpaceType(const EcmaVM *vm,
123         size_t length, bool compressed, MemSpaceType type);
124     static EcmaString *CreateTreeString(const EcmaVM *vm,
125         const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, uint32_t length, bool compressed);
126     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data,
127         size_t length, bool compressed, MemSpaceType type = MemSpaceType::SEMI_SPACE, uint32_t idOffset = 0);
128     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &left,
129         const JSHandle<EcmaString> &right, MemSpaceType type = MemSpaceType::SEMI_SPACE);
130     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
131         uint32_t length, bool compressed);
132     static EcmaString *FastSubString(const EcmaVM *vm,
133         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
134     static EcmaString *GetSlicedString(const EcmaVM *vm,
135         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
136     static EcmaString *GetSubString(const EcmaVM *vm,
137         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
138     // require src is LineString
139     // not change src data structure
140     static inline EcmaString *FastSubUtf8String(const EcmaVM *vm,
141         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
142     // require src is LineString
143     // not change src data structure
144     static inline EcmaString *FastSubUtf16String(const EcmaVM *vm,
145         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length);
146     inline void TrimLineString(const JSThread *thread, uint32_t newLength);
IsUtf8()147     inline bool IsUtf8() const
148     {
149         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_COMPRESSED;
150     }
151 
IsUtf16()152     inline bool IsUtf16() const
153     {
154         return (GetMixLength() & STRING_COMPRESSED_BIT) == STRING_UNCOMPRESSED;
155     }
156 
IsInteger()157     inline bool IsInteger()
158     {
159         return (GetHashcode() & IS_INTEGER_MASK) == IS_INTEGER_MASK;
160     }
161 
162     // require is LineString
163     inline uint16_t *GetData() const;
164     inline const uint8_t *GetDataUtf8() const;
165     inline const uint16_t *GetDataUtf16() const;
166 
167     // require is LineString
168     inline uint8_t *GetDataUtf8Writable();
169     inline uint16_t *GetDataUtf16Writable();
170 
GetLength()171     inline uint32_t GetLength() const
172     {
173         return GetMixLength() >> STRING_LENGTH_SHIFT_COUNT;
174     }
175 
176     inline void SetLength(uint32_t length, bool compressed = false)
177     {
178         ASSERT(length < MAX_STRING_LENGTH);
179         // Use 0u for compressed/utf8 expression
180         SetMixLength((length << STRING_LENGTH_SHIFT_COUNT) | (compressed ? STRING_COMPRESSED : STRING_UNCOMPRESSED));
181     }
182 
GetRawHashcode()183     inline uint32_t GetRawHashcode() const
184     {
185         return GetMixHashcode() & (~IS_INTEGER_MASK);
186     }
187 
MixHashcode(uint32_t hashcode,bool isInteger)188     static inline uint32_t MixHashcode(uint32_t hashcode, bool isInteger)
189     {
190         return isInteger ? (hashcode | IS_INTEGER_MASK) : (hashcode & (~IS_INTEGER_MASK));
191     }
192 
193     inline void SetRawHashcode(uint32_t hashcode, bool isInteger = false)
194     {
195         // Use 0u for not integer string's expression
196         SetMixHashcode(MixHashcode(hashcode, isInteger));
197     }
198 
199     inline size_t GetUtf8Length(bool modify = true) const;
200 
SetIsInternString()201     inline void SetIsInternString()
202     {
203         SetMixLength(GetMixLength() | STRING_INTERN_BIT);
204     }
205 
IsInternString()206     inline bool IsInternString() const
207     {
208         return (GetMixLength() & STRING_INTERN_BIT) != 0;
209     }
210 
ClearInternStringFlag()211     inline void ClearInternStringFlag()
212     {
213         SetMixLength(GetMixLength() & ~STRING_INTERN_BIT);
214     }
215 
TryGetHashCode(uint32_t * hash)216     inline bool TryGetHashCode(uint32_t *hash)
217     {
218         uint32_t hashcode = GetMixHashcode();
219         if (hashcode == 0 && GetLength() != 0) {
220             return false;
221         }
222         *hash = hashcode;
223         return true;
224     }
225 
GetIntegerCode()226     inline uint32_t GetIntegerCode()
227     {
228         ASSERT(GetMixHashcode() & IS_INTEGER_MASK);
229         return GetRawHashcode();
230     }
231 
232     // not change this data structure.
233     // if string is not flat, this func has low efficiency.
GetHashcode()234     uint32_t PUBLIC_API GetHashcode()
235     {
236         uint32_t hashcode = GetMixHashcode();
237         // GetLength() == 0 means it's an empty array.No need to computeHashCode again when hashseed is 0.
238         if (hashcode == 0 && GetLength() != 0) {
239             hashcode = ComputeHashcode();
240             SetMixHashcode(hashcode);
241         }
242         return hashcode;
243     }
244 
245     template<typename T>
IsDecimalDigitChar(const T c)246     inline static bool IsDecimalDigitChar(const T c)
247     {
248         return (c >= '0' && c <= '9');
249     }
250 
ComputeIntegerHash(uint32_t * num,uint8_t c)251     static uint32_t ComputeIntegerHash(uint32_t *num, uint8_t c)
252     {
253         if (!IsDecimalDigitChar(c)) {
254             return false;
255         }
256         int charDate = c - '0';
257         *num = (*num) * 10 + charDate; // 10: decimal factor
258         return true;
259     }
260 
261     bool HashIntegerString(uint32_t length, uint32_t *hash, uint32_t hashSeed) const;
262 
263     template<typename T>
HashIntegerString(const T * data,size_t size,uint32_t * hash,uint32_t hashSeed)264     static bool HashIntegerString(const T *data, size_t size, uint32_t *hash, uint32_t hashSeed)
265     {
266         ASSERT(size >= 0);
267         if (hashSeed == 0) {
268             if (IsDecimalDigitChar(data[0]) && data[0] != '0') {
269                 uint32_t num = data[0] - '0';
270                 uint32_t i = 1;
271                 do {
272                     if (i == size) {
273                         // compute mix hash
274                         if (num <= MAX_INTEGER_HASH_NUMBER) {
275                             *hash = MixHashcode(num, IS_INTEGER);
276                             return true;
277                         }
278                         return false;
279                     }
280                 } while (ComputeIntegerHash(&num, data[i++]));
281             }
282             if (size == 1 && (data[0] == '0')) {
283                 *hash = MixHashcode(0, IS_INTEGER);
284                 return true;
285             }
286         } else {
287             if (IsDecimalDigitChar(data[0])) {
288                 uint32_t num = hashSeed * 10 + (data[0] - '0'); // 10: decimal factor
289                 uint32_t i = 1;
290                 do {
291                     if (i == size) {
292                         // compute mix hash
293                         if (num <= MAX_INTEGER_HASH_NUMBER) {
294                             *hash = MixHashcode(num, IS_INTEGER);
295                             return true;
296                         }
297                         return false;
298                     }
299                 } while (ComputeIntegerHash(&num, data[i++]));
300             }
301         }
302         return false;
303     }
304 
305     // not change this data structure.
306     // if string is not flat, this func has low efficiency.
307     uint32_t PUBLIC_API ComputeHashcode() const;
308     std::pair<uint32_t, bool> PUBLIC_API ComputeRawHashcode() const;
309     uint32_t PUBLIC_API ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const;
310 
311     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress);
312     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length);
313 
314     template<bool verify = true>
315     uint16_t At(int32_t index) const;
316 
317     // require is LineString
318     void WriteData(uint32_t index, uint16_t src);
319 
320     // can change left and right data structure
321     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right);
322 
323     // Check that two spans are equal. Should have the same length.
324     /* static */
325     template<typename T, typename T1>
StringsAreEquals(Span<const T> & str1,Span<const T1> & str2)326     static bool StringsAreEquals(Span<const T> &str1, Span<const T1> &str2)
327     {
328         ASSERT(str1.Size() <= str2.Size());
329         size_t size = str1.Size();
330         if (!std::is_same_v<T, T1>) {
331             for (size_t i = 0; i < size; i++) {
332                 auto left = static_cast<uint16_t>(str1[i]);
333                 auto right = static_cast<uint16_t>(str2[i]);
334                 if (left != right) {
335                     return false;
336                 }
337             }
338             return true;
339         }
340         if (size < SMALL_STRING_SIZE) {
341             for (size_t i = 0; i < size; i++) {
342                 if (str1[i] != str2[i]) {
343                     return false;
344                 }
345             }
346             return true;
347         }
348         return memcmp(str1.data(), str2.data(), size * sizeof(T)) == 0;
349     }
350 
351     // Converts utf8Data to utf16 and compare it with given utf16_data.
352     static bool IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
353                                   uint32_t utf16Len);
354     // Compares string1 + string2 by bytes, It doesn't check canonical unicode equivalence.
355     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2);
356     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
357     static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2);
358     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
359     static bool StringsAreEqual(EcmaString *str1, EcmaString *str2);
360     // Two strings have the same type of utf encoding format.
361     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2);
362     static bool StringsAreEqualDiffUtfEncoding(const FlatStringInfo &str1, const FlatStringInfo &str2);
363     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
364     // not change str1 data structure.
365     // if str1 is not flat, this func has low efficiency.
366     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
367                                        bool canBeCompress);
368     // Compares strings by bytes, It doesn't check canonical unicode equivalence.
369     // not change str1 data structure.
370     // if str1 is not flat, this func has low efficiency.
371     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len);
372 
373     // can change receiver and search data structure
374     static int32_t IndexOf(const EcmaVM *vm,
375         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
376 
377     // can change receiver and search data structure
378     static int32_t LastIndexOf(const EcmaVM *vm,
379         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0);
380 
381     inline size_t CopyDataUtf8(uint8_t *buf, size_t maxLength, bool modify = true) const
382     {
383         if (maxLength == 0) {
384             return 1; // maxLength was -1 at napi
385         }
386         size_t length = GetLength();
387         if (length > maxLength) {
388             return 0;
389         }
390         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
391         buf[maxLength - 1] = '\0';
392         // Put comparison here so that internal usage and napi can use the same CopyDataRegionUtf8
393         return CopyDataRegionUtf8(buf, 0, length, maxLength, modify) + 1;  // add place for zero in the end
394     }
395 
396     // It allows user to copy into buffer even if maxLength < length
397     inline size_t WriteUtf8(uint8_t *buf, size_t maxLength, bool isWriteBuffer = false) const
398     {
399         if (maxLength == 0) {
400             return 1; // maxLength was -1 at napi
401         }
402         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
403         buf[maxLength - 1] = '\0';
404         return CopyDataRegionUtf8(buf, 0, GetLength(), maxLength, true, isWriteBuffer) + 1;
405     }
406 
CopyDataToUtf16(uint16_t * buf,uint32_t length,uint32_t bufLength)407     size_t CopyDataToUtf16(uint16_t *buf, uint32_t length, uint32_t bufLength) const
408     {
409         if (IsUtf16()) {
410             CVector<uint16_t> tmpBuf;
411             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
412             if (length > bufLength) {
413                 if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, bufLength * sizeof(uint16_t)) != EOK) {
414                     LOG_FULL(FATAL) << "memcpy_s failed when length > bufLength";
415                     UNREACHABLE();
416                 }
417                 return bufLength;
418             }
419             if (memcpy_s(buf, bufLength * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
420                 LOG_FULL(FATAL) << "memcpy_s failed";
421                 UNREACHABLE();
422             }
423             return length;
424         }
425         CVector<uint8_t> tmpBuf;
426         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
427         if (length > bufLength) {
428             return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, bufLength, bufLength, 0);
429         }
430         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, length, bufLength, 0);
431     }
432 
433     // It allows user to copy into buffer even if maxLength < length
WriteUtf16(uint16_t * buf,uint32_t targetLength,uint32_t bufLength)434     inline size_t WriteUtf16(uint16_t *buf, uint32_t targetLength, uint32_t bufLength) const
435     {
436         if (bufLength == 0) {
437             return 0;
438         }
439         // Returns a number representing a valid backrest length.
440         return CopyDataToUtf16(buf, targetLength, bufLength);
441     }
442 
WriteOneByte(uint8_t * buf,size_t maxLength)443     size_t WriteOneByte(uint8_t *buf, size_t maxLength) const
444     {
445         if (maxLength == 0) {
446             return 0;
447         }
448         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
449         buf[maxLength - 1] = '\0';
450         uint32_t length = GetLength();
451         if (!IsUtf16()) {
452             CVector<uint8_t> tmpBuf;
453             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
454             if (length > maxLength) {
455                 length = maxLength;
456             }
457             if (memcpy_s(buf, maxLength, data, length) != EOK) {
458                 LOG_FULL(FATAL) << "memcpy_s failed when write one byte";
459                 UNREACHABLE();
460             }
461             return length;
462         }
463 
464         CVector<uint16_t> tmpBuf;
465         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
466         if (length > maxLength) {
467             return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, maxLength, maxLength);
468         }
469         return base::utf_helper::ConvertRegionUtf16ToLatin1(data, buf, length, maxLength);
470     }
471 
472     size_t CopyDataRegionUtf8(uint8_t *buf, size_t start, size_t length, size_t maxLength,
473                               bool modify = true, bool isWriteBuffer = false) const
474     {
475         uint32_t len = GetLength();
476         if (start + length > len) {
477             return 0;
478         }
479         if (!IsUtf16()) {
480             if (length > std::numeric_limits<size_t>::max() / 2 - 1) {  // 2: half
481                 LOG_FULL(FATAL) << " length is higher than half of size_t::max";
482                 UNREACHABLE();
483             }
484             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
485             // Only memcpy_s maxLength number of chars into buffer if length > maxLength
486             CVector<uint8_t> tmpBuf;
487             const uint8_t *data = GetUtf8DataFlat(this, tmpBuf);
488             if (length > maxLength) {
489                 if (memcpy_s(buf, maxLength, data + start, maxLength) != EOK) {
490                     LOG_FULL(FATAL) << "memcpy_s failed when length > maxlength";
491                     UNREACHABLE();
492                 }
493                 return maxLength;
494             }
495             if (memcpy_s(buf, maxLength, data + start, length) != EOK) {
496                 LOG_FULL(FATAL) << "memcpy_s failed when length <= maxlength";
497                 UNREACHABLE();
498             }
499             return length;
500         }
501         CVector<uint16_t> tmpBuf;
502         const uint16_t *data = GetUtf16DataFlat(this, tmpBuf);
503         if (length > maxLength) {
504             return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, maxLength, maxLength, start,
505                                                               modify, isWriteBuffer);
506         }
507         return base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf, length, maxLength, start,
508                                                           modify, isWriteBuffer);
509     }
510 
CopyDataUtf16(uint16_t * buf,uint32_t maxLength)511     inline uint32_t CopyDataUtf16(uint16_t *buf, uint32_t maxLength) const
512     {
513         return CopyDataRegionUtf16(buf, 0, GetLength(), maxLength);
514     }
515 
CopyDataRegionUtf16(uint16_t * buf,uint32_t start,uint32_t length,uint32_t maxLength)516     uint32_t CopyDataRegionUtf16(uint16_t *buf, uint32_t start, uint32_t length, uint32_t maxLength) const
517     {
518         if (length > maxLength) {
519             return 0;
520         }
521         uint32_t len = GetLength();
522         if (start + length > len) {
523             return 0;
524         }
525         if (IsUtf16()) {
526             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
527             CVector<uint16_t> tmpBuf;
528             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
529             if (memcpy_s(buf, maxLength * sizeof(uint16_t), data + start, length * sizeof(uint16_t)) != EOK) {
530                 LOG_FULL(FATAL) << "memcpy_s failed";
531                 UNREACHABLE();
532             }
533             return length;
534         }
535         CVector<uint8_t> tmpBuf;
536         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, tmpBuf);
537         return base::utf_helper::ConvertRegionUtf8ToUtf16(data, buf, len, maxLength, start);
538     }
539 
540     std::u16string ToU16String(uint32_t len = 0);
541 
ToOneByteDataForced()542     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
543     {
544         uint8_t *buf = nullptr;
545         auto length = GetLength();
546         if (IsUtf16()) {
547             auto size = length * sizeof(uint16_t);
548             buf = new uint8_t[size]();
549             CopyDataUtf16(reinterpret_cast<uint16_t *>(buf), length);
550         } else {
551             buf = new uint8_t[length + 1]();
552             CopyDataUtf8(buf, length + 1);
553         }
554         return std::unique_ptr<uint8_t[]>(buf);
555     }
556 
557     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
558     {
559         Span<const uint8_t> str;
560         uint32_t strLen = GetLength();
561         if (UNLIKELY(IsUtf16())) {
562             CVector<uint16_t> tmpBuf;
563             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
564             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
565             buf.reserve(len);
566             len = base::utf_helper::ConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
567             str = Span<const uint8_t>(buf.data(), len);
568         } else {
569             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
570             str = Span<const uint8_t>(data, strLen);
571         }
572         return str;
573     }
574 
575     Span<const uint8_t> DebuggerToUtf8Span(CVector<uint8_t> &buf, bool modify = true)
576     {
577         Span<const uint8_t> str;
578         uint32_t strLen = GetLength();
579         if (UNLIKELY(IsUtf16())) {
580             CVector<uint16_t> tmpBuf;
581             const uint16_t *data = EcmaString::GetUtf16DataFlat(this, tmpBuf);
582             size_t len = base::utf_helper::Utf16ToUtf8Size(data, strLen, modify) - 1;
583             buf.reserve(len);
584             len = base::utf_helper::DebuggerConvertRegionUtf16ToUtf8(data, buf.data(), strLen, len, 0, modify);
585             str = Span<const uint8_t>(buf.data(), len);
586         } else {
587             const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
588             str = Span<const uint8_t>(data, strLen);
589         }
590         return str;
591     }
592 
593     inline Span<const uint8_t> FastToUtf8Span() const;
594 
TryToGetInteger(uint32_t * result)595     bool TryToGetInteger(uint32_t *result)
596     {
597         if (!IsInteger()) {
598             return false;
599         }
600         ASSERT(GetLength() <= MAX_CACHED_INTEGER_SIZE);
601         *result = GetIntegerCode();
602         return true;
603     }
604 
605     // using integer number set into hash
TryToSetIntegerHash(int32_t num)606     inline bool TryToSetIntegerHash(int32_t num)
607     {
608         uint32_t hashcode = GetMixHashcode();
609         if (hashcode == 0 && GetLength() != 0) {
610             SetRawHashcode(static_cast<uint32_t>(num), IS_INTEGER);
611             return true;
612         }
613         return false;
614     }
615 
616     void WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
617 
618     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len);
619     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len);
620     static bool CanBeCompressed(const EcmaString *string);
621 
622     bool ToElementIndex(uint32_t *index);
623 
624     bool ToInt(int32_t *index, bool *negative);
625 
626     bool ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data);
627 
628     bool ToTypedArrayIndex(uint32_t *index);
629 
630     template<bool isLower>
631     static EcmaString *ConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src);
632 
633     template<bool isLower>
634     static EcmaString *LocaleConvertCase(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
635 
636     template<typename T>
637     static EcmaString *TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode);
638 
639     static EcmaString *Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode = TrimMode::TRIM);
640 
641     // single char copy for loop
642     template<typename DstType, typename SrcType>
CopyChars(DstType * dst,SrcType * src,uint32_t count)643     static void CopyChars(DstType *dst, SrcType *src, uint32_t count)
644     {
645         Span<SrcType> srcSp(src, count);
646         Span<DstType> dstSp(dst, count);
647         for (uint32_t i = 0; i < count; i++) {
648             dstSp[i] = srcSp[i];
649         }
650     }
651 
652     // memory block copy
653     template<typename T>
654     static bool MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count);
655 
656     template<typename T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)657     static uint32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
658     {
659         uint32_t hash = hashSeed;
660         Span<const T> sp(data, size);
661         for (auto c : sp) {
662             constexpr size_t SHIFT = 5;
663             hash = (hash << SHIFT) - hash + c;
664         }
665         return hash;
666     }
667 
IsASCIICharacter(uint16_t data)668     static bool IsASCIICharacter(uint16_t data)
669     {
670         // \0 is not considered ASCII in Ecma-Modified-UTF8 [only modify '\u0000']
671         return data - 1U < base::utf_helper::UTF8_1B_MAX;
672     }
673 
674     template<typename T1, typename T2>
675     static int32_t IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max);
676 
677     template<typename T1, typename T2>
678     static int32_t LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos);
679 
680     bool IsFlat() const;
681 
IsLineString()682     bool IsLineString() const
683     {
684         return GetClass()->IsLineString();
685     }
IsConstantString()686     bool IsConstantString() const
687     {
688         return GetClass()->IsConstantString();
689     }
IsSlicedString()690     bool IsSlicedString() const
691     {
692         return GetClass()->IsSlicedString();
693     }
IsTreeString()694     bool IsTreeString() const
695     {
696         return GetClass()->IsTreeString();
697     }
NotTreeString()698     bool NotTreeString() const
699     {
700         return !IsTreeString();
701     }
IsLineOrConstantString()702     bool IsLineOrConstantString() const
703     {
704         auto hclass = GetClass();
705         return hclass->IsLineString() || hclass->IsConstantString();
706     }
707 
GetStringType()708     JSType GetStringType() const
709     {
710         JSType type = GetClass()->GetObjectType();
711         ASSERT(type >= JSType::STRING_FIRST && type <= JSType::STRING_LAST);
712         return type;
713     }
714 
715     template <typename Char>
716     static void WriteToFlat(EcmaString *src, Char *buf, uint32_t maxLength);
717 
718     static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf);
719 
720     static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf);
721 
722     // string must be not flat
723     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type);
724 
725     static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
726                                MemSpaceType type = MemSpaceType::SEMI_SPACE);
727 
728     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
729                                             MemSpaceType type = MemSpaceType::SEMI_SPACE);
730 
731     static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string);
732 
733     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
734 
735     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src);
736 
737     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
738 
739     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale);
740 
741     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src);
742 
743     static EcmaString *ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &srcFlat,
744                                                  bool toLower, uint32_t startIndex = 0);
745 };
746 
747 // The LineEcmaString abstract class captures sequential string values, only LineEcmaString can store chars data
748 class LineEcmaString : public EcmaString {
749 public:
750     static constexpr uint32_t MAX_LENGTH = (1 << 28) - 16;
751     static constexpr uint32_t INIT_LENGTH_TIMES = 4;
752     // DATA_OFFSET: the string data stored after the string header.
753     // Data can be stored in utf8 or utf16 form according to compressed bit.
754     static constexpr size_t DATA_OFFSET = EcmaString::SIZE;  // DATA_OFFSET equal to Empty String size
755 
756     CAST_CHECK(LineEcmaString, IsLineString);
757 
758     DECL_VISIT_ARRAY(DATA_OFFSET, 0, GetPointerLength());
759 
Cast(EcmaString * str)760     static LineEcmaString *Cast(EcmaString *str)
761     {
762         return static_cast<LineEcmaString *>(str);
763     }
764 
Cast(const EcmaString * str)765     static LineEcmaString *Cast(const EcmaString *str)
766     {
767         return LineEcmaString::Cast(const_cast<EcmaString *>(str));
768     }
769 
ComputeSizeUtf8(uint32_t utf8Len)770     static size_t ComputeSizeUtf8(uint32_t utf8Len)
771     {
772         return DATA_OFFSET + utf8Len;
773     }
774 
ComputeSizeUtf16(uint32_t utf16Len)775     static size_t ComputeSizeUtf16(uint32_t utf16Len)
776     {
777         return DATA_OFFSET + utf16Len * sizeof(uint16_t);
778     }
779 
ObjectSize(EcmaString * str)780     static size_t ObjectSize(EcmaString *str)
781     {
782         uint32_t length = str->GetLength();
783         return str->IsUtf16() ? ComputeSizeUtf16(length) : ComputeSizeUtf8(length);
784     }
785 
DataSize(EcmaString * str)786     static size_t DataSize(EcmaString *str)
787     {
788         uint32_t length = str->GetLength();
789         return str->IsUtf16() ? length * sizeof(uint16_t) : length;
790     }
791 
GetPointerLength()792     size_t GetPointerLength()
793     {
794         size_t byteSize = DataSize(this);
795         return AlignUp(byteSize, static_cast<size_t>(MemAlignment::MEM_ALIGN_OBJECT)) / sizeof(JSTaggedType);
796     }
797 
GetData()798     uint16_t *GetData() const
799     {
800         return reinterpret_cast<uint16_t *>(ToUintPtr(this) + DATA_OFFSET);
801     }
802 
803     template<bool verify = true>
Get(int32_t index)804     uint16_t Get(int32_t index) const
805     {
806         int32_t length = static_cast<int32_t>(GetLength());
807         if (verify) {
808             if ((index < 0) || (index >= length)) {
809                 return 0;
810             }
811         }
812         if (!IsUtf16()) {
813             Span<const uint8_t> sp(GetDataUtf8(), length);
814             return sp[index];
815         }
816         Span<const uint16_t> sp(GetDataUtf16(), length);
817         return sp[index];
818     }
819 
Set(uint32_t index,uint16_t src)820     void Set(uint32_t index, uint16_t src)
821     {
822         ASSERT(index < GetLength());
823         if (IsUtf8()) {
824             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
825             *(reinterpret_cast<uint8_t *>(GetData()) + index) = static_cast<uint8_t>(src);
826         } else {
827             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
828             *(GetData() + index) = src;
829         }
830     }
831 };
832 static_assert((LineEcmaString::DATA_OFFSET % static_cast<uint8_t>(MemAlignment::MEM_ALIGN_OBJECT)) == 0);
833 
834 class ConstantString : public EcmaString {
835 public:
836     static constexpr size_t RELOCTAED_DATA_OFFSET = EcmaString::SIZE;
837     // ConstantData is the pointer of const string in the pandafile.
838     // String in pandafile is encoded by the utf8 format.
839     // EntityId is normally the uint32_t index in the pandafile.
840     // When the pandafile is to be removed, EntityId will become -1.
841     // The real string data will be reloacted into bytearray and stored in RelocatedData.
842     // ConstantData will also point at data of bytearray data.
843     ACCESSORS(RelocatedData, RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
844     ACCESSORS_PRIMITIVE_FIELD(EntityId, int64_t, ENTITY_ID_OFFSET, CONSTANT_DATA_OFFSET);
845     ACCESSORS_NATIVE_FIELD(ConstantData, uint8_t, CONSTANT_DATA_OFFSET, LAST_OFFSET);
846     DEFINE_ALIGN_SIZE(LAST_OFFSET);
847 
848     CAST_CHECK(ConstantString, IsConstantString);
849     DECL_VISIT_OBJECT(RELOCTAED_DATA_OFFSET, ENTITY_ID_OFFSET);
850 
Cast(EcmaString * str)851     static ConstantString *Cast(EcmaString *str)
852     {
853         return static_cast<ConstantString *>(str);
854     }
855 
Cast(const EcmaString * str)856     static ConstantString *Cast(const EcmaString *str)
857     {
858         return ConstantString::Cast(const_cast<EcmaString *>(str));
859     }
860 
ObjectSize()861     static size_t ObjectSize()
862     {
863         return ConstantString::SIZE;
864     }
865 
GetEntityIdU32()866     uint32_t GetEntityIdU32() const
867     {
868         ASSERT(GetEntityId() >= 0);
869         return static_cast<uint32_t>(GetEntityId());
870     }
871 
872     template<bool verify = true>
Get(int32_t index)873     uint16_t Get(int32_t index) const
874     {
875         int32_t length = static_cast<int32_t>(GetLength());
876         if (verify) {
877             if ((index < 0) || (index >= length)) {
878                 return 0;
879             }
880         }
881         ASSERT(IsUtf8());
882         Span<const uint8_t> sp(GetConstantData(), length);
883         return sp[index];
884     }
885 };
886 
887 // The substrings of another string use SlicedString to describe.
888 class SlicedString : public EcmaString {
889 public:
890     static constexpr uint32_t MIN_SLICED_ECMASTRING_LENGTH = 13;
891     static constexpr size_t PARENT_OFFSET = EcmaString::SIZE;
892     ACCESSORS(Parent, PARENT_OFFSET, STARTINDEX_OFFSET);
893     ACCESSORS_PRIMITIVE_FIELD(StartIndex, uint32_t, STARTINDEX_OFFSET, BACKING_STORE_FLAG);
894     ACCESSORS_PRIMITIVE_FIELD(HasBackingStore, uint32_t, BACKING_STORE_FLAG, SIZE);
895 
896     DECL_VISIT_OBJECT(PARENT_OFFSET, STARTINDEX_OFFSET);
897 
898     CAST_CHECK(SlicedString, IsSlicedString);
899 private:
900     friend class EcmaString;
Cast(EcmaString * str)901     static SlicedString *Cast(EcmaString *str)
902     {
903         return static_cast<SlicedString *>(str);
904     }
905 
Cast(const EcmaString * str)906     static SlicedString *Cast(const EcmaString *str)
907     {
908         return SlicedString::Cast(const_cast<EcmaString *>(str));
909     }
910 
ObjectSize()911     static size_t ObjectSize()
912     {
913         return SlicedString::SIZE;
914     }
915 
916     // Minimum length for a sliced string
917     template<bool verify = true>
Get(int32_t index)918     uint16_t Get(int32_t index) const
919     {
920         int32_t length = static_cast<int32_t>(GetLength());
921         if (verify) {
922             if ((index < 0) || (index >= length)) {
923                 return 0;
924             }
925         }
926         EcmaString *parent = EcmaString::Cast(GetParent());
927         if (parent->IsLineString()) {
928             if (parent->IsUtf8()) {
929                 Span<const uint8_t> sp(parent->GetDataUtf8() + GetStartIndex(), length);
930                 return sp[index];
931             }
932             Span<const uint16_t> sp(parent->GetDataUtf16() + GetStartIndex(), length);
933             return sp[index];
934         }
935         Span<const uint8_t> sp(ConstantString::Cast(parent)->GetConstantData() + GetStartIndex(), length);
936         return sp[index];
937     }
938 };
939 
940 class TreeEcmaString : public EcmaString {
941 public:
942     // Minimum length for a tree string
943     static constexpr uint32_t MIN_TREE_ECMASTRING_LENGTH = 13;
944 
945     static constexpr size_t FIRST_OFFSET = EcmaString::SIZE;
946     ACCESSORS(First, FIRST_OFFSET, SECOND_OFFSET);
947     ACCESSORS(Second, SECOND_OFFSET, SIZE);
948 
949     DECL_VISIT_OBJECT(FIRST_OFFSET, SIZE);
950 
951     CAST_CHECK(TreeEcmaString, IsTreeString);
952 
Cast(EcmaString * str)953     static TreeEcmaString *Cast(EcmaString *str)
954     {
955         return static_cast<TreeEcmaString *>(str);
956     }
957 
Cast(const EcmaString * str)958     static TreeEcmaString *Cast(const EcmaString *str)
959     {
960         return TreeEcmaString::Cast(const_cast<EcmaString *>(str));
961     }
962 
IsFlat()963     bool IsFlat() const
964     {
965         auto strSecond = EcmaString::Cast(GetSecond());
966         return strSecond->GetLength() == 0;
967     }
968 
969     template<bool verify = true>
Get(int32_t index)970     uint16_t Get(int32_t index) const
971     {
972         int32_t length = static_cast<int32_t>(GetLength());
973         if (verify) {
974             if ((index < 0) || (index >= length)) {
975                 return 0;
976             }
977         }
978 
979         if (IsFlat()) {
980             EcmaString *first = EcmaString::Cast(GetFirst());
981             return first->At<verify>(index);
982         }
983         EcmaString *string = const_cast<TreeEcmaString *>(this);
984         while (true) {
985             if (string->IsTreeString()) {
986                 EcmaString *first = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
987                 if (static_cast<int32_t>(first->GetLength()) > index) {
988                     string = first;
989                 } else {
990                     index -= static_cast<int32_t>(first->GetLength());
991                     string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetSecond());
992                 }
993             } else {
994                 return string->At<verify>(index);
995             }
996         }
997         UNREACHABLE();
998     }
999 };
1000 
1001 class FlatStringInfo {
1002 public:
FlatStringInfo(EcmaString * string,uint32_t startIndex,uint32_t length)1003     FlatStringInfo(EcmaString *string, uint32_t startIndex, uint32_t length) : string_(string),
1004                                                                                startIndex_(startIndex),
1005                                                                                length_(length) {}
IsUtf8()1006     bool IsUtf8() const
1007     {
1008         return string_->IsUtf8();
1009     }
1010 
IsUtf16()1011     bool IsUtf16() const
1012     {
1013         return string_->IsUtf16();
1014     }
1015 
GetString()1016     EcmaString *GetString() const
1017     {
1018         return string_;
1019     }
1020 
SetString(EcmaString * string)1021     void SetString(EcmaString *string)
1022     {
1023         string_ = string;
1024     }
1025 
GetStartIndex()1026     uint32_t GetStartIndex() const
1027     {
1028         return startIndex_;
1029     }
1030 
GetLength()1031     uint32_t GetLength() const
1032     {
1033         return length_;
1034     }
1035 
1036     const uint8_t *GetDataUtf8() const;
1037     const uint16_t *GetDataUtf16() const;
1038     uint8_t *GetDataUtf8Writable() const;
1039     std::u16string ToU16String(uint32_t len = 0);
1040 private:
1041     EcmaString *string_ {nullptr};
1042     uint32_t startIndex_ {0};
1043     uint32_t length_ {0};
1044 };
1045 
1046 // if you want to use functions of EcmaString, please not use directly,
1047 // and use functions of EcmaStringAccessor alternatively.
1048 // eg: EcmaString *str = ***; str->GetLength() ----->  EcmaStringAccessor(str).GetLength()
1049 class PUBLIC_API EcmaStringAccessor {
1050 public:
1051     explicit EcmaStringAccessor(EcmaString *string);
1052 
1053     explicit EcmaStringAccessor(TaggedObject *obj);
1054 
1055     explicit EcmaStringAccessor(JSTaggedValue value);
1056 
1057     explicit EcmaStringAccessor(const JSHandle<EcmaString> &strHandle);
1058 
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1059     static EcmaString *CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1060     {
1061         return EcmaString::CreateLineString(vm, length, compressed);
1062     }
1063 
CreateEmptyString(const EcmaVM * vm)1064     static EcmaString *CreateEmptyString(const EcmaVM *vm)
1065     {
1066         return EcmaString::CreateEmptyString(vm);
1067     }
1068 
1069     static EcmaString *CreateFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len, bool canBeCompress,
1070                                       MemSpaceType type = MemSpaceType::SEMI_SPACE, bool isConstantString = false,
1071                                       uint32_t idOffset = 0)
1072     {
1073         return EcmaString::CreateFromUtf8(vm, utf8Data, utf8Len, canBeCompress, type, isConstantString, idOffset);
1074     }
1075 
1076     static EcmaString *CreateConstantString(const EcmaVM *vm, const uint8_t *utf8Data, size_t length,
1077         bool compressed, MemSpaceType type = MemSpaceType::SEMI_SPACE, uint32_t idOffset = 0)
1078     {
1079         return EcmaString::CreateConstantString(vm, utf8Data, length, compressed, type, idOffset);
1080     }
1081 
1082     static EcmaString *CreateUtf16StringFromUtf8(const EcmaVM *vm, const uint8_t *utf8Data, uint32_t utf8Len,
1083         MemSpaceType type = MemSpaceType::SEMI_SPACE)
1084     {
1085         return EcmaString::CreateUtf16StringFromUtf8(vm, utf8Data, utf8Len, type);
1086     }
1087 
1088     static EcmaString *CreateFromUtf16(const EcmaVM *vm, const uint16_t *utf16Data, uint32_t utf16Len,
1089                                        bool canBeCompress, MemSpaceType type = MemSpaceType::SEMI_SPACE)
1090     {
1091         return EcmaString::CreateFromUtf16(vm, utf16Data, utf16Len, canBeCompress, type);
1092     }
1093 
1094     static EcmaString *Concat(const EcmaVM *vm, const JSHandle<EcmaString> &str1Handle,
1095         const JSHandle<EcmaString> &str2Handle, MemSpaceType type = MemSpaceType::SEMI_SPACE)
1096     {
1097         return EcmaString::Concat(vm, str1Handle, str2Handle, type);
1098     }
1099 
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)1100     static EcmaString *CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
1101         uint32_t length, bool compressed)
1102     {
1103         return EcmaString::CopyStringToOldSpace(vm, original, length, compressed);
1104     }
1105 
1106     // can change src data structure
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1107     static EcmaString *FastSubString(const EcmaVM *vm,
1108         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1109     {
1110         return EcmaString::FastSubString(vm, src, start, length);
1111     }
1112 
1113     // get
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)1114     static EcmaString *GetSubString(const EcmaVM *vm,
1115         const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
1116     {
1117         return EcmaString::GetSubString(vm, src, start, length);
1118     }
1119 
IsUtf8()1120     bool IsUtf8() const
1121     {
1122         return string_->IsUtf8();
1123     }
1124 
IsUtf16()1125     bool IsUtf16() const
1126     {
1127         return string_->IsUtf16();
1128     }
1129 
GetLength()1130     uint32_t GetLength() const
1131     {
1132         return string_->GetLength();
1133     }
1134 
1135     // require is LineString
1136     inline size_t GetUtf8Length() const;
1137 
ObjectSize()1138     size_t ObjectSize() const
1139     {
1140         if (string_->IsLineString()) {
1141             return LineEcmaString::ObjectSize(string_);
1142         } if (string_->IsConstantString()) {
1143             return ConstantString::ObjectSize();
1144         } else {
1145             return TreeEcmaString::SIZE;
1146         }
1147     }
1148 
1149     // For TreeString, the calculation result is size of LineString correspondingly.
GetFlatStringSize()1150     size_t GetFlatStringSize() const
1151     {
1152         if (string_->IsConstantString()) {
1153             return ConstantString::ObjectSize();
1154         }
1155         return LineEcmaString::ObjectSize(string_);
1156     }
1157 
IsInternString()1158     bool IsInternString() const
1159     {
1160         return string_->IsInternString();
1161     }
1162 
SetInternString()1163     void SetInternString()
1164     {
1165         string_->SetIsInternString();
1166     }
1167 
ClearInternString()1168     void ClearInternString()
1169     {
1170         string_->ClearInternStringFlag();
1171     }
1172 
1173     // require is LineString
1174     // It's Utf8 format, but without 0 in the end.
1175     inline const uint8_t *GetDataUtf8();
1176 
1177     // require is LineString
1178     inline const uint16_t *GetDataUtf16();
1179 
1180     // not change string data structure.
1181     // if string is not flat, this func has low efficiency.
1182     std::u16string ToU16String(uint32_t len = 0)
1183     {
1184         return string_->ToU16String(len);
1185     }
1186 
1187     // not change string data structure.
1188     // if string is not flat, this func has low efficiency.
ToOneByteDataForced()1189     std::unique_ptr<uint8_t[]> ToOneByteDataForced()
1190     {
1191         return string_->ToOneByteDataForced();
1192     }
1193 
1194     // not change string data structure.
1195     // if string is not flat, this func has low efficiency.
ToUtf8Span(CVector<uint8_t> & buf)1196     Span<const uint8_t> ToUtf8Span(CVector<uint8_t> &buf)
1197     {
1198         return string_->ToUtf8Span(buf);
1199     }
1200 
1201     // only for string is flat and using UTF8 encoding
1202     inline Span<const uint8_t> FastToUtf8Span();
1203 
1204     // Using string's hash to figure out whether the string can be converted to integer
TryToGetInteger(uint32_t * result)1205     inline bool TryToGetInteger(uint32_t *result)
1206     {
1207         return string_->TryToGetInteger(result);
1208     }
1209 
TryToSetIntegerHash(int32_t num)1210     inline bool TryToSetIntegerHash(int32_t num)
1211     {
1212         return string_->TryToSetIntegerHash(num);
1213     }
1214 
1215     // not change string data structure.
1216     // if string is not flat, this func has low efficiency.
1217     std::string ToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1218 
1219     std::string DebuggerToStdString(StringConvertedUsage usage = StringConvertedUsage::PRINT);
1220     // not change string data structure.
1221     // if string is not flat, this func has low efficiency.
1222     CString ToCString(StringConvertedUsage usage = StringConvertedUsage::LOGICOPERATION);
1223 
1224     // not change string data structure.
1225     // if string is not flat, this func has low efficiency.
1226     uint32_t WriteToFlatUtf8(uint8_t *buf, uint32_t maxLength, bool isWriteBuffer = false)
1227     {
1228         return string_->WriteUtf8(buf, maxLength, isWriteBuffer);
1229     }
1230 
WriteToUtf16(uint16_t * buf,uint32_t bufLength)1231     uint32_t WriteToUtf16(uint16_t *buf, uint32_t bufLength)
1232     {
1233         return string_->WriteUtf16(buf, GetLength(), bufLength);
1234     }
1235 
WriteToOneByte(uint8_t * buf,uint32_t maxLength)1236     uint32_t WriteToOneByte(uint8_t *buf, uint32_t maxLength)
1237     {
1238         return string_->WriteOneByte(buf, maxLength);
1239     }
1240 
1241     // not change string data structure.
1242     // if string is not flat, this func has low efficiency.
WriteToFlatUtf16(uint16_t * buf,uint32_t maxLength)1243     uint32_t WriteToFlatUtf16(uint16_t *buf, uint32_t maxLength) const
1244     {
1245         return string_->CopyDataUtf16(buf, maxLength);
1246     }
1247 
1248     // require dst is LineString
1249     // not change src data structure.
1250     // if src is not flat, this func has low efficiency.
1251     inline static void ReadData(EcmaString * dst, EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length);
1252 
1253     // not change src data structure.
1254     // if src is not flat, this func has low efficiency.
1255     template<bool verify = true>
Get(uint32_t index)1256     uint16_t Get(uint32_t index) const
1257     {
1258         return string_->At<verify>(index);
1259     }
1260 
1261     // require string is LineString.
Set(uint32_t index,uint16_t src)1262     void Set(uint32_t index, uint16_t src)
1263     {
1264         return string_->WriteData(index, src);
1265     }
1266 
1267     // not change src data structure.
1268     // if src is not flat, this func has low efficiency.
GetHashcode()1269     uint32_t GetHashcode()
1270     {
1271         return string_->GetHashcode();
1272     }
1273 
GetRawHashcode()1274     uint32_t GetRawHashcode()
1275     {
1276         return string_->GetRawHashcode();
1277     }
1278 
1279     // not change src data structure.
1280     // if src is not flat, this func has low efficiency.
ComputeRawHashcode()1281     std::pair<uint32_t, bool> ComputeRawHashcode()
1282     {
1283         return string_->ComputeRawHashcode();
1284     }
1285 
ComputeHashcode()1286     uint32_t ComputeHashcode()
1287     {
1288         return string_->ComputeHashcode();
1289     }
1290 
ComputeHashcode(uint32_t rawHashSeed,bool isInteger)1291     uint32_t ComputeHashcode(uint32_t rawHashSeed, bool isInteger)
1292     {
1293         return string_->ComputeHashcode(rawHashSeed, isInteger);
1294     }
1295 
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)1296     static uint32_t ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
1297     {
1298         return EcmaString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
1299     }
1300 
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)1301     static uint32_t ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
1302     {
1303         return EcmaString::ComputeHashcodeUtf16(utf16Data, length);
1304     }
1305 
1306     // can change receiver and search data structure
1307     static int32_t IndexOf(const EcmaVM *vm,
1308         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1309     {
1310         return EcmaString::IndexOf(vm, receiver, search, pos);
1311     }
1312 
1313     // can change receiver and search data structure
1314     static int32_t LastIndexOf(const EcmaVM *vm,
1315         const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos = 0)
1316     {
1317         return EcmaString::LastIndexOf(vm, receiver, search, pos);
1318     }
1319 
1320     // can change receiver and search data structure
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)1321     static int32_t Compare(const EcmaVM *vm, const JSHandle<EcmaString>& left, const JSHandle<EcmaString>& right)
1322     {
1323         return EcmaString::Compare(vm, left, right);
1324     }
1325 
1326     // can change str1 and str2 data structure
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)1327     static bool StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
1328     {
1329         return EcmaString::StringsAreEqual(vm, str1, str2);
1330     }
1331 
1332     // not change str1 and str2 data structure.
1333     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqual(EcmaString * str1,EcmaString * str2)1334     static bool StringsAreEqual(EcmaString *str1, EcmaString *str2)
1335     {
1336         return EcmaString::StringsAreEqual(str1, str2);
1337     }
1338 
1339     // not change str1 and str2 data structure.
1340     // if str1 or str2 is not flat, this func has low efficiency.
StringsAreEqualDiffUtfEncoding(EcmaString * str1,EcmaString * str2)1341     static bool StringsAreEqualDiffUtfEncoding(EcmaString *str1, EcmaString *str2)
1342     {
1343         return EcmaString::StringsAreEqualDiffUtfEncoding(str1, str2);
1344     }
1345 
1346     // not change str1 data structure.
1347     // if str1 is not flat, this func has low efficiency.
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompress)1348     static bool StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
1349                                        bool canBeCompress)
1350     {
1351         return EcmaString::StringIsEqualUint8Data(str1, dataAddr, dataLen, canBeCompress);
1352     }
1353 
1354     // not change str1 data structure.
1355     // if str1 is not flat, this func has low efficiency.
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)1356     static bool StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
1357     {
1358         return EcmaString::StringsAreEqualUtf16(str1, utf16Data, utf16Len);
1359     }
1360 
1361     // require str1 and str2 are LineString.
1362     // not change string data structure.
1363     // if string is not flat, this func has low efficiency.
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)1364     bool EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
1365     {
1366         return string_->EqualToSplicedString(str1, str2);
1367     }
1368 
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)1369     static bool CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
1370     {
1371         return EcmaString::CanBeCompressed(utf8Data, utf8Len);
1372     }
1373 
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)1374     static bool CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
1375     {
1376         return EcmaString::CanBeCompressed(utf16Data, utf16Len);
1377     }
1378 
1379     // require string is LineString
CanBeCompressed(const EcmaString * string)1380     static bool CanBeCompressed(const EcmaString *string)
1381     {
1382         return EcmaString::CanBeCompressed(string);
1383     }
1384 
1385     // not change string data structure.
1386     // if string is not flat, this func has low efficiency.
ToElementIndex(uint32_t * index)1387     bool ToElementIndex(uint32_t *index)
1388     {
1389         return string_->ToElementIndex(index);
1390     }
1391 
1392     // not change string data structure.
1393     // if string is not flat, this func has low efficiency.
ToInt(int32_t * index,bool * negative)1394     bool ToInt(int32_t *index, bool *negative)
1395     {
1396         return string_->ToInt(index, negative);
1397     }
1398 
1399     // not change string data structure.
1400     // if string is not flat, this func has low efficiency.
ToTypedArrayIndex(uint32_t * index)1401     bool ToTypedArrayIndex(uint32_t *index)
1402     {
1403         return string_->ToTypedArrayIndex(index);
1404     }
1405 
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1406     static EcmaString *ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1407     {
1408         return EcmaString::ToLower(vm, src);
1409     }
1410 
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1411     static EcmaString *TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1412     {
1413         return EcmaString::TryToLower(vm, src);
1414     }
1415 
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1416     static EcmaString *ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1417     {
1418         return EcmaString::ToUpper(vm, src);
1419     }
1420 
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1421     static EcmaString *ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1422     {
1423         return EcmaString::ToLocaleLower(vm, src, locale);
1424     }
1425 
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1426     static EcmaString *ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1427     {
1428         return EcmaString::ToLocaleUpper(vm, src, locale);
1429     }
1430 
1431     static EcmaString *Trim(const JSThread *thread,
1432         const JSHandle<EcmaString> &src, EcmaString::TrimMode mode = EcmaString::TrimMode::TRIM)
1433     {
1434         return EcmaString::Trim(thread, src, mode);
1435     }
1436 
IsFlat()1437     bool IsFlat() const
1438     {
1439         return string_->IsFlat();
1440     }
1441 
IsLineString()1442     bool IsLineString() const
1443     {
1444         return string_->IsLineString();
1445     }
1446 
IsConstantString()1447     bool IsConstantString() const
1448     {
1449         return string_->IsConstantString();
1450     }
1451 
IsLineOrConstantString()1452     bool IsLineOrConstantString() const
1453     {
1454         return string_->IsLineOrConstantString();
1455     }
1456 
IsTreeString()1457     bool IsTreeString() const
1458     {
1459         return string_->IsTreeString();
1460     }
1461 
NotTreeString()1462     bool NotTreeString() const
1463     {
1464         return string_->NotTreeString();
1465     }
1466 
1467     static EcmaString *Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1468         MemSpaceType type = MemSpaceType::SEMI_SPACE)
1469     {
1470         return EcmaString::Flatten(vm, string, type);
1471     }
1472 
1473     static FlatStringInfo FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1474         MemSpaceType type = MemSpaceType::SEMI_SPACE)
1475     {
1476         return EcmaString::FlattenAllString(vm, string, type);
1477     }
1478 
1479     static EcmaString *SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string,
1480         MemSpaceType type = MemSpaceType::SEMI_SPACE)
1481     {
1482         return EcmaString::SlowFlatten(vm, string, type);
1483     }
1484 
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1485     static EcmaString *FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1486     {
1487         return EcmaString::FlattenNoGC(vm, string);
1488     }
1489 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1490     static const uint8_t *GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1491     {
1492         return EcmaString::GetUtf8DataFlat(src, buf);
1493     }
1494 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1495     static const uint16_t *GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1496     {
1497         return EcmaString::GetUtf16DataFlat(src, buf);
1498     }
1499 
1500     static JSTaggedValue StringToList(JSThread *thread, JSHandle<JSTaggedValue> &str);
1501 
1502 private:
1503     EcmaString *string_ {nullptr};
1504 };
1505 }  // namespace ecmascript
1506 }  // namespace panda
1507 #endif  // ECMASCRIPT_STRING_H