• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "common_interfaces/objects/base_string.h"
19 #include "ecmascript/base/json_helper.h"
20 
21 namespace panda::ecmascript {
22 
23 constexpr size_t LOW_3BITS = 0x7;
24 constexpr size_t LOW_4BITS = 0xF;
25 constexpr size_t LOW_5BITS = 0x1F;
26 constexpr size_t LOW_6BITS = 0x3F;
27 constexpr size_t L_SURROGATE_START = 0xDC00;
28 constexpr size_t H_SURROGATE_START = 0xD800;
29 constexpr size_t SURROGATE_RAIR_START = 0x10000;
30 constexpr size_t OFFSET_18POS = 18;
31 constexpr size_t OFFSET_12POS = 12;
32 constexpr size_t OFFSET_10POS = 10;
33 constexpr size_t OFFSET_6POS = 6;
34 
35 using NumberHelper = base::NumberHelper;
36 
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)37 EcmaString *EcmaString::Concat(const EcmaVM *vm,
38     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
39 {
40     ASSERT(IsSMemSpace(type));
41     // allocator may trig gc and move src, need to hold it
42     EcmaString *strLeft = *left;
43     EcmaString *strRight = *right;
44     uint32_t leftLength = strLeft->GetLength();
45     uint32_t rightLength = strRight->GetLength();
46     uint32_t newLength = leftLength + rightLength;
47     if (newLength == 0) {
48         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
49     }
50 
51     if (leftLength == 0) {
52         return strRight;
53     }
54     if (rightLength == 0) {
55         return strLeft;
56     }
57     // if the result string is small, make a LineString
58     bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
59     if (newLength < TreeString::MIN_TREE_STRING_LENGTH) {
60         ASSERT(strLeft->IsLineString());
61         ASSERT(strRight->IsLineString());
62         auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
63         // retrieve strings after gc
64         strLeft = *left;
65         strRight = *right;
66         if (compressed) {
67             // copy left part
68             Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
69             Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
70             EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
71             // copy right part
72             sp = sp.SubSpan(leftLength);
73             Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
74             EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
75         } else {
76             // copy left part
77             Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
78             if (strLeft->IsUtf8()) {
79                 BaseString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
80             } else {
81                 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
82                 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
83             }
84             // copy right part
85             sp = sp.SubSpan(leftLength);
86             if (strRight->IsUtf8()) {
87                 BaseString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
88             } else {
89                 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
90                 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
91             }
92         }
93         ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
94         return newString;
95     }
96     return CreateTreeString(vm, left, right, newLength, compressed);
97 }
98 
99 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)100 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
101     uint32_t length, bool compressed)
102 {
103     JSHandle<EcmaString> newString(vm->GetJSThread(),
104         CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105     auto strOrigin = FlattenAllString(vm, original);
106     if (compressed) {
107         // copy
108         Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109         Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110         EcmaString::MemCopyChars(sp, length, srcSp, length);
111     } else {
112         // copy left part
113         Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114         if (strOrigin.IsUtf8()) {
115             BaseString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116         } else {
117             Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118             EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119         }
120     }
121     ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122     return *newString;
123 }
124 
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129     ASSERT((start + length) <= src->GetLength());
130     if (length == 0) {
131         return *vm->GetFactory()->GetEmptyString();
132     }
133     if (start == 0 && length == src->GetLength()) {
134         return *src;
135     }
136     if (src->IsUtf8()) {
137         return FastSubUtf8String(vm, src, start, length);
138     }
139     return FastSubUtf16String(vm, src, start, length);
140 }
141 
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146     ASSERT((start + length) <= src->GetLength());
147     FlatStringInfo srcFlat = FlattenAllString(vm, src);
148     JSHandle<EcmaString> flatString(vm->GetJSThread(), srcFlat.GetString());
149     SlicedEcmaString *slicedString = CreateSlicedString(vm, flatString);
150     slicedString->InitLengthAndFlags(length, flatString->IsUtf8());
151     slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152     return slicedString;
153 }
154 
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159     ASSERT((start + length) <= src->GetLength());
160     if (length == 1) {
161         JSThread *thread = vm->GetJSThread();
162         uint16_t res = EcmaStringAccessor(src).Get<false>(thread, start);
163         if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
164             JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
165             return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(thread, res).GetTaggedObject());
166         }
167     }
168     if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_STRING_LENGTH) {
169         if (start == 0 && length == src->GetLength()) {
170             return *src;
171         }
172         if (src->IsUtf16()) {
173             FlatStringInfo srcFlat = FlattenAllString(vm, src);
174             bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
175             if (canBeCompressed) {
176                 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
177                 srcFlat = FlattenAllString(vm, src);
178                 BaseString::CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
179                 return *string;
180             }
181         }
182         return GetSlicedString(vm, src, start, length);
183     }
184     return FastSubString(vm, src, start, length);
185 }
186 
SubStringIsUtf8(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)187 bool EcmaString::SubStringIsUtf8(const EcmaVM *vm,
188     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
189 {
190     ASSERT((start + length) <= src->GetLength());
191     if (length == 0) {
192         return true;
193     }
194     if (src->IsUtf8()) {
195         return true;
196     }
197     FlatStringInfo srcFlat = FlattenAllString(vm, src);
198     return CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
199 }
200 
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)201 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
202 {
203     if (*left == *right) {
204         return 0;
205     }
206     FlatStringInfo lhs = FlattenAllString(vm, left);
207     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
208     FlatStringInfo rhs = FlattenAllString(vm, right);
209     lhs.SetString(*string);
210     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
211     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
212     int32_t countDiff = lhsCount - rhsCount;
213     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
214     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
215         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
216         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
217         int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
218         if (charDiff != 0) {
219             return charDiff;
220         }
221     } else if (!lhs.IsUtf16()) {
222         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
223         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
224         int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
225         if (charDiff != 0) {
226             return charDiff;
227         }
228     } else if (!rhs.IsUtf16()) {
229         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
230         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
231         int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
232         if (charDiff != 0) {
233             return charDiff;
234         }
235     } else {
236         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
237         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
238         int32_t charDiff = common::CompareStringSpan(lhsSp, rhsSp, minCount);
239         if (charDiff != 0) {
240             return charDiff;
241         }
242     }
243     return countDiff;
244 }
245 
246 /**
247  * left: text string
248  * right: pattern string
249  * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
250  * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
251 */
IsSubStringAt(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t offset)252 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
253     const JSHandle<EcmaString>& right, uint32_t offset)
254 {
255     FlatStringInfo lhs = FlattenAllString(vm, left);
256     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
257     FlatStringInfo rhs = FlattenAllString(vm, right);
258     lhs.SetString(*string);
259     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
260     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
261     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
262         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
263         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
264         return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
265     } else if (!lhs.IsUtf16()) {
266         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
267         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
268         return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
269     } else if (!rhs.IsUtf16()) {
270         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
271         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
272         return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
273     } else {
274         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
275         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
276         return common::IsSubStringAtSpan(lhsSp, rhsSp, offset);
277     }
278     return false;
279 }
280 
281 /* static */
282 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)283 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
284 {
285    return BaseString::IndexOf(lhsSp, rhsSp, pos, max);
286 }
287 
288 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)289 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
290 {
291     return BaseString::LastIndexOf(lhsSp, rhsSp, pos);
292 }
293 
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)294 int32_t EcmaString::IndexOf(const EcmaVM *vm,
295     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
296 {
297     EcmaString *lhstring = *receiver;
298     EcmaString *rhstring = *search;
299     if (lhstring == nullptr || rhstring == nullptr) {
300         return -1;
301     }
302     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
303     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
304 
305     if (pos > lhsCount) {
306         return -1;
307     }
308 
309     if (rhsCount == 0) {
310         return pos;
311     }
312 
313     if (pos < 0) {
314         pos = 0;
315     }
316 
317     int32_t max = lhsCount - rhsCount;
318     if (max < 0) {
319         return -1;
320     }
321 
322     if (pos + rhsCount > lhsCount) {
323         return -1;
324     }
325 
326     FlatStringInfo lhs = FlattenAllString(vm, receiver);
327     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
328     FlatStringInfo rhs = FlattenAllString(vm, search);
329     lhs.SetString(*string);
330 
331     if (rhs.IsUtf8() && lhs.IsUtf8()) {
332         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
333         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
334         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
335     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
336         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
337         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
338         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
339     } else if (rhs.IsUtf16()) {
340         return -1;
341     } else {  // NOLINT(readability-else-after-return)
342         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
343         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
344         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
345     }
346 }
347 
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)348 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
349     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
350 {
351     EcmaString *lhstring = *receiver;
352     EcmaString *rhstring = *search;
353     if (lhstring == nullptr || rhstring == nullptr) {
354         return -1;
355     }
356 
357     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
358     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
359     if (lhsCount < rhsCount) {
360         return -1;
361     }
362 
363     if (pos < 0) {
364         pos = 0;
365     }
366 
367     if (pos > lhsCount) {
368         pos = lhsCount;
369     }
370 
371     if (pos + rhsCount > lhsCount) {
372         pos = lhsCount - rhsCount;
373     }
374 
375     if (rhsCount == 0) {
376         return pos;
377     }
378 
379     FlatStringInfo lhs = FlattenAllString(vm, receiver);
380     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
381     FlatStringInfo rhs = FlattenAllString(vm, search);
382     lhs.SetString(*string);
383     if (rhs.IsUtf8() && lhs.IsUtf8()) {
384         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
385         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
386         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
387     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
388         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
389         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
390         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
391     } else if (rhs.IsUtf16()) {
392         return -1;
393     } else {  // NOLINT(readability-else-after-return)
394         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
395         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
396         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
397     }
398 }
399 
ToU16String(const JSThread * thread,uint32_t len)400 std::u16string EcmaString::ToU16String(const JSThread *thread, uint32_t len)
401 {
402     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
403         return Barriers::GetTaggedObject(thread, obj, offset);
404     };
405     return ToBaseString()->ToU16String(std::move(readBarrier), len);
406 }
407 
408 // static
409 template<typename T1, typename T2>
CalculateDataConcatHashCode(const T1 * dataFirst,size_t sizeFirst,const T2 * dataSecond,size_t sizeSecond)410 uint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
411                                                  const T2 *dataSecond, size_t sizeSecond)
412 {
413     return BaseString::CalculateDataConcatHashCode(dataFirst, sizeFirst, dataSecond, sizeSecond);
414 }
415 
416 // static
CalculateConcatHashCode(const JSThread * thread,const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)417 uint32_t EcmaString::CalculateConcatHashCode(const JSThread *thread, const JSHandle<EcmaString> &firstString,
418                                              const JSHandle<EcmaString> &secondString)
419 {
420     uint32_t hashCode;
421     uint32_t firstLength = firstString->GetLength();
422     uint32_t secondLength = secondString->GetLength();
423     if ((firstLength + secondLength < BaseString::MAX_ELEMENT_INDEX_LEN) &&
424         firstString->IsUtf8() && secondString->IsUtf8() &&
425         firstString->IsInteger(thread) && secondString->IsInteger(thread)) {
426         firstString->HashIntegerString(firstLength, &hashCode, 0);
427         secondString->HashIntegerString(secondLength, &hashCode, hashCode);
428         return hashCode;
429     }
430     bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8();
431     bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8();
432     EcmaString *firstStr = *firstString;
433     EcmaString *secondStr = *secondString;
434     CVector<uint8_t> bufFirstUint8;
435     CVector<uint8_t> bufSecondUint8;
436     CVector<uint16_t> bufFirstUint16;
437     CVector<uint16_t> bufSecondUint16;
438     if (isFirstStringUtf8 && isSecondStringUtf8) {
439         const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(thread, firstStr, bufFirstUint8);
440         const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(thread, secondStr, bufSecondUint8);
441         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
442                                            dataSecond, secondStr->GetLength());
443     }
444     if (!isFirstStringUtf8 && isSecondStringUtf8) {
445         const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(thread, firstStr, bufFirstUint16);
446         const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(thread, secondStr, bufSecondUint8);
447         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
448                                            dataSecond, secondStr->GetLength());
449     }
450     if (isFirstStringUtf8 && !isSecondStringUtf8) {
451         const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(thread, firstStr, bufFirstUint8);
452         const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(thread, secondStr, bufSecondUint16);
453         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
454                                            dataSecond, secondStr->GetLength());
455     }
456     {
457         const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(thread, firstStr, bufFirstUint16);
458         const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(thread, secondStr, bufSecondUint16);
459         return  CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
460                                             dataSecond, secondStr->GetLength());
461     }
462 }
463 
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const464 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
465 {
466     ASSERT(length >= 0);
467     Span<const uint8_t> str = FastToUtf8Span();
468     return BaseString::HashIntegerString(str.data(), length, hash, hashSeed);
469 }
470 
471 // static
CanBeCompressed(const EcmaString * string)472 bool EcmaString::CanBeCompressed(const EcmaString *string)
473 {
474     return BaseString::CanBeCompressed(string->ToBaseString());
475 }
476 
477 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)478 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
479 {
480     return BaseString::CanBeCompressed(utf8Data, utf8Len);
481 }
482 
483 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)484 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
485 {
486     return BaseString::CanBeCompressed(utf16Data, utf16Len);
487 }
488 
EqualToSplicedString(const JSThread * thread,const EcmaString * str1,const EcmaString * str2)489 bool EcmaString::EqualToSplicedString(const JSThread *thread, const EcmaString *str1, const EcmaString *str2)
490 {
491     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
492         return Barriers::GetTaggedObject(thread, obj, offset);
493     };
494     return ToBaseString()->EqualToSplicedString(std::move(readBarrier), str1->ToBaseString(), str2->ToBaseString());
495 }
496 
497 /* static */
StringsAreEqualDiffUtfEncoding(const JSThread * thread,EcmaString * left,EcmaString * right)498 bool EcmaString::StringsAreEqualDiffUtfEncoding(const JSThread *thread, EcmaString *left, EcmaString *right)
499 {
500     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
501         return Barriers::GetTaggedObject(thread, obj, offset);
502     };
503     return BaseString::StringsAreEqualDiffUtfEncoding(std::move(readBarrier), left->ToBaseString(),
504                                                       right->ToBaseString());
505 }
506 
507 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)508 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
509 {
510     int32_t lhsCount = static_cast<int32_t>(left.GetLength());
511     int32_t rhsCount = static_cast<int32_t>(right.GetLength());
512     if (!left.IsUtf16() && !right.IsUtf16()) {
513         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
514         Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
515         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
516     } else if (!left.IsUtf16()) {
517         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
518         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
519         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
520     } else if (!right.IsUtf16()) {
521         Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
522         Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
523         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
524     } else {
525         Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
526         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
527         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
528     }
529 }
530 
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)531 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
532 {
533     if (str1 == str2) {
534         return true;
535     }
536     if (str1->IsInternString() && str2->IsInternString()) {
537         return false;
538     }
539     uint32_t str1Len = str1->GetLength();
540     if (str1Len != str2->GetLength()) {
541         return false;
542     }
543     if (str1Len == 0) {
544         return true;
545     }
546 
547     uint32_t str1Hash;
548     uint32_t str2Hash;
549     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
550         if (str1Hash != str2Hash) {
551             return false;
552         }
553     }
554     FlatStringInfo str1Flat = FlattenAllString(vm, str1);
555     JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
556     FlatStringInfo str2Flat = FlattenAllString(vm, str2);
557     str1Flat.SetString(*string);
558     return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
559 }
560 
561 /* static */
StringIsEqualUint8Data(const JSThread * thread,const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)562 bool EcmaString::StringIsEqualUint8Data(const JSThread *thread, const EcmaString *str1, const uint8_t *dataAddr,
563                                         uint32_t dataLen, bool canBeCompressToUtf8)
564 {
565     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
566         return Barriers::GetTaggedObject(thread, obj, offset);
567     };
568     return BaseString::StringIsEqualUint8Data(std::move(readBarrier), str1->ToBaseString(), dataAddr, dataLen,
569                                               canBeCompressToUtf8);
570 }
571 
572 /* static */
StringsAreEqualUtf16(const JSThread * thread,const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)573 bool EcmaString::StringsAreEqualUtf16(const JSThread *thread, const EcmaString *str1, const uint16_t *utf16Data,
574                                       uint32_t utf16Len)
575 {
576     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
577         return Barriers::GetTaggedObject(thread, obj, offset);
578     };
579     return BaseString::StringsAreEqualUtf16(std::move(readBarrier), str1->ToBaseString(), utf16Data, utf16Len);
580 }
581 
582 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)583 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
584 {
585     ASSERT(dstMax >= count);
586     ASSERT(dst.Size() >= src.Size());
587     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
588         LOG_FULL(FATAL) << "memcpy_s failed";
589         UNREACHABLE();
590     }
591     return true;
592 }
593 
594 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)595 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
596 {
597     return BaseString::ComputeHashcodeUtf8(utf8Data, utf8Len, canBeCompress);
598 }
599 
600 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)601 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
602 {
603     return BaseString::ComputeHashcodeUtf16(utf16Data, length);
604 }
605 
ToElementIndex(const JSThread * thread,uint32_t * index)606 bool EcmaString::ToElementIndex(const JSThread *thread, uint32_t *index)
607 {
608     uint32_t len = GetLength();
609     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
610         return false;
611     }
612     if (UNLIKELY(IsUtf16())) {
613         return false;
614     }
615 
616     CVector<uint8_t> buf;
617     const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, this, buf);
618     constexpr uint64_t maxValue = std::numeric_limits<uint32_t>::max() - 1;
619     if (NumberHelper::StringToUint<uint32_t, uint8_t>(std::basic_string_view(data, GetLength()), *index, maxValue)) {
620         return true;
621     }
622     return false;
623 }
624 
ToInt(const JSThread * thread,int32_t * index,bool * negative)625 bool EcmaString::ToInt(const JSThread *thread, int32_t *index, bool *negative)
626 {
627     uint32_t len = GetLength();
628     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
629         return false;
630     }
631     if (UNLIKELY(IsUtf16())) {
632         return false;
633     }
634     CVector<uint8_t> buf;
635     const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, this, buf);
636     uint32_t c = data[0];
637     uint32_t loopStart = 0;
638     uint64_t n = 0;
639     if (c == '0') {
640         *index = 0;
641         return len == 1;
642     }
643     if (c == '-' && len > 1) {
644         *negative = true;
645         loopStart = 1;
646     }
647 
648     if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
649         *index = *negative ? -n : n;
650         return true;
651     }
652     return false;
653 }
654 
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)655 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
656 {
657     uint64_t n = 0;
658     uint32_t len = GetLength();
659     if (UNLIKELY(loopStart >= len)) {
660         return false;
661     }
662     for (uint32_t i = loopStart; i < len; i++) {
663         uint32_t c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
664         if (c < '0' || c > '9') {
665             return false;
666         }
667         // NOLINTNEXTLINE(readability-magic-numbers)
668         n = n * 10 + (c - '0');  // 10: decimal factor
669     }
670     *index = n;
671     return true;
672 }
673 
ToTypedArrayIndex(const JSThread * thread,uint32_t * index)674 bool EcmaString::ToTypedArrayIndex(const JSThread *thread, uint32_t *index)
675 {
676     uint32_t len = GetLength();
677     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
678         return false;
679     }
680     if (UNLIKELY(IsUtf16())) {
681         return false;
682     }
683 
684     CVector<uint8_t> buf;
685     const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, this, buf);
686     uint32_t c = data[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
687     uint64_t n = 0;
688     if (c == '0') {
689         *index = 0;
690         return len == 1;
691     }
692     if (c > '0' && c <= '9') {
693         n = c - '0';
694         for (uint32_t i = 1; i < len; i++) {
695             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
696             if (c >= '0' && c <= '9') {
697                 // NOLINTNEXTLINE(readability-magic-numbers)
698                 n = n * 10 + (c - '0');  // 10: decimal factor
699             } else if (c == '.') {
700                 n = JSObject::MAX_ELEMENT_INDEX;
701                 break;
702             } else {
703                 return false;
704             }
705         }
706         if (n < JSObject::MAX_ELEMENT_INDEX) {
707             *index = n;
708             return true;
709         } else {
710             *index = JSObject::MAX_ELEMENT_INDEX;
711             return true;
712         }
713     } else if (c == '-') {
714         *index = JSObject::MAX_ELEMENT_INDEX;
715         return true;
716     }
717     return false;
718 }
719 
720 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)721 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
722 {
723     uint32_t srcLen = src->GetLength();
724     int32_t start = 0;
725     int32_t end = static_cast<int32_t>(srcLen) - 1;
726 
727     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
728         start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
729     }
730     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
731         end = base::StringHelper::GetEnd(data, start, srcLen);
732     }
733     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
734     return res;
735 }
736 
737 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)738 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
739 {
740     auto srcFlat = FlattenAllString(vm, src);
741     uint32_t srcLength = srcFlat.GetLength();
742     auto factory = vm->GetFactory();
743     if (srcFlat.IsUtf16()) {
744         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
745         std::string res = base::StringHelper::ToLower(u16str);
746         return *(factory->NewFromStdString(res));
747     } else {
748         return ConvertUtf8ToLowerOrUpper(vm, src, true);
749     }
750 }
751 
752 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)753 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
754 {
755     auto srcFlat = FlattenAllString(vm, src);
756     uint32_t srcLength = srcFlat.GetLength();
757     const char start = 'A';
758     const char end = 'Z';
759     uint32_t upperIndex = srcLength;
760     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
761     for (uint32_t index = 0; index < srcLength; ++index) {
762         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
763             upperIndex = index;
764             break;
765         }
766     }
767     if (upperIndex == srcLength) {
768         return *src;
769     }
770     return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
771 }
772 
773 /* static */
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)774 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
775 {
776     auto srcFlat = FlattenAllString(vm, src);
777     uint32_t srcLength = srcFlat.GetLength();
778     const char start = 'a';
779     const char end = 'z';
780     uint32_t lowerIndex = srcLength;
781     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
782     for (uint32_t index = 0; index < srcLength; ++index) {
783         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
784             lowerIndex = index;
785             break;
786         }
787     }
788     if (lowerIndex == srcLength) {
789         return *src;
790     }
791     return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
792 }
793 
794 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)795 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
796                                                   bool toLower, uint32_t startIndex)
797 {
798     const char start = toLower ? 'A' : 'a';
799     const char end = toLower ? 'Z' : 'z';
800     uint32_t srcLength = src->GetLength();
801     JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
802     auto srcFlat = FlattenAllString(vm, src);
803     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
804     auto newStringPtr = newString->GetDataUtf8Writable();
805     if (startIndex > 0) {
806         if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
807             LOG_FULL(FATAL) << "memcpy_s failed";
808             UNREACHABLE();
809         }
810     }
811     for (uint32_t index = startIndex; index < srcLength; ++index) {
812         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
813             *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
814         } else {
815             *(newStringPtr + index) = data[index];
816         }
817     }
818     return *newString;
819 }
820 
821 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)822 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
823 {
824     FlatStringInfo srcFlat = FlattenAllString(vm, src);
825     uint32_t srcLength = srcFlat.GetLength();
826     auto factory = vm->GetFactory();
827     if (srcFlat.IsUtf16()) {
828         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
829         std::string res = base::StringHelper::ToUpper(u16str);
830         return *(factory->NewFromStdString(res));
831     } else {
832         return ConvertUtf8ToLowerOrUpper(vm, src, false);
833     }
834 }
835 
836 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)837 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
838 {
839     auto factory = vm->GetFactory();
840     FlatStringInfo srcFlat = FlattenAllString(vm, src);
841     std::u16string utf16 = srcFlat.ToU16String();
842     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
843     return *(factory->NewFromStdString(res));
844 }
845 
846 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)847 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
848 {
849     auto factory = vm->GetFactory();
850     FlatStringInfo srcFlat = FlattenAllString(vm, src);
851     std::u16string utf16 = srcFlat.ToU16String();
852     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
853     return *(factory->NewFromStdString(res));
854 }
855 
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)856 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
857 {
858     FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
859     uint32_t srcLen = srcFlat.GetLength();
860     if (UNLIKELY(srcLen == 0)) {
861         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
862     }
863     if (srcFlat.IsUtf8()) {
864         Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
865         return TrimBody(thread, src, data, mode);
866     } else {
867         Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
868         return TrimBody(thread, src, data, mode);
869     }
870 }
871 
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)872 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
873 {
874     ASSERT(string->IsTreeString() || string->IsSlicedString());
875     ASSERT(IsSMemSpace(type));
876     auto thread = vm->GetJSThread();
877     uint32_t length = string->GetLength();
878     EcmaString *result = nullptr;
879     if (string->IsUtf8()) {
880         result = CreateLineStringWithSpaceType(vm, length, true, type);
881         WriteToFlat<uint8_t>(thread, *string, result->GetDataUtf8Writable(), length);
882     } else {
883         result = CreateLineStringWithSpaceType(vm, length, false, type);
884         WriteToFlat<uint16_t>(thread, *string, result->GetDataUtf16Writable(), length);
885     }
886     if (string->IsTreeString()) {
887         JSHandle<TreeEcmaString> tree(string);
888         ASSERT(EcmaString::Cast(tree->GetSecond(thread))->GetLength() != 0);
889         tree->SetFirst(thread, JSTaggedValue(result));
890         tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
891     }
892     return result;
893 }
894 
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)895 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
896 {
897     EcmaString *s = *string;
898     if (!s->IsTreeString()) {
899         return s;
900     }
901     JSThread *thread = vm->GetJSThread();
902     JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
903     if (!tree->IsFlat(thread)) {
904         return SlowFlatten(vm, string, type);
905     }
906     return EcmaString::Cast(tree->GetFirst(thread));
907 }
908 
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)909 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
910 {
911     ASSERT(IsSMemSpace(type));
912     EcmaString *s = *string;
913     uint32_t startIndex = 0;
914     if (s->IsLineString()) {
915         return FlatStringInfo(s, startIndex, s->GetLength());
916     }
917     JSThread *thread = vm->GetJSThread();
918     if (string->IsTreeString()) {
919         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
920         if (!tree->IsFlat(thread)) {
921             s = SlowFlatten(vm, string, type);
922         } else {
923             s = EcmaString::Cast(tree->GetFirst(thread));
924         }
925     } else if (string->IsSlicedString()) {
926         s = EcmaString::Cast(SlicedEcmaString::Cast(*string)->GetParent(thread));
927         startIndex = SlicedEcmaString::Cast(*string)->GetStartIndex();
928     }
929     return FlatStringInfo(s, startIndex, string->GetLength());
930 }
931 
FlattenNoGCForSnapshot(const EcmaVM * vm,EcmaString * string)932 EcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
933 {
934     DISALLOW_GARBAGE_COLLECTION;
935     if (string->IsLineString()) {
936         return string;
937     }
938     if (string->IsTreeString()) {
939         TreeEcmaString *tree = TreeEcmaString::Cast(string);
940         JSThread *thread = vm->GetJSThread();
941         if (tree->IsFlat(thread)) {
942             string = EcmaString::Cast(tree->GetFirst(thread));
943         } else {
944             uint32_t length = tree->GetLength();
945             EcmaString *result = nullptr;
946             if (tree->IsUtf8()) {
947                 result = CreateLineStringNoGC(vm, length, true);
948                 WriteToFlat<uint8_t>(thread, tree, result->GetDataUtf8Writable(), length);
949             } else {
950                 result = CreateLineStringNoGC(vm, length, false);
951                 WriteToFlat<uint16_t>(thread, tree, result->GetDataUtf16Writable(), length);
952             }
953             tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
954             tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
955             return result;
956         }
957     } else if (string->IsSlicedString()) {
958         SlicedEcmaString *str = SlicedEcmaString::Cast(string);
959         uint32_t length = str->GetLength();
960         JSThread *thread = vm->GetJSThread();
961         EcmaString *result = nullptr;
962         if (str->IsUtf8()) {
963             result = CreateLineStringNoGC(vm, length, true);
964             WriteToFlat<uint8_t>(thread, str, result->GetDataUtf8Writable(), length);
965         } else {
966             result = CreateLineStringNoGC(vm, length, false);
967             WriteToFlat<uint16_t>(thread, str, result->GetDataUtf16Writable(), length);
968         }
969         return result;
970     }
971     return string;
972 }
973 
GetUtf8DataFlat(const JSThread * thread,const EcmaString * src,CVector<uint8_t> & buf)974 const uint8_t *EcmaString::GetUtf8DataFlat(const JSThread *thread, const EcmaString *src, CVector<uint8_t> &buf)
975 {
976     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
977         return Barriers::GetTaggedObject(thread, obj, offset);
978     };
979     return BaseString::GetUtf8DataFlat(std::move(readBarrier), src->ToBaseString(), buf);
980 }
981 
GetNonTreeUtf8Data(const JSThread * thread,const EcmaString * src)982 const uint8_t *EcmaString::GetNonTreeUtf8Data(const JSThread *thread, const EcmaString *src)
983 {
984     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
985         return Barriers::GetTaggedObject(thread, obj, offset);
986     };
987     return BaseString::GetNonTreeUtf8Data(std::move(readBarrier), src->ToBaseString());
988 }
989 
GetUtf16DataFlat(const JSThread * thread,const EcmaString * src,CVector<uint16_t> & buf)990 const uint16_t *EcmaString::GetUtf16DataFlat(const JSThread *thread, const EcmaString *src, CVector<uint16_t> &buf)
991 {
992     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
993         return Barriers::GetTaggedObject(thread, obj, offset);
994     };
995     return BaseString::GetUtf16DataFlat(std::move(readBarrier), src->ToBaseString(), buf);
996 }
997 
GetNonTreeUtf16Data(const JSThread * thread,const EcmaString * src)998 const uint16_t *EcmaString::GetNonTreeUtf16Data(const JSThread *thread, const EcmaString *src)
999 {
1000     auto readBarrier = [thread](const void *obj, size_t offset) -> TaggedObject * {
1001         return Barriers::GetTaggedObject(thread, obj, offset);
1002     };
1003     return BaseString::GetNonTreeUtf16Data(std::move(readBarrier), src->ToBaseString());
1004 }
1005 
ToU16String(uint32_t len)1006 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1007 {
1008     uint32_t length = len > 0 ? len : GetLength();
1009     std::u16string result;
1010     if (IsUtf16()) {
1011         const uint16_t *data = this->GetDataUtf16();
1012         result = base::StringHelper::Utf16ToU16String(data, length);
1013     } else {
1014         const uint8_t *data = this->GetDataUtf8();
1015         result = base::StringHelper::Utf8ToU16String(data, length);
1016     }
1017     return result;
1018 }
1019 
EcmaStringAccessor(TaggedObject * obj)1020 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1021 {
1022     ASSERT(obj != nullptr);
1023     string_ = EcmaString::Cast(obj);
1024 }
1025 
EcmaStringAccessor(JSTaggedValue value)1026 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1027 {
1028     ASSERT(value.IsString());
1029     string_ = EcmaString::Cast(value.GetTaggedObject());
1030 }
1031 
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1032 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1033     : string_(*strHandle)
1034 {
1035 }
1036 
ToStdString(const JSThread * thread,StringConvertedUsage usage)1037 std::string EcmaStringAccessor::ToStdString(const JSThread *thread, StringConvertedUsage usage)
1038 {
1039     if (string_ == nullptr) {
1040         return "";
1041     }
1042     bool modify = (usage != StringConvertedUsage::PRINT);
1043     CVector<uint8_t> buf;
1044     Span<const uint8_t> sp = string_->ToUtf8Span(thread, buf, modify);
1045 #if ENABLE_NEXT_OPTIMIZATION
1046     return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1047 #else
1048     std::string res;
1049     res.reserve(sp.size());
1050     for (const auto &c : sp) {
1051         res.push_back(c);
1052     }
1053     return res;
1054 #endif
1055 }
1056 
Utf8ConvertToString(const JSThread * thread)1057 CString EcmaStringAccessor::Utf8ConvertToString(const JSThread *thread)
1058 {
1059     if (string_ == nullptr) {
1060         return CString("");
1061     }
1062     if (IsUtf8()) {
1063         std::string stdStr;
1064         if (IsLineString()) {
1065             return base::StringHelper::Utf8ToCString(GetDataUtf8(), GetLength());
1066         }
1067         CVector<uint8_t> buf;
1068         const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, string_, buf);
1069         return base::StringHelper::Utf8ToCString(data, GetLength());
1070     } else {
1071         return ToCString(thread);
1072     }
1073 }
1074 
DebuggerToStdString(const JSThread * thread,StringConvertedUsage usage)1075 std::string EcmaStringAccessor::DebuggerToStdString(const JSThread *thread, StringConvertedUsage usage)
1076 {
1077     if (string_ == nullptr) {
1078         return "";
1079     }
1080 
1081     bool modify = (usage != StringConvertedUsage::PRINT);
1082     CVector<uint8_t> buf;
1083     Span<const uint8_t> sp = string_->DebuggerToUtf8Span(thread, buf, modify);
1084 #if ENABLE_NEXT_OPTIMIZATION
1085     return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1086 #else
1087     std::string res;
1088     res.reserve(sp.size());
1089     for (const auto &c : sp) {
1090         res.push_back(c);
1091     }
1092     return res;
1093 #endif
1094 }
1095 
ToCString(const JSThread * thread,StringConvertedUsage usage,bool cesu8)1096 CString EcmaStringAccessor::ToCString(const JSThread *thread, StringConvertedUsage usage, bool cesu8)
1097 {
1098     if (string_ == nullptr) {
1099         return "";
1100     }
1101     bool modify = (usage != StringConvertedUsage::PRINT);
1102     CVector<uint8_t> buf;
1103     Span<const uint8_t> sp = string_->ToUtf8Span(thread, buf, modify, cesu8);
1104 #if ENABLE_NEXT_OPTIMIZATION
1105     return CString(reinterpret_cast<const char*>(sp.data()), sp.size());
1106 #else
1107     CString res;
1108     res.reserve(sp.size());
1109     for (const auto &c : sp) {
1110         res.push_back(c);
1111     }
1112     return res;
1113 #endif
1114 }
1115 
1116 #if ENABLE_NEXT_OPTIMIZATION
AppendToCString(const JSThread * thread,CString & str)1117 void EcmaStringAccessor::AppendToCString(const JSThread *thread, CString &str)
1118 {
1119     if (string_ == nullptr) {
1120         return;
1121     }
1122 
1123     size_t strLen = GetLength();
1124     CVector<uint8_t> buf;
1125     const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, string_, buf);
1126     str.append(reinterpret_cast<const char *>(data), strLen);
1127 }
1128 
AppendToC16String(const JSThread * thread,C16String & str)1129 void EcmaStringAccessor::AppendToC16String(const JSThread *thread, C16String &str)
1130 {
1131     if (string_ == nullptr) {
1132         return;
1133     }
1134     // used to append utf8 space to utf16 gap by stringify
1135     // In real world, space is usually utf8.
1136     if LIKELY(string_->IsUtf8()) {
1137         CVector<uint8_t> buf;
1138         const uint8_t *data = EcmaString::GetUtf8DataFlat(thread, string_, buf);
1139         // only ascii codes, no need to convert to UTF-16, just append.
1140         AppendString(str, reinterpret_cast<const char*>(data), GetLength());
1141     } else {
1142         CVector<uint16_t> buf;
1143         const uint16_t *data = EcmaString::GetUtf16DataFlat(thread, string_, buf);
1144         str.append(reinterpret_cast<const char16_t *>(data), GetLength());
1145     }
1146 }
1147 #endif
1148 // static
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1149 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1150 {
1151     return EcmaString::CreateLineString(vm, length, compressed);
1152 }
1153 }  // namespace panda::ecmascript
1154