• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/base/json_helper.h"
19 
20 namespace panda::ecmascript {
21 
22 constexpr size_t LOW_3BITS = 0x7;
23 constexpr size_t LOW_4BITS = 0xF;
24 constexpr size_t LOW_5BITS = 0x1F;
25 constexpr size_t LOW_6BITS = 0x3F;
26 constexpr size_t L_SURROGATE_START = 0xDC00;
27 constexpr size_t H_SURROGATE_START = 0xD800;
28 constexpr size_t SURROGATE_RAIR_START = 0x10000;
29 constexpr size_t OFFSET_18POS = 18;
30 constexpr size_t OFFSET_12POS = 12;
31 constexpr size_t OFFSET_10POS = 10;
32 constexpr size_t OFFSET_6POS = 6;
33 
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)34 EcmaString *EcmaString::Concat(const EcmaVM *vm,
35     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
36 {
37     ASSERT(IsSMemSpace(type));
38     // allocator may trig gc and move src, need to hold it
39     EcmaString *strLeft = *left;
40     EcmaString *strRight = *right;
41     uint32_t leftLength = strLeft->GetLength();
42     uint32_t rightLength = strRight->GetLength();
43     uint32_t newLength = leftLength + rightLength;
44     if (newLength == 0) {
45         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
46     }
47 
48     if (leftLength == 0) {
49         return strRight;
50     }
51     if (rightLength == 0) {
52         return strLeft;
53     }
54     // if the result string is small, make a LineString
55     bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
56     if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
57         ASSERT(strLeft->IsLineOrConstantString());
58         ASSERT(strRight->IsLineOrConstantString());
59         auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
60         // retrieve strings after gc
61         strLeft = *left;
62         strRight = *right;
63         if (compressed) {
64             // copy left part
65             Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
66             Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
67             EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
68             // copy right part
69             sp = sp.SubSpan(leftLength);
70             Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
71             EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
72         } else {
73             // copy left part
74             Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
75             if (strLeft->IsUtf8()) {
76                 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
77             } else {
78                 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
79                 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
80             }
81             // copy right part
82             sp = sp.SubSpan(leftLength);
83             if (strRight->IsUtf8()) {
84                 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
85             } else {
86                 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
87                 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
88             }
89         }
90         ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
91         return newString;
92     }
93     return CreateTreeString(vm, left, right, newLength, compressed);
94 }
95 
96 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)97 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
98     uint32_t length, bool compressed)
99 {
100     if (original->IsConstantString()) {
101         return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
102     }
103     JSHandle<EcmaString> newString(vm->GetJSThread(),
104         CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105     auto strOrigin = FlattenAllString(vm, original);
106     if (compressed) {
107         // copy
108         Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109         Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110         EcmaString::MemCopyChars(sp, length, srcSp, length);
111     } else {
112         // copy left part
113         Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114         if (strOrigin.IsUtf8()) {
115             EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116         } else {
117             Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118             EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119         }
120     }
121     ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122     return *newString;
123 }
124 
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129     ASSERT((start + length) <= src->GetLength());
130     if (length == 0) {
131         return *vm->GetFactory()->GetEmptyString();
132     }
133     if (start == 0 && length == src->GetLength()) {
134         return *src;
135     }
136     if (src->IsUtf8()) {
137         return FastSubUtf8String(vm, src, start, length);
138     }
139     return FastSubUtf16String(vm, src, start, length);
140 }
141 
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146     ASSERT((start + length) <= src->GetLength());
147     JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
148     FlatStringInfo srcFlat = FlattenAllString(vm, src);
149     slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
150     slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
151     slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152     return *slicedString;
153 }
154 
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159     ASSERT((start + length) <= src->GetLength());
160     if (length == 1) {
161         JSThread *thread = vm->GetJSThread();
162         uint16_t res = EcmaStringAccessor(src).Get<false>(start);
163         if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
164             JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
165             return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject());
166         }
167     }
168     if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
169         if (start == 0 && length == src->GetLength()) {
170             return *src;
171         }
172         if (src->IsUtf16()) {
173             FlatStringInfo srcFlat = FlattenAllString(vm, src);
174             bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
175             if (canBeCompressed) {
176                 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
177                 srcFlat = FlattenAllString(vm, src);
178                 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
179                 return *string;
180             }
181         }
182         return GetSlicedString(vm, src, start, length);
183     }
184     return FastSubString(vm, src, start, length);
185 }
186 
SubStringIsUtf8(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)187 bool EcmaString::SubStringIsUtf8(const EcmaVM *vm,
188     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
189 {
190     ASSERT((start + length) <= src->GetLength());
191     if (length == 0) {
192         return true;
193     }
194     if (src->IsUtf8()) {
195         return true;
196     }
197     FlatStringInfo srcFlat = FlattenAllString(vm, src);
198     return CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
199 }
200 
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)201 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
202 {
203     ASSERT(IsLineString() && !IsConstantString());
204     if (IsUtf8()) {
205         ASSERT(src->IsUtf8());
206         CVector<uint8_t> buf;
207         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
208         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
209         if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
210             LOG_FULL(FATAL) << "memcpy_s failed";
211             UNREACHABLE();
212         }
213     } else if (src->IsUtf8()) {
214         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
215         CVector<uint8_t> buf;
216         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
217         Span<uint16_t> to(GetDataUtf16Writable() + start, length);
218         Span<const uint8_t> from(data, length);
219         for (uint32_t i = 0; i < length; i++) {
220             to[i] = from[i];
221         }
222     } else {
223         CVector<uint16_t> buf;
224         const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
225         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
226         if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
227             destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
228             LOG_FULL(FATAL) << "memcpy_s failed";
229             UNREACHABLE();
230         }
231     }
232 }
233 
234 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)235 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
236 {
237     for (int32_t i = 0; i < count; ++i) {
238         auto left = static_cast<int32_t>(lhsSp[i]);
239         auto right = static_cast<int32_t>(rhsSp[i]);
240         if (left != right) {
241             return left - right;
242         }
243     }
244     return 0;
245 }
246 
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)247 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
248 {
249     if (*left == *right) {
250         return 0;
251     }
252     FlatStringInfo lhs = FlattenAllString(vm, left);
253     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
254     FlatStringInfo rhs = FlattenAllString(vm, right);
255     lhs.SetString(*string);
256     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
257     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
258     int32_t countDiff = lhsCount - rhsCount;
259     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
260     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
261         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
262         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
263         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
264         if (charDiff != 0) {
265             return charDiff;
266         }
267     } else if (!lhs.IsUtf16()) {
268         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
269         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
270         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
271         if (charDiff != 0) {
272             return charDiff;
273         }
274     } else if (!rhs.IsUtf16()) {
275         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
276         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
277         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
278         if (charDiff != 0) {
279             return charDiff;
280         }
281     } else {
282         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
283         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
284         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
285         if (charDiff != 0) {
286             return charDiff;
287         }
288     }
289     return countDiff;
290 }
291 
292 template<typename T1, typename T2>
IsSubStringAtSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,uint32_t offset)293 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)
294 {
295     int rhsSize = static_cast<int>(rhsSp.size());
296     ASSERT(rhsSize + offset <= lhsSp.size());
297     for (int i = 0; i < rhsSize; ++i) {
298         auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
299         auto right = static_cast<int32_t>(rhsSp[i]);
300         if (left != right) {
301             return false;
302         }
303     }
304     return true;
305 }
306 
307 
308 /**
309  * left: text string
310  * right: pattern string
311  * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
312  * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
313 */
IsSubStringAt(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t offset)314 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
315     const JSHandle<EcmaString>& right, uint32_t offset)
316 {
317     FlatStringInfo lhs = FlattenAllString(vm, left);
318     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
319     FlatStringInfo rhs = FlattenAllString(vm, right);
320     lhs.SetString(*string);
321     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
322     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
323     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
324         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
325         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
326         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
327     } else if (!lhs.IsUtf16()) {
328         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
329         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
330         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
331     } else if (!rhs.IsUtf16()) {
332         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
333         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
334         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
335     } else {
336         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
337         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
338         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
339     }
340     return false;
341 }
342 
343 /* static */
344 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)345 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
346 {
347     ASSERT(rhsSp.size() > 0);
348     auto first = static_cast<int32_t>(rhsSp[0]);
349     for (int32_t i = pos; i <= max; i++) {
350         if (static_cast<int32_t>(lhsSp[i]) != first) {
351             i++;
352             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
353                 i++;
354             }
355         }
356         /* Found first character, now look at the rest of rhsSp */
357         if (i <= max) {
358             int j = i + 1;
359             int end = j + static_cast<int>(rhsSp.size()) - 1;
360 
361             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
362             }
363             if (j == end) {
364                 /* Found whole string. */
365                 return i;
366             }
367         }
368     }
369     return -1;
370 }
371 
372 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)373 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
374 {
375     int rhsSize = static_cast<int>(rhsSp.size());
376     ASSERT(rhsSize > 0);
377     auto first = rhsSp[0];
378     for (int32_t i = pos; i >= 0; i--) {
379         if (lhsSp[i] != first) {
380             continue;
381         }
382         /* Found first character, now look at the rest of rhsSp */
383         int j = 1;
384         while (j < rhsSize) {
385             if (rhsSp[j] != lhsSp[i + j]) {
386                 break;
387             }
388             j++;
389         }
390         if (j == rhsSize) {
391             return i;
392         }
393     }
394     return -1;
395 }
396 
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)397 int32_t EcmaString::IndexOf(const EcmaVM *vm,
398     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
399 {
400     EcmaString *lhstring = *receiver;
401     EcmaString *rhstring = *search;
402     if (lhstring == nullptr || rhstring == nullptr) {
403         return -1;
404     }
405     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
406     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
407 
408     if (pos > lhsCount) {
409         return -1;
410     }
411 
412     if (rhsCount == 0) {
413         return pos;
414     }
415 
416     if (pos < 0) {
417         pos = 0;
418     }
419 
420     int32_t max = lhsCount - rhsCount;
421     if (max < 0) {
422         return -1;
423     }
424 
425     if (pos + rhsCount > lhsCount) {
426         return -1;
427     }
428 
429     FlatStringInfo lhs = FlattenAllString(vm, receiver);
430     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
431     FlatStringInfo rhs = FlattenAllString(vm, search);
432     lhs.SetString(*string);
433 
434     if (rhs.IsUtf8() && lhs.IsUtf8()) {
435         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
436         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
437         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
438     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
439         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
440         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
441         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
442     } else if (rhs.IsUtf16()) {
443         return -1;
444     } else {  // NOLINT(readability-else-after-return)
445         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
446         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
447         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
448     }
449 }
450 
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)451 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
452     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
453 {
454     EcmaString *lhstring = *receiver;
455     EcmaString *rhstring = *search;
456     if (lhstring == nullptr || rhstring == nullptr) {
457         return -1;
458     }
459 
460     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
461     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
462     if (lhsCount < rhsCount) {
463         return -1;
464     }
465 
466     if (pos < 0) {
467         pos = 0;
468     }
469 
470     if (pos > lhsCount) {
471         pos = lhsCount;
472     }
473 
474     if (pos + rhsCount > lhsCount) {
475         pos = lhsCount - rhsCount;
476     }
477 
478     if (rhsCount == 0) {
479         return pos;
480     }
481 
482     FlatStringInfo lhs = FlattenAllString(vm, receiver);
483     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
484     FlatStringInfo rhs = FlattenAllString(vm, search);
485     lhs.SetString(*string);
486     if (rhs.IsUtf8() && lhs.IsUtf8()) {
487         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
488         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
489         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
490     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
491         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
492         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
493         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
494     } else if (rhs.IsUtf16()) {
495         return -1;
496     } else {  // NOLINT(readability-else-after-return)
497         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
498         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
499         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
500     }
501 }
502 
ToU16String(uint32_t len)503 std::u16string EcmaString::ToU16String(uint32_t len)
504 {
505     uint32_t length = len > 0 ? len : GetLength();
506     std::u16string result;
507     if (IsUtf16()) {
508         CVector<uint16_t> buf;
509         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
510         result = base::StringHelper::Utf16ToU16String(data, length);
511     } else {
512         CVector<uint8_t> buf;
513         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
514         result = base::StringHelper::Utf8ToU16String(data, length);
515     }
516     return result;
517 }
518 
519 //static
CalculateAllConcatHashCode(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)520 uint32_t EcmaString::CalculateAllConcatHashCode(const JSHandle<EcmaString> &firstString,
521                                                 const JSHandle<EcmaString> &secondString)
522 {
523     uint32_t hashCode;
524     uint32_t firstLength = firstString->GetLength();
525     uint32_t secondLength = secondString->GetLength();
526     if ((firstLength + secondLength < MAX_ELEMENT_INDEX_LEN) &&
527         firstString->IsUtf8() && secondString->IsUtf8() &&
528         firstString->IsInteger() && secondString->IsInteger()) {
529             firstString->HashIntegerString(firstLength, &hashCode, 0);
530             secondString->HashIntegerString(secondLength, &hashCode, hashCode);
531             return hashCode;
532     }
533     hashCode = EcmaString::CalculateConcatHashCode(firstString, secondString);
534     hashCode = MixHashcode(hashCode, NOT_INTEGER);
535     return hashCode;
536 }
537 
538 // static
539 template<typename T1, typename T2>
CalculateDataConcatHashCode(const T1 * dataFirst,size_t sizeFirst,const T2 * dataSecond,size_t sizeSecond)540 uint32_t EcmaString::CalculateDataConcatHashCode(const T1 *dataFirst, size_t sizeFirst,
541                                                  const T2 *dataSecond, size_t sizeSecond)
542 {
543     uint32_t totalHash = 0;
544     constexpr uint32_t hashShift = static_cast<uint32_t>(EcmaStringHash::HASH_SHIFT);
545     constexpr uint32_t blockSize = static_cast<size_t>(EcmaStringHash::BLOCK_SIZE);
546     // The concatenated length of the two strings is less than MIN_SIZE_FOR_UNROLLING.
547     if (sizeFirst + sizeSecond <= static_cast<size_t>(EcmaStringHash::MIN_SIZE_FOR_UNROLLING)) {
548         for (uint32_t i = 0; i < sizeFirst; i++) {
549             totalHash = (totalHash << hashShift) - totalHash + dataFirst[i];
550         }
551         for (uint32_t i = 0; i < sizeSecond; i++) {
552             totalHash = (totalHash << hashShift) - totalHash + dataSecond[i];
553         }
554         return totalHash;
555     }
556     // Process the entire block of the first string.
557     uint32_t hash[blockSize] = {0};
558     uint32_t index = 0;
559     for (; index + blockSize <= sizeFirst; index += blockSize) {
560         hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
561         hash[1] = (hash[1] << hashShift) - hash[1] + dataFirst[index + 1]; // 1: the second element
562         hash[2] = (hash[2] << hashShift) - hash[2] + dataFirst[index + 2]; // 2: the third element
563         hash[3] = (hash[3] << hashShift) - hash[3] + dataFirst[index + 3]; // 3: the fourth element
564     }
565     // The remaining total string length is less than a whole block.
566     if ((sizeFirst % blockSize) + sizeSecond < blockSize) {
567         for (; index < sizeFirst; ++index) {
568             hash[0] = (hash[0] << hashShift) - hash[0] + dataFirst[index];
569         }
570         index = 0;
571     } else {
572         //Calculate the non-integral block portion at the end of the first string.
573         for (; index < sizeFirst; ++index) {
574             hash[index % blockSize] = (hash[index % blockSize] << hashShift) -
575                                         hash[index % blockSize] + dataFirst[index];
576         }
577         //Calculate the portion of the second string
578         //that starts and aligns with an integral block at the end of the first string.
579         uint32_t wholeBlockRemain = (blockSize - sizeFirst % blockSize) % blockSize;
580         index = 0;
581         for (; index < wholeBlockRemain && index < sizeSecond; ++index) {
582             uint32_t nowHashIndex = sizeFirst % blockSize + index;
583             hash[nowHashIndex] = (hash[nowHashIndex] << hashShift) - hash[nowHashIndex] + dataSecond[index];
584         }
585         // Process the entire block of the Second string.
586         for (; index + blockSize <= sizeSecond; index += blockSize) {
587             hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
588             hash[1] = (hash[1] << hashShift) - hash[1] + dataSecond[index + 1]; // 1: the second element
589             hash[2] = (hash[2] << hashShift) - hash[2] + dataSecond[index + 2]; // 2: the third element
590             hash[3] = (hash[3] << hashShift) - hash[3] + dataSecond[index + 3]; // 3: the fourth element
591         }
592     }
593     for (; index < sizeSecond; ++index) {
594         hash[0] = (hash[0] << hashShift) - hash[0] + dataSecond[index];
595     }
596     for (uint32_t i = 0; i < blockSize; ++i) {
597         totalHash = (totalHash << hashShift) - totalHash + hash[i];
598     }
599     return totalHash;
600 }
601 
602 // static
CalculateConcatHashCode(const JSHandle<EcmaString> & firstString,const JSHandle<EcmaString> & secondString)603 uint32_t EcmaString::CalculateConcatHashCode(const JSHandle<EcmaString> &firstString,
604                                              const JSHandle<EcmaString> &secondString)
605 {
606     bool isFirstStringUtf8 = EcmaStringAccessor(firstString).IsUtf8();
607     bool isSecondStringUtf8 = EcmaStringAccessor(secondString).IsUtf8();
608     EcmaString *firstStr = *firstString;
609     EcmaString *secondStr = *secondString;
610     CVector<uint8_t> bufFirstUint8;
611     CVector<uint8_t> bufSecondUint8;
612     CVector<uint16_t> bufFirstUint16;
613     CVector<uint16_t> bufSecondUint16;
614     if (isFirstStringUtf8 && isSecondStringUtf8) {
615         const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
616         const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
617         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
618                                            dataSecond, secondStr->GetLength());
619     }
620     if (!isFirstStringUtf8 && isSecondStringUtf8) {
621         const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
622         const uint8_t *dataSecond = EcmaString::GetUtf8DataFlat(secondStr, bufSecondUint8);
623         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
624                                            dataSecond, secondStr->GetLength());
625     }
626     if (isFirstStringUtf8 && !isSecondStringUtf8) {
627         const uint8_t *dataFirst = EcmaString::GetUtf8DataFlat(firstStr, bufFirstUint8);
628         const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
629         return CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
630                                            dataSecond, secondStr->GetLength());
631     }
632     {
633         const uint16_t *dataFirst = EcmaString::GetUtf16DataFlat(firstStr, bufFirstUint16);
634         const uint16_t *dataSecond = EcmaString::GetUtf16DataFlat(secondStr, bufSecondUint16);
635         return  CalculateDataConcatHashCode(dataFirst, firstStr->GetLength(),
636                                             dataSecond, secondStr->GetLength());
637     }
638 }
639 
640 // static
CanBeCompressed(const EcmaString * string)641 bool EcmaString::CanBeCompressed(const EcmaString *string)
642 {
643     ASSERT(string->IsLineOrConstantString());
644     if (string->IsUtf8()) {
645         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
646     }
647     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
648 }
649 
650 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)651 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
652 {
653     uint32_t index = 0;
654     for (; index + 4 <= utf8Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
655         // Check if all four characters in the current block are ASCII characters
656         if (!IsASCIICharacter(utf8Data[index]) ||
657             !IsASCIICharacter(utf8Data[index + 1]) || // 1: the second element of the block
658             !IsASCIICharacter(utf8Data[index + 2]) || // 2: the third element of the block
659             !IsASCIICharacter(utf8Data[index + 3])) { // 3: the fourth element of the block
660             return false;
661         }
662     }
663     // Check remaining characters if they are ASCII
664     for (; index < utf8Len; ++index) {
665         if (!IsASCIICharacter(utf8Data[index])) {
666             return false;
667         }
668     }
669     return true;
670 }
671 
672 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)673 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
674 {
675     uint32_t index = 0;
676     for (; index + 4 <= utf16Len; index += 4) { // 4: process the data in chunks of 4 elements to improve speed
677         // Check if all four characters in the current block are ASCII characters
678         if (!IsASCIICharacter(utf16Data[index]) ||
679             !IsASCIICharacter(utf16Data[index + 1]) || // 1: the second element of the block
680             !IsASCIICharacter(utf16Data[index + 2]) || // 2: the third element of the block
681             !IsASCIICharacter(utf16Data[index + 3])) { // 3: the fourth element of the block
682             return false;
683         }
684     }
685     // Check remaining characters if they are ASCII
686     for (; index < utf16Len; ++index) {
687         if (!IsASCIICharacter(utf16Data[index])) {
688             return false;
689         }
690     }
691     return true;
692 }
693 
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)694 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
695 {
696     ASSERT(NotTreeString());
697     ASSERT(str1->NotTreeString() && str2->NotTreeString());
698     if (GetLength() != str1->GetLength() + str2->GetLength()) {
699         return false;
700     }
701     if (IsUtf16()) {
702         CVector<uint16_t> buf;
703         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
704         if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
705             return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
706         }
707     } else {
708         CVector<uint8_t> buf;
709         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
710         if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
711             return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
712                                                       str2->GetLength(), this->IsUtf8());
713         }
714     }
715     return false;
716 }
717 
718 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString * left,EcmaString * right)719 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
720 {
721     CVector<uint16_t> bufLeftUft16;
722     CVector<uint16_t> bufRightUft16;
723     CVector<uint8_t> bufLeftUft8;
724     CVector<uint8_t> bufRightUft8;
725     int32_t lhsCount = static_cast<int32_t>(left->GetLength());
726     int32_t rhsCount = static_cast<int32_t>(right->GetLength());
727     if (!left->IsUtf16() && !right->IsUtf16()) {
728         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
729         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
730         Span<const uint8_t> lhsSp(data1, lhsCount);
731         Span<const uint8_t> rhsSp(data2, rhsCount);
732         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
733     } else if (!left->IsUtf16()) {
734         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
735         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
736         Span<const uint8_t> lhsSp(data1, lhsCount);
737         Span<const uint16_t> rhsSp(data2, rhsCount);
738         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
739     } else if (!right->IsUtf16()) {
740         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
741         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
742         Span<const uint16_t> lhsSp(data1, lhsCount);
743         Span<const uint8_t> rhsSp(data2, rhsCount);
744         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
745     } else {
746         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
747         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
748         Span<const uint16_t> lhsSp(data1, lhsCount);
749         Span<const uint16_t> rhsSp(data2, rhsCount);
750         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
751     }
752 }
753 
754 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)755 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
756 {
757     int32_t lhsCount = static_cast<int32_t>(left.GetLength());
758     int32_t rhsCount = static_cast<int32_t>(right.GetLength());
759     if (!left.IsUtf16() && !right.IsUtf16()) {
760         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
761         Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
762         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
763     } else if (!left.IsUtf16()) {
764         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
765         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
766         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
767     } else if (!right.IsUtf16()) {
768         Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
769         Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
770         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
771     } else {
772         Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
773         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
774         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
775     }
776 }
777 
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)778 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
779 {
780     if (str1 == str2) {
781         return true;
782     }
783     if (str1->IsInternString() && str2->IsInternString()) {
784         return false;
785     }
786     uint32_t str1Len = str1->GetLength();
787     if (str1Len != str2->GetLength()) {
788         return false;
789     }
790     if (str1Len == 0) {
791         return true;
792     }
793 
794     uint32_t str1Hash;
795     uint32_t str2Hash;
796     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
797         if (str1Hash != str2Hash) {
798             return false;
799         }
800     }
801     FlatStringInfo str1Flat = FlattenAllString(vm, str1);
802     JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
803     FlatStringInfo str2Flat = FlattenAllString(vm, str2);
804     str1Flat.SetString(*string);
805     return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
806 }
807 
808 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)809 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
810 {
811     ASSERT(str1 != nullptr && str2 != nullptr);
812     if (str1 == str2) {
813         return true;
814     }
815     uint32_t str1Len = str1->GetLength();
816     if (str1Len != str2->GetLength()) {
817         return false;
818     }
819     if (str1Len == 0) {
820         return true;
821     }
822 
823     uint32_t str1Hash;
824     uint32_t str2Hash;
825     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
826         if (str1Hash != str2Hash) {
827             return false;
828         }
829     }
830     return StringsAreEqualDiffUtfEncoding(str1, str2);
831 }
832 
833 /* static */
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)834 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
835                                         bool canBeCompressToUtf8)
836 {
837     if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
838         return false;
839     }
840     if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
841         return false;
842     }
843     if (str1->IsUtf8()) {
844         CVector<uint8_t> buf;
845         Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
846         Span<const uint8_t> data2(dataAddr, dataLen);
847         return EcmaString::StringsAreEquals(data1, data2);
848     }
849     CVector<uint16_t> buf;
850     uint32_t length = str1->GetLength();
851     const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
852     return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
853 }
854 
855 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)856 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
857 {
858     uint32_t length = str1->GetLength();
859     if (length != utf16Len) {
860         return false;
861     }
862     if (str1->IsUtf8()) {
863         CVector<uint8_t> buf;
864         const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
865         return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
866     } else {
867         CVector<uint16_t> buf;
868         Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
869         Span<const uint16_t> data2(utf16Data, utf16Len);
870         return EcmaString::StringsAreEquals(data1, data2);
871     }
872 }
873 
874 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)875 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
876 {
877     ASSERT(dstMax >= count);
878     ASSERT(dst.Size() >= src.Size());
879     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
880         LOG_FULL(FATAL) << "memcpy_s failed";
881         UNREACHABLE();
882     }
883     return true;
884 }
885 
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const886 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
887 {
888     ASSERT(length >= 0);
889     Span<const uint8_t> str = FastToUtf8Span();
890     return HashIntegerString(str.data(), length, hash, hashSeed);
891 }
892 
ComputeHashcode() const893 uint32_t EcmaString::ComputeHashcode() const
894 {
895     auto [hash, isInteger] = ComputeRawHashcode();
896     return MixHashcode(hash, isInteger);
897 }
898 
899 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const900 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
901 {
902     uint32_t hash = 0;
903     uint32_t length = GetLength();
904     if (length == 0) {
905         return {hash, false};
906     }
907 
908     if (IsUtf8()) {
909         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
910         if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
911             return {hash, true};
912         }
913         CVector<uint8_t> buf;
914         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
915         // String can not convert to integer number, using normal hashcode computing algorithm.
916         hash = this->ComputeHashForData(data, length, 0);
917         return {hash, false};
918     } else {
919         CVector<uint16_t> buf;
920         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
921         // If rawSeed has certain value, and second string uses UTF16 encoding,
922         // then merged string can not be small integer number.
923         hash = this->ComputeHashForData(data, length, 0);
924         return {hash, false};
925     }
926 }
927 
928 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed,bool isInteger) const929 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
930 {
931     uint32_t hash;
932     uint32_t length = GetLength();
933     if (length == 0) {
934         return MixHashcode(rawHashSeed, isInteger);
935     }
936 
937     if (IsUtf8()) {
938         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
939         if ((rawHashSeed == 0 || isInteger) &&
940              length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
941             return hash;
942         }
943         CVector<uint8_t> buf;
944         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
945         // String can not convert to integer number, using normal hashcode computing algorithm.
946         hash = this->ComputeHashForData(data, length, rawHashSeed);
947         return MixHashcode(hash, NOT_INTEGER);
948     } else {
949         CVector<uint16_t> buf;
950         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
951         // If rawSeed has certain value, and second string uses UTF16 encoding,
952         // then merged string can not be small integer number.
953         hash = this->ComputeHashForData(data, length, rawHashSeed);
954         return MixHashcode(hash, NOT_INTEGER);
955     }
956 }
957 
958 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)959 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
960 {
961     uint32_t mixHash = 0;
962     if (canBeCompress) {
963         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
964         if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
965             return mixHash;
966         }
967         uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
968         return MixHashcode(hash, NOT_INTEGER);
969     } else {
970         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
971         CVector<uint16_t> tmpBuffer(utf16Len);
972         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
973                                                                                utf16Len);
974         ASSERT(len == utf16Len);
975         uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
976         return MixHashcode(hash, NOT_INTEGER);
977     }
978     LOG_ECMA(FATAL) << "this branch is unreachable";
979     UNREACHABLE();
980 }
981 
982 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)983 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
984 {
985     uint32_t mixHash = 0;
986     // String length smaller than 10, try to compute integer hash.
987     if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
988         return mixHash;
989     }
990     uint32_t hash = ComputeHashForData(utf16Data, length, 0);
991     return MixHashcode(hash, NOT_INTEGER);
992 }
993 
994 // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t * utf8,size_t utf8Len)995 static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
996 {
997     constexpr size_t TWO_BYTES_LENGTH = 2;
998     constexpr size_t THREE_BYTES_LENGTH = 3;
999     size_t trimSize = 0;
1000     if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
1001         // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
1002         trimSize = 1;
1003     }
1004     if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
1005         // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
1006         trimSize = TWO_BYTES_LENGTH;
1007     }
1008     if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
1009         // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
1010         trimSize = THREE_BYTES_LENGTH;
1011     }
1012     return utf8Len - trimSize;
1013 }
1014 
1015 
1016 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)1017 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len,
1018                                    const uint16_t *utf16Data, uint32_t utf16Len)
1019 {
1020     size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
1021     const uint8_t *utf8End = utf8Data + utf8Len;
1022     const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
1023     const uint16_t *utf16End = utf16Data + utf16Len;
1024     while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
1025         uint8_t src = *utf8Data;
1026         switch (src & 0xF0) {
1027             case 0xF0: {
1028                 const uint8_t c2 = *(++utf8Data);
1029                 const uint8_t c3 = *(++utf8Data);
1030                 const uint8_t c4 = *(++utf8Data);
1031                 uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
1032                                      ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
1033                 if (codePoint >= SURROGATE_RAIR_START) {
1034                     if (utf16Data >= utf16End - 1) {
1035                         return false;
1036                     }
1037                     codePoint -= SURROGATE_RAIR_START;
1038                     if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
1039                         return false;
1040                     } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
1041                         return false;
1042                     }
1043                 } else {
1044                     if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
1045                         return false;
1046                     }
1047                 }
1048                 utf8Data++;
1049                 break;
1050             }
1051             case 0xE0: {
1052                 const uint8_t c2 = *(++utf8Data);
1053                 const uint8_t c3 = *(++utf8Data);
1054                 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
1055                     ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
1056                     return false;
1057                 }
1058                 utf8Data++;
1059                 break;
1060             }
1061             case 0xD0:
1062             case 0xC0: {
1063                 const uint8_t c2 = *(++utf8Data);
1064                 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
1065                     return false;
1066                 }
1067                 utf8Data++;
1068                 break;
1069             }
1070             default:
1071                 do {
1072                     if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1073                         return false;
1074                     }
1075                 } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
1076                 break;
1077         }
1078     }
1079     // The remain chars should be treated as single byte char.
1080     while (utf8Data < utf8End && utf16Data < utf16End) {
1081         if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
1082             return false;
1083         }
1084     }
1085     return utf8Data == utf8End && utf16Data == utf16End;
1086 }
1087 
ToElementIndex(uint32_t * index)1088 bool EcmaString::ToElementIndex(uint32_t *index)
1089 {
1090     uint32_t len = GetLength();
1091     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
1092         return false;
1093     }
1094     if (UNLIKELY(IsUtf16())) {
1095         return false;
1096     }
1097 
1098     // fast path: get integer from string's hash value
1099     if (TryToGetInteger(index)) {
1100         return true;
1101     }
1102 
1103     CVector<uint8_t> buf;
1104     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1105     uint32_t c = data[0];
1106     uint64_t n = 0;
1107     if (c == '0') {
1108         *index = 0;
1109         return len == 1;
1110     }
1111     uint32_t loopStart = 0;
1112     if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
1113         *index = n;
1114         return true;
1115     }
1116     return false;
1117 }
1118 
ToInt(int32_t * index,bool * negative)1119 bool EcmaString::ToInt(int32_t *index, bool *negative)
1120 {
1121     uint32_t len = GetLength();
1122     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
1123         return false;
1124     }
1125     if (UNLIKELY(IsUtf16())) {
1126         return false;
1127     }
1128     CVector<uint8_t> buf;
1129     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1130     uint32_t c = data[0];
1131     uint32_t loopStart = 0;
1132     uint64_t n = 0;
1133     if (c == '0') {
1134         *index = 0;
1135         return len == 1;
1136     }
1137     if (c == '-' && len > 1) {
1138         *negative = true;
1139         loopStart = 1;
1140     }
1141 
1142     if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
1143         *index = *negative ? -n : n;
1144         return true;
1145     }
1146     return false;
1147 }
1148 
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)1149 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
1150 {
1151     uint64_t n = 0;
1152     uint32_t len = GetLength();
1153     if (UNLIKELY(loopStart >= len)) {
1154         return false;
1155     }
1156     for (uint32_t i = loopStart; i < len; i++) {
1157         uint32_t c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1158         if (c < '0' || c > '9') {
1159             return false;
1160         }
1161         // NOLINTNEXTLINE(readability-magic-numbers)
1162         n = n * 10 + (c - '0');  // 10: decimal factor
1163     }
1164     *index = n;
1165     return true;
1166 }
1167 
ToTypedArrayIndex(uint32_t * index)1168 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
1169 {
1170     uint32_t len = GetLength();
1171     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
1172         return false;
1173     }
1174     if (UNLIKELY(IsUtf16())) {
1175         return false;
1176     }
1177 
1178     CVector<uint8_t> buf;
1179     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1180     uint32_t c = data[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1181     uint64_t n = 0;
1182     if (c == '0') {
1183         *index = 0;
1184         return len == 1;
1185     }
1186     if (c > '0' && c <= '9') {
1187         n = c - '0';
1188         for (uint32_t i = 1; i < len; i++) {
1189             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1190             if (c >= '0' && c <= '9') {
1191                 // NOLINTNEXTLINE(readability-magic-numbers)
1192                 n = n * 10 + (c - '0');  // 10: decimal factor
1193             } else if (c == '.') {
1194                 n = JSObject::MAX_ELEMENT_INDEX;
1195                 break;
1196             } else {
1197                 return false;
1198             }
1199         }
1200         if (n < JSObject::MAX_ELEMENT_INDEX) {
1201             *index = n;
1202             return true;
1203         } else {
1204             *index = JSObject::MAX_ELEMENT_INDEX;
1205             return true;
1206         }
1207     } else if (c == '-') {
1208         *index = JSObject::MAX_ELEMENT_INDEX;
1209         return true;
1210     }
1211     return false;
1212 }
1213 
1214 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)1215 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
1216 {
1217     uint32_t srcLen = src->GetLength();
1218     int32_t start = 0;
1219     int32_t end = static_cast<int32_t>(srcLen) - 1;
1220 
1221     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
1222         start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
1223     }
1224     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
1225         end = base::StringHelper::GetEnd(data, start, srcLen);
1226     }
1227     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
1228     return res;
1229 }
1230 
1231 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1232 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1233 {
1234     auto srcFlat = FlattenAllString(vm, src);
1235     uint32_t srcLength = srcFlat.GetLength();
1236     auto factory = vm->GetFactory();
1237     if (srcFlat.IsUtf16()) {
1238         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1239         std::string res = base::StringHelper::ToLower(u16str);
1240         return *(factory->NewFromStdString(res));
1241     } else {
1242         return ConvertUtf8ToLowerOrUpper(vm, src, true);
1243     }
1244 }
1245 
1246 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1247 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1248 {
1249     auto srcFlat = FlattenAllString(vm, src);
1250     uint32_t srcLength = srcFlat.GetLength();
1251     const char start = 'A';
1252     const char end = 'Z';
1253     uint32_t upperIndex = srcLength;
1254     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1255     for (uint32_t index = 0; index < srcLength; ++index) {
1256         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1257             upperIndex = index;
1258             break;
1259         }
1260     }
1261     if (upperIndex == srcLength) {
1262         return *src;
1263     }
1264     return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
1265 }
1266 
1267 /* static */
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1268 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1269 {
1270     auto srcFlat = FlattenAllString(vm, src);
1271     uint32_t srcLength = srcFlat.GetLength();
1272     const char start = 'a';
1273     const char end = 'z';
1274     uint32_t lowerIndex = srcLength;
1275     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1276     for (uint32_t index = 0; index < srcLength; ++index) {
1277         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1278             lowerIndex = index;
1279             break;
1280         }
1281     }
1282     if (lowerIndex == srcLength) {
1283         return *src;
1284     }
1285     return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
1286 }
1287 
1288 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)1289 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
1290                                                   bool toLower, uint32_t startIndex)
1291 {
1292     const char start = toLower ? 'A' : 'a';
1293     const char end = toLower ? 'Z' : 'z';
1294     uint32_t srcLength = src->GetLength();
1295     JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1296     auto srcFlat = FlattenAllString(vm, src);
1297     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1298     auto newStringPtr = newString->GetDataUtf8Writable();
1299     if (startIndex > 0) {
1300         if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1301             LOG_FULL(FATAL) << "memcpy_s failed";
1302             UNREACHABLE();
1303         }
1304     }
1305     for (uint32_t index = startIndex; index < srcLength; ++index) {
1306         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1307             *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
1308         } else {
1309             *(newStringPtr + index) = data[index];
1310         }
1311     }
1312     return *newString;
1313 }
1314 
1315 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1316 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1317 {
1318     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1319     uint32_t srcLength = srcFlat.GetLength();
1320     auto factory = vm->GetFactory();
1321     if (srcFlat.IsUtf16()) {
1322         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1323         std::string res = base::StringHelper::ToUpper(u16str);
1324         return *(factory->NewFromStdString(res));
1325     } else {
1326         return ConvertUtf8ToLowerOrUpper(vm, src, false);
1327     }
1328 }
1329 
1330 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1331 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1332 {
1333     auto factory = vm->GetFactory();
1334     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1335     std::u16string utf16 = srcFlat.ToU16String();
1336     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1337     return *(factory->NewFromStdString(res));
1338 }
1339 
1340 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1341 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1342 {
1343     auto factory = vm->GetFactory();
1344     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1345     std::u16string utf16 = srcFlat.ToU16String();
1346     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1347     return *(factory->NewFromStdString(res));
1348 }
1349 
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)1350 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1351 {
1352     FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1353     uint32_t srcLen = srcFlat.GetLength();
1354     if (UNLIKELY(srcLen == 0)) {
1355         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1356     }
1357     if (srcFlat.IsUtf8()) {
1358         Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1359         return TrimBody(thread, src, data, mode);
1360     } else {
1361         Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1362         return TrimBody(thread, src, data, mode);
1363     }
1364 }
1365 
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1366 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1367 {
1368     ASSERT(string->IsTreeString() || string->IsSlicedString());
1369     ASSERT(IsSMemSpace(type));
1370     auto thread = vm->GetJSThread();
1371     uint32_t length = string->GetLength();
1372     EcmaString *result = nullptr;
1373     if (string->IsUtf8()) {
1374         result = CreateLineStringWithSpaceType(vm, length, true, type);
1375         WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1376     } else {
1377         result = CreateLineStringWithSpaceType(vm, length, false, type);
1378         WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1379     }
1380     if (string->IsTreeString()) {
1381         JSHandle<TreeEcmaString> tree(string);
1382         ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1383         tree->SetFirst(thread, JSTaggedValue(result));
1384         tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1385     }
1386     return result;
1387 }
1388 
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1389 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1390 {
1391     EcmaString *s = *string;
1392     if (!s->IsTreeString()) {
1393         return s;
1394     }
1395     JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1396     if (!tree->IsFlat()) {
1397         return SlowFlatten(vm, string, type);
1398     }
1399     return EcmaString::Cast(tree->GetFirst());
1400 }
1401 
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1402 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1403 {
1404     ASSERT(IsSMemSpace(type));
1405     EcmaString *s = *string;
1406     uint32_t startIndex = 0;
1407     if (s->IsLineOrConstantString()) {
1408         return FlatStringInfo(s, startIndex, s->GetLength());
1409     }
1410     if (string->IsTreeString()) {
1411         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1412         if (!tree->IsFlat()) {
1413             s = SlowFlatten(vm, string, type);
1414         } else {
1415             s = EcmaString::Cast(tree->GetFirst());
1416         }
1417     } else if (string->IsSlicedString()) {
1418         s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1419         startIndex = SlicedString::Cast(*string)->GetStartIndex();
1420     }
1421     return FlatStringInfo(s, startIndex, string->GetLength());
1422 }
1423 
FlattenNoGCForSnapshot(const EcmaVM * vm,EcmaString * string)1424 EcmaString *EcmaString::FlattenNoGCForSnapshot(const EcmaVM *vm, EcmaString *string)
1425 {
1426     DISALLOW_GARBAGE_COLLECTION;
1427     if (string->IsLineOrConstantString()) {
1428         return string;
1429     }
1430     if (string->IsTreeString()) {
1431         TreeEcmaString *tree = TreeEcmaString::Cast(string);
1432         if (tree->IsFlat()) {
1433             string = EcmaString::Cast(tree->GetFirst());
1434         } else {
1435             uint32_t length = tree->GetLength();
1436             EcmaString *result = nullptr;
1437             if (tree->IsUtf8()) {
1438                 result = CreateLineStringNoGC(vm, length, true);
1439                 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1440             } else {
1441                 result = CreateLineStringNoGC(vm, length, false);
1442                 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1443             }
1444             tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1445             tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1446             return result;
1447         }
1448     } else if (string->IsSlicedString()) {
1449         SlicedString *str = SlicedString::Cast(string);
1450         uint32_t length = str->GetLength();
1451         EcmaString *result = nullptr;
1452         if (str->IsUtf8()) {
1453             result = CreateLineStringNoGC(vm, length, true);
1454             WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1455         } else {
1456             result = CreateLineStringNoGC(vm, length, false);
1457             WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1458         }
1459         return result;
1460     }
1461     return string;
1462 }
1463 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1464 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1465 {
1466     ASSERT(src->IsUtf8());
1467     uint32_t length = src->GetLength();
1468     EcmaString *string = const_cast<EcmaString *>(src);
1469     if (string->IsTreeString()) {
1470         if (string->IsFlat()) {
1471             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1472         } else {
1473             buf.reserve(length);
1474             WriteToFlat(string, buf.data(), length);
1475             return buf.data();
1476         }
1477     } else if (string->IsSlicedString()) {
1478         SlicedString *str = SlicedString::Cast(string);
1479         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1480     }
1481     return string->GetDataUtf8();
1482 }
1483 
GetNonTreeUtf8Data(const EcmaString * src)1484 const uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src)
1485 {
1486     ASSERT(src->IsUtf8());
1487     ASSERT(!src->IsTreeString());
1488     EcmaString *string = const_cast<EcmaString *>(src);
1489     if (string->IsSlicedString()) {
1490         SlicedString *str = SlicedString::Cast(string);
1491         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1492     }
1493     ASSERT(src->IsLineOrConstantString());
1494     return string->GetDataUtf8();
1495 }
1496 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1497 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1498 {
1499     ASSERT(src->IsUtf16());
1500     uint32_t length = src->GetLength();
1501     EcmaString *string = const_cast<EcmaString *>(src);
1502     if (string->IsTreeString()) {
1503         if (string->IsFlat()) {
1504             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1505         } else {
1506             buf.reserve(length);
1507             WriteToFlat(string, buf.data(), length);
1508             return buf.data();
1509         }
1510     } else if (string->IsSlicedString()) {
1511         SlicedString *str = SlicedString::Cast(string);
1512         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1513     }
1514     return string->GetDataUtf16();
1515 }
1516 
GetNonTreeUtf16Data(const EcmaString * src)1517 const uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src)
1518 {
1519     ASSERT(src->IsUtf16());
1520     ASSERT(!src->IsTreeString());
1521     EcmaString *string = const_cast<EcmaString *>(src);
1522     if (string->IsSlicedString()) {
1523         SlicedString *str = SlicedString::Cast(string);
1524         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1525     }
1526     ASSERT(src->IsLineOrConstantString());
1527     return string->GetDataUtf16();
1528 }
1529 
ToU16String(uint32_t len)1530 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1531 {
1532     uint32_t length = len > 0 ? len : GetLength();
1533     std::u16string result;
1534     if (IsUtf16()) {
1535         const uint16_t *data = this->GetDataUtf16();
1536         result = base::StringHelper::Utf16ToU16String(data, length);
1537     } else {
1538         const uint8_t *data = this->GetDataUtf8();
1539         result = base::StringHelper::Utf8ToU16String(data, length);
1540     }
1541     return result;
1542 }
1543 
EcmaStringAccessor(TaggedObject * obj)1544 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1545 {
1546     ASSERT(obj != nullptr);
1547     string_ = EcmaString::Cast(obj);
1548 }
1549 
EcmaStringAccessor(JSTaggedValue value)1550 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1551 {
1552     ASSERT(value.IsString());
1553     string_ = EcmaString::Cast(value.GetTaggedObject());
1554 }
1555 
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1556 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1557     : string_(*strHandle)
1558 {
1559 }
1560 
ToStdString(StringConvertedUsage usage)1561 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1562 {
1563     if (string_ == nullptr) {
1564         return "";
1565     }
1566     bool modify = (usage != StringConvertedUsage::PRINT);
1567     CVector<uint8_t> buf;
1568     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1569 #if ENABLE_NEXT_OPTIMIZATION
1570     return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1571 #else
1572     std::string res;
1573     res.reserve(sp.size());
1574     for (const auto &c : sp) {
1575         res.push_back(c);
1576     }
1577     return res;
1578 #endif
1579 }
1580 
Utf8ConvertToString()1581 CString EcmaStringAccessor::Utf8ConvertToString()
1582 {
1583     if (string_ == nullptr) {
1584         return CString("");
1585     }
1586     if (IsUtf8()) {
1587         std::string stdStr;
1588         if (IsLineString()) {
1589             return base::StringHelper::Utf8ToCString(GetDataUtf8(), GetLength());
1590         }
1591         CVector<uint8_t> buf;
1592         const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf);
1593         return base::StringHelper::Utf8ToCString(data, GetLength());
1594     } else {
1595         return ToCString();
1596     }
1597 }
1598 
DebuggerToStdString(StringConvertedUsage usage)1599 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1600 {
1601     if (string_ == nullptr) {
1602         return "";
1603     }
1604 
1605     bool modify = (usage != StringConvertedUsage::PRINT);
1606     CVector<uint8_t> buf;
1607     Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1608 #if ENABLE_NEXT_OPTIMIZATION
1609     return std::string(reinterpret_cast<const char*>(sp.data()), sp.size());
1610 #else
1611     std::string res;
1612     res.reserve(sp.size());
1613     for (const auto &c : sp) {
1614         res.push_back(c);
1615     }
1616     return res;
1617 #endif
1618 }
1619 
ToCString(StringConvertedUsage usage,bool cesu8)1620 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
1621 {
1622     if (string_ == nullptr) {
1623         return "";
1624     }
1625     bool modify = (usage != StringConvertedUsage::PRINT);
1626     CVector<uint8_t> buf;
1627     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1628 #if ENABLE_NEXT_OPTIMIZATION
1629     return CString(reinterpret_cast<const char*>(sp.data()), sp.size());
1630 #else
1631     CString res;
1632     res.reserve(sp.size());
1633     for (const auto &c : sp) {
1634         res.push_back(c);
1635     }
1636     return res;
1637 #endif
1638 }
1639 
AppendToCString(CString & str,StringConvertedUsage usage,bool cesu8)1640 void EcmaStringAccessor::AppendToCString(CString &str, StringConvertedUsage usage, bool cesu8)
1641 {
1642     if (string_ == nullptr) {
1643         return;
1644     }
1645     bool modify = (usage != StringConvertedUsage::PRINT);
1646     CVector<uint8_t> buf;
1647     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1648     str.append(reinterpret_cast<const char*>(sp.data()), sp.size());
1649 }
1650 
AppendQuotedStringToCString(CString & str,StringConvertedUsage usage,bool cesu8)1651 void EcmaStringAccessor::AppendQuotedStringToCString(CString &str, StringConvertedUsage usage, bool cesu8)
1652 {
1653     if (string_ == nullptr) {
1654         return;
1655     }
1656     bool modify = (usage != StringConvertedUsage::PRINT);
1657     CVector<uint8_t> buf;
1658     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1659     base::JsonHelper::AppendValueToQuotedString(sp, str);
1660 }
1661 
1662 // static
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1663 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1664 {
1665     return EcmaString::CreateLineString(vm, length, compressed);
1666 }
1667 }  // namespace panda::ecmascript
1668