• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/ecma_string_table.h"
19 
20 namespace panda::ecmascript {
21 
22 constexpr size_t LOW_3BITS = 0x7;
23 constexpr size_t LOW_4BITS = 0xF;
24 constexpr size_t LOW_5BITS = 0x1F;
25 constexpr size_t LOW_6BITS = 0x3F;
26 constexpr size_t L_SURROGATE_START = 0xDC00;
27 constexpr size_t H_SURROGATE_START = 0xD800;
28 constexpr size_t SURROGATE_RAIR_START = 0x10000;
29 constexpr size_t OFFSET_18POS = 18;
30 constexpr size_t OFFSET_12POS = 12;
31 constexpr size_t OFFSET_10POS = 10;
32 constexpr size_t OFFSET_6POS = 6;
33 
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)34 EcmaString *EcmaString::Concat(const EcmaVM *vm,
35     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
36 {
37     ASSERT(IsSMemSpace(type));
38     // allocator may trig gc and move src, need to hold it
39     EcmaString *strLeft = *left;
40     EcmaString *strRight = *right;
41     uint32_t leftLength = strLeft->GetLength();
42     uint32_t rightLength = strRight->GetLength();
43     uint32_t newLength = leftLength + rightLength;
44     if (newLength == 0) {
45         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
46     }
47 
48     if (leftLength == 0) {
49         return strRight;
50     }
51     if (rightLength == 0) {
52         return strLeft;
53     }
54     // if the result string is small, make a LineString
55     bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
56     if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
57         ASSERT(strLeft->IsLineOrConstantString());
58         ASSERT(strRight->IsLineOrConstantString());
59         auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
60         // retrieve strings after gc
61         strLeft = *left;
62         strRight = *right;
63         if (compressed) {
64             // copy left part
65             Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
66             Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
67             EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
68             // copy right part
69             sp = sp.SubSpan(leftLength);
70             Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
71             EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
72         } else {
73             // copy left part
74             Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
75             if (strLeft->IsUtf8()) {
76                 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
77             } else {
78                 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
79                 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
80             }
81             // copy right part
82             sp = sp.SubSpan(leftLength);
83             if (strRight->IsUtf8()) {
84                 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
85             } else {
86                 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
87                 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
88             }
89         }
90         ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
91         return newString;
92     }
93     return CreateTreeString(vm, left, right, newLength, compressed);
94 }
95 
96 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)97 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
98     uint32_t length, bool compressed)
99 {
100     if (original->IsConstantString()) {
101         return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
102     }
103     JSHandle<EcmaString> newString(vm->GetJSThread(),
104         CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105     auto strOrigin = FlattenAllString(vm, original);
106     if (compressed) {
107         // copy
108         Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109         Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110         EcmaString::MemCopyChars(sp, length, srcSp, length);
111     } else {
112         // copy left part
113         Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114         if (strOrigin.IsUtf8()) {
115             EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116         } else {
117             Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118             EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119         }
120     }
121     ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122     return *newString;
123 }
124 
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129     ASSERT((start + length) <= src->GetLength());
130     if (length == 0) {
131         return *vm->GetFactory()->GetEmptyString();
132     }
133     if (start == 0 && length == src->GetLength()) {
134         return *src;
135     }
136     if (src->IsUtf8()) {
137         return FastSubUtf8String(vm, src, start, length);
138     }
139     return FastSubUtf16String(vm, src, start, length);
140 }
141 
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146     ASSERT((start + length) <= src->GetLength());
147     JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
148     FlatStringInfo srcFlat = FlattenAllString(vm, src);
149     slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
150     slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
151     slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152     return *slicedString;
153 }
154 
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159     ASSERT((start + length) <= src->GetLength());
160     if (length == 1) {
161         JSThread *thread = vm->GetJSThread();
162         uint16_t res = EcmaStringAccessor(src).Get<false>(start);
163         if (EcmaStringAccessor::CanBeCompressed(&res, 1)) {
164             JSHandle<SingleCharTable> singleCharTable(thread, thread->GetSingleCharTable());
165             return EcmaString::Cast(singleCharTable->GetStringFromSingleCharTable(res).GetTaggedObject());
166         }
167     }
168     if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
169         if (start == 0 && length == src->GetLength()) {
170             return *src;
171         }
172         if (src->IsUtf16()) {
173             FlatStringInfo srcFlat = FlattenAllString(vm, src);
174             bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
175             if (canBeCompressed) {
176                 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
177                 srcFlat = FlattenAllString(vm, src);
178                 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
179                 return *string;
180             }
181         }
182         return GetSlicedString(vm, src, start, length);
183     }
184     return FastSubString(vm, src, start, length);
185 }
186 
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)187 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
188 {
189     ASSERT(IsLineString() && !IsConstantString());
190     if (IsUtf8()) {
191         ASSERT(src->IsUtf8());
192         CVector<uint8_t> buf;
193         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
194         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
195         if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
196             LOG_FULL(FATAL) << "memcpy_s failed";
197             UNREACHABLE();
198         }
199     } else if (src->IsUtf8()) {
200         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
201         CVector<uint8_t> buf;
202         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
203         Span<uint16_t> to(GetDataUtf16Writable() + start, length);
204         Span<const uint8_t> from(data, length);
205         for (uint32_t i = 0; i < length; i++) {
206             to[i] = from[i];
207         }
208     } else {
209         CVector<uint16_t> buf;
210         const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
211         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
212         if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
213             destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
214             LOG_FULL(FATAL) << "memcpy_s failed";
215             UNREACHABLE();
216         }
217     }
218 }
219 
220 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)221 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
222 {
223     for (int32_t i = 0; i < count; ++i) {
224         auto left = static_cast<int32_t>(lhsSp[i]);
225         auto right = static_cast<int32_t>(rhsSp[i]);
226         if (left != right) {
227             return left - right;
228         }
229     }
230     return 0;
231 }
232 
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)233 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
234 {
235     if (*left == *right) {
236         return 0;
237     }
238     FlatStringInfo lhs = FlattenAllString(vm, left);
239     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
240     FlatStringInfo rhs = FlattenAllString(vm, right);
241     lhs.SetString(*string);
242     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
243     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
244     int32_t countDiff = lhsCount - rhsCount;
245     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
246     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
247         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
248         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
249         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
250         if (charDiff != 0) {
251             return charDiff;
252         }
253     } else if (!lhs.IsUtf16()) {
254         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
255         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
256         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
257         if (charDiff != 0) {
258             return charDiff;
259         }
260     } else if (!rhs.IsUtf16()) {
261         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
262         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
263         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
264         if (charDiff != 0) {
265             return charDiff;
266         }
267     } else {
268         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
269         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
270         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
271         if (charDiff != 0) {
272             return charDiff;
273         }
274     }
275     return countDiff;
276 }
277 
278 template<typename T1, typename T2>
IsSubStringAtSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,uint32_t offset)279 bool IsSubStringAtSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, uint32_t offset)
280 {
281     int rhsSize = static_cast<int>(rhsSp.size());
282     ASSERT(rhsSize + offset <= lhsSp.size());
283     for (int i = 0; i < rhsSize; ++i) {
284         auto left = static_cast<int32_t>(lhsSp[offset + static_cast<uint32_t>(i)]);
285         auto right = static_cast<int32_t>(rhsSp[i]);
286         if (left != right) {
287             return false;
288         }
289     }
290     return true;
291 }
292 
293 
294 /**
295  * left: text string
296  * right: pattern string
297  * example 1: IsSubStringAt("IsSubStringAt", "Is", 0) return true
298  * example 2: IsSubStringAt("IsSubStringAt", "It", 0) return false
299 */
IsSubStringAt(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,uint32_t offset)300 bool EcmaString::IsSubStringAt(const EcmaVM *vm, const JSHandle<EcmaString>& left,
301     const JSHandle<EcmaString>& right, uint32_t offset)
302 {
303     FlatStringInfo lhs = FlattenAllString(vm, left);
304     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
305     FlatStringInfo rhs = FlattenAllString(vm, right);
306     lhs.SetString(*string);
307     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
308     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
309     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
310         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
311         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
312         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
313     } else if (!lhs.IsUtf16()) {
314         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
315         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
316         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
317     } else if (!rhs.IsUtf16()) {
318         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
319         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
320         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
321     } else {
322         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
323         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
324         return IsSubStringAtSpan(lhsSp, rhsSp, offset);
325     }
326     return false;
327 }
328 
329 /* static */
330 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)331 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
332 {
333     ASSERT(rhsSp.size() > 0);
334     auto first = static_cast<int32_t>(rhsSp[0]);
335     for (int32_t i = pos; i <= max; i++) {
336         if (static_cast<int32_t>(lhsSp[i]) != first) {
337             i++;
338             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
339                 i++;
340             }
341         }
342         /* Found first character, now look at the rest of rhsSp */
343         if (i <= max) {
344             int j = i + 1;
345             int end = j + static_cast<int>(rhsSp.size()) - 1;
346 
347             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
348             }
349             if (j == end) {
350                 /* Found whole string. */
351                 return i;
352             }
353         }
354     }
355     return -1;
356 }
357 
358 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)359 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
360 {
361     int rhsSize = static_cast<int>(rhsSp.size());
362     ASSERT(rhsSize > 0);
363     auto first = rhsSp[0];
364     for (int32_t i = pos; i >= 0; i--) {
365         if (lhsSp[i] != first) {
366             continue;
367         }
368         /* Found first character, now look at the rest of rhsSp */
369         int j = 1;
370         while (j < rhsSize) {
371             if (rhsSp[j] != lhsSp[i + j]) {
372                 break;
373             }
374             j++;
375         }
376         if (j == rhsSize) {
377             return i;
378         }
379     }
380     return -1;
381 }
382 
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)383 int32_t EcmaString::IndexOf(const EcmaVM *vm,
384     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
385 {
386     EcmaString *lhstring = *receiver;
387     EcmaString *rhstring = *search;
388     if (lhstring == nullptr || rhstring == nullptr) {
389         return -1;
390     }
391     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
392     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
393 
394     if (pos > lhsCount) {
395         return -1;
396     }
397 
398     if (rhsCount == 0) {
399         return pos;
400     }
401 
402     if (pos < 0) {
403         pos = 0;
404     }
405 
406     int32_t max = lhsCount - rhsCount;
407     if (max < 0) {
408         return -1;
409     }
410 
411     if (pos + rhsCount > lhsCount) {
412         return -1;
413     }
414 
415     FlatStringInfo lhs = FlattenAllString(vm, receiver);
416     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
417     FlatStringInfo rhs = FlattenAllString(vm, search);
418     lhs.SetString(*string);
419 
420     if (rhs.IsUtf8() && lhs.IsUtf8()) {
421         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
422         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
423         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
424     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
425         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
426         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
427         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
428     } else if (rhs.IsUtf16()) {
429         return -1;
430     } else {  // NOLINT(readability-else-after-return)
431         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
432         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
433         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
434     }
435 }
436 
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)437 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
438     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
439 {
440     EcmaString *lhstring = *receiver;
441     EcmaString *rhstring = *search;
442     if (lhstring == nullptr || rhstring == nullptr) {
443         return -1;
444     }
445 
446     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
447     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
448     if (lhsCount < rhsCount) {
449         return -1;
450     }
451 
452     if (pos < 0) {
453         pos = 0;
454     }
455 
456     if (pos > lhsCount) {
457         pos = lhsCount;
458     }
459 
460     if (pos + rhsCount > lhsCount) {
461         pos = lhsCount - rhsCount;
462     }
463 
464     if (rhsCount == 0) {
465         return pos;
466     }
467 
468     FlatStringInfo lhs = FlattenAllString(vm, receiver);
469     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
470     FlatStringInfo rhs = FlattenAllString(vm, search);
471     lhs.SetString(*string);
472     if (rhs.IsUtf8() && lhs.IsUtf8()) {
473         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
474         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
475         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
476     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
477         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
478         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
479         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
480     } else if (rhs.IsUtf16()) {
481         return -1;
482     } else {  // NOLINT(readability-else-after-return)
483         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
484         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
485         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
486     }
487 }
488 
ToU16String(uint32_t len)489 std::u16string EcmaString::ToU16String(uint32_t len)
490 {
491     uint32_t length = len > 0 ? len : GetLength();
492     std::u16string result;
493     if (IsUtf16()) {
494         CVector<uint16_t> buf;
495         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
496         result = base::StringHelper::Utf16ToU16String(data, length);
497     } else {
498         CVector<uint8_t> buf;
499         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
500         result = base::StringHelper::Utf8ToU16String(data, length);
501     }
502     return result;
503 }
504 
505 // static
CanBeCompressed(const EcmaString * string)506 bool EcmaString::CanBeCompressed(const EcmaString *string)
507 {
508     ASSERT(string->IsLineOrConstantString());
509     if (string->IsUtf8()) {
510         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
511     }
512     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
513 }
514 
515 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)516 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
517 {
518     bool isCompressed = true;
519     uint32_t index = 0;
520     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
521     while (index < utf8Len) {
522         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
523         if (!IsASCIICharacter(utf8Data[index])) {
524             isCompressed = false;
525             break;
526         }
527         ++index;
528     }
529     return isCompressed;
530 }
531 
532 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)533 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
534 {
535     bool isCompressed = true;
536     Span<const uint16_t> data(utf16Data, utf16Len);
537     for (uint32_t i = 0; i < utf16Len; i++) {
538         if (!IsASCIICharacter(data[i])) {
539             isCompressed = false;
540             break;
541         }
542     }
543     return isCompressed;
544 }
545 
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)546 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
547 {
548     ASSERT(NotTreeString());
549     ASSERT(str1->NotTreeString() && str2->NotTreeString());
550     if (GetLength() != str1->GetLength() + str2->GetLength()) {
551         return false;
552     }
553     if (IsUtf16()) {
554         CVector<uint16_t> buf;
555         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
556         if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
557             return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
558         }
559     } else {
560         CVector<uint8_t> buf;
561         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
562         if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
563             return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
564                                                       str2->GetLength(), this->IsUtf8());
565         }
566     }
567     return false;
568 }
569 
570 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString * left,EcmaString * right)571 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
572 {
573     CVector<uint16_t> bufLeftUft16;
574     CVector<uint16_t> bufRightUft16;
575     CVector<uint8_t> bufLeftUft8;
576     CVector<uint8_t> bufRightUft8;
577     int32_t lhsCount = static_cast<int32_t>(left->GetLength());
578     int32_t rhsCount = static_cast<int32_t>(right->GetLength());
579     if (!left->IsUtf16() && !right->IsUtf16()) {
580         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
581         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
582         Span<const uint8_t> lhsSp(data1, lhsCount);
583         Span<const uint8_t> rhsSp(data2, rhsCount);
584         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
585     } else if (!left->IsUtf16()) {
586         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
587         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
588         Span<const uint8_t> lhsSp(data1, lhsCount);
589         Span<const uint16_t> rhsSp(data2, rhsCount);
590         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
591     } else if (!right->IsUtf16()) {
592         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
593         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
594         Span<const uint16_t> lhsSp(data1, lhsCount);
595         Span<const uint8_t> rhsSp(data2, rhsCount);
596         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
597     } else {
598         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
599         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
600         Span<const uint16_t> lhsSp(data1, lhsCount);
601         Span<const uint16_t> rhsSp(data2, rhsCount);
602         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
603     }
604 }
605 
606 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)607 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
608 {
609     int32_t lhsCount = static_cast<int32_t>(left.GetLength());
610     int32_t rhsCount = static_cast<int32_t>(right.GetLength());
611     if (!left.IsUtf16() && !right.IsUtf16()) {
612         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
613         Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
614         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
615     } else if (!left.IsUtf16()) {
616         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
617         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
618         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
619     } else if (!right.IsUtf16()) {
620         Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
621         Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
622         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
623     } else {
624         Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
625         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
626         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
627     }
628 }
629 
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)630 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
631 {
632     if (str1 == str2) {
633         return true;
634     }
635     if (str1->IsInternString() && str2->IsInternString()) {
636         return false;
637     }
638     uint32_t str1Len = str1->GetLength();
639     if (str1Len != str2->GetLength()) {
640         return false;
641     }
642     if (str1Len == 0) {
643         return true;
644     }
645 
646     uint32_t str1Hash;
647     uint32_t str2Hash;
648     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
649         if (str1Hash != str2Hash) {
650             return false;
651         }
652     }
653     FlatStringInfo str1Flat = FlattenAllString(vm, str1);
654     JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
655     FlatStringInfo str2Flat = FlattenAllString(vm, str2);
656     str1Flat.SetString(*string);
657     return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
658 }
659 
660 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)661 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
662 {
663     if (str1 == str2) {
664         return true;
665     }
666     uint32_t str1Len = str1->GetLength();
667     if (str1Len != str2->GetLength()) {
668         return false;
669     }
670     if (str1Len == 0) {
671         return true;
672     }
673 
674     uint32_t str1Hash;
675     uint32_t str2Hash;
676     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
677         if (str1Hash != str2Hash) {
678             return false;
679         }
680     }
681     return StringsAreEqualDiffUtfEncoding(str1, str2);
682 }
683 
684 /* static */
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)685 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
686                                         bool canBeCompressToUtf8)
687 {
688     if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
689         return false;
690     }
691     if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
692         return false;
693     }
694     if (str1->IsUtf8()) {
695         CVector<uint8_t> buf;
696         Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
697         Span<const uint8_t> data2(dataAddr, dataLen);
698         return EcmaString::StringsAreEquals(data1, data2);
699     }
700     CVector<uint16_t> buf;
701     uint32_t length = str1->GetLength();
702     const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
703     return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
704 }
705 
706 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)707 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
708 {
709     uint32_t length = str1->GetLength();
710     if (length != utf16Len) {
711         return false;
712     }
713     if (str1->IsUtf8()) {
714         CVector<uint8_t> buf;
715         const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
716         return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
717     } else {
718         CVector<uint16_t> buf;
719         Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
720         Span<const uint16_t> data2(utf16Data, utf16Len);
721         return EcmaString::StringsAreEquals(data1, data2);
722     }
723 }
724 
725 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)726 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
727 {
728     ASSERT(dstMax >= count);
729     ASSERT(dst.Size() >= src.Size());
730     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
731         LOG_FULL(FATAL) << "memcpy_s failed";
732         UNREACHABLE();
733     }
734     return true;
735 }
736 
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const737 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
738 {
739     ASSERT(length >= 0);
740     Span<const uint8_t> str = FastToUtf8Span();
741     return HashIntegerString(str.data(), length, hash, hashSeed);
742 }
743 
ComputeHashcode() const744 uint32_t EcmaString::ComputeHashcode() const
745 {
746     auto [hash, isInteger] = ComputeRawHashcode();
747     return MixHashcode(hash, isInteger);
748 }
749 
750 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const751 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
752 {
753     uint32_t hash = 0;
754     uint32_t length = GetLength();
755     if (length == 0) {
756         return {hash, false};
757     }
758 
759     if (IsUtf8()) {
760         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
761         if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
762             return {hash, true};
763         }
764         CVector<uint8_t> buf;
765         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
766         // String can not convert to integer number, using normal hashcode computing algorithm.
767         hash = this->ComputeHashForData(data, length, 0);
768         return {hash, false};
769     } else {
770         CVector<uint16_t> buf;
771         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
772         // If rawSeed has certain value, and second string uses UTF16 encoding,
773         // then merged string can not be small integer number.
774         hash = this->ComputeHashForData(data, length, 0);
775         return {hash, false};
776     }
777 }
778 
779 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed,bool isInteger) const780 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
781 {
782     uint32_t hash;
783     uint32_t length = GetLength();
784     if (length == 0) {
785         return MixHashcode(rawHashSeed, isInteger);
786     }
787 
788     if (IsUtf8()) {
789         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
790         if ((rawHashSeed == 0 || isInteger) &&
791              length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
792             return hash;
793         }
794         CVector<uint8_t> buf;
795         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
796         // String can not convert to integer number, using normal hashcode computing algorithm.
797         hash = this->ComputeHashForData(data, length, rawHashSeed);
798         return MixHashcode(hash, NOT_INTEGER);
799     } else {
800         CVector<uint16_t> buf;
801         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
802         // If rawSeed has certain value, and second string uses UTF16 encoding,
803         // then merged string can not be small integer number.
804         hash = this->ComputeHashForData(data, length, rawHashSeed);
805         return MixHashcode(hash, NOT_INTEGER);
806     }
807 }
808 
809 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)810 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
811 {
812     uint32_t mixHash = 0;
813     if (canBeCompress) {
814         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
815         if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
816             return mixHash;
817         }
818         uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
819         return MixHashcode(hash, NOT_INTEGER);
820     } else {
821         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
822         CVector<uint16_t> tmpBuffer(utf16Len);
823         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
824                                                                                utf16Len);
825         ASSERT(len == utf16Len);
826         uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
827         return MixHashcode(hash, NOT_INTEGER);
828     }
829     LOG_ECMA(FATAL) << "this branch is unreachable";
830     UNREACHABLE();
831 }
832 
833 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)834 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
835 {
836     uint32_t mixHash = 0;
837     // String length smaller than 10, try to compute integer hash.
838     if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
839         return mixHash;
840     }
841     uint32_t hash = ComputeHashForData(utf16Data, length, 0);
842     return MixHashcode(hash, NOT_INTEGER);
843 }
844 
845 // drop the tail bytes if the remain length can't fill the length it represents.
FixUtf8Len(const uint8_t * utf8,size_t utf8Len)846 static size_t FixUtf8Len(const uint8_t* utf8, size_t utf8Len)
847 {
848     constexpr size_t TWO_BYTES_LENGTH = 2;
849     constexpr size_t THREE_BYTES_LENGTH = 3;
850     size_t trimSize = 0;
851     if (utf8Len >= 1 && utf8[utf8Len - 1] >= 0xC0) {
852         // The last one char claim there are more than 1 byte next to it, it's invalid, so drop the last one.
853         trimSize = 1;
854     }
855     if (utf8Len >= TWO_BYTES_LENGTH && utf8[utf8Len - TWO_BYTES_LENGTH] >= 0xE0) {
856         // The second to last char claim there are more than 2 bytes next to it, it's invalid, so drop the last two.
857         trimSize = TWO_BYTES_LENGTH;
858     }
859     if (utf8Len >= THREE_BYTES_LENGTH && utf8[utf8Len - THREE_BYTES_LENGTH] >= 0xF0) {
860         // The third to last char claim there are more than 3 bytes next to it, it's invalid, so drop the last three.
861         trimSize = THREE_BYTES_LENGTH;
862     }
863     return utf8Len - trimSize;
864 }
865 
866 
867 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)868 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len,
869                                    const uint16_t *utf16Data, uint32_t utf16Len)
870 {
871     size_t safeUtf8Len = FixUtf8Len(utf8Data, utf8Len);
872     const uint8_t *utf8End = utf8Data + utf8Len;
873     const uint8_t *utf8SafeEnd = utf8Data + safeUtf8Len;
874     const uint16_t *utf16End = utf16Data + utf16Len;
875     while (utf8Data < utf8SafeEnd && utf16Data < utf16End) {
876         uint8_t src = *utf8Data;
877         switch (src & 0xF0) {
878             case 0xF0: {
879                 const uint8_t c2 = *(++utf8Data);
880                 const uint8_t c3 = *(++utf8Data);
881                 const uint8_t c4 = *(++utf8Data);
882                 uint32_t codePoint = ((src & LOW_3BITS) << OFFSET_18POS) | ((c2 & LOW_6BITS) << OFFSET_12POS) |
883                                      ((c3 & LOW_6BITS) << OFFSET_6POS) | (c4 & LOW_6BITS);
884                 if (codePoint >= SURROGATE_RAIR_START) {
885                     if (utf16Data >= utf16End - 1) {
886                         return false;
887                     }
888                     codePoint -= SURROGATE_RAIR_START;
889                     if (*utf16Data++ != static_cast<uint16_t>((codePoint >> OFFSET_10POS) | H_SURROGATE_START)) {
890                         return false;
891                     } else if (*utf16Data++ != static_cast<uint16_t>((codePoint & 0x3FF) | L_SURROGATE_START)) {
892                         return false;
893                     }
894                 } else {
895                     if (*utf16Data++ != static_cast<uint16_t>(codePoint)) {
896                         return false;
897                     }
898                 }
899                 utf8Data++;
900                 break;
901             }
902             case 0xE0: {
903                 const uint8_t c2 = *(++utf8Data);
904                 const uint8_t c3 = *(++utf8Data);
905                 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_4BITS) << OFFSET_12POS) |
906                     ((c2 & LOW_6BITS) << OFFSET_6POS) | (c3 & LOW_6BITS))) {
907                     return false;
908                 }
909                 utf8Data++;
910                 break;
911             }
912             case 0xD0:
913             case 0xC0: {
914                 const uint8_t c2 = *(++utf8Data);
915                 if (*utf16Data++ != static_cast<uint16_t>(((src & LOW_5BITS) << OFFSET_6POS) | (c2 & LOW_6BITS))) {
916                     return false;
917                 }
918                 utf8Data++;
919                 break;
920             }
921             default:
922                 do {
923                     if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
924                         return false;
925                     }
926                 } while (utf8Data < utf8SafeEnd && utf16Data < utf16End && *utf8Data < 0x80);
927                 break;
928         }
929     }
930     // The remain chars should be treated as single byte char.
931     while (utf8Data < utf8End && utf16Data < utf16End) {
932         if (*utf16Data++ != static_cast<uint16_t>(*utf8Data++)) {
933             return false;
934         }
935     }
936     return utf8Data == utf8End && utf16Data == utf16End;
937 }
938 
ToElementIndex(uint32_t * index)939 bool EcmaString::ToElementIndex(uint32_t *index)
940 {
941     uint32_t len = GetLength();
942     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
943         return false;
944     }
945     if (UNLIKELY(IsUtf16())) {
946         return false;
947     }
948 
949     // fast path: get integer from string's hash value
950     if (TryToGetInteger(index)) {
951         return true;
952     }
953 
954     CVector<uint8_t> buf;
955     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
956     uint32_t c = data[0];
957     uint64_t n = 0;
958     if (c == '0') {
959         *index = 0;
960         return len == 1;
961     }
962     uint32_t loopStart = 0;
963     if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
964         *index = n;
965         return true;
966     }
967     return false;
968 }
969 
ToInt(int32_t * index,bool * negative)970 bool EcmaString::ToInt(int32_t *index, bool *negative)
971 {
972     uint32_t len = GetLength();
973     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
974         return false;
975     }
976     if (UNLIKELY(IsUtf16())) {
977         return false;
978     }
979     CVector<uint8_t> buf;
980     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
981     uint32_t c = data[0];
982     uint32_t loopStart = 0;
983     uint64_t n = 0;
984     if (c == '0') {
985         *index = 0;
986         return len == 1;
987     }
988     if (c == '-' && len > 1) {
989         *negative = true;
990         loopStart = 1;
991     }
992 
993     if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
994         *index = *negative ? -n : n;
995         return true;
996     }
997     return false;
998 }
999 
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)1000 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
1001 {
1002     uint64_t n = 0;
1003     uint32_t len = GetLength();
1004     if (UNLIKELY(loopStart >= len)) {
1005         return false;
1006     }
1007     for (uint32_t i = loopStart; i < len; i++) {
1008         uint32_t c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1009         if (c < '0' || c > '9') {
1010             return false;
1011         }
1012         // NOLINTNEXTLINE(readability-magic-numbers)
1013         n = n * 10 + (c - '0');  // 10: decimal factor
1014     }
1015     *index = n;
1016     return true;
1017 }
1018 
ToTypedArrayIndex(uint32_t * index)1019 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
1020 {
1021     uint32_t len = GetLength();
1022     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
1023         return false;
1024     }
1025     if (UNLIKELY(IsUtf16())) {
1026         return false;
1027     }
1028 
1029     CVector<uint8_t> buf;
1030     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
1031     uint32_t c = data[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1032     uint64_t n = 0;
1033     if (c == '0') {
1034         *index = 0;
1035         return len == 1;
1036     }
1037     if (c > '0' && c <= '9') {
1038         n = c - '0';
1039         for (uint32_t i = 1; i < len; i++) {
1040             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
1041             if (c >= '0' && c <= '9') {
1042                 // NOLINTNEXTLINE(readability-magic-numbers)
1043                 n = n * 10 + (c - '0');  // 10: decimal factor
1044             } else if (c == '.') {
1045                 n = JSObject::MAX_ELEMENT_INDEX;
1046                 break;
1047             } else {
1048                 return false;
1049             }
1050         }
1051         if (n < JSObject::MAX_ELEMENT_INDEX) {
1052             *index = n;
1053             return true;
1054         } else {
1055             *index = JSObject::MAX_ELEMENT_INDEX;
1056             return true;
1057         }
1058     } else if (c == '-') {
1059         *index = JSObject::MAX_ELEMENT_INDEX;
1060         return true;
1061     }
1062     return false;
1063 }
1064 
1065 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)1066 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
1067 {
1068     uint32_t srcLen = src->GetLength();
1069     int32_t start = 0;
1070     int32_t end = static_cast<int32_t>(srcLen) - 1;
1071 
1072     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
1073         start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
1074     }
1075     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
1076         end = base::StringHelper::GetEnd(data, start, srcLen);
1077     }
1078     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
1079     return res;
1080 }
1081 
1082 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1083 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1084 {
1085     auto srcFlat = FlattenAllString(vm, src);
1086     uint32_t srcLength = srcFlat.GetLength();
1087     auto factory = vm->GetFactory();
1088     if (srcFlat.IsUtf16()) {
1089         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1090         std::string res = base::StringHelper::ToLower(u16str);
1091         return *(factory->NewFromStdString(res));
1092     } else {
1093         return ConvertUtf8ToLowerOrUpper(vm, src, true);
1094     }
1095 }
1096 
1097 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)1098 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1099 {
1100     auto srcFlat = FlattenAllString(vm, src);
1101     uint32_t srcLength = srcFlat.GetLength();
1102     const char start = 'A';
1103     const char end = 'Z';
1104     uint32_t upperIndex = srcLength;
1105     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1106     for (uint32_t index = 0; index < srcLength; ++index) {
1107         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1108             upperIndex = index;
1109             break;
1110         }
1111     }
1112     if (upperIndex == srcLength) {
1113         return *src;
1114     }
1115     return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
1116 }
1117 
1118 /* static */
TryToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1119 EcmaString *EcmaString::TryToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1120 {
1121     auto srcFlat = FlattenAllString(vm, src);
1122     uint32_t srcLength = srcFlat.GetLength();
1123     const char start = 'a';
1124     const char end = 'z';
1125     uint32_t lowerIndex = srcLength;
1126     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1127     for (uint32_t index = 0; index < srcLength; ++index) {
1128         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1129             lowerIndex = index;
1130             break;
1131         }
1132     }
1133     if (lowerIndex == srcLength) {
1134         return *src;
1135     }
1136     return ConvertUtf8ToLowerOrUpper(vm, src, false, lowerIndex);
1137 }
1138 
1139 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)1140 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
1141                                                   bool toLower, uint32_t startIndex)
1142 {
1143     const char start = toLower ? 'A' : 'a';
1144     const char end = toLower ? 'Z' : 'z';
1145     uint32_t srcLength = src->GetLength();
1146     JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1147     auto srcFlat = FlattenAllString(vm, src);
1148     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1149     auto newStringPtr = newString->GetDataUtf8Writable();
1150     if (startIndex > 0) {
1151         if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1152             LOG_FULL(FATAL) << "memcpy_s failed";
1153             UNREACHABLE();
1154         }
1155     }
1156     for (uint32_t index = startIndex; index < srcLength; ++index) {
1157         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1158             *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
1159         } else {
1160             *(newStringPtr + index) = data[index];
1161         }
1162     }
1163     return *newString;
1164 }
1165 
1166 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1167 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1168 {
1169     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1170     uint32_t srcLength = srcFlat.GetLength();
1171     auto factory = vm->GetFactory();
1172     if (srcFlat.IsUtf16()) {
1173         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1174         std::string res = base::StringHelper::ToUpper(u16str);
1175         return *(factory->NewFromStdString(res));
1176     } else {
1177         return ConvertUtf8ToLowerOrUpper(vm, src, false);
1178     }
1179 }
1180 
1181 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1182 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1183 {
1184     auto factory = vm->GetFactory();
1185     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1186     std::u16string utf16 = srcFlat.ToU16String();
1187     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1188     return *(factory->NewFromStdString(res));
1189 }
1190 
1191 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1192 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1193 {
1194     auto factory = vm->GetFactory();
1195     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1196     std::u16string utf16 = srcFlat.ToU16String();
1197     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1198     return *(factory->NewFromStdString(res));
1199 }
1200 
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)1201 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1202 {
1203     FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1204     uint32_t srcLen = srcFlat.GetLength();
1205     if (UNLIKELY(srcLen == 0)) {
1206         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1207     }
1208     if (srcFlat.IsUtf8()) {
1209         Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1210         return TrimBody(thread, src, data, mode);
1211     } else {
1212         Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1213         return TrimBody(thread, src, data, mode);
1214     }
1215 }
1216 
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1217 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1218 {
1219     ASSERT(string->IsTreeString() || string->IsSlicedString());
1220     ASSERT(IsSMemSpace(type));
1221     auto thread = vm->GetJSThread();
1222     uint32_t length = string->GetLength();
1223     EcmaString *result = nullptr;
1224     if (string->IsUtf8()) {
1225         result = CreateLineStringWithSpaceType(vm, length, true, type);
1226         WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1227     } else {
1228         result = CreateLineStringWithSpaceType(vm, length, false, type);
1229         WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1230     }
1231     if (string->IsTreeString()) {
1232         JSHandle<TreeEcmaString> tree(string);
1233         ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1234         tree->SetFirst(thread, JSTaggedValue(result));
1235         tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1236     }
1237     return result;
1238 }
1239 
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1240 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1241 {
1242     EcmaString *s = *string;
1243     if (!s->IsTreeString()) {
1244         return s;
1245     }
1246     JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1247     if (!tree->IsFlat()) {
1248         return SlowFlatten(vm, string, type);
1249     }
1250     return EcmaString::Cast(tree->GetFirst());
1251 }
1252 
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1253 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1254 {
1255     ASSERT(IsSMemSpace(type));
1256     EcmaString *s = *string;
1257     uint32_t startIndex = 0;
1258     if (s->IsLineOrConstantString()) {
1259         return FlatStringInfo(s, startIndex, s->GetLength());
1260     }
1261     if (string->IsTreeString()) {
1262         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1263         if (!tree->IsFlat()) {
1264             s = SlowFlatten(vm, string, type);
1265         } else {
1266             s = EcmaString::Cast(tree->GetFirst());
1267         }
1268     } else if (string->IsSlicedString()) {
1269         s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1270         startIndex = SlicedString::Cast(*string)->GetStartIndex();
1271     }
1272     return FlatStringInfo(s, startIndex, string->GetLength());
1273 }
1274 
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1275 EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1276 {
1277     DISALLOW_GARBAGE_COLLECTION;
1278     if (string->IsLineOrConstantString()) {
1279         return string;
1280     }
1281     if (string->IsTreeString()) {
1282         TreeEcmaString *tree = TreeEcmaString::Cast(string);
1283         if (tree->IsFlat()) {
1284             string = EcmaString::Cast(tree->GetFirst());
1285         } else {
1286             uint32_t length = tree->GetLength();
1287             EcmaString *result = nullptr;
1288             if (tree->IsUtf8()) {
1289                 result = CreateLineStringNoGC(vm, length, true);
1290                 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1291             } else {
1292                 result = CreateLineStringNoGC(vm, length, false);
1293                 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1294             }
1295             tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1296             tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1297             return result;
1298         }
1299     } else if (string->IsSlicedString()) {
1300         SlicedString *str = SlicedString::Cast(string);
1301         uint32_t length = str->GetLength();
1302         EcmaString *result = nullptr;
1303         if (str->IsUtf8()) {
1304             result = CreateLineStringNoGC(vm, length, true);
1305             WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1306         } else {
1307             result = CreateLineStringNoGC(vm, length, false);
1308             WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1309         }
1310         return result;
1311     }
1312     return string;
1313 }
1314 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1315 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1316 {
1317     ASSERT(src->IsUtf8());
1318     uint32_t length = src->GetLength();
1319     EcmaString *string = const_cast<EcmaString *>(src);
1320     if (string->IsTreeString()) {
1321         if (string->IsFlat()) {
1322             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1323         } else {
1324             buf.reserve(length);
1325             WriteToFlat(string, buf.data(), length);
1326             return buf.data();
1327         }
1328     } else if (string->IsSlicedString()) {
1329         SlicedString *str = SlicedString::Cast(string);
1330         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1331     }
1332     return string->GetDataUtf8();
1333 }
1334 
GetNonTreeUtf8Data(const EcmaString * src)1335 const uint8_t *EcmaString::GetNonTreeUtf8Data(const EcmaString *src)
1336 {
1337     ASSERT(src->IsUtf8());
1338     ASSERT(!src->IsTreeString());
1339     EcmaString *string = const_cast<EcmaString *>(src);
1340     if (string->IsSlicedString()) {
1341         SlicedString *str = SlicedString::Cast(string);
1342         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1343     }
1344     ASSERT(src->IsLineOrConstantString());
1345     return string->GetDataUtf8();
1346 }
1347 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1348 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1349 {
1350     ASSERT(src->IsUtf16());
1351     uint32_t length = src->GetLength();
1352     EcmaString *string = const_cast<EcmaString *>(src);
1353     if (string->IsTreeString()) {
1354         if (string->IsFlat()) {
1355             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1356         } else {
1357             buf.reserve(length);
1358             WriteToFlat(string, buf.data(), length);
1359             return buf.data();
1360         }
1361     } else if (string->IsSlicedString()) {
1362         SlicedString *str = SlicedString::Cast(string);
1363         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1364     }
1365     return string->GetDataUtf16();
1366 }
1367 
GetNonTreeUtf16Data(const EcmaString * src)1368 const uint16_t *EcmaString::GetNonTreeUtf16Data(const EcmaString *src)
1369 {
1370     ASSERT(src->IsUtf16());
1371     ASSERT(!src->IsTreeString());
1372     EcmaString *string = const_cast<EcmaString *>(src);
1373     if (string->IsSlicedString()) {
1374         SlicedString *str = SlicedString::Cast(string);
1375         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1376     }
1377     ASSERT(src->IsLineOrConstantString());
1378     return string->GetDataUtf16();
1379 }
1380 
ToU16String(uint32_t len)1381 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1382 {
1383     uint32_t length = len > 0 ? len : GetLength();
1384     std::u16string result;
1385     if (IsUtf16()) {
1386         const uint16_t *data = this->GetDataUtf16();
1387         result = base::StringHelper::Utf16ToU16String(data, length);
1388     } else {
1389         const uint8_t *data = this->GetDataUtf8();
1390         result = base::StringHelper::Utf8ToU16String(data, length);
1391     }
1392     return result;
1393 }
1394 
EcmaStringAccessor(TaggedObject * obj)1395 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1396 {
1397     ASSERT(obj != nullptr);
1398     string_ = EcmaString::Cast(obj);
1399 }
1400 
EcmaStringAccessor(JSTaggedValue value)1401 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1402 {
1403     ASSERT(value.IsString());
1404     string_ = EcmaString::Cast(value.GetTaggedObject());
1405 }
1406 
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1407 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1408     : string_(*strHandle)
1409 {
1410 }
1411 
ToStdString(StringConvertedUsage usage)1412 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1413 {
1414     if (string_ == nullptr) {
1415         return "";
1416     }
1417     bool modify = (usage != StringConvertedUsage::PRINT);
1418     CVector<uint8_t> buf;
1419     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1420     std::string res;
1421     res.reserve(sp.size());
1422     for (const auto &c : sp) {
1423         res.push_back(c);
1424     }
1425     return res;
1426 }
1427 
Utf8ConvertToString()1428 CString EcmaStringAccessor::Utf8ConvertToString()
1429 {
1430     if (string_ == nullptr) {
1431         return CString("");
1432     }
1433     if (IsUtf8()) {
1434         std::string stdStr;
1435         if (IsLineString()) {
1436             return base::StringHelper::Utf8ToString(GetDataUtf8(), GetLength()).c_str();
1437         }
1438         CVector<uint8_t> buf;
1439         const uint8_t *data = EcmaString::GetUtf8DataFlat(string_, buf);
1440         return base::StringHelper::Utf8ToString(data, GetLength()).c_str();
1441     } else {
1442         return ToCString();
1443     }
1444 }
1445 
DebuggerToStdString(StringConvertedUsage usage)1446 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1447 {
1448     if (string_ == nullptr) {
1449         return "";
1450     }
1451 
1452     bool modify = (usage != StringConvertedUsage::PRINT);
1453     CVector<uint8_t> buf;
1454     Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1455     std::string res;
1456     res.reserve(sp.size());
1457     for (const auto &c : sp) {
1458         res.push_back(c);
1459     }
1460     return res;
1461 }
1462 
ToCString(StringConvertedUsage usage,bool cesu8)1463 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage, bool cesu8)
1464 {
1465     if (string_ == nullptr) {
1466         return "";
1467     }
1468     bool modify = (usage != StringConvertedUsage::PRINT);
1469     CVector<uint8_t> buf;
1470     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify, cesu8);
1471     CString res;
1472     res.reserve(sp.size());
1473     for (const auto &c : sp) {
1474         res.push_back(c);
1475     }
1476     return res;
1477 }
1478 
1479 // static
CreateLineString(const EcmaVM * vm,size_t length,bool compressed)1480 EcmaString *EcmaStringAccessor::CreateLineString(const EcmaVM *vm, size_t length, bool compressed)
1481 {
1482     return EcmaString::CreateLineString(vm, length, compressed);
1483 }
1484 }  // namespace panda::ecmascript
1485