• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20 
21 namespace panda::ecmascript {
22 
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right,MemSpaceType type)23 EcmaString *EcmaString::Concat(const EcmaVM *vm,
24     const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right, MemSpaceType type)
25 {
26     // allocator may trig gc and move src, need to hold it
27     EcmaString *strLeft = *left;
28     EcmaString *strRight = *right;
29     uint32_t leftLength = strLeft->GetLength();
30     uint32_t rightLength = strRight->GetLength();
31     uint32_t newLength = leftLength + rightLength;
32     if (newLength == 0) {
33         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
34     }
35 
36     if (leftLength == 0) {
37         if (type == MemSpaceType::OLD_SPACE) {
38             Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*right));
39             if (objectRegion->InYoungSpace()) {
40                 return CopyStringToOldSpace(vm, right, rightLength, strRight->IsUtf8());
41             }
42         }
43         return strRight;
44     }
45     if (rightLength == 0) {
46         if (type == MemSpaceType::OLD_SPACE) {
47             Region *objectRegion = Region::ObjectAddressToRange(reinterpret_cast<TaggedObject *>(*left));
48             if (objectRegion->InYoungSpace()) {
49                 return CopyStringToOldSpace(vm, left, leftLength, strLeft->IsUtf8());
50             }
51         }
52         return strLeft;
53     }
54     // if the result string is small, make a LineString
55     bool compressed = (strLeft->IsUtf8() && strRight->IsUtf8());
56     if (newLength < TreeEcmaString::MIN_TREE_ECMASTRING_LENGTH) {
57         ASSERT(strLeft->IsLineOrConstantString());
58         ASSERT(strRight->IsLineOrConstantString());
59         auto newString = CreateLineStringWithSpaceType(vm, newLength, compressed, type);
60         // retrieve strings after gc
61         strLeft = *left;
62         strRight = *right;
63         if (compressed) {
64             // copy left part
65             Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
66             Span<const uint8_t> srcLeft(strLeft->GetDataUtf8(), leftLength);
67             EcmaString::MemCopyChars(sp, newLength, srcLeft, leftLength);
68             // copy right part
69             sp = sp.SubSpan(leftLength);
70             Span<const uint8_t> srcRight(strRight->GetDataUtf8(), rightLength);
71             EcmaString::MemCopyChars(sp, rightLength, srcRight, rightLength);
72         } else {
73             // copy left part
74             Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
75             if (strLeft->IsUtf8()) {
76                 EcmaString::CopyChars(sp.data(), strLeft->GetDataUtf8(), leftLength);
77             } else {
78                 Span<const uint16_t> srcLeft(strLeft->GetDataUtf16(), leftLength);
79                 EcmaString::MemCopyChars(sp, newLength << 1U, srcLeft, leftLength << 1U);
80             }
81             // copy right part
82             sp = sp.SubSpan(leftLength);
83             if (strRight->IsUtf8()) {
84                 EcmaString::CopyChars(sp.data(), strRight->GetDataUtf8(), rightLength);
85             } else {
86                 Span<const uint16_t> srcRight(strRight->GetDataUtf16(), rightLength);
87                 EcmaString::MemCopyChars(sp, rightLength << 1U, srcRight, rightLength << 1U);
88             }
89         }
90         ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
91         return newString;
92     }
93     return CreateTreeString(vm, left, right, newLength, compressed);
94 }
95 
96 /* static */
CopyStringToOldSpace(const EcmaVM * vm,const JSHandle<EcmaString> & original,uint32_t length,bool compressed)97 EcmaString *EcmaString::CopyStringToOldSpace(const EcmaVM *vm, const JSHandle<EcmaString> &original,
98     uint32_t length, bool compressed)
99 {
100     if (original->IsConstantString()) {
101         return CreateConstantString(vm, original->GetDataUtf8(), length, MemSpaceType::OLD_SPACE);
102     }
103     JSHandle<EcmaString> newString(vm->GetJSThread(),
104         CreateLineStringWithSpaceType(vm, length, compressed, MemSpaceType::OLD_SPACE));
105     auto strOrigin = FlattenAllString(vm, original);
106     if (compressed) {
107         // copy
108         Span<uint8_t> sp(newString->GetDataUtf8Writable(), length);
109         Span<const uint8_t> srcSp(strOrigin.GetDataUtf8(), length);
110         EcmaString::MemCopyChars(sp, length, srcSp, length);
111     } else {
112         // copy left part
113         Span<uint16_t> sp(newString->GetDataUtf16Writable(), length);
114         if (strOrigin.IsUtf8()) {
115             EcmaString::CopyChars(sp.data(), strOrigin.GetDataUtf8(), length);
116         } else {
117             Span<const uint16_t> srcSp(strOrigin.GetDataUtf16(), length);
118             EcmaString::MemCopyChars(sp, length << 1U, srcSp, length << 1U);
119         }
120     }
121     ASSERT_PRINT(compressed == CanBeCompressed(*newString), "compressed does not match the real value!");
122     return *newString;
123 }
124 
125 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)126 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
127     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
128 {
129     ASSERT((start + length) <= src->GetLength());
130     if (length == 0) {
131         return *vm->GetFactory()->GetEmptyString();
132     }
133     if (start == 0 && length == src->GetLength()) {
134         return *src;
135     }
136     if (src->IsUtf8()) {
137         return FastSubUtf8String(vm, src, start, length);
138     }
139     return FastSubUtf16String(vm, src, start, length);
140 }
141 
142 /* static */
GetSlicedString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)143 EcmaString *EcmaString::GetSlicedString(const EcmaVM *vm,
144     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
145 {
146     ASSERT((start + length) <= src->GetLength());
147     JSHandle<SlicedString> slicedString(vm->GetJSThread(), CreateSlicedString(vm));
148     FlatStringInfo srcFlat = FlattenAllString(vm, src);
149     slicedString->SetLength(length, srcFlat.GetString()->IsUtf8());
150     slicedString->SetParent(vm->GetJSThread(), JSTaggedValue(srcFlat.GetString()));
151     slicedString->SetStartIndex(start + srcFlat.GetStartIndex());
152     return *slicedString;
153 }
154 
155 /* static */
GetSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t length)156 EcmaString *EcmaString::GetSubString(const EcmaVM *vm,
157     const JSHandle<EcmaString> &src, uint32_t start, uint32_t length)
158 {
159     ASSERT((start + length) <= src->GetLength());
160     if (static_cast<uint32_t>(length) >= SlicedString::MIN_SLICED_ECMASTRING_LENGTH) {
161         if (start == 0 && length == src->GetLength()) {
162             return *src;
163         }
164         if (src->IsUtf16()) {
165             FlatStringInfo srcFlat = FlattenAllString(vm, src);
166             bool canBeCompressed = CanBeCompressed(srcFlat.GetDataUtf16() + start, length);
167             if (canBeCompressed) {
168                 JSHandle<EcmaString> string(vm->GetJSThread(), CreateLineString(vm, length, canBeCompressed));
169                 srcFlat = FlattenAllString(vm, src);
170                 CopyChars(string->GetDataUtf8Writable(), srcFlat.GetDataUtf16() + start, length);
171                 return *string;
172             }
173         }
174         return GetSlicedString(vm, src, start, length);
175     }
176     return FastSubString(vm, src, start, length);
177 }
178 
WriteData(EcmaString * src,uint32_t start,uint32_t destSize,uint32_t length)179 void EcmaString::WriteData(EcmaString *src, uint32_t start, uint32_t destSize, uint32_t length)
180 {
181     ASSERT(IsLineString() && !IsConstantString());
182     if (IsUtf8()) {
183         ASSERT(src->IsUtf8());
184         CVector<uint8_t> buf;
185         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
186         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
187         if (length != 0 && memcpy_s(GetDataUtf8Writable() + start, destSize, data, length) != EOK) {
188             LOG_FULL(FATAL) << "memcpy_s failed";
189             UNREACHABLE();
190         }
191     } else if (src->IsUtf8()) {
192         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
193         CVector<uint8_t> buf;
194         const uint8_t *data = EcmaString::GetUtf8DataFlat(src, buf);
195         Span<uint16_t> to(GetDataUtf16Writable() + start, length);
196         Span<const uint8_t> from(data, length);
197         for (uint32_t i = 0; i < length; i++) {
198             to[i] = from[i];
199         }
200     } else {
201         CVector<uint16_t> buf;
202         const uint16_t *data = EcmaString::GetUtf16DataFlat(src, buf);
203         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
204         if (length != 0 && memcpy_s(GetDataUtf16Writable() + start,
205             destSize * sizeof(uint16_t), data, length * sizeof(uint16_t)) != EOK) {
206             LOG_FULL(FATAL) << "memcpy_s failed";
207             UNREACHABLE();
208         }
209     }
210 }
211 
212 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)213 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
214 {
215     for (int32_t i = 0; i < count; ++i) {
216         auto left = static_cast<int32_t>(lhsSp[i]);
217         auto right = static_cast<int32_t>(rhsSp[i]);
218         if (left != right) {
219             return left - right;
220         }
221     }
222     return 0;
223 }
224 
Compare(const EcmaVM * vm,const JSHandle<EcmaString> & left,const JSHandle<EcmaString> & right)225 int32_t EcmaString::Compare(const EcmaVM *vm, const JSHandle<EcmaString> &left, const JSHandle<EcmaString> &right)
226 {
227     if (*left == *right) {
228         return 0;
229     }
230     FlatStringInfo lhs = FlattenAllString(vm, left);
231     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
232     FlatStringInfo rhs = FlattenAllString(vm, right);
233     lhs.SetString(*string);
234     int32_t lhsCount = static_cast<int32_t>(lhs.GetLength());
235     int32_t rhsCount = static_cast<int32_t>(rhs.GetLength());
236     int32_t countDiff = lhsCount - rhsCount;
237     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
238     if (!lhs.IsUtf16() && !rhs.IsUtf16()) {
239         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
240         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
241         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
242         if (charDiff != 0) {
243             return charDiff;
244         }
245     } else if (!lhs.IsUtf16()) {
246         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
247         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
248         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
249         if (charDiff != 0) {
250             return charDiff;
251         }
252     } else if (!rhs.IsUtf16()) {
253         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), rhsCount);
254         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), lhsCount);
255         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
256         if (charDiff != 0) {
257             return charDiff;
258         }
259     } else {
260         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
261         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
262         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
263         if (charDiff != 0) {
264             return charDiff;
265         }
266     }
267     return countDiff;
268 }
269 
270 /* static */
271 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)272 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
273 {
274     ASSERT(rhsSp.size() > 0);
275     auto first = static_cast<int32_t>(rhsSp[0]);
276     for (int32_t i = pos; i <= max; i++) {
277         if (static_cast<int32_t>(lhsSp[i]) != first) {
278             i++;
279             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
280                 i++;
281             }
282         }
283         /* Found first character, now look at the rest of rhsSp */
284         if (i <= max) {
285             int j = i + 1;
286             int end = j + static_cast<int>(rhsSp.size()) - 1;
287 
288             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
289             }
290             if (j == end) {
291                 /* Found whole string. */
292                 return i;
293             }
294         }
295     }
296     return -1;
297 }
298 
299 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)300 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
301 {
302     int rhsSize = static_cast<int>(rhsSp.size());
303     ASSERT(rhsSize > 0);
304     auto first = rhsSp[0];
305     for (int32_t i = pos; i >= 0; i--) {
306         if (lhsSp[i] != first) {
307             continue;
308         }
309         /* Found first character, now look at the rest of rhsSp */
310         int j = 1;
311         while (j < rhsSize) {
312             if (rhsSp[j] != lhsSp[i + j]) {
313                 break;
314             }
315             j++;
316         }
317         if (j == rhsSize) {
318             return i;
319         }
320     }
321     return -1;
322 }
323 
IndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)324 int32_t EcmaString::IndexOf(const EcmaVM *vm,
325     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
326 {
327     EcmaString *lhstring = *receiver;
328     EcmaString *rhstring = *search;
329     if (lhstring == nullptr || rhstring == nullptr) {
330         return -1;
331     }
332     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
333     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
334 
335     if (pos > lhsCount) {
336         return -1;
337     }
338 
339     if (rhsCount == 0) {
340         return pos;
341     }
342 
343     if (pos < 0) {
344         pos = 0;
345     }
346 
347     int32_t max = lhsCount - rhsCount;
348     if (max < 0) {
349         return -1;
350     }
351 
352     if (pos + rhsCount > lhsCount) {
353         return -1;
354     }
355 
356     FlatStringInfo lhs = FlattenAllString(vm, receiver);
357     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
358     FlatStringInfo rhs = FlattenAllString(vm, search);
359     lhs.SetString(*string);
360 
361     if (rhs.IsUtf8() && lhs.IsUtf8()) {
362         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
363         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
364         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
365     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
366         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
367         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
368         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
369     } else if (rhs.IsUtf16()) {
370         return -1;
371     } else {  // NOLINT(readability-else-after-return)
372         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
373         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
374         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
375     }
376 }
377 
LastIndexOf(const EcmaVM * vm,const JSHandle<EcmaString> & receiver,const JSHandle<EcmaString> & search,int pos)378 int32_t EcmaString::LastIndexOf(const EcmaVM *vm,
379     const JSHandle<EcmaString> &receiver, const JSHandle<EcmaString> &search, int pos)
380 {
381     EcmaString *lhstring = *receiver;
382     EcmaString *rhstring = *search;
383     if (lhstring == nullptr || rhstring == nullptr) {
384         return -1;
385     }
386 
387     int32_t lhsCount = static_cast<int32_t>(lhstring->GetLength());
388     int32_t rhsCount = static_cast<int32_t>(rhstring->GetLength());
389     if (lhsCount < rhsCount) {
390         return -1;
391     }
392 
393     if (pos < 0) {
394         pos = 0;
395     }
396 
397     if (pos > lhsCount) {
398         pos = lhsCount;
399     }
400 
401     if (pos + rhsCount > lhsCount) {
402         pos = lhsCount - rhsCount;
403     }
404 
405     if (rhsCount == 0) {
406         return pos;
407     }
408 
409     FlatStringInfo lhs = FlattenAllString(vm, receiver);
410     JSHandle<EcmaString> string(vm->GetJSThread(), lhs.GetString());
411     FlatStringInfo rhs = FlattenAllString(vm, search);
412     lhs.SetString(*string);
413     if (rhs.IsUtf8() && lhs.IsUtf8()) {
414         Span<const uint8_t> lhsSp(lhs.GetDataUtf8(), lhsCount);
415         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
416         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
417     } else if (rhs.IsUtf16() && lhs.IsUtf16()) {  // NOLINT(readability-else-after-return)
418         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
419         Span<const uint16_t> rhsSp(rhs.GetDataUtf16(), rhsCount);
420         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
421     } else if (rhs.IsUtf16()) {
422         return -1;
423     } else {  // NOLINT(readability-else-after-return)
424         Span<const uint16_t> lhsSp(lhs.GetDataUtf16(), lhsCount);
425         Span<const uint8_t> rhsSp(rhs.GetDataUtf8(), rhsCount);
426         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
427     }
428 }
429 
ToU16String(uint32_t len)430 std::u16string EcmaString::ToU16String(uint32_t len)
431 {
432     uint32_t length = len > 0 ? len : GetLength();
433     std::u16string result;
434     if (IsUtf16()) {
435         CVector<uint16_t> buf;
436         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
437         result = base::StringHelper::Utf16ToU16String(data, length);
438     } else {
439         CVector<uint8_t> buf;
440         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
441         result = base::StringHelper::Utf8ToU16String(data, length);
442     }
443     return result;
444 }
445 
446 // static
CanBeCompressed(const EcmaString * string)447 bool EcmaString::CanBeCompressed(const EcmaString *string)
448 {
449     ASSERT(string->IsLineOrConstantString());
450     if (string->IsUtf8()) {
451         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
452     }
453     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
454 }
455 
456 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)457 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
458 {
459     bool isCompressed = true;
460     uint32_t index = 0;
461     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
462     while (index < utf8Len) {
463         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
464         if (!IsASCIICharacter(utf8Data[index])) {
465             isCompressed = false;
466             break;
467         }
468         ++index;
469     }
470     return isCompressed;
471 }
472 
473 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)474 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
475 {
476     bool isCompressed = true;
477     Span<const uint16_t> data(utf16Data, utf16Len);
478     for (uint32_t i = 0; i < utf16Len; i++) {
479         if (!IsASCIICharacter(data[i])) {
480             isCompressed = false;
481             break;
482         }
483     }
484     return isCompressed;
485 }
486 
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)487 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
488 {
489     ASSERT(NotTreeString());
490     ASSERT(str1->NotTreeString() && str2->NotTreeString());
491     if (GetLength() != str1->GetLength() + str2->GetLength()) {
492         return false;
493     }
494     if (IsUtf16()) {
495         CVector<uint16_t> buf;
496         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
497         if (EcmaString::StringsAreEqualUtf16(str1, data, str1->GetLength())) {
498             return EcmaString::StringsAreEqualUtf16(str2, data + str1->GetLength(), str2->GetLength());
499         }
500     } else {
501         CVector<uint8_t> buf;
502         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
503         if (EcmaString::StringIsEqualUint8Data(str1, data, str1->GetLength(), this->IsUtf8())) {
504             return EcmaString::StringIsEqualUint8Data(str2, data + str1->GetLength(),
505                                                       str2->GetLength(), this->IsUtf8());
506         }
507     }
508     return false;
509 }
510 
511 /* static */
StringsAreEqualDiffUtfEncoding(EcmaString * left,EcmaString * right)512 bool EcmaString::StringsAreEqualDiffUtfEncoding(EcmaString *left, EcmaString *right)
513 {
514     CVector<uint16_t> bufLeftUft16;
515     CVector<uint16_t> bufRightUft16;
516     CVector<uint8_t> bufLeftUft8;
517     CVector<uint8_t> bufRightUft8;
518     int32_t lhsCount = static_cast<int32_t>(left->GetLength());
519     int32_t rhsCount = static_cast<int32_t>(right->GetLength());
520     if (!left->IsUtf16() && !right->IsUtf16()) {
521         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
522         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
523         Span<const uint8_t> lhsSp(data1, lhsCount);
524         Span<const uint8_t> rhsSp(data2, rhsCount);
525         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
526     } else if (!left->IsUtf16()) {
527         const uint8_t *data1 = EcmaString::GetUtf8DataFlat(left, bufLeftUft8);
528         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
529         Span<const uint8_t> lhsSp(data1, lhsCount);
530         Span<const uint16_t> rhsSp(data2, rhsCount);
531         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
532     } else if (!right->IsUtf16()) {
533         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
534         const uint8_t *data2 = EcmaString::GetUtf8DataFlat(right, bufRightUft8);
535         Span<const uint16_t> lhsSp(data1, lhsCount);
536         Span<const uint8_t> rhsSp(data2, rhsCount);
537         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
538     } else {
539         const uint16_t *data1 = EcmaString::GetUtf16DataFlat(left, bufLeftUft16);
540         const uint16_t *data2 = EcmaString::GetUtf16DataFlat(right, bufRightUft16);
541         Span<const uint16_t> lhsSp(data1, lhsCount);
542         Span<const uint16_t> rhsSp(data2, rhsCount);
543         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
544     }
545 }
546 
547 /* static */
StringsAreEqualDiffUtfEncoding(const FlatStringInfo & left,const FlatStringInfo & right)548 bool EcmaString::StringsAreEqualDiffUtfEncoding(const FlatStringInfo &left, const FlatStringInfo &right)
549 {
550     int32_t lhsCount = static_cast<int32_t>(left.GetLength());
551     int32_t rhsCount = static_cast<int32_t>(right.GetLength());
552     if (!left.IsUtf16() && !right.IsUtf16()) {
553         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
554         Span<const uint8_t> rhsSp(right.GetDataUtf8(), rhsCount);
555         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
556     } else if (!left.IsUtf16()) {
557         Span<const uint8_t> lhsSp(left.GetDataUtf8(), lhsCount);
558         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
559         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
560     } else if (!right.IsUtf16()) {
561         Span<const uint16_t> lhsSp(left.GetDataUtf16(), rhsCount);
562         Span<const uint8_t> rhsSp(right.GetDataUtf8(), lhsCount);
563         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
564     } else {
565         Span<const uint16_t> lhsSp(left.GetDataUtf16(), lhsCount);
566         Span<const uint16_t> rhsSp(right.GetDataUtf16(), rhsCount);
567         return EcmaString::StringsAreEquals(lhsSp, rhsSp);
568     }
569 }
570 
StringsAreEqual(const EcmaVM * vm,const JSHandle<EcmaString> & str1,const JSHandle<EcmaString> & str2)571 bool EcmaString::StringsAreEqual(const EcmaVM *vm, const JSHandle<EcmaString> &str1, const JSHandle<EcmaString> &str2)
572 {
573     if (str1 == str2) {
574         return true;
575     }
576     if (str1->IsInternString() && str2->IsInternString()) {
577         return false;
578     }
579     uint32_t str1Len = str1->GetLength();
580     if (str1Len != str2->GetLength()) {
581         return false;
582     }
583     if (str1Len == 0) {
584         return true;
585     }
586 
587     uint32_t str1Hash;
588     uint32_t str2Hash;
589     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
590         if (str1Hash != str2Hash) {
591             return false;
592         }
593     }
594     FlatStringInfo str1Flat = FlattenAllString(vm, str1);
595     JSHandle<EcmaString> string(vm->GetJSThread(), str1Flat.GetString());
596     FlatStringInfo str2Flat = FlattenAllString(vm, str2);
597     str1Flat.SetString(*string);
598     return StringsAreEqualDiffUtfEncoding(str1Flat, str2Flat);
599 }
600 
601 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)602 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
603 {
604     if (str1 == str2) {
605         return true;
606     }
607     uint32_t str1Len = str1->GetLength();
608     if (str1Len != str2->GetLength()) {
609         return false;
610     }
611     if (str1Len == 0) {
612         return true;
613     }
614 
615     uint32_t str1Hash;
616     uint32_t str2Hash;
617     if (str1->TryGetHashCode(&str1Hash) && str2->TryGetHashCode(&str2Hash)) {
618         if (str1Hash != str2Hash) {
619             return false;
620         }
621     }
622     return StringsAreEqualDiffUtfEncoding(str1, str2);
623 }
624 
625 /* static */
StringIsEqualUint8Data(const EcmaString * str1,const uint8_t * dataAddr,uint32_t dataLen,bool canBeCompressToUtf8)626 bool EcmaString::StringIsEqualUint8Data(const EcmaString *str1, const uint8_t *dataAddr, uint32_t dataLen,
627                                         bool canBeCompressToUtf8)
628 {
629     if (!str1->IsSlicedString() && canBeCompressToUtf8 != str1->IsUtf8()) {
630         return false;
631     }
632     if (canBeCompressToUtf8 && str1->GetLength() != dataLen) {
633         return false;
634     }
635     if (str1->IsUtf8()) {
636         CVector<uint8_t> buf;
637         Span<const uint8_t> data1(EcmaString::GetUtf8DataFlat(str1, buf), dataLen);
638         Span<const uint8_t> data2(dataAddr, dataLen);
639         return EcmaString::StringsAreEquals(data1, data2);
640     }
641     CVector<uint16_t> buf;
642     uint32_t length = str1->GetLength();
643     const uint16_t *data = EcmaString::GetUtf16DataFlat(str1, buf);
644     return IsUtf8EqualsUtf16(dataAddr, dataLen, data, length);
645 }
646 
647 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)648 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
649 {
650     uint32_t length = str1->GetLength();
651     if (length != utf16Len) {
652         return false;
653     }
654     if (str1->IsUtf8()) {
655         CVector<uint8_t> buf;
656         const uint8_t *data = EcmaString::GetUtf8DataFlat(str1, buf);
657         return IsUtf8EqualsUtf16(data, length, utf16Data, utf16Len);
658     } else {
659         CVector<uint16_t> buf;
660         Span<const uint16_t> data1(EcmaString::GetUtf16DataFlat(str1, buf), length);
661         Span<const uint16_t> data2(utf16Data, utf16Len);
662         return EcmaString::StringsAreEquals(data1, data2);
663     }
664 }
665 
666 template<typename T>
MemCopyChars(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)667 bool EcmaString::MemCopyChars(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
668 {
669     ASSERT(dstMax >= count);
670     ASSERT(dst.Size() >= src.Size());
671     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
672         LOG_FULL(FATAL) << "memcpy_s failed";
673         UNREACHABLE();
674     }
675     return true;
676 }
677 
HashIntegerString(uint32_t length,uint32_t * hash,const uint32_t hashSeed) const678 bool EcmaString::HashIntegerString(uint32_t length, uint32_t *hash, const uint32_t hashSeed) const
679 {
680     ASSERT(length >= 0);
681     Span<const uint8_t> str = FastToUtf8Span();
682     return HashIntegerString(str.data(), length, hash, hashSeed);
683 }
684 
ComputeHashcode() const685 uint32_t EcmaString::ComputeHashcode() const
686 {
687     auto [hash, isInteger] = ComputeRawHashcode();
688     return MixHashcode(hash, isInteger);
689 }
690 
691 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeRawHashcode() const692 std::pair<uint32_t, bool> EcmaString::ComputeRawHashcode() const
693 {
694     uint32_t hash = 0;
695     uint32_t length = GetLength();
696     if (length == 0) {
697         return {hash, false};
698     }
699 
700     if (IsUtf8()) {
701         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
702         if (length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, 0)) {
703             return {hash, true};
704         }
705         CVector<uint8_t> buf;
706         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
707         // String can not convert to integer number, using normal hashcode computing algorithm.
708         hash = this->ComputeHashForData(data, length, 0);
709         return {hash, false};
710     } else {
711         CVector<uint16_t> buf;
712         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
713         // If rawSeed has certain value, and second string uses UTF16 encoding,
714         // then merged string can not be small integer number.
715         hash = this->ComputeHashForData(data, length, 0);
716         return {hash, false};
717     }
718 }
719 
720 // hashSeed only be used when computing two separate strings merged hashcode.
ComputeHashcode(uint32_t rawHashSeed,bool isInteger) const721 uint32_t EcmaString::ComputeHashcode(uint32_t rawHashSeed, bool isInteger) const
722 {
723     uint32_t hash;
724     uint32_t length = GetLength();
725     if (length == 0) {
726         return MixHashcode(rawHashSeed, isInteger);
727     }
728 
729     if (IsUtf8()) {
730         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
731         if ((rawHashSeed == 0 || isInteger) &&
732              length < MAX_ELEMENT_INDEX_LEN && this->HashIntegerString(length, &hash, rawHashSeed)) {
733             return hash;
734         }
735         CVector<uint8_t> buf;
736         const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
737         // String can not convert to integer number, using normal hashcode computing algorithm.
738         hash = this->ComputeHashForData(data, length, rawHashSeed);
739         return MixHashcode(hash, NOT_INTEGER);
740     } else {
741         CVector<uint16_t> buf;
742         const uint16_t *data = EcmaString::GetUtf16DataFlat(this, buf);
743         // If rawSeed has certain value, and second string uses UTF16 encoding,
744         // then merged string can not be small integer number.
745         hash = this->ComputeHashForData(data, length, rawHashSeed);
746         return MixHashcode(hash, NOT_INTEGER);
747     }
748 }
749 
750 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)751 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
752 {
753     uint32_t mixHash = 0;
754     if (canBeCompress) {
755         // String using UTF8 encoding, and length smaller than 10, try to compute integer hash.
756         if (utf8Len < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf8Data, utf8Len, &mixHash, 0)) {
757             return mixHash;
758         }
759         uint32_t hash = ComputeHashForData(utf8Data, utf8Len, 0);
760         return MixHashcode(hash, NOT_INTEGER);
761     } else {
762         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
763         CVector<uint16_t> tmpBuffer(utf16Len);
764         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
765                                                                                utf16Len, 0);
766         ASSERT(len == utf16Len);
767         uint32_t hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
768         return MixHashcode(hash, NOT_INTEGER);
769     }
770     LOG_ECMA(FATAL) << "this branch is unreachable";
771     UNREACHABLE();
772 }
773 
774 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)775 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
776 {
777     uint32_t mixHash = 0;
778     // String length smaller than 10, try to compute integer hash.
779     if (length < MAX_ELEMENT_INDEX_LEN && HashIntegerString(utf16Data, length, &mixHash, 0)) {
780         return mixHash;
781     }
782     uint32_t hash = ComputeHashForData(utf16Data, length, 0);
783     return MixHashcode(hash, NOT_INTEGER);
784 }
785 
786 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)787 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
788                                    uint32_t utf16Len)
789 {
790     size_t utf8Pos = 0;
791     size_t utf16Pos = 0;
792     while (utf8Pos < utf8Len) {
793         auto [pair, nbytes] = utf::ConvertMUtf8ToUtf16Pair(utf8Data, utf8Len - utf8Pos);
794         auto [pHigh, pLow] = utf::SplitUtf16Pair(pair);
795         utf8Data += nbytes;
796         utf8Pos += nbytes;
797         if (pHigh != 0) {
798             if (utf16Pos >= utf16Len - 1 || *utf16Data != pHigh) {
799                 return false;
800             }
801             ++utf16Pos;
802             ++utf16Data;
803         }
804         if (utf16Pos >= utf16Len || *utf16Data != pLow) {
805             return false;
806         }
807         ++utf16Pos;
808         ++utf16Data;
809     }
810     return true;
811 }
812 
ToElementIndex(uint32_t * index)813 bool EcmaString::ToElementIndex(uint32_t *index)
814 {
815     uint32_t len = GetLength();
816     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
817         return false;
818     }
819     if (UNLIKELY(IsUtf16())) {
820         return false;
821     }
822 
823     // fast path: get integer from string's hash value
824     if (TryToGetInteger(index)) {
825         return true;
826     }
827 
828     CVector<uint8_t> buf;
829     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
830     uint32_t c = data[0];
831     uint64_t n = 0;
832     if (c == '0') {
833         *index = 0;
834         return len == 1;
835     }
836     uint32_t loopStart = 0;
837     if (ToUInt64FromLoopStart(&n, loopStart, data) && n < JSObject::MAX_ELEMENT_INDEX) {
838         *index = n;
839         return true;
840     }
841     return false;
842 }
843 
ToInt(int32_t * index,bool * negative)844 bool EcmaString::ToInt(int32_t *index, bool *negative)
845 {
846     uint32_t len = GetLength();
847     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
848         return false;
849     }
850     if (UNLIKELY(IsUtf16())) {
851         return false;
852     }
853     CVector<uint8_t> buf;
854     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
855     uint32_t c = data[0];
856     uint32_t loopStart = 0;
857     uint64_t n = 0;
858     if (c == '0') {
859         *index = 0;
860         return len == 1;
861     }
862     if (c == '-' && len > 1) {
863         *negative = true;
864         loopStart = 1;
865     }
866 
867     if (ToUInt64FromLoopStart(&n, loopStart, data) && n <= std::numeric_limits<int32_t>::max()) {
868         *index = *negative ? -n : n;
869         return true;
870     }
871     return false;
872 }
873 
ToUInt64FromLoopStart(uint64_t * index,uint32_t loopStart,const uint8_t * data)874 bool EcmaString::ToUInt64FromLoopStart(uint64_t *index, uint32_t loopStart, const uint8_t *data)
875 {
876     uint64_t n = 0;
877     uint32_t len = GetLength();
878     if (UNLIKELY(loopStart >= len)) {
879         return false;
880     }
881     for (uint32_t i = loopStart; i < len; i++) {
882         uint32_t c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
883         if (c < '0' || c > '9') {
884             return false;
885         }
886         // NOLINTNEXTLINE(readability-magic-numbers)
887         n = n * 10 + (c - '0');  // 10: decimal factor
888     }
889     *index = n;
890     return true;
891 }
892 
ToTypedArrayIndex(uint32_t * index)893 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
894 {
895     uint32_t len = GetLength();
896     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
897         return false;
898     }
899     if (UNLIKELY(IsUtf16())) {
900         return false;
901     }
902 
903     CVector<uint8_t> buf;
904     const uint8_t *data = EcmaString::GetUtf8DataFlat(this, buf);
905     uint32_t c = data[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
906     uint64_t n = 0;
907     if (c == '0') {
908         *index = 0;
909         return len == 1;
910     }
911     if (c > '0' && c <= '9') {
912         n = c - '0';
913         for (uint32_t i = 1; i < len; i++) {
914             c = data[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
915             if (c >= '0' && c <= '9') {
916                 // NOLINTNEXTLINE(readability-magic-numbers)
917                 n = n * 10 + (c - '0');  // 10: decimal factor
918             } else if (c == '.') {
919                 n = JSObject::MAX_ELEMENT_INDEX;
920                 break;
921             } else {
922                 return false;
923             }
924         }
925         if (n < JSObject::MAX_ELEMENT_INDEX) {
926             *index = n;
927             return true;
928         } else {
929             *index = JSObject::MAX_ELEMENT_INDEX;
930             return true;
931         }
932     } else if (c == '-') {
933         *index = JSObject::MAX_ELEMENT_INDEX;
934         return true;
935     }
936     return false;
937 }
938 
939 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)940 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
941 {
942     uint32_t srcLen = src->GetLength();
943     int32_t start = 0;
944     int32_t end = static_cast<int32_t>(srcLen) - 1;
945 
946     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
947         start = static_cast<int32_t>(base::StringHelper::GetStart(data, srcLen));
948     }
949     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
950         end = base::StringHelper::GetEnd(data, start, srcLen);
951     }
952     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, static_cast<uint32_t>(end - start + 1));
953     return res;
954 }
955 
956 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)957 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
958 {
959     auto srcFlat = FlattenAllString(vm, src);
960     uint32_t srcLength = srcFlat.GetLength();
961     auto factory = vm->GetFactory();
962     if (srcFlat.IsUtf16()) {
963         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
964         std::string res = base::StringHelper::ToLower(u16str);
965         return *(factory->NewFromStdString(res));
966     } else {
967         return ConvertUtf8ToLowerOrUpper(vm, src, true);
968     }
969 }
970 
971 /* static */
TryToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)972 EcmaString *EcmaString::TryToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
973 {
974     auto srcFlat = FlattenAllString(vm, src);
975     uint32_t srcLength = srcFlat.GetLength();
976     const char start = 'A';
977     const char end = 'Z';
978     uint32_t upperIndex = srcLength;
979     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
980     for (uint32_t index = 0; index < srcLength; ++index) {
981         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
982             upperIndex = index;
983             break;
984         }
985     }
986     if (upperIndex == srcLength) {
987         return *src;
988     }
989     return ConvertUtf8ToLowerOrUpper(vm, src, true, upperIndex);
990 }
991 
992 /* static */
ConvertUtf8ToLowerOrUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,bool toLower,uint32_t startIndex)993 EcmaString *EcmaString::ConvertUtf8ToLowerOrUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src,
994                                                   bool toLower, uint32_t startIndex)
995 {
996     const char start = toLower ? 'A' : 'a';
997     const char end = toLower ? 'Z' : 'z';
998     uint32_t srcLength = src->GetLength();
999     JSHandle<EcmaString> newString(vm->GetJSThread(), CreateLineString(vm, srcLength, true));
1000     auto srcFlat = FlattenAllString(vm, src);
1001     Span<uint8_t> data(srcFlat.GetDataUtf8Writable(), srcLength);
1002     auto newStringPtr = newString->GetDataUtf8Writable();
1003     if (startIndex > 0) {
1004         if (memcpy_s(newStringPtr, startIndex * sizeof(uint8_t), data.data(), startIndex * sizeof(uint8_t)) != EOK) {
1005             LOG_FULL(FATAL) << "memcpy_s failed";
1006             UNREACHABLE();
1007         }
1008     }
1009     for (uint32_t index = startIndex; index < srcLength; ++index) {
1010         if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
1011             *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
1012         } else {
1013             *(newStringPtr + index) = data[index];
1014         }
1015     }
1016     return *newString;
1017 }
1018 
1019 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)1020 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
1021 {
1022     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1023     uint32_t srcLength = srcFlat.GetLength();
1024     auto factory = vm->GetFactory();
1025     if (srcFlat.IsUtf16()) {
1026         std::u16string u16str = base::StringHelper::Utf16ToU16String(srcFlat.GetDataUtf16(), srcLength);
1027         std::string res = base::StringHelper::ToUpper(u16str);
1028         return *(factory->NewFromStdString(res));
1029     } else {
1030         return ConvertUtf8ToLowerOrUpper(vm, src, false);
1031     }
1032 }
1033 
1034 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1035 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1036 {
1037     auto factory = vm->GetFactory();
1038     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1039     std::u16string utf16 = srcFlat.ToU16String();
1040     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
1041     return *(factory->NewFromStdString(res));
1042 }
1043 
1044 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)1045 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
1046 {
1047     auto factory = vm->GetFactory();
1048     FlatStringInfo srcFlat = FlattenAllString(vm, src);
1049     std::u16string utf16 = srcFlat.ToU16String();
1050     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
1051     return *(factory->NewFromStdString(res));
1052 }
1053 
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)1054 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
1055 {
1056     FlatStringInfo srcFlat = FlattenAllString(thread->GetEcmaVM(), src);
1057     uint32_t srcLen = srcFlat.GetLength();
1058     if (UNLIKELY(srcLen == 0)) {
1059         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
1060     }
1061     if (srcFlat.IsUtf8()) {
1062         Span<const uint8_t> data(srcFlat.GetDataUtf8(), srcLen);
1063         return TrimBody(thread, src, data, mode);
1064     } else {
1065         Span<const uint16_t> data(srcFlat.GetDataUtf16(), srcLen);
1066         return TrimBody(thread, src, data, mode);
1067     }
1068 }
1069 
SlowFlatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1070 EcmaString *EcmaString::SlowFlatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1071 {
1072     ASSERT(string->IsTreeString() || string->IsSlicedString());
1073     auto thread = vm->GetJSThread();
1074     uint32_t length = string->GetLength();
1075     EcmaString *result = nullptr;
1076     if (string->IsUtf8()) {
1077         result = CreateLineStringWithSpaceType(vm, length, true, type);
1078         WriteToFlat<uint8_t>(*string, result->GetDataUtf8Writable(), length);
1079     } else {
1080         result = CreateLineStringWithSpaceType(vm, length, false, type);
1081         WriteToFlat<uint16_t>(*string, result->GetDataUtf16Writable(), length);
1082     }
1083     if (string->IsTreeString()) {
1084         JSHandle<TreeEcmaString> tree(string);
1085         ASSERT(EcmaString::Cast(tree->GetSecond())->GetLength() != 0);
1086         tree->SetFirst(thread, JSTaggedValue(result));
1087         tree->SetSecond(thread, JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1088     }
1089     return result;
1090 }
1091 
Flatten(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1092 EcmaString *EcmaString::Flatten(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1093 {
1094     EcmaString *s = *string;
1095     if (s->IsLineOrConstantString() || s->IsSlicedString()) {
1096         return s;
1097     }
1098     if (s->IsTreeString()) {
1099         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1100         if (!tree->IsFlat()) {
1101             return SlowFlatten(vm, string, type);
1102         }
1103         s = EcmaString::Cast(tree->GetFirst());
1104     }
1105     return s;
1106 }
1107 
FlattenAllString(const EcmaVM * vm,const JSHandle<EcmaString> & string,MemSpaceType type)1108 FlatStringInfo EcmaString::FlattenAllString(const EcmaVM *vm, const JSHandle<EcmaString> &string, MemSpaceType type)
1109 {
1110     EcmaString *s = *string;
1111     uint32_t startIndex = 0;
1112     if (s->IsLineOrConstantString()) {
1113         return FlatStringInfo(s, startIndex, s->GetLength());
1114     }
1115     if (string->IsTreeString()) {
1116         JSHandle<TreeEcmaString> tree = JSHandle<TreeEcmaString>::Cast(string);
1117         if (!tree->IsFlat()) {
1118             s = SlowFlatten(vm, string, type);
1119         } else {
1120             s = EcmaString::Cast(tree->GetFirst());
1121         }
1122     } else if (string->IsSlicedString()) {
1123         s = EcmaString::Cast(SlicedString::Cast(*string)->GetParent());
1124         startIndex = SlicedString::Cast(*string)->GetStartIndex();
1125     }
1126     return FlatStringInfo(s, startIndex, string->GetLength());
1127 }
1128 
FlattenNoGC(const EcmaVM * vm,EcmaString * string)1129 EcmaString *EcmaString::FlattenNoGC(const EcmaVM *vm, EcmaString *string)
1130 {
1131     DISALLOW_GARBAGE_COLLECTION;
1132     if (string->IsLineOrConstantString()) {
1133         return string;
1134     }
1135     if (string->IsTreeString()) {
1136         TreeEcmaString *tree = TreeEcmaString::Cast(string);
1137         if (tree->IsFlat()) {
1138             string = EcmaString::Cast(tree->GetFirst());
1139         } else {
1140             uint32_t length = tree->GetLength();
1141             EcmaString *result = nullptr;
1142             if (tree->IsUtf8()) {
1143                 result = CreateLineStringNoGC(vm, length, true);
1144                 WriteToFlat<uint8_t>(tree, result->GetDataUtf8Writable(), length);
1145             } else {
1146                 result = CreateLineStringNoGC(vm, length, false);
1147                 WriteToFlat<uint16_t>(tree, result->GetDataUtf16Writable(), length);
1148             }
1149             tree->SetFirst(vm->GetJSThread(), JSTaggedValue(result));
1150             tree->SetSecond(vm->GetJSThread(), JSTaggedValue(*vm->GetFactory()->GetEmptyString()));
1151             return result;
1152         }
1153     } else if (string->IsSlicedString()) {
1154         SlicedString *str = SlicedString::Cast(string);
1155         uint32_t length = str->GetLength();
1156         EcmaString *result = nullptr;
1157         if (str->IsUtf8()) {
1158             result = CreateLineStringNoGC(vm, length, true);
1159             WriteToFlat<uint8_t>(str, result->GetDataUtf8Writable(), length);
1160         } else {
1161             result = CreateLineStringNoGC(vm, length, false);
1162             WriteToFlat<uint16_t>(str, result->GetDataUtf16Writable(), length);
1163         }
1164         return result;
1165     }
1166     return string;
1167 }
1168 
GetUtf8DataFlat(const EcmaString * src,CVector<uint8_t> & buf)1169 const uint8_t *EcmaString::GetUtf8DataFlat(const EcmaString *src, CVector<uint8_t> &buf)
1170 {
1171     ASSERT(src->IsUtf8());
1172     uint32_t length = src->GetLength();
1173     EcmaString *string = const_cast<EcmaString *>(src);
1174     if (string->IsTreeString()) {
1175         if (string->IsFlat()) {
1176             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1177         } else {
1178             buf.reserve(length);
1179             WriteToFlat(string, buf.data(), length);
1180             return buf.data();
1181         }
1182     } else if (string->IsSlicedString()) {
1183         SlicedString *str = SlicedString::Cast(string);
1184         return EcmaString::Cast(str->GetParent())->GetDataUtf8() + str->GetStartIndex();
1185     }
1186     return string->GetDataUtf8();
1187 }
1188 
GetUtf16DataFlat(const EcmaString * src,CVector<uint16_t> & buf)1189 const uint16_t *EcmaString::GetUtf16DataFlat(const EcmaString *src, CVector<uint16_t> &buf)
1190 {
1191     ASSERT(src->IsUtf16());
1192     uint32_t length = src->GetLength();
1193     EcmaString *string = const_cast<EcmaString *>(src);
1194     if (string->IsTreeString()) {
1195         if (string->IsFlat()) {
1196             string = EcmaString::Cast(TreeEcmaString::Cast(string)->GetFirst());
1197         } else {
1198             buf.reserve(length);
1199             WriteToFlat(string, buf.data(), length);
1200             return buf.data();
1201         }
1202     } else if (string->IsSlicedString()) {
1203         SlicedString *str = SlicedString::Cast(string);
1204         return EcmaString::Cast(str->GetParent())->GetDataUtf16() + str->GetStartIndex();
1205     }
1206     return string->GetDataUtf16();
1207 }
1208 
ToU16String(uint32_t len)1209 std::u16string FlatStringInfo::ToU16String(uint32_t len)
1210 {
1211     uint32_t length = len > 0 ? len : GetLength();
1212     std::u16string result;
1213     if (IsUtf16()) {
1214         const uint16_t *data = this->GetDataUtf16();
1215         result = base::StringHelper::Utf16ToU16String(data, length);
1216     } else {
1217         const uint8_t *data = this->GetDataUtf8();
1218         result = base::StringHelper::Utf8ToU16String(data, length);
1219     }
1220     return result;
1221 }
1222 
EcmaStringAccessor(EcmaString * string)1223 EcmaStringAccessor::EcmaStringAccessor(EcmaString *string)
1224 {
1225     ASSERT(string != nullptr);
1226     string_ = string;
1227 }
1228 
EcmaStringAccessor(TaggedObject * obj)1229 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
1230 {
1231     ASSERT(obj != nullptr);
1232     string_ = EcmaString::Cast(obj);
1233 }
1234 
EcmaStringAccessor(JSTaggedValue value)1235 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
1236 {
1237     ASSERT(value.IsString());
1238     string_ = EcmaString::Cast(value.GetTaggedObject());
1239 }
1240 
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)1241 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
1242     : string_(*strHandle)
1243 {
1244 }
1245 
ToStdString(StringConvertedUsage usage)1246 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
1247 {
1248     if (string_ == nullptr) {
1249         return "";
1250     }
1251     bool modify = (usage != StringConvertedUsage::PRINT);
1252     CVector<uint8_t> buf;
1253     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1254     std::string res;
1255     res.reserve(sp.size());
1256     for (const auto &c : sp) {
1257         res.push_back(c);
1258     }
1259     return res;
1260 }
1261 
DebuggerToStdString(StringConvertedUsage usage)1262 std::string EcmaStringAccessor::DebuggerToStdString(StringConvertedUsage usage)
1263 {
1264     if (string_ == nullptr) {
1265         return "";
1266     }
1267 
1268     bool modify = (usage != StringConvertedUsage::PRINT);
1269     CVector<uint8_t> buf;
1270     Span<const uint8_t> sp = string_->DebuggerToUtf8Span(buf, modify);
1271     std::string res;
1272     res.reserve(sp.size());
1273     for (const auto &c : sp) {
1274         res.push_back(c);
1275     }
1276     return res;
1277 }
1278 
ToCString(StringConvertedUsage usage)1279 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
1280 {
1281     if (string_ == nullptr) {
1282         return "";
1283     }
1284     bool modify = (usage != StringConvertedUsage::PRINT);
1285     CVector<uint8_t> buf;
1286     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
1287     CString res;
1288     res.reserve(sp.size());
1289     for (const auto &c : sp) {
1290         res.push_back(c);
1291     }
1292     return res;
1293 }
1294 }  // namespace panda::ecmascript
1295