• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ecmascript/ecma_string-inl.h"
17 
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20 
21 namespace panda::ecmascript {
22 static constexpr int SMALL_STRING_SIZE = 128;
23 
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & str1Handle,const JSHandle<EcmaString> & str2Handle)24 EcmaString *EcmaString::Concat(const EcmaVM *vm,
25     const JSHandle<EcmaString> &str1Handle, const JSHandle<EcmaString> &str2Handle)
26 {
27     // allocator may trig gc and move src, need to hold it
28     EcmaString *string1 = *str1Handle;
29     EcmaString *string2 = *str2Handle;
30 
31     uint32_t length1 = string1->GetLength();
32 
33     uint32_t length2 = string2->GetLength();
34     uint32_t newLength = length1 + length2;
35     if (newLength == 0) {
36         return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
37     } else if (length1 == 0) {
38         return string2;
39     } else if (length2 == 0) {
40         return string1;
41     }
42     bool compressed = (!string1->IsUtf16() && !string2->IsUtf16());
43     auto newString = AllocStringObject(vm, newLength, compressed);
44 
45     // retrieve strings after gc
46     string1 = *str1Handle;
47     string2 = *str2Handle;
48     if (compressed) {
49         Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
50         Span<const uint8_t> src1(string1->GetDataUtf8(), length1);
51         EcmaString::StringCopy(sp, newLength, src1, length1);
52 
53         sp = sp.SubSpan(length1);
54         Span<const uint8_t> src2(string2->GetDataUtf8(), length2);
55         EcmaString::StringCopy(sp, newLength - length1, src2, length2);
56     } else {
57         Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
58         if (!string1->IsUtf16()) {
59             for (uint32_t i = 0; i < length1; ++i) {
60                 sp[i] = string1->At<false>(i);
61             }
62         } else {
63             Span<const uint16_t> src1(string1->GetDataUtf16(), length1);
64             EcmaString::StringCopy(sp, newLength << 1U, src1, length1 << 1U);
65         }
66         sp = sp.SubSpan(length1);
67         if (!string2->IsUtf16()) {
68             for (uint32_t i = 0; i < length2; ++i) {
69                 sp[i] = string2->At<false>(i);
70             }
71         } else {
72             uint32_t length = length2 << 1U;
73             Span<const uint16_t> src2(string2->GetDataUtf16(), length2);
74             EcmaString::StringCopy(sp, length, src2, length);
75         }
76     }
77 
78     ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
79     return newString;
80 }
81 
82 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t utf16Len)83 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
84     const JSHandle<EcmaString> &src, uint32_t start, uint32_t utf16Len)
85 {
86     if (src->IsUtf8()) {
87         return FastSubUtf8String(vm, src, start, utf16Len);
88     }
89     return FastSubUtf16String(vm, src, start, utf16Len);
90 }
91 
92 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)93 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
94 {
95     for (int32_t i = 0; i < count; ++i) {
96         auto left = static_cast<int32_t>(lhsSp[i]);
97         auto right = static_cast<int32_t>(rhsSp[i]);
98         if (left != right) {
99             return left - right;
100         }
101     }
102     return 0;
103 }
104 
Compare(EcmaString * lhs,EcmaString * rhs)105 int32_t EcmaString::Compare(EcmaString *lhs, EcmaString *rhs)
106 {
107     if (lhs == rhs) {
108         return 0;
109     }
110     int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
111     int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
112     int32_t countDiff = lhsCount - rhsCount;
113     int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
114     if (!lhs->IsUtf16() && !rhs->IsUtf16()) {
115         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
116         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
117         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
118         if (charDiff != 0) {
119             return charDiff;
120         }
121     } else if (!lhs->IsUtf16()) {
122         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
123         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
124         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
125         if (charDiff != 0) {
126             return charDiff;
127         }
128     } else if (!rhs->IsUtf16()) {
129         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), rhsCount);
130         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), lhsCount);
131         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
132         if (charDiff != 0) {
133             return charDiff;
134         }
135     } else {
136         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
137         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
138         int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
139         if (charDiff != 0) {
140             return charDiff;
141         }
142     }
143     return countDiff;
144 }
145 
146 /* static */
147 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)148 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
149 {
150     ASSERT(rhsSp.size() > 0);
151     auto first = static_cast<int32_t>(rhsSp[0]);
152     int32_t i;
153     for (i = pos; i <= max; i++) {
154         if (static_cast<int32_t>(lhsSp[i]) != first) {
155             i++;
156             while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
157                 i++;
158             }
159         }
160         /* Found first character, now look at the rest of rhsSp */
161         if (i <= max) {
162             int j = i + 1;
163             int end = j + static_cast<int>(rhsSp.size()) - 1;
164 
165             for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
166             }
167             if (j == end) {
168                 /* Found whole string. */
169                 return i;
170             }
171         }
172     }
173     return -1;
174 }
175 
176 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)177 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
178 {
179     int rhsSize = static_cast<int>(rhsSp.size());
180     ASSERT(rhsSize > 0);
181     auto first = rhsSp[0];
182     for (int32_t i = pos; i >= 0; i--) {
183         if (lhsSp[i] != first) {
184             continue;
185         }
186         /* Found first character, now look at the rest of rhsSp */
187         int j = 1;
188         while (j < rhsSize) {
189             if (rhsSp[j] != lhsSp[i + j]) {
190                 break;
191             }
192             j++;
193         }
194         if (j == rhsSize) {
195             return i;
196         }
197     }
198     return -1;
199 }
200 
IndexOf(EcmaString * lhs,EcmaString * rhs,int pos)201 int32_t EcmaString::IndexOf(EcmaString *lhs, EcmaString *rhs, int pos)
202 {
203     if (lhs == nullptr || rhs == nullptr) {
204         return -1;
205     }
206     int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
207     int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
208 
209     if (pos > lhsCount) {
210         return -1;
211     }
212 
213     if (rhsCount == 0) {
214         return pos;
215     }
216 
217     if (pos < 0) {
218         pos = 0;
219     }
220 
221     int32_t max = lhsCount - rhsCount;
222     if (max < 0) {
223         return -1;
224     }
225     if (rhs->IsUtf8() && lhs->IsUtf8()) {
226         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
227         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
228         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
229     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
230         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
231         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
232         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
233     } else if (rhs->IsUtf16()) {
234         return -1;
235     } else {  // NOLINT(readability-else-after-return)
236         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
237         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
238         return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
239     }
240 }
241 
LastIndexOf(EcmaString * lhs,EcmaString * rhs,int pos)242 int32_t EcmaString::LastIndexOf(EcmaString *lhs, EcmaString *rhs, int pos)
243 {
244     if (lhs == nullptr || rhs == nullptr) {
245         return -1;
246     }
247 
248     int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
249     int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
250     if (lhsCount < rhsCount) {
251         return -1;
252     }
253 
254     if (pos < 0) {
255         pos = 0;
256     }
257 
258     if (pos > lhsCount) {
259         pos = lhsCount;
260     }
261 
262     if (pos + rhsCount > lhsCount) {
263         pos = lhsCount - rhsCount;
264     }
265 
266     if (rhsCount == 0) {
267         return pos;
268     }
269 
270     if (rhs->IsUtf8() && lhs->IsUtf8()) {
271         Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
272         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
273         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
274     } else if (rhs->IsUtf16() && lhs->IsUtf16()) {  // NOLINT(readability-else-after-return)
275         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
276         Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
277         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
278     } else if (rhs->IsUtf16()) {
279         return -1;
280     } else {  // NOLINT(readability-else-after-return)
281         Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
282         Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
283         return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
284     }
285 }
286 
ToU16String(uint32_t len)287 std::u16string EcmaString::ToU16String(uint32_t len)
288 {
289     uint32_t length = len > 0 ? len : GetLength();
290     std::u16string result;
291     if (IsUtf16()) {
292         result = base::StringHelper::Utf16ToU16String(GetDataUtf16(), length);
293     } else {
294         result = base::StringHelper::Utf8ToU16String(GetDataUtf8(), length);
295     }
296     return result;
297 }
298 
299 // static
CanBeCompressed(const EcmaString * string)300 bool EcmaString::CanBeCompressed(const EcmaString *string)
301 {
302     if (string->IsUtf8()) {
303         return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
304     }
305     return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
306 }
307 
308 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)309 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
310 {
311     bool isCompressed = true;
312     uint32_t index = 0;
313     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
314     while (index < utf8Len) {
315         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
316         if (!IsASCIICharacter(utf8Data[index])) {
317             isCompressed = false;
318             break;
319         }
320         ++index;
321     }
322     return isCompressed;
323 }
324 
325 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)326 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
327 {
328     bool isCompressed = true;
329     Span<const uint16_t> data(utf16Data, utf16Len);
330     for (uint32_t i = 0; i < utf16Len; i++) {
331         if (!IsASCIICharacter(data[i])) {
332             isCompressed = false;
333             break;
334         }
335     }
336     return isCompressed;
337 }
338 
339 /* static */
CopyUtf16AsUtf8(const uint16_t * utf16From,uint8_t * utf8To,uint32_t utf16Len)340 void EcmaString::CopyUtf16AsUtf8(const uint16_t *utf16From, uint8_t *utf8To, uint32_t utf16Len)
341 {
342     Span<const uint16_t> from(utf16From, utf16Len);
343     Span<uint8_t> to(utf8To, utf16Len);
344     for (uint32_t i = 0; i < utf16Len; i++) {
345         to[i] = from[i];
346     }
347 }
348 
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)349 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
350 {
351     if (GetLength() != str1->GetLength() + str2->GetLength()) {
352         return false;
353     }
354     if (IsUtf16()) {
355         if (str1->IsUtf8() && str2->IsUtf8()) {
356             return false;
357         }
358         if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) {
359             return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength());
360         }
361     } else {
362         if (str1->IsUtf16() || str2->IsUtf16()) {
363             return false;
364         }
365         Span<const uint8_t> concatData(GetDataUtf8(), str1->GetLength());
366         Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
367         if (EcmaString::StringsAreEquals(concatData, data1)) {
368             concatData = Span<const uint8_t>(GetDataUtf8() + str1->GetLength(), str2->GetLength());
369             Span<const uint8_t> data2(str2->GetDataUtf8(), str2->GetLength());
370             return EcmaString::StringsAreEquals(concatData, data2);
371         }
372     }
373     return false;
374 }
375 
376 /* static */
StringsAreEqualSameUtfEncoding(EcmaString * str1,EcmaString * str2)377 bool EcmaString::StringsAreEqualSameUtfEncoding(EcmaString *str1, EcmaString *str2)
378 {
379     if (str1->IsUtf16()) {
380         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
381         Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
382         return EcmaString::StringsAreEquals(data1, data2);
383     } else {  // NOLINT(readability-else-after-return)
384         Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
385         Span<const uint8_t> data2(str2->GetDataUtf8(), str1->GetLength());
386         return EcmaString::StringsAreEquals(data1, data2);
387     }
388 }
389 
390 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)391 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
392 {
393     if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength()) ||
394         (str1->GetHashcode() != str2->GetHashcode())) {
395         return false;
396     }
397     return StringsAreEqualSameUtfEncoding(str1, str2);
398 }
399 
400 /* static */
StringsAreEqualUtf8(const EcmaString * str1,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)401 bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
402                                      bool canBeCompress)
403 {
404     if (canBeCompress != str1->IsUtf8()) {
405         return false;
406     }
407 
408     if (canBeCompress && str1->GetLength() != utf8Len) {
409         return false;
410     }
411 
412     if (canBeCompress) {
413         Span<const uint8_t> data1(str1->GetDataUtf8(), utf8Len);
414         Span<const uint8_t> data2(utf8Data, utf8Len);
415         return EcmaString::StringsAreEquals(data1, data2);
416     }
417     return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength());
418 }
419 
420 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)421 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
422 {
423     bool result = false;
424     if (str1->GetLength() != utf16Len) {
425         result = false;
426     } else if (!str1->IsUtf16()) {
427         result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len);
428     } else {
429         Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
430         Span<const uint16_t> data2(utf16Data, utf16Len);
431         result = EcmaString::StringsAreEquals(data1, data2);
432     }
433     return result;
434 }
435 
436 /* static */
437 template<typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)438 bool EcmaString::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
439 {
440     ASSERT(str1.Size() <= str2.Size());
441     size_t size = str1.Size();
442     if (size < SMALL_STRING_SIZE) {
443         for (size_t i = 0; i < size; i++) {
444             if (str1[i] != str2[i]) {
445                 return false;
446             }
447         }
448         return true;
449     }
450     return !memcmp(str1.data(), str2.data(), size);
451 }
452 
453 template<typename T>
StringCopy(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)454 bool EcmaString::StringCopy(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
455 {
456     ASSERT(dstMax >= count);
457     ASSERT(dst.Size() >= src.Size());
458     if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
459         LOG_FULL(FATAL) << "memcpy_s failed";
460         UNREACHABLE();
461     }
462     return true;
463 }
464 
465 template<class T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)466 static int32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
467 {
468     uint32_t hash = hashSeed;
469     Span<const T> sp(data, size);
470     for (auto c : sp) {
471         constexpr size_t SHIFT = 5;
472         hash = (hash << SHIFT) - hash + c;
473     }
474     return static_cast<int32_t>(hash);
475 }
476 
ComputeHashForUtf8(const uint8_t * utf8Data,uint32_t utf8DataLength)477 static int32_t ComputeHashForUtf8(const uint8_t *utf8Data, uint32_t utf8DataLength)
478 {
479     if (utf8Data == nullptr) {
480         return 0;
481     }
482     uint32_t hash = 0;
483     const uint8_t *end = utf8Data + utf8DataLength;
484     while (utf8Data < end) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
485         constexpr size_t SHIFT = 5;
486         hash = (hash << SHIFT) - hash + *utf8Data++;  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
487     }
488     return static_cast<int32_t>(hash);
489 }
490 
ComputeHashcode(uint32_t hashSeed) const491 uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const
492 {
493     int32_t hash;
494     if (!IsUtf16()) {
495         hash = ComputeHashForData(GetDataUtf8(), GetLength(), hashSeed);
496     } else {
497         hash = ComputeHashForData(GetDataUtf16(), GetLength(), hashSeed);
498     }
499     return static_cast<uint32_t>(hash);
500 }
501 
502 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)503 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
504 {
505     int32_t hash;
506     if (canBeCompress) {
507         hash = ComputeHashForUtf8(utf8Data, utf8Len);
508     } else {
509         auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
510         CVector<uint16_t> tmpBuffer(utf16Len);
511         [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
512                                                                                utf16Len, 0);
513         ASSERT(len == utf16Len);
514         hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
515     }
516     return static_cast<uint32_t>(hash);
517 }
518 
519 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)520 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
521 {
522     return ComputeHashForData(utf16Data, length, 0);
523 }
524 
525 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)526 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
527                                    uint32_t utf16Len)
528 {
529     // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
530     uint32_t utf8ConvertLength = utf16Len + 1;
531     CVector<uint16_t> tmpBuffer(utf8ConvertLength);
532     auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
533     if (len != utf16Len) {
534         return false;
535     }
536 
537     Span<const uint16_t> data1(tmpBuffer.data(), len);
538     Span<const uint16_t> data2(utf16Data, utf16Len);
539     return EcmaString::StringsAreEquals(data1, data2);
540 }
541 
ToElementIndex(uint32_t * index)542 bool EcmaString::ToElementIndex(uint32_t *index)
543 {
544     uint32_t len = GetLength();
545     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {  // NOLINTNEXTLINEreadability-magic-numbers)
546         return false;
547     }
548     if (UNLIKELY(IsUtf16())) {
549         return false;
550     }
551 
552     uint32_t c = GetDataUtf8()[0];
553     uint64_t n = 0;
554     if (c == '0') {
555         *index = 0;
556         return len == 1;
557     }
558     if (c > '0' && c <= '9') {
559         n = c - '0';
560         for (uint32_t i = 1; i < len; i++) {
561             c = GetDataUtf8()[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
562             if (c < '0' || c > '9') {
563                 return false;
564             }
565             // NOLINTNEXTLINE(readability-magic-numbers)
566             n = n * 10 + (c - '0');  // 10: decimal factor
567         }
568         if (n < JSObject::MAX_ELEMENT_INDEX) {
569             *index = n;
570             return true;
571         }
572     }
573     return false;
574 }
575 
ToTypedArrayIndex(uint32_t * index)576 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
577 {
578     uint32_t len = GetLength();
579     if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
580         return false;
581     }
582     if (UNLIKELY(IsUtf16())) {
583         return false;
584     }
585 
586     uint32_t c = GetDataUtf8()[0];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
587     uint64_t n  = 0;
588     if (c == '0') {
589         *index = 0;
590         return len == 1;
591     }
592     if (c > '0' && c <= '9') {
593         n = c - '0';
594         for (uint32_t i = 1; i < len; i++) {
595             c = GetDataUtf8()[i];  // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
596             if (c >= '0' && c <= '9') {
597                 // NOLINTNEXTLINE(readability-magic-numbers)
598                 n = n * 10 + (c - '0');  // 10: decimal factor
599             } else if (c == '.') {
600                 n = JSObject::MAX_ELEMENT_INDEX;
601                 break;
602             } else {
603                 return false;
604             }
605         }
606         if (n < JSObject::MAX_ELEMENT_INDEX) {
607             *index = n;
608             return true;
609         } else {
610             *index = JSObject::MAX_ELEMENT_INDEX;
611             return true;
612         }
613     } else if (c == '-') {
614         *index = JSObject::MAX_ELEMENT_INDEX;
615         return true;
616     }
617     return false;
618 }
619 
620 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)621 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
622 {
623     uint32_t srcLen = src->GetLength();
624     uint32_t start = 0;
625     uint32_t end = srcLen - 1;
626 
627     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
628         start = base::StringHelper::GetStart(data, srcLen);
629     }
630     if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
631         end = base::StringHelper::GetEnd(data, start, srcLen);
632     }
633     EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, end - start + 1);
634     return res;
635 }
636 
637 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)638 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
639 {
640     uint32_t srcLength = src->GetLength();
641     auto factory = vm->GetFactory();
642     if (src->IsUtf16()) {
643         std::u16string u16str = base::StringHelper::Utf16ToU16String(src->GetDataUtf16(), srcLength);
644         std::string res = base::StringHelper::ToLower(u16str);
645         return *(factory->NewFromStdString(res));
646     } else {
647         const char start = 'A';
648         const char end = 'Z';
649         auto newString = AllocStringObject(vm, srcLength, true);
650         Span<uint8_t> data(src->GetDataUtf8Writable(), srcLength);
651         auto newStringPtr = newString->GetDataUtf8Writable();
652         for (uint32_t index = 0; index < srcLength; ++index) {
653             if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
654                 *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
655             } else {
656                 *(newStringPtr + index) = data[index];
657             }
658         }
659         return newString;
660     }
661 }
662 
663 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)664 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
665 {
666     uint32_t srcLength = src->GetLength();
667     auto factory = vm->GetFactory();
668     if (src->IsUtf16()) {
669         std::u16string u16str = base::StringHelper::Utf16ToU16String(src->GetDataUtf16(), srcLength);
670         std::string res = base::StringHelper::ToUpper(u16str);
671         return *(factory->NewFromStdString(res));
672     } else {
673         const char start = 'a';
674         const char end = 'z';
675         auto newString = AllocStringObject(vm, srcLength, true);
676         Span<uint8_t> data(src->GetDataUtf8Writable(), srcLength);
677         auto newStringPtr = newString->GetDataUtf8Writable();
678         for (uint32_t index = 0; index < srcLength; ++index) {
679             if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
680                 *(newStringPtr + index) = data[index] ^ (1 << 5);   // 1 and 5 means lower to upper or upper to lower
681             } else {
682                 *(newStringPtr + index) = data[index];
683             }
684         }
685         return newString;
686     }
687 }
688 
689 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)690 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
691 {
692     auto factory = vm->GetFactory();
693     std::u16string utf16 = src->ToU16String();
694     std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
695     return *(factory->NewFromStdString(res));
696 }
697 
698 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)699 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
700 {
701     auto factory = vm->GetFactory();
702     std::u16string utf16 = src->ToU16String();
703     std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
704     return *(factory->NewFromStdString(res));
705 }
706 
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)707 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
708 {
709     uint32_t srcLen = src->GetLength();
710     if (UNLIKELY(srcLen == 0)) {
711         return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
712     }
713     if (src->IsUtf8()) {
714         Span<const uint8_t> data(src->GetDataUtf8(), srcLen);
715         return TrimBody(thread, src, data, mode);
716     } else {
717         Span<const uint16_t> data(src->GetDataUtf16(), srcLen);
718         return TrimBody(thread, src, data, mode);
719     }
720 }
721 
EcmaStringAccessor(EcmaString * string)722 EcmaStringAccessor::EcmaStringAccessor(EcmaString *string)
723 {
724     ASSERT(string != nullptr);
725     string_ = string;
726 }
727 
EcmaStringAccessor(TaggedObject * obj)728 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
729 {
730     ASSERT(obj != nullptr);
731     string_ = EcmaString::Cast(obj);
732 }
733 
EcmaStringAccessor(JSTaggedValue value)734 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
735 {
736     ASSERT(value.IsString());
737     string_ = EcmaString::Cast(value.GetTaggedObject());
738 }
739 
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)740 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
741     : string_(*strHandle)
742 {
743 }
744 
ToStdString(StringConvertedUsage usage)745 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
746 {
747     if (string_ == nullptr) {
748         return "";
749     }
750     bool modify = (usage != StringConvertedUsage::PRINT);
751     [[maybe_unused]] CVector<uint8_t> buf;
752     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
753     std::string res;
754     res.reserve(sp.size());
755     for (const auto &c : sp) {
756         res.push_back(c);
757     }
758     return res;
759 }
760 
ToCString(StringConvertedUsage usage)761 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
762 {
763     if (string_ == nullptr) {
764         return "";
765     }
766     bool modify = (usage != StringConvertedUsage::PRINT);
767     [[maybe_unused]] CVector<uint8_t> buf;
768     Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
769     CString res;
770     res.reserve(sp.size());
771     for (const auto &c : sp) {
772         res.push_back(c);
773     }
774     return res;
775 }
776 }  // namespace panda::ecmascript
777