1 /*
2 * Copyright (c) 2021 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ecmascript/ecma_string-inl.h"
17
18 #include "ecmascript/js_symbol.h"
19 #include "ecmascript/mem/c_containers.h"
20
21 namespace panda::ecmascript {
22 static constexpr int SMALL_STRING_SIZE = 128;
23
Concat(const EcmaVM * vm,const JSHandle<EcmaString> & str1Handle,const JSHandle<EcmaString> & str2Handle)24 EcmaString *EcmaString::Concat(const EcmaVM *vm,
25 const JSHandle<EcmaString> &str1Handle, const JSHandle<EcmaString> &str2Handle)
26 {
27 // allocator may trig gc and move src, need to hold it
28 EcmaString *string1 = *str1Handle;
29 EcmaString *string2 = *str2Handle;
30
31 uint32_t length1 = string1->GetLength();
32
33 uint32_t length2 = string2->GetLength();
34 uint32_t newLength = length1 + length2;
35 if (newLength == 0) {
36 return vm->GetFactory()->GetEmptyString().GetObject<EcmaString>();
37 } else if (length1 == 0) {
38 return string2;
39 } else if (length2 == 0) {
40 return string1;
41 }
42 bool compressed = (!string1->IsUtf16() && !string2->IsUtf16());
43 auto newString = AllocStringObject(vm, newLength, compressed);
44
45 // retrieve strings after gc
46 string1 = *str1Handle;
47 string2 = *str2Handle;
48 if (compressed) {
49 Span<uint8_t> sp(newString->GetDataUtf8Writable(), newLength);
50 Span<const uint8_t> src1(string1->GetDataUtf8(), length1);
51 EcmaString::StringCopy(sp, newLength, src1, length1);
52
53 sp = sp.SubSpan(length1);
54 Span<const uint8_t> src2(string2->GetDataUtf8(), length2);
55 EcmaString::StringCopy(sp, newLength - length1, src2, length2);
56 } else {
57 Span<uint16_t> sp(newString->GetDataUtf16Writable(), newLength);
58 if (!string1->IsUtf16()) {
59 for (uint32_t i = 0; i < length1; ++i) {
60 sp[i] = string1->At<false>(i);
61 }
62 } else {
63 Span<const uint16_t> src1(string1->GetDataUtf16(), length1);
64 EcmaString::StringCopy(sp, newLength << 1U, src1, length1 << 1U);
65 }
66 sp = sp.SubSpan(length1);
67 if (!string2->IsUtf16()) {
68 for (uint32_t i = 0; i < length2; ++i) {
69 sp[i] = string2->At<false>(i);
70 }
71 } else {
72 uint32_t length = length2 << 1U;
73 Span<const uint16_t> src2(string2->GetDataUtf16(), length2);
74 EcmaString::StringCopy(sp, length, src2, length);
75 }
76 }
77
78 ASSERT_PRINT(compressed == CanBeCompressed(newString), "compressed does not match the real value!");
79 return newString;
80 }
81
82 /* static */
FastSubString(const EcmaVM * vm,const JSHandle<EcmaString> & src,uint32_t start,uint32_t utf16Len)83 EcmaString *EcmaString::FastSubString(const EcmaVM *vm,
84 const JSHandle<EcmaString> &src, uint32_t start, uint32_t utf16Len)
85 {
86 if (src->IsUtf8()) {
87 return FastSubUtf8String(vm, src, start, utf16Len);
88 }
89 return FastSubUtf16String(vm, src, start, utf16Len);
90 }
91
92 template<typename T1, typename T2>
CompareStringSpan(Span<T1> & lhsSp,Span<T2> & rhsSp,int32_t count)93 int32_t CompareStringSpan(Span<T1> &lhsSp, Span<T2> &rhsSp, int32_t count)
94 {
95 for (int32_t i = 0; i < count; ++i) {
96 auto left = static_cast<int32_t>(lhsSp[i]);
97 auto right = static_cast<int32_t>(rhsSp[i]);
98 if (left != right) {
99 return left - right;
100 }
101 }
102 return 0;
103 }
104
Compare(EcmaString * lhs,EcmaString * rhs)105 int32_t EcmaString::Compare(EcmaString *lhs, EcmaString *rhs)
106 {
107 if (lhs == rhs) {
108 return 0;
109 }
110 int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
111 int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
112 int32_t countDiff = lhsCount - rhsCount;
113 int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
114 if (!lhs->IsUtf16() && !rhs->IsUtf16()) {
115 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
116 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
117 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
118 if (charDiff != 0) {
119 return charDiff;
120 }
121 } else if (!lhs->IsUtf16()) {
122 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
123 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
124 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
125 if (charDiff != 0) {
126 return charDiff;
127 }
128 } else if (!rhs->IsUtf16()) {
129 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), rhsCount);
130 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), lhsCount);
131 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
132 if (charDiff != 0) {
133 return charDiff;
134 }
135 } else {
136 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
137 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
138 int32_t charDiff = CompareStringSpan(lhsSp, rhsSp, minCount);
139 if (charDiff != 0) {
140 return charDiff;
141 }
142 }
143 return countDiff;
144 }
145
146 /* static */
147 template<typename T1, typename T2>
IndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos,int32_t max)148 int32_t EcmaString::IndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos, int32_t max)
149 {
150 ASSERT(rhsSp.size() > 0);
151 auto first = static_cast<int32_t>(rhsSp[0]);
152 int32_t i;
153 for (i = pos; i <= max; i++) {
154 if (static_cast<int32_t>(lhsSp[i]) != first) {
155 i++;
156 while (i <= max && static_cast<int32_t>(lhsSp[i]) != first) {
157 i++;
158 }
159 }
160 /* Found first character, now look at the rest of rhsSp */
161 if (i <= max) {
162 int j = i + 1;
163 int end = j + static_cast<int>(rhsSp.size()) - 1;
164
165 for (int k = 1; j < end && static_cast<int32_t>(lhsSp[j]) == static_cast<int32_t>(rhsSp[k]); j++, k++) {
166 }
167 if (j == end) {
168 /* Found whole string. */
169 return i;
170 }
171 }
172 }
173 return -1;
174 }
175
176 template<typename T1, typename T2>
LastIndexOf(Span<const T1> & lhsSp,Span<const T2> & rhsSp,int32_t pos)177 int32_t EcmaString::LastIndexOf(Span<const T1> &lhsSp, Span<const T2> &rhsSp, int32_t pos)
178 {
179 int rhsSize = static_cast<int>(rhsSp.size());
180 ASSERT(rhsSize > 0);
181 auto first = rhsSp[0];
182 for (int32_t i = pos; i >= 0; i--) {
183 if (lhsSp[i] != first) {
184 continue;
185 }
186 /* Found first character, now look at the rest of rhsSp */
187 int j = 1;
188 while (j < rhsSize) {
189 if (rhsSp[j] != lhsSp[i + j]) {
190 break;
191 }
192 j++;
193 }
194 if (j == rhsSize) {
195 return i;
196 }
197 }
198 return -1;
199 }
200
IndexOf(EcmaString * lhs,EcmaString * rhs,int pos)201 int32_t EcmaString::IndexOf(EcmaString *lhs, EcmaString *rhs, int pos)
202 {
203 if (lhs == nullptr || rhs == nullptr) {
204 return -1;
205 }
206 int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
207 int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
208
209 if (pos > lhsCount) {
210 return -1;
211 }
212
213 if (rhsCount == 0) {
214 return pos;
215 }
216
217 if (pos < 0) {
218 pos = 0;
219 }
220
221 int32_t max = lhsCount - rhsCount;
222 if (max < 0) {
223 return -1;
224 }
225 if (rhs->IsUtf8() && lhs->IsUtf8()) {
226 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
227 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
228 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
229 } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return)
230 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
231 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
232 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
233 } else if (rhs->IsUtf16()) {
234 return -1;
235 } else { // NOLINT(readability-else-after-return)
236 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
237 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
238 return EcmaString::IndexOf(lhsSp, rhsSp, pos, max);
239 }
240 }
241
LastIndexOf(EcmaString * lhs,EcmaString * rhs,int pos)242 int32_t EcmaString::LastIndexOf(EcmaString *lhs, EcmaString *rhs, int pos)
243 {
244 if (lhs == nullptr || rhs == nullptr) {
245 return -1;
246 }
247
248 int32_t lhsCount = static_cast<int32_t>(lhs->GetLength());
249 int32_t rhsCount = static_cast<int32_t>(rhs->GetLength());
250 if (lhsCount < rhsCount) {
251 return -1;
252 }
253
254 if (pos < 0) {
255 pos = 0;
256 }
257
258 if (pos > lhsCount) {
259 pos = lhsCount;
260 }
261
262 if (pos + rhsCount > lhsCount) {
263 pos = lhsCount - rhsCount;
264 }
265
266 if (rhsCount == 0) {
267 return pos;
268 }
269
270 if (rhs->IsUtf8() && lhs->IsUtf8()) {
271 Span<const uint8_t> lhsSp(lhs->GetDataUtf8(), lhsCount);
272 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
273 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
274 } else if (rhs->IsUtf16() && lhs->IsUtf16()) { // NOLINT(readability-else-after-return)
275 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
276 Span<const uint16_t> rhsSp(rhs->GetDataUtf16(), rhsCount);
277 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
278 } else if (rhs->IsUtf16()) {
279 return -1;
280 } else { // NOLINT(readability-else-after-return)
281 Span<const uint16_t> lhsSp(lhs->GetDataUtf16(), lhsCount);
282 Span<const uint8_t> rhsSp(rhs->GetDataUtf8(), rhsCount);
283 return EcmaString::LastIndexOf(lhsSp, rhsSp, pos);
284 }
285 }
286
ToU16String(uint32_t len)287 std::u16string EcmaString::ToU16String(uint32_t len)
288 {
289 uint32_t length = len > 0 ? len : GetLength();
290 std::u16string result;
291 if (IsUtf16()) {
292 result = base::StringHelper::Utf16ToU16String(GetDataUtf16(), length);
293 } else {
294 result = base::StringHelper::Utf8ToU16String(GetDataUtf8(), length);
295 }
296 return result;
297 }
298
299 // static
CanBeCompressed(const EcmaString * string)300 bool EcmaString::CanBeCompressed(const EcmaString *string)
301 {
302 if (string->IsUtf8()) {
303 return CanBeCompressed(string->GetDataUtf8(), string->GetLength());
304 }
305 return CanBeCompressed(string->GetDataUtf16(), string->GetLength());
306 }
307
308 // static
CanBeCompressed(const uint8_t * utf8Data,uint32_t utf8Len)309 bool EcmaString::CanBeCompressed(const uint8_t *utf8Data, uint32_t utf8Len)
310 {
311 bool isCompressed = true;
312 uint32_t index = 0;
313 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
314 while (index < utf8Len) {
315 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
316 if (!IsASCIICharacter(utf8Data[index])) {
317 isCompressed = false;
318 break;
319 }
320 ++index;
321 }
322 return isCompressed;
323 }
324
325 /* static */
CanBeCompressed(const uint16_t * utf16Data,uint32_t utf16Len)326 bool EcmaString::CanBeCompressed(const uint16_t *utf16Data, uint32_t utf16Len)
327 {
328 bool isCompressed = true;
329 Span<const uint16_t> data(utf16Data, utf16Len);
330 for (uint32_t i = 0; i < utf16Len; i++) {
331 if (!IsASCIICharacter(data[i])) {
332 isCompressed = false;
333 break;
334 }
335 }
336 return isCompressed;
337 }
338
339 /* static */
CopyUtf16AsUtf8(const uint16_t * utf16From,uint8_t * utf8To,uint32_t utf16Len)340 void EcmaString::CopyUtf16AsUtf8(const uint16_t *utf16From, uint8_t *utf8To, uint32_t utf16Len)
341 {
342 Span<const uint16_t> from(utf16From, utf16Len);
343 Span<uint8_t> to(utf8To, utf16Len);
344 for (uint32_t i = 0; i < utf16Len; i++) {
345 to[i] = from[i];
346 }
347 }
348
EqualToSplicedString(const EcmaString * str1,const EcmaString * str2)349 bool EcmaString::EqualToSplicedString(const EcmaString *str1, const EcmaString *str2)
350 {
351 if (GetLength() != str1->GetLength() + str2->GetLength()) {
352 return false;
353 }
354 if (IsUtf16()) {
355 if (str1->IsUtf8() && str2->IsUtf8()) {
356 return false;
357 }
358 if (EcmaString::StringsAreEqualUtf16(str1, GetDataUtf16(), str1->GetLength())) {
359 return EcmaString::StringsAreEqualUtf16(str2, GetDataUtf16() + str1->GetLength(), str2->GetLength());
360 }
361 } else {
362 if (str1->IsUtf16() || str2->IsUtf16()) {
363 return false;
364 }
365 Span<const uint8_t> concatData(GetDataUtf8(), str1->GetLength());
366 Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
367 if (EcmaString::StringsAreEquals(concatData, data1)) {
368 concatData = Span<const uint8_t>(GetDataUtf8() + str1->GetLength(), str2->GetLength());
369 Span<const uint8_t> data2(str2->GetDataUtf8(), str2->GetLength());
370 return EcmaString::StringsAreEquals(concatData, data2);
371 }
372 }
373 return false;
374 }
375
376 /* static */
StringsAreEqualSameUtfEncoding(EcmaString * str1,EcmaString * str2)377 bool EcmaString::StringsAreEqualSameUtfEncoding(EcmaString *str1, EcmaString *str2)
378 {
379 if (str1->IsUtf16()) {
380 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
381 Span<const uint16_t> data2(str2->GetDataUtf16(), str1->GetLength());
382 return EcmaString::StringsAreEquals(data1, data2);
383 } else { // NOLINT(readability-else-after-return)
384 Span<const uint8_t> data1(str1->GetDataUtf8(), str1->GetLength());
385 Span<const uint8_t> data2(str2->GetDataUtf8(), str1->GetLength());
386 return EcmaString::StringsAreEquals(data1, data2);
387 }
388 }
389
390 /* static */
StringsAreEqual(EcmaString * str1,EcmaString * str2)391 bool EcmaString::StringsAreEqual(EcmaString *str1, EcmaString *str2)
392 {
393 if ((str1->IsUtf16() != str2->IsUtf16()) || (str1->GetLength() != str2->GetLength()) ||
394 (str1->GetHashcode() != str2->GetHashcode())) {
395 return false;
396 }
397 return StringsAreEqualSameUtfEncoding(str1, str2);
398 }
399
400 /* static */
StringsAreEqualUtf8(const EcmaString * str1,const uint8_t * utf8Data,uint32_t utf8Len,bool canBeCompress)401 bool EcmaString::StringsAreEqualUtf8(const EcmaString *str1, const uint8_t *utf8Data, uint32_t utf8Len,
402 bool canBeCompress)
403 {
404 if (canBeCompress != str1->IsUtf8()) {
405 return false;
406 }
407
408 if (canBeCompress && str1->GetLength() != utf8Len) {
409 return false;
410 }
411
412 if (canBeCompress) {
413 Span<const uint8_t> data1(str1->GetDataUtf8(), utf8Len);
414 Span<const uint8_t> data2(utf8Data, utf8Len);
415 return EcmaString::StringsAreEquals(data1, data2);
416 }
417 return IsUtf8EqualsUtf16(utf8Data, utf8Len, str1->GetDataUtf16(), str1->GetLength());
418 }
419
420 /* static */
StringsAreEqualUtf16(const EcmaString * str1,const uint16_t * utf16Data,uint32_t utf16Len)421 bool EcmaString::StringsAreEqualUtf16(const EcmaString *str1, const uint16_t *utf16Data, uint32_t utf16Len)
422 {
423 bool result = false;
424 if (str1->GetLength() != utf16Len) {
425 result = false;
426 } else if (!str1->IsUtf16()) {
427 result = IsUtf8EqualsUtf16(str1->GetDataUtf8(), str1->GetLength(), utf16Data, utf16Len);
428 } else {
429 Span<const uint16_t> data1(str1->GetDataUtf16(), str1->GetLength());
430 Span<const uint16_t> data2(utf16Data, utf16Len);
431 result = EcmaString::StringsAreEquals(data1, data2);
432 }
433 return result;
434 }
435
436 /* static */
437 template<typename T>
StringsAreEquals(Span<const T> & str1,Span<const T> & str2)438 bool EcmaString::StringsAreEquals(Span<const T> &str1, Span<const T> &str2)
439 {
440 ASSERT(str1.Size() <= str2.Size());
441 size_t size = str1.Size();
442 if (size < SMALL_STRING_SIZE) {
443 for (size_t i = 0; i < size; i++) {
444 if (str1[i] != str2[i]) {
445 return false;
446 }
447 }
448 return true;
449 }
450 return !memcmp(str1.data(), str2.data(), size);
451 }
452
453 template<typename T>
StringCopy(Span<T> & dst,size_t dstMax,Span<const T> & src,size_t count)454 bool EcmaString::StringCopy(Span<T> &dst, size_t dstMax, Span<const T> &src, size_t count)
455 {
456 ASSERT(dstMax >= count);
457 ASSERT(dst.Size() >= src.Size());
458 if (memcpy_s(dst.data(), dstMax, src.data(), count) != EOK) {
459 LOG_FULL(FATAL) << "memcpy_s failed";
460 UNREACHABLE();
461 }
462 return true;
463 }
464
465 template<class T>
ComputeHashForData(const T * data,size_t size,uint32_t hashSeed)466 static int32_t ComputeHashForData(const T *data, size_t size, uint32_t hashSeed)
467 {
468 uint32_t hash = hashSeed;
469 Span<const T> sp(data, size);
470 for (auto c : sp) {
471 constexpr size_t SHIFT = 5;
472 hash = (hash << SHIFT) - hash + c;
473 }
474 return static_cast<int32_t>(hash);
475 }
476
ComputeHashForUtf8(const uint8_t * utf8Data,uint32_t utf8DataLength)477 static int32_t ComputeHashForUtf8(const uint8_t *utf8Data, uint32_t utf8DataLength)
478 {
479 if (utf8Data == nullptr) {
480 return 0;
481 }
482 uint32_t hash = 0;
483 const uint8_t *end = utf8Data + utf8DataLength;
484 while (utf8Data < end) { // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
485 constexpr size_t SHIFT = 5;
486 hash = (hash << SHIFT) - hash + *utf8Data++; // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
487 }
488 return static_cast<int32_t>(hash);
489 }
490
ComputeHashcode(uint32_t hashSeed) const491 uint32_t EcmaString::ComputeHashcode(uint32_t hashSeed) const
492 {
493 int32_t hash;
494 if (!IsUtf16()) {
495 hash = ComputeHashForData(GetDataUtf8(), GetLength(), hashSeed);
496 } else {
497 hash = ComputeHashForData(GetDataUtf16(), GetLength(), hashSeed);
498 }
499 return static_cast<uint32_t>(hash);
500 }
501
502 /* static */
ComputeHashcodeUtf8(const uint8_t * utf8Data,size_t utf8Len,bool canBeCompress)503 uint32_t EcmaString::ComputeHashcodeUtf8(const uint8_t *utf8Data, size_t utf8Len, bool canBeCompress)
504 {
505 int32_t hash;
506 if (canBeCompress) {
507 hash = ComputeHashForUtf8(utf8Data, utf8Len);
508 } else {
509 auto utf16Len = base::utf_helper::Utf8ToUtf16Size(utf8Data, utf8Len);
510 CVector<uint16_t> tmpBuffer(utf16Len);
511 [[maybe_unused]] auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len,
512 utf16Len, 0);
513 ASSERT(len == utf16Len);
514 hash = ComputeHashForData(tmpBuffer.data(), utf16Len, 0);
515 }
516 return static_cast<uint32_t>(hash);
517 }
518
519 /* static */
ComputeHashcodeUtf16(const uint16_t * utf16Data,uint32_t length)520 uint32_t EcmaString::ComputeHashcodeUtf16(const uint16_t *utf16Data, uint32_t length)
521 {
522 return ComputeHashForData(utf16Data, length, 0);
523 }
524
525 /* static */
IsUtf8EqualsUtf16(const uint8_t * utf8Data,size_t utf8Len,const uint16_t * utf16Data,uint32_t utf16Len)526 bool EcmaString::IsUtf8EqualsUtf16(const uint8_t *utf8Data, size_t utf8Len, const uint16_t *utf16Data,
527 uint32_t utf16Len)
528 {
529 // length is one more than compared utf16Data, don't need convert all utf8Data to utf16Data
530 uint32_t utf8ConvertLength = utf16Len + 1;
531 CVector<uint16_t> tmpBuffer(utf8ConvertLength);
532 auto len = base::utf_helper::ConvertRegionUtf8ToUtf16(utf8Data, tmpBuffer.data(), utf8Len, utf8ConvertLength, 0);
533 if (len != utf16Len) {
534 return false;
535 }
536
537 Span<const uint16_t> data1(tmpBuffer.data(), len);
538 Span<const uint16_t> data2(utf16Data, utf16Len);
539 return EcmaString::StringsAreEquals(data1, data2);
540 }
541
ToElementIndex(uint32_t * index)542 bool EcmaString::ToElementIndex(uint32_t *index)
543 {
544 uint32_t len = GetLength();
545 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) { // NOLINTNEXTLINEreadability-magic-numbers)
546 return false;
547 }
548 if (UNLIKELY(IsUtf16())) {
549 return false;
550 }
551
552 uint32_t c = GetDataUtf8()[0];
553 uint64_t n = 0;
554 if (c == '0') {
555 *index = 0;
556 return len == 1;
557 }
558 if (c > '0' && c <= '9') {
559 n = c - '0';
560 for (uint32_t i = 1; i < len; i++) {
561 c = GetDataUtf8()[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
562 if (c < '0' || c > '9') {
563 return false;
564 }
565 // NOLINTNEXTLINE(readability-magic-numbers)
566 n = n * 10 + (c - '0'); // 10: decimal factor
567 }
568 if (n < JSObject::MAX_ELEMENT_INDEX) {
569 *index = n;
570 return true;
571 }
572 }
573 return false;
574 }
575
ToTypedArrayIndex(uint32_t * index)576 bool EcmaString::ToTypedArrayIndex(uint32_t *index)
577 {
578 uint32_t len = GetLength();
579 if (UNLIKELY(len == 0 || len > MAX_ELEMENT_INDEX_LEN)) {
580 return false;
581 }
582 if (UNLIKELY(IsUtf16())) {
583 return false;
584 }
585
586 uint32_t c = GetDataUtf8()[0]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
587 uint64_t n = 0;
588 if (c == '0') {
589 *index = 0;
590 return len == 1;
591 }
592 if (c > '0' && c <= '9') {
593 n = c - '0';
594 for (uint32_t i = 1; i < len; i++) {
595 c = GetDataUtf8()[i]; // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
596 if (c >= '0' && c <= '9') {
597 // NOLINTNEXTLINE(readability-magic-numbers)
598 n = n * 10 + (c - '0'); // 10: decimal factor
599 } else if (c == '.') {
600 n = JSObject::MAX_ELEMENT_INDEX;
601 break;
602 } else {
603 return false;
604 }
605 }
606 if (n < JSObject::MAX_ELEMENT_INDEX) {
607 *index = n;
608 return true;
609 } else {
610 *index = JSObject::MAX_ELEMENT_INDEX;
611 return true;
612 }
613 } else if (c == '-') {
614 *index = JSObject::MAX_ELEMENT_INDEX;
615 return true;
616 }
617 return false;
618 }
619
620 template<typename T>
TrimBody(const JSThread * thread,const JSHandle<EcmaString> & src,Span<T> & data,TrimMode mode)621 EcmaString *EcmaString::TrimBody(const JSThread *thread, const JSHandle<EcmaString> &src, Span<T> &data, TrimMode mode)
622 {
623 uint32_t srcLen = src->GetLength();
624 uint32_t start = 0;
625 uint32_t end = srcLen - 1;
626
627 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_START) {
628 start = base::StringHelper::GetStart(data, srcLen);
629 }
630 if (mode == TrimMode::TRIM || mode == TrimMode::TRIM_END) {
631 end = base::StringHelper::GetEnd(data, start, srcLen);
632 }
633 EcmaString *res = FastSubString(thread->GetEcmaVM(), src, start, end - start + 1);
634 return res;
635 }
636
637 /* static */
ToLower(const EcmaVM * vm,const JSHandle<EcmaString> & src)638 EcmaString *EcmaString::ToLower(const EcmaVM *vm, const JSHandle<EcmaString> &src)
639 {
640 uint32_t srcLength = src->GetLength();
641 auto factory = vm->GetFactory();
642 if (src->IsUtf16()) {
643 std::u16string u16str = base::StringHelper::Utf16ToU16String(src->GetDataUtf16(), srcLength);
644 std::string res = base::StringHelper::ToLower(u16str);
645 return *(factory->NewFromStdString(res));
646 } else {
647 const char start = 'A';
648 const char end = 'Z';
649 auto newString = AllocStringObject(vm, srcLength, true);
650 Span<uint8_t> data(src->GetDataUtf8Writable(), srcLength);
651 auto newStringPtr = newString->GetDataUtf8Writable();
652 for (uint32_t index = 0; index < srcLength; ++index) {
653 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
654 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
655 } else {
656 *(newStringPtr + index) = data[index];
657 }
658 }
659 return newString;
660 }
661 }
662
663 /* static */
ToUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src)664 EcmaString *EcmaString::ToUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src)
665 {
666 uint32_t srcLength = src->GetLength();
667 auto factory = vm->GetFactory();
668 if (src->IsUtf16()) {
669 std::u16string u16str = base::StringHelper::Utf16ToU16String(src->GetDataUtf16(), srcLength);
670 std::string res = base::StringHelper::ToUpper(u16str);
671 return *(factory->NewFromStdString(res));
672 } else {
673 const char start = 'a';
674 const char end = 'z';
675 auto newString = AllocStringObject(vm, srcLength, true);
676 Span<uint8_t> data(src->GetDataUtf8Writable(), srcLength);
677 auto newStringPtr = newString->GetDataUtf8Writable();
678 for (uint32_t index = 0; index < srcLength; ++index) {
679 if (base::StringHelper::Utf8CharInRange(data[index], start, end)) {
680 *(newStringPtr + index) = data[index] ^ (1 << 5); // 1 and 5 means lower to upper or upper to lower
681 } else {
682 *(newStringPtr + index) = data[index];
683 }
684 }
685 return newString;
686 }
687 }
688
689 /* static */
ToLocaleLower(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)690 EcmaString *EcmaString::ToLocaleLower(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
691 {
692 auto factory = vm->GetFactory();
693 std::u16string utf16 = src->ToU16String();
694 std::string res = base::StringHelper::ToLocaleLower(utf16, locale);
695 return *(factory->NewFromStdString(res));
696 }
697
698 /* static */
ToLocaleUpper(const EcmaVM * vm,const JSHandle<EcmaString> & src,const icu::Locale & locale)699 EcmaString *EcmaString::ToLocaleUpper(const EcmaVM *vm, const JSHandle<EcmaString> &src, const icu::Locale &locale)
700 {
701 auto factory = vm->GetFactory();
702 std::u16string utf16 = src->ToU16String();
703 std::string res = base::StringHelper::ToLocaleUpper(utf16, locale);
704 return *(factory->NewFromStdString(res));
705 }
706
Trim(const JSThread * thread,const JSHandle<EcmaString> & src,TrimMode mode)707 EcmaString *EcmaString::Trim(const JSThread *thread, const JSHandle<EcmaString> &src, TrimMode mode)
708 {
709 uint32_t srcLen = src->GetLength();
710 if (UNLIKELY(srcLen == 0)) {
711 return EcmaString::Cast(thread->GlobalConstants()->GetEmptyString().GetTaggedObject());
712 }
713 if (src->IsUtf8()) {
714 Span<const uint8_t> data(src->GetDataUtf8(), srcLen);
715 return TrimBody(thread, src, data, mode);
716 } else {
717 Span<const uint16_t> data(src->GetDataUtf16(), srcLen);
718 return TrimBody(thread, src, data, mode);
719 }
720 }
721
EcmaStringAccessor(EcmaString * string)722 EcmaStringAccessor::EcmaStringAccessor(EcmaString *string)
723 {
724 ASSERT(string != nullptr);
725 string_ = string;
726 }
727
EcmaStringAccessor(TaggedObject * obj)728 EcmaStringAccessor::EcmaStringAccessor(TaggedObject *obj)
729 {
730 ASSERT(obj != nullptr);
731 string_ = EcmaString::Cast(obj);
732 }
733
EcmaStringAccessor(JSTaggedValue value)734 EcmaStringAccessor::EcmaStringAccessor(JSTaggedValue value)
735 {
736 ASSERT(value.IsString());
737 string_ = EcmaString::Cast(value.GetTaggedObject());
738 }
739
EcmaStringAccessor(const JSHandle<EcmaString> & strHandle)740 EcmaStringAccessor::EcmaStringAccessor(const JSHandle<EcmaString> &strHandle)
741 : string_(*strHandle)
742 {
743 }
744
ToStdString(StringConvertedUsage usage)745 std::string EcmaStringAccessor::ToStdString(StringConvertedUsage usage)
746 {
747 if (string_ == nullptr) {
748 return "";
749 }
750 bool modify = (usage != StringConvertedUsage::PRINT);
751 [[maybe_unused]] CVector<uint8_t> buf;
752 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
753 std::string res;
754 res.reserve(sp.size());
755 for (const auto &c : sp) {
756 res.push_back(c);
757 }
758 return res;
759 }
760
ToCString(StringConvertedUsage usage)761 CString EcmaStringAccessor::ToCString(StringConvertedUsage usage)
762 {
763 if (string_ == nullptr) {
764 return "";
765 }
766 bool modify = (usage != StringConvertedUsage::PRINT);
767 [[maybe_unused]] CVector<uint8_t> buf;
768 Span<const uint8_t> sp = string_->ToUtf8Span(buf, modify);
769 CString res;
770 res.reserve(sp.size());
771 for (const auto &c : sp) {
772 res.push_back(c);
773 }
774 return res;
775 }
776 } // namespace panda::ecmascript
777